diff --git a/.github/workflows/ax.yml b/.github/workflows/ax.yml index 0b8c99463f..a37c972f0a 100644 --- a/.github/workflows/ax.yml +++ b/.github/workflows/ax.yml @@ -74,6 +74,10 @@ jobs: - { image: '2021-clang10', cxx: 'g++', build: 'Release', cmake: '-DDISABLE_DEPENDENCY_VERSION_CHECKS=ON' } fail-fast: false steps: + - name: Enable Node 16 + if: contains(matrix.config.image, '2021') || contains(matrix.config.image, '2022') + run: | + echo "ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION=true" >> $GITHUB_ENV - uses: actions/checkout@v3 - name: pybind11 #if: contains(matrix.config.image, '2023') == false @@ -123,14 +127,16 @@ jobs: matrix: config: #@note llvm10 never got its own brew formula... - - { runner: 'macos-latest', cxx: 'clang++', build: 'Release', llvm: '11' } - - { runner: 'macos-latest', cxx: 'clang++', build: 'Release', llvm: '12' } - - { runner: 'macos-latest', cxx: 'clang++', build: 'Release', llvm: '13' } + # Last macos runner befor M1 (macos-14) + - { runner: 'macos-13', cxx: 'clang++', build: 'Release', llvm: '12' } + - { runner: 'macos-13', cxx: 'clang++', build: 'Release', llvm: '13' } fail-fast: false steps: - uses: actions/checkout@v3 - name: install_deps - run: ./ci/install_macos.sh ${{ matrix.config.llvm }} + run: | + ./ci/install_macos.sh ${{ matrix.config.llvm }} + ./ci/install_tbb_macos.sh - name: build run: > ./ci/build.sh -v @@ -139,7 +145,8 @@ jobs: --cargs=\" -DOPENVDB_AX_TEST_CMD_DOWNLOADS=ON -DUSE_EXPLICIT_INSTANTIATION=OFF - -DLLVM_DIR=/usr/local/opt/llvm@${{ matrix.config.llvm }}/lib/cmake/llvm + -DCMAKE_INSTALL_PREFIX=${{ github.workspace }}/install + -DLLVM_DIR=/opt/homebrew/opt/llvm@${{ matrix.config.llvm }}/lib/cmake/llvm \" - name: test run: cd build && ctest -V diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index c9b36595c9..3a4dd2c621 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -86,6 +86,11 @@ jobs: - { cxx: g++, image: '2022-clang11', abi: '9', build: 'Release', cmake: '-DDISABLE_DEPENDENCY_VERSION_CHECKS=ON' } fail-fast: false steps: + - name: Enable Node 16 + # Solution taken from https://github.blog/changelog/2024-03-07-github-actions-all-actions-will-run-on-node20-instead-of-node16-by-default + if: contains(matrix.config.image, '2022') + run: | + echo "ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION=true" >> $GITHUB_ENV - uses: actions/checkout@v3 - name: pybind11 #if: contains(matrix.config.image, '2023') == false @@ -125,29 +130,16 @@ jobs: run: ccache --evict-older-than 1d windows: - # Windows CI. Tests static and dynamic builds with MT and MD respectively. + # Windows CI. Tests a dynamic build with MD. if: | github.event_name != 'workflow_dispatch' || github.event.inputs.type == 'all' || github.event.inputs.type == 'win' runs-on: ${{ (github.repository_owner == 'AcademySoftwareFoundation' && 'windows-2022-8c-32g-300h') || 'windows-latest' }} - name: windows-vc:${{ matrix.config.vc }}-type:${{ matrix.config.build }} + name: windows env: - VCPKG_DEFAULT_TRIPLET: ${{ matrix.config.vc }} + VCPKG_DEFAULT_TRIPLET: x64-windows strategy: - matrix: - config: - # static build of blosc from vcpkg does not build internal sources. - # USE_STATIC_DEPENDENCIES is required for IlmBase/OpenEXR defines and - # Boost as both shared and static libs are installed. - # USE_EXPLICIT_INSTANTIATION is disabled for debug static libraries - # due to disk space constraints - # @note Commented out the static debug build due to linker OOM LNK1102 - - { vc: 'x64-windows-static', components: 'core,bin,view,render,test', build: 'Release', cmake: '-A x64 -G \"Visual Studio 17 2022\" -DOPENVDB_CORE_SHARED=OFF -DUSE_STATIC_DEPENDENCIES=ON -DBLOSC_USE_EXTERNAL_SOURCES=ON' } - #- { vc: 'x64-windows-static', components: 'core,bin,view,render,test', build: 'Debug', cmake: '-A x64 -G \"Visual Studio 17 2022\" -DOPENVDB_CORE_SHARED=OFF -DUSE_STATIC_DEPENDENCIES=ON -DBLOSC_USE_EXTERNAL_SOURCES=ON -DUSE_EXPLICIT_INSTANTIATION=OFF' } - - { vc: 'x64-windows', components: 'core,bin,view,render,python,test', build: 'Release', cmake: '-A x64 -G \"Visual Studio 17 2022\" -DOPENVDB_CORE_STATIC=OFF' } - - { vc: 'x64-windows', components: 'core,bin,view,render,python,test', build: 'Debug', cmake: '-A x64 -G \"Visual Studio 17 2022\" -DOPENVDB_CORE_STATIC=OFF' } - #- { vc: 'x64-windows', build: 'Release', cmake: '-G \"MinGW Makefiles\" -DOPENVDB_CORE_STATIC=OFF' } fail-fast: false steps: - uses: actions/checkout@v3 @@ -155,17 +147,17 @@ jobs: shell: pwsh run: | # note: system path must be modified in a previous step to it's use - echo "$Env:VCPKG_INSTALLATION_ROOT\installed\${{ matrix.config.vc }}\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append - echo "${{github.workspace}}\build\openvdb\openvdb\${{ matrix.config.build }}" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append + echo "$Env:VCPKG_INSTALLATION_ROOT\installed\x64-windows\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append + echo "${{github.workspace}}\build\openvdb\openvdb\Release" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append - name: install run: ./ci/install_windows.sh - name: build run: > ./ci/build.sh -v - --config=${{ matrix.config.build }} - --components=${{ matrix.config.components }} + --config='Release' + --components='core,bin,view,render,python,test' --cargs=\' - ${{ matrix.config.cmake }} + -A x64 -G \"Visual Studio 17 2022\" -DOPENVDB_CORE_STATIC=OFF -DMSVC_COMPRESS_PDB=ON -DUSE_EXR=ON -DUSE_PNG=ON @@ -176,27 +168,29 @@ jobs: # Print the build directy size (monitor if we're hitting runner limits) run: du -h build - name: test - # Always run tests on weekly builds but skip Debug on commits as they take a while. - # https://github.community/t/distinct-job-for-each-schedule/17811/2 - if: contains(github.event.schedule, '0 7 * * 1') || matrix.config.build == 'Release' - run: cd build && ctest -V -C ${{ matrix.config.build }} + run: cd build && ctest -V -C Release macos: if: | github.event_name != 'workflow_dispatch' || github.event.inputs.type == 'all' || github.event.inputs.type == 'mac' - runs-on: macos-latest + runs-on: macos-13 # Last macos runner befor M1 (macos-14) env: CXX: clang++ steps: - uses: actions/checkout@v3 - name: install - run: ./ci/install_macos.sh + run: | + ./ci/install_macos.sh + ./ci/install_tbb_macos.sh - name: build run: > ./ci/build.sh -v --build-type=Release --components=\"core,python,bin,view,render,test\" + --cargs=\' + -DCMAKE_INSTALL_PREFIX=${{ github.workspace }}/install + \' - name: test run: cd build && ctest -V diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index d3febe0d76..21a9e2bac8 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -42,6 +42,9 @@ jobs: # need to re-write the python docs to use sphinx image: aswf/ci-openvdb:2022 steps: + - name: Enable Node 16 + run: | + echo "ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION=true" >> $GITHUB_ENV - uses: actions/checkout@v3 - name: install_doxygen run: ./ci/install_doxygen.sh 1_8_11 @@ -51,7 +54,12 @@ jobs: # - name: install_epydoc # run: pip install epydoc - name: install_latex - run: yum -y install texlive-latex-bin texlive-dvips texlive-collection-fontsrecommended texlive-collection-latexrecommended + run: | + # Fix error: Cannot prepare internal mirrorlist: No URLs in mirrorlist. CentOS 8 reached EOL means need to replace the official mirror to vault.centos.org + # Comment out mirrorlist and replace #baseurl=...mirror.centos.org with baseurl=...vault.centos.org in files starting with CentOS- in /etc/yum.repos.d folder + sed -i 's/mirrorlist/#mirrorlist/g' /etc/yum.repos.d/CentOS-* + sed -i 's|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g' /etc/yum.repos.d/CentOS-* + yum -y install texlive-latex-bin texlive-dvips texlive-collection-fontsrecommended texlive-collection-latexrecommended - name: build run: > ./ci/build.sh -v diff --git a/.github/workflows/houdini.yml b/.github/workflows/houdini.yml index 8252cf5473..f995a49704 100644 --- a/.github/workflows/houdini.yml +++ b/.github/workflows/houdini.yml @@ -83,6 +83,9 @@ jobs: steps: # See note on this step in the Houdini weekly.yml job # We can remove this when we no longer use < 2023 images + - name: Enable Node 16 + run: | + echo "ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION=true" >> $GITHUB_ENV - name: remove zstd run: yum -y remove zstd - uses: actions/checkout@v3 @@ -159,21 +162,12 @@ jobs: cp hou/hou.tar.gz $HOME/houdini_install/hou.tar.gz cd $HOME/houdini_install && tar -xzf hou.tar.gz && cd - - name: install_deps - run: | - # Remove Python3 symlinks in /usr/local/bin as workaround to brew update issues - # https://github.com/actions/setup-python/issues/577 - rm /usr/local/bin/2to3* || : - rm /usr/local/bin/idle3* || : - rm /usr/local/bin/pydoc* || : - rm /usr/local/bin/python3* || : - brew update - brew install bash gnu-getopt cmake boost glfw googletest openexr pybind11 llvm@15 cppunit - echo "/usr/local/opt/gnu-getopt/bin" >> $GITHUB_PATH + run: ./ci/install_macos.sh 15 - name: build run: | ./ci/build.sh -v \ --build-type=Release \ - --components="core,hou,bin,view,render,python,test,axcore,axbin,axtest" \ + --components="core,hou,bin,view,render,python,test,axcore,axbin" \ --cargs=\" \ -DHOUDINI_ROOT=$HOME/houdini_install/hou \ -DOPENVDB_BUILD_HOUDINI_ABITESTS=OFF \ @@ -181,7 +175,9 @@ jobs: -DDISABLE_CMAKE_SEARCH_PATHS=ON \ -DDISABLE_DEPENDENCY_VERSION_CHECKS=ON \ -DUSE_EXPLICIT_INSTANTIATION=OFF \ - -DLLVM_DIR=/usr/local/opt/llvm@15/lib/cmake/llvm \ + -DTbb_INCLUDE_DIR=$HOME/houdini_install/hou/Frameworks/Houdini.framework/Versions/Current/Resources/toolkit/include/tbb \ + -DLLVM_DIR=/opt/homebrew/opt/llvm@15/lib/cmake/llvm \ + -DCMAKE_INSTALL_PREFIX=${{ github.workspace }}/install \ \" - name: test run: cd build && ctest -V diff --git a/.github/workflows/nanovdb.yml b/.github/workflows/nanovdb.yml index 35c25010e4..cfdccb0d05 100644 --- a/.github/workflows/nanovdb.yml +++ b/.github/workflows/nanovdb.yml @@ -65,9 +65,17 @@ jobs: - { cxx: clang++, image: '2022-clang11', build: 'Debug' } fail-fast: false steps: + - name: Enable Node 16 + if: contains(matrix.config.image, '2022') + run: | + echo "ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION=true" >> $GITHUB_ENV - uses: actions/checkout@v3 - name: install_cuda_11 run: | + # Fix error: Cannot prepare internal mirrorlist: No URLs in mirrorlist. CentOS 8 reached EOL means need to replace the official mirror to vault.centos.org + # Comment out mirrorlist and replace #baseurl=...mirror.centos.org with baseurl=...vault.centos.org in files starting with CentOS- in /etc/yum.repos.d folder + sed -i 's/mirrorlist/#mirrorlist/g' /etc/yum.repos.d/CentOS-* + sed -i 's|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g' /etc/yum.repos.d/CentOS-* yum -y install yum-utils yum-config-manager --add-repo http://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64/cuda-rhel7.repo echo "Installing cuda toolkit" @@ -96,43 +104,32 @@ jobs: github.event.inputs.type == 'win' runs-on: ${{ (github.repository_owner == 'AcademySoftwareFoundation' && 'windows-2022-8c-32g-300h') || 'windows-latest' }} env: - VCPKG_DEFAULT_TRIPLET: ${{ matrix.config.vc }} + VCPKG_DEFAULT_TRIPLET: 'x64-windows' visual_studio: "Visual Studio 17 2022" - cuda: "11.6.2" + cuda: "12.4.0" strategy: - matrix: - config: - # static build of blosc from vcpkg does not build internal sources. - # USE_STATIC_DEPENDENCIES is required for IlmBase/OpenEXR defines and - # Boost as both shared and static libs are installed. - - { vc: 'x64-windows-static', build: 'Release', cmake: '-A x64 -G \"Visual Studio 17 2022\" -DOPENVDB_CORE_SHARED=OFF -DUSE_STATIC_DEPENDENCIES=ON -DBLOSC_USE_EXTERNAL_SOURCES=ON -DCMAKE_MSVC_RUNTIME_LIBRARY=MultiThreaded' } - - { vc: 'x64-windows-static', build: 'Debug', cmake: '-A x64 -G \"Visual Studio 17 2022\" -DOPENVDB_CORE_SHARED=OFF -DUSE_STATIC_DEPENDENCIES=ON -DBLOSC_USE_EXTERNAL_SOURCES=ON -DCMAKE_MSVC_RUNTIME_LIBRARY=MultiThreadedDebug' } - - { vc: 'x64-windows', build: 'Release', cmake: '-A x64 -G \"Visual Studio 17 2022\" -DOPENVDB_CORE_STATIC=OFF' } - - { vc: 'x64-windows', build: 'Debug', cmake: '-A x64 -G \"Visual Studio 17 2022\" -DOPENVDB_CORE_STATIC=OFF' } fail-fast: false steps: - uses: actions/checkout@v3 - name: path run: | # note: system path must be modified in a previous step to it's use - echo "$Env:VCPKG_INSTALLATION_ROOT\installed\${{ matrix.config.vc }}\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append - echo "${{github.workspace}}\build\openvdb\openvdb\${{ matrix.config.build }}" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append + echo "$Env:VCPKG_INSTALLATION_ROOT\installed\x64-windows\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append + echo "${{github.workspace}}\build\openvdb\openvdb\Release" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append - name: install_cuda shell: powershell run: .\ci\install_windows_cuda.ps1 - name: install_deps shell: bash - run: | - vcpkg update - vcpkg install zlib tbb gtest blosc boost-iostreams boost-system boost-any boost-uuid boost-interprocess boost-algorithm + run: ./ci/install_windows.sh - name: build shell: bash run: > ./ci/build.sh -v - --config=${{ matrix.config.build }} + --config=Release --components=core,nano,nanotest,nanoexam,nanobench,nanotool --cargs=\' - ${{ matrix.config.cmake }} + -A x64 -G \"Visual Studio 17 2022\" -DOPENVDB_CORE_STATIC=OFF -DMSVC_COMPRESS_PDB=ON -DUSE_EXPLICIT_INSTANTIATION=OFF -DNANOVDB_USE_CUDA=ON @@ -155,13 +152,15 @@ jobs: strategy: matrix: config: - - { runner: 'macos-11', cxx: 'clang++', build: 'Release' } - - { runner: 'macos-11', cxx: 'clang++', build: 'Debug' } + - { runner: 'macos-12', cxx: 'clang++', build: 'Release' } + - { runner: 'macos-12', cxx: 'clang++', build: 'Debug' } fail-fast: false steps: - uses: actions/checkout@v3 - name: install_deps - run: ./ci/install_macos.sh + run: | + ./ci/install_macos.sh + ./ci/install_tbb_macos.sh - name: build run: > ./ci/build.sh -v @@ -188,6 +187,6 @@ jobs: cd nanovdb/nanovdb sudo mkdir .build cd .build - sudo cmake -DUSE_EXPLICIT_INSTANTIATION=OFF -DNANOVDB_BUILD_UNITTESTS=ON ../ + sudo cmake -DUSE_EXPLICIT_INSTANTIATION=OFF -DNANOVDB_BUILD_UNITTESTS=ON -DNANOVDB_USE_OPENVDB=OFF -DNANOVDB_USE_CUDA=OFF ../ sudo make -j8 install sudo ctest -V diff --git a/.github/workflows/weekly.yml b/.github/workflows/weekly.yml index 7e26ca9305..5debe30be5 100644 --- a/.github/workflows/weekly.yml +++ b/.github/workflows/weekly.yml @@ -77,6 +77,9 @@ jobs: container: image: aswf/ci-base:2023 steps: + - name: Enable Node 16 + run: | + echo "ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION=true" >> $GITHUB_ENV - uses: actions/checkout@v3 # We bumped from the 2021 CI image to 2023 here to fix some OpenSSL issues # with the Houdini download script. In so doing we broke some of the caching @@ -111,6 +114,8 @@ jobs: (github.event_name != 'workflow_dispatch' || github.event.inputs.type == 'all' || github.event.inputs.type == 'houdini') + # Note that macos-14 (current macos-latest) switches to M1. We could instead test + # the arm build here instead of the x86 one. runs-on: macos-latest name: macos-houdini-20 env: @@ -122,7 +127,7 @@ jobs: id: timestamp run: echo "timestamp=$(date -u +'%Y-%m-%dT%H:%M:%SZ')" >> $GITHUB_OUTPUT - name: download_houdini - run: ./ci/download_houdini.sh 20.0 macosx_x86_64_clang12.0_11 --prod + run: ./ci/download_houdini.sh 20.0 macosx_arm64_clang14.0_13 --prod - name: install_houdini run: | mkdir $HOME/houdini_install @@ -169,6 +174,9 @@ jobs: - { name: 'conf', build: 'Release', components: 'core,python,bin,view,render,test,axcore,axtest', cmake: '-DCMAKE_FIND_PACKAGE_PREFER_CONFIG=ON' } fail-fast: false steps: + - name: Enable Node 16 + run: | + echo "ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION=true" >> $GITHUB_ENV - uses: actions/checkout@v3 - name: pybind11 #if: contains(container.image, '2023') == false @@ -198,7 +206,7 @@ jobs: # Disable the clang job for now. See https://github.com/actions/runner-images/issues/8659 # - { runson: ubuntu-latest, cxx: clang++, cmake: '' } # @todo gcc on macos - - { runson: macos-latest, cxx: '', cmake: '-D CMAKE_CXX_COMPILER=/usr/local/opt/llvm@15/bin/clang++' } + - { runson: macos-latest, cxx: '', cmake: '-D CMAKE_CXX_COMPILER=/opt/homebrew/opt/llvm@15/bin/clang++' } fail-fast: false steps: - uses: actions/checkout@v3 @@ -208,6 +216,7 @@ jobs: sudo apt-get -q install -y libboost-dev libboost-iostreams-dev libtbb-dev libblosc-dev llvm-dev libgtest-dev libcppunit-dev pybind11-dev elif [ "$RUNNER_OS" == "macOS" ]; then ./ci/install_macos.sh 15 + ./ci/install_tbb_macos.sh else echo "$RUNNER_OS not supported"; exit 1 fi @@ -220,6 +229,60 @@ jobs: - name: test run: cd build && ctest -V + windows: + # Windows CI. Tests static and dynamic builds with MT and MD respectively. + if: | + github.event_name != 'workflow_dispatch' || + github.event.inputs.type == 'all' || + github.event.inputs.type == 'win' + runs-on: ${{ (github.repository_owner == 'AcademySoftwareFoundation' && 'windows-2022-8c-32g-300h') || 'windows-latest' }} + name: windows-vc:${{ matrix.config.vc }}-type:${{ matrix.config.build }} + env: + VCPKG_DEFAULT_TRIPLET: ${{ matrix.config.vc }} + strategy: + matrix: + config: + # static build of blosc from vcpkg does not build internal sources. + # USE_STATIC_DEPENDENCIES is required for IlmBase/OpenEXR defines and + # Boost as both shared and static libs are installed. + # USE_EXPLICIT_INSTANTIATION is disabled for debug static libraries + # due to disk space constraints + - { vc: 'x64-windows-static', components: 'core,bin,view,render,test', build: 'Release', cmake: '-A x64 -G \"Visual Studio 17 2022\" -DOPENVDB_CORE_SHARED=OFF -DUSE_STATIC_DEPENDENCIES=ON -DBLOSC_USE_EXTERNAL_SOURCES=ON' } + - { vc: 'x64-windows', components: 'core,bin,view,render,python,test', build: 'Release', cmake: '-A x64 -G \"Visual Studio 17 2022\" -DOPENVDB_CORE_STATIC=OFF' } + - { vc: 'x64-windows', components: 'core,bin,view,render,python,test', build: 'Debug', cmake: '-A x64 -G \"Visual Studio 17 2022\" -DOPENVDB_CORE_STATIC=OFF' } + fail-fast: false + steps: + - uses: actions/checkout@v3 + - name: path + shell: pwsh + run: | + # note: system path must be modified in a previous step to it's use + echo "$Env:VCPKG_INSTALLATION_ROOT\installed\${{ matrix.config.vc }}\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append + echo "${{github.workspace}}\build\openvdb\openvdb\${{ matrix.config.build }}" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append + - name: install + run: ./ci/install_windows.sh + - name: build + run: > + ./ci/build.sh -v + --config=${{ matrix.config.build }} + --components=${{ matrix.config.components }} + --cargs=\' + ${{ matrix.config.cmake }} + -DMSVC_COMPRESS_PDB=ON + -DUSE_EXR=ON + -DUSE_PNG=ON + -DVCPKG_TARGET_TRIPLET=${VCPKG_DEFAULT_TRIPLET} + -DCMAKE_TOOLCHAIN_FILE=\"${VCPKG_INSTALLATION_ROOT}\\scripts\\buildsystems\\vcpkg.cmake\" + \' + - name: size + # Print the build directy size (monitor if we're hitting runner limits) + run: du -h build + - name: test + # Always run tests on weekly builds but skip Debug on commits as they take a while. + # https://github.community/t/distinct-job-for-each-schedule/17811/2 + if: contains(github.event.schedule, '0 7 * * 1') || matrix.config.build == 'Release' + run: cd build && ctest -V -C ${{ matrix.config.build }} + ############################################################################# ############################ AX Library Extras ############################## ############################################################################# @@ -251,6 +314,10 @@ jobs: - { image: '2022-clang11', cxx: 'g++', build: 'Release', components: 'core', cmake: '' } fail-fast: false steps: + - name: Enable Node 16 + if: contains(matrix.config.image, '2021') || contains(matrix.config.image, '2022') + run: | + echo "ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION=true" >> $GITHUB_ENV - uses: actions/checkout@v3 - name: pybind11 #f: contains(matrix.config.image, '2023') == false @@ -291,7 +358,7 @@ jobs: github.event_name != 'workflow_dispatch' || github.event.inputs.type == 'all' || github.event.inputs.type == 'ax' - runs-on: macos-latest + runs-on: macos-13 name: macos-cxx:${{ matrix.config.cxx }}-llvm:${{ matrix.config.llvm }}-${{ matrix.config.build }} env: CXX: ${{ matrix.config.cxx }} @@ -305,7 +372,9 @@ jobs: steps: - uses: actions/checkout@v3 - name: install_deps - run: ./ci/install_macos.sh ${{ matrix.config.llvm }} + run: | + ./ci/install_macos.sh ${{ matrix.config.llvm }} + ./ci/install_tbb_macos.sh - name: build run: > ./ci/build.sh -v @@ -314,7 +383,8 @@ jobs: --cargs=\" -DOPENVDB_AX_TEST_CMD_DOWNLOADS=ON -DUSE_EXPLICIT_INSTANTIATION=OFF - -DLLVM_DIR=/usr/local/opt/llvm@${{ matrix.config.llvm }}/lib/cmake/llvm + -DCMAKE_INSTALL_PREFIX=${{ github.workspace }}/install + -DLLVM_DIR=/opt/homebrew/opt/llvm@${{ matrix.config.llvm }}/lib/cmake/llvm \" - name: test run: cd build && ctest -V @@ -380,6 +450,64 @@ jobs: ################################## Blosc #################################### ############################################################################# + windows-nanovdb: + if: | + github.event_name != 'workflow_dispatch' || + github.event.inputs.type == 'all' || + github.event.inputs.type == 'win' + runs-on: ${{ (github.repository_owner == 'AcademySoftwareFoundation' && 'windows-2022-8c-32g-300h') || 'windows-latest' }} + env: + VCPKG_DEFAULT_TRIPLET: ${{ matrix.config.vc }} + visual_studio: "Visual Studio 17 2022" + cuda: "12.4.0" + strategy: + matrix: + config: + # static build of blosc from vcpkg does not build internal sources. + # USE_STATIC_DEPENDENCIES is required for IlmBase/OpenEXR defines and + # Boost as both shared and static libs are installed. + - { vc: 'x64-windows-static', build: 'Release', cmake: '-A x64 -G \"Visual Studio 17 2022\" -DOPENVDB_CORE_SHARED=OFF -DUSE_STATIC_DEPENDENCIES=ON -DBLOSC_USE_EXTERNAL_SOURCES=ON -DCMAKE_MSVC_RUNTIME_LIBRARY=MultiThreaded' } + - { vc: 'x64-windows-static', build: 'Debug', cmake: '-A x64 -G \"Visual Studio 17 2022\" -DOPENVDB_CORE_SHARED=OFF -DUSE_STATIC_DEPENDENCIES=ON -DBLOSC_USE_EXTERNAL_SOURCES=ON -DCMAKE_MSVC_RUNTIME_LIBRARY=MultiThreadedDebug' } + - { vc: 'x64-windows', build: 'Release', cmake: '-A x64 -G \"Visual Studio 17 2022\" -DOPENVDB_CORE_STATIC=OFF' } + - { vc: 'x64-windows', build: 'Debug', cmake: '-A x64 -G \"Visual Studio 17 2022\" -DOPENVDB_CORE_STATIC=OFF' } + fail-fast: false + steps: + - uses: actions/checkout@v3 + - name: path + shell: powershell + run: | + # note: system path must be modified in a previous step to it's use + echo "$Env:VCPKG_INSTALLATION_ROOT\installed\${{ matrix.config.vc }}\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append + echo "${{github.workspace}}\build\openvdb\openvdb\${{ matrix.config.build }}" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append + - name: install_cuda + shell: powershell + run: .\ci\install_windows_cuda.ps1 + - name: install_deps + shell: bash + run: ./ci/install_windows.sh + - name: build + shell: bash + run: > + ./ci/build.sh -v + --config=${{ matrix.config.build }} + --components=core,nano,nanotest,nanoexam,nanobench,nanotool + --cargs=\' + ${{ matrix.config.cmake }} + -DMSVC_COMPRESS_PDB=ON + -DUSE_EXPLICIT_INSTANTIATION=OFF + -DNANOVDB_USE_CUDA=ON + -DNANOVDB_USE_OPENVDB=ON + -DVCPKG_TARGET_TRIPLET=${VCPKG_DEFAULT_TRIPLET} + -DCMAKE_TOOLCHAIN_FILE=\"${VCPKG_INSTALLATION_ROOT}\\scripts\\buildsystems\\vcpkg.cmake\" + \' + - name: test + shell: bash + run: cd build && ctest -V -E ".*cuda.*" + + ############################################################################# + ################################## Blosc #################################### + ############################################################################# + linux-blosc: if: | github.event_name != 'workflow_dispatch' || @@ -394,6 +522,9 @@ jobs: blosc: ['1.18.0','1.19.0','1.20.0','1.21.0'] fail-fast: false steps: + - name: Enable Node 16 + run: | + echo "ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION=true" >> $GITHUB_ENV - uses: actions/checkout@v3 - name: install_blosc run: sudo ./ci/install_blosc.sh ${{ matrix.blosc }} @@ -420,6 +551,9 @@ jobs: # may not have this build type. See OpenVDBCXX.cmake CXXFLAGS: "-gdwarf-4 -g3 -ggdb -Og" steps: + - name: Enable Node 16 + run: | + echo "ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION=true" >> $GITHUB_ENV - uses: actions/checkout@v3 with: fetch-depth: 0 diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 1ee6eda8af..e0838e682c 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -57,18 +57,20 @@ code and has the right to release it under the [Mozilla Public License, version 2.0](LICENSE.md) license. See the [TAC documentation on contribution sign off](https://github.com/AcademySoftwareFoundation/tac/blob/master/process/contributing.md#contribution-sign-off) for more information on this requirement. -## Committer +## Maintainer -The committer role enables the participant to commit code directly to the repository, but also comes with the obligation to be a responsible leader in the community. +The maintainer role is the equivalent of the "Committer" role in the charter. -### Process for becoming a committer +This role enables the participant to commit code directly to the repository, but also comes with the obligation to be a responsible leader in the community. + +### Process for becoming a maintainer * Show your experience with the codebase through contributions and engagement on the community channels. -* Request to become a committer. -* Have the majority of committers approve you becoming a committer. +* Request to become a maintainer. +* Have the majority of maintainers approve you becoming a maintainer. * Your name and email is added to the MAINTAINERS.md file for the project. -### Committer responsibilities +### Maintainer responsibilities * Monitor email aliases. * Monitor Slack (delayed response is perfectly acceptable). @@ -76,28 +78,29 @@ The committer role enables the participant to commit code directly to the reposi * Make sure that ongoing PRs are moving forward at the right pace or close them. * Remain an active contributor to the project in general and the code base in particular. -### When does a committer lose committer status? +### When does a maintainer lose maintainer status? -If a committer is no longer interested or cannot perform the committer duties listed above, they +If a maintainer is no longer interested or cannot perform the maintainer duties listed above, they should volunteer to be moved to emeritus status. In extreme cases this can also occur by a vote of -the committers per the voting process below. +the maintainers per the voting process below. ## Technical Steering Committee (TSC) member The Technical Steering Committee (TSC) oversees the overall technical direction of OpenVDB, as defined in the [charter](charter.md). -TSC voting members consist of committers that have been nominated by the committers, with a supermajority of voting members required to have a committer elected to be a TSC voting member. TSC voting members term and succession is defined in the [charter](charter.md). +TSC voting members consist of maintainers that have been nominated by the TSC, with a supermajority of voting members required to have a maintainer elected to be a TSC voting member. TSC voting members term and succession is defined in the [charter](charter.md). All meetings of the TSC are open to participation by any member of the OpenVDB community. Meeting times are listed in the [ASWF technical community calendar](https://lists.aswf.io/g/tac/calendar). ## Current TSC members -* Ken Museth, Chair / NVIDIA -* Andre Pradhana, DreamWorks +* Ken Museth (Chair), Nvidia +* Andre Pradhana, Nvidia * Jeff Lait, SideFX -* Nick Avramoussis, WETA +* Nick Avramoussis, WETA FX * Dan Bailey, ILM -* Richard Jones, DNEG +* Richard Jones, ILM +* Gregory Hurst, United Therapeutics # Release Process diff --git a/MAINTAINERS.md b/MAINTAINERS.md index df279e587e..1d9752fb24 100644 --- a/MAINTAINERS.md +++ b/MAINTAINERS.md @@ -14,3 +14,5 @@ The current OpenVDB maintainers are: | Ken Museth | ken.museth@gmail.com | Andre Pradhana | andre.pradhana@gmail.com | Richard Jones | richardj@ilm.com +| Gregory Hurst | tbd +| Jonathan Swartz | tbd diff --git a/RE-LICENSE_NOTE.txt b/RE-LICENSE_NOTE.txt new file mode 100644 index 0000000000..47c3c73623 --- /dev/null +++ b/RE-LICENSE_NOTE.txt @@ -0,0 +1,33 @@ +The following copyright holders agree that all of their contributions +originally submitted to this project under the Mozilla Public License +Version 2.0, are hereby relicensed to the Apache License, Version 2.0, +and are submitted pursuant to the Developer Certificate of Origin, version 1.1: + +Ken Museth +Mehdi Chinoune +DreamWorks Animation +Side Effects Software Inc. +Blender Foundation +NVIDIA Corporation +United Therapeutics Corporation +Digital Domain 3.0, Inc. +Double Negative +Ubisoft Entertainment SA +Adobe Inc. +Mathieu Malaterre +Brecht Sanders +Ignacio Vizzo +Ben FrantzDale +Sebastian Gaida +Alessio Quaglino +Benedikt Mersch +David Aguilar +Brian Sharpe +Kartik Shrivastava +Michael Lackner +Lucas Baraya +Kuba Roth +Tom Cnops +Walt Disney Pictures (Walt Disney Animation Studios) +The Linux Foundation +Industrial Light & Magic (ILM) diff --git a/README.md b/README.md index f16e03c451..32b95c3b20 100644 --- a/README.md +++ b/README.md @@ -35,6 +35,8 @@ Mozilla Foundation. The trademarks of any contributor to this project may not be used in association with the project without the contributor's express permission. +NOTE: OpenVDB is in the process of changing its license from [Mozilla Public License Version 2.0](https://www.mozilla.org/MPL/2.0/) to [Apache License, Version 2.0](https://www.apache.org/licenses/LICENSE-2.0)! Please see the file RE-LICENSE_NOTE.txt for more details. + ### Contributing OpenVDB welcomes contributions to the OpenVDB project. Please refer to the diff --git a/ci/install_macos.sh b/ci/install_macos.sh index 39d89ba191..0d7899687d 100755 --- a/ci/install_macos.sh +++ b/ci/install_macos.sh @@ -19,7 +19,6 @@ brew install googletest brew install jq # for trivial parsing of brew json brew install openexr brew install pybind11 # also installs the dependent python version -brew install tbb brew install zlib brew install jemalloc @@ -31,8 +30,13 @@ echo "Using python $py_version" echo "Python_ROOT_DIR=/usr/local/opt/$py_version" >> $GITHUB_ENV echo "/usr/local/opt/$py_version/bin" >> $GITHUB_PATH +# use bash +echo "/usr/local/opt/bash/bin" >> $GITHUB_PATH +echo "/opt/homebrew/opt/bash/bin" >> $GITHUB_PATH + # use gnu-getopt echo "/usr/local/opt/gnu-getopt/bin" >> $GITHUB_PATH +echo "/opt/homebrew/opt/gnu-getopt/bin" >> $GITHUB_PATH LLVM_VERSION=$1 if [ ! -z "$LLVM_VERSION" ]; then diff --git a/ci/install_tbb_macos.sh b/ci/install_tbb_macos.sh new file mode 100755 index 0000000000..4b71fe9e93 --- /dev/null +++ b/ci/install_tbb_macos.sh @@ -0,0 +1,6 @@ +#!/usr/bin/env bash + +set -x + +brew update +brew install tbb diff --git a/ci/install_windows.sh b/ci/install_windows.sh index 939668c4b3..9620ee42a7 100755 --- a/ci/install_windows.sh +++ b/ci/install_windows.sh @@ -1,8 +1,44 @@ #!/usr/bin/env bash -set -ex +set -x +set -e +# Required dependencies +VCPKG_INSTALL_CMD="vcpkg install + zlib + libpng + openexr + tbb + gtest + cppunit + blosc + glfw3 + glew + python3 + jemalloc + boost-iostreams + boost-interprocess + boost-algorithm + pybind11 + --clean-after-build" + +# Update vcpkg vcpkg update -vcpkg install zlib libpng openexr tbb gtest cppunit blosc glfw3 glew python3 jemalloc \ - boost-iostreams boost-interprocess boost-algorithm pybind11 \ - --clean-after-build + +# Allow the vcpkg command to fail once so we can retry with the latest +set +e +$VCPKG_INSTALL_CMD +STATUS=$? + +# Subsequent commands cannot fail +set -x + +if [ $STATUS -ne 0 ]; then + # Try once more with latest ports + echo "vcpkg install failed, retrying with latest ports..." + cd $VCPKG_INSTALLATION_ROOT && git pull && cd- + vcpkg update + $VCPKG_INSTALL_CMD +fi + +echo "vcpkg install completed successfully" diff --git a/ci/install_windows_cuda.ps1 b/ci/install_windows_cuda.ps1 index db8b49c79a..f365cca7e7 100644 --- a/ci/install_windows_cuda.ps1 +++ b/ci/install_windows_cuda.ps1 @@ -26,12 +26,13 @@ $CUDA_KNOWN_URLS = @{ "11.2.1" = "https://developer.download.nvidia.com/compute/cuda/11.2.1/network_installers/cuda_11.2.1_win10_network.exe"; "11.2.2" = "https://developer.download.nvidia.com/compute/cuda/11.2.2/network_installers/cuda_11.2.2_win10_network.exe"; "11.3.0" = "https://developer.download.nvidia.com/compute/cuda/11.3.0/network_installers/cuda_11.3.0_win10_network.exe"; - "11.6.2" = "https://developer.download.nvidia.com/compute/cuda/11.6.2/network_installers/cuda_11.6.2_windows_network.exe" + "11.6.2" = "https://developer.download.nvidia.com/compute/cuda/11.6.2/network_installers/cuda_11.6.2_windows_network.exe"; + "12.4.0" = "https://developer.download.nvidia.com/compute/cuda/12.4.0/network_installers/cuda_12.4.0_windows_network.exe" } # @todo - change this to be based on _MSC_VER intead, or invert it to be CUDA keyed instead? $VISUAL_STUDIO_MIN_CUDA = @{ - "2022" = "11.6"; + "2022" = "12.4"; "2019" = "10.1"; "2017" = "10.0"; # Depends on which version of 2017! 9.0 to 10.0 depending on version "2015" = "8.0"; # might support older, unsure. diff --git a/cmake/FindBlosc.cmake b/cmake/FindBlosc.cmake index a9c77ae5e7..9b873cfb63 100644 --- a/cmake/FindBlosc.cmake +++ b/cmake/FindBlosc.cmake @@ -188,22 +188,6 @@ list(APPEND _BLOSC_LIBRARYDIR_SEARCH_DIRS ${SYSTEM_LIBRARY_PATHS} ) -# Library suffix handling - -set(_BLOSC_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES}) -set(_BLOSC_ORIG_CMAKE_FIND_LIBRARY_PREFIXES ${CMAKE_FIND_LIBRARY_PREFIXES}) - -if(MSVC) - if(BLOSC_USE_STATIC_LIBS) - set(CMAKE_FIND_LIBRARY_SUFFIXES ".lib") - set(CMAKE_FIND_LIBRARY_PREFIXES "${CMAKE_FIND_LIBRARY_PREFIXES};lib") - endif() -else() - if(BLOSC_USE_STATIC_LIBS) - set(CMAKE_FIND_LIBRARY_SUFFIXES ".a") - endif() -endif() - set(Blosc_LIB_COMPONENTS "") # NOTE: Search for debug version first (see vcpkg hack) list(APPEND BLOSC_BUILD_TYPES DEBUG RELEASE) @@ -246,13 +230,6 @@ foreach(BUILD_TYPE ${BLOSC_BUILD_TYPES}) set(CMAKE_IGNORE_PATH ${_BLOSC_CMAKE_IGNORE_PATH}) endforeach() -# Reset library suffix - -set(CMAKE_FIND_LIBRARY_SUFFIXES ${_BLOSC_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES}) -set(CMAKE_FIND_LIBRARY_PREFIXES ${_BLOSC_ORIG_CMAKE_FIND_LIBRARY_PREFIXES}) -unset(_BLOSC_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES) -unset(_BLOSC_ORIG_CMAKE_FIND_LIBRARY_PREFIXES) - if(Blosc_LIBRARY_DEBUG AND Blosc_LIBRARY_RELEASE) # if the generator is multi-config or if CMAKE_BUILD_TYPE is set for # single-config generators, set optimized and debug libraries diff --git a/doc/nanovdb/SourceTree.md b/doc/nanovdb/SourceTree.md index 6eb0a1dcc9..49bb8dc4b7 100644 --- a/doc/nanovdb/SourceTree.md +++ b/doc/nanovdb/SourceTree.md @@ -5,12 +5,12 @@ * [NanoVDB.h](https://github.com/AcademySoftwareFoundation/openvdb/blob/master/nanovdb/nanovdb/NanoVDB.h) C++11 implementation of the core data structure and its access methods. * [CNanoVDB.h](https://github.com/AcademySoftwareFoundation/openvdb/blob/master/nanovdb/nanovdb/CNanoVDB.h) Incomplete but useable C99 implementation of the core data structure and its access methods. Designed in particular for use in OpenCL kernels. Note that this relies on zero-sized arrays for the _reserved padding, so will not work on all compilers (with MSVC being a particular example) * [PNanoVDB.h](https://github.com/AcademySoftwareFoundation/openvdb/blob/master/nanovdb/nanovdb/PNanoVDB.h) C99 implementation of the core data structure and its access methods. More complete coverage than CNanoVDB. This version is pointer-less and supports virtually all graphics APIs. -* [util/GridHandle.h](https://github.com/AcademySoftwareFoundation/openvdb/blob/master/nanovdb/nanovdb/util/GridHandle.h) defines a handler for the memory allocated to a NanoVDB grid. -* [util/IO.h](https://github.com/AcademySoftwareFoundation/openvdb/blob/master/nanovdb/nanovdb/util/IO.h) implements I/O support. -* [util/OpenToNanoVDB.h](https://github.com/AcademySoftwareFoundation/openvdb/blob/master/nanovdb/nanovdb/util/OpenToNanoVDB.h) defines the converter from OpenVDB to NanoVDB and obviously depends on the OpenVDB library (as the only header file). -* [Ray.h](https://github.com/AcademySoftwareFoundation/openvdb/blob/master/nanovdb/nanovdb/util/Ray.h) Ray class. -* [HDDA.h](https://github.com/AcademySoftwareFoundation/openvdb/blob/master/nanovdb/nanovdb/util/HDDA.h) HDDA related. -* [SampleFromVoxels.h](https://github.com/AcademySoftwareFoundation/openvdb/blob/master/nanovdb/nanovdb/util/SampleFromVoxels.h) interpolation. +* [GridHandle.h](https://github.com/AcademySoftwareFoundation/openvdb/blob/master/nanovdb/nanovdb/GridHandle.h) defines a handler for the memory allocated to a NanoVDB grid. +* [io/IO.h](https://github.com/AcademySoftwareFoundation/openvdb/blob/master/nanovdb/nanovdb/io/IO.h) implements I/O support. +* [tools/CreateNanoGrid.h](https://github.com/AcademySoftwareFoundation/openvdb/blob/master/nanovdb/nanovdb/tools/CreateNanoGrid.h) defines the converter from OpenVDB to NanoVDB and obviously depends on the OpenVDB library (as the only header file). +* [math/Ray.h](https://github.com/AcademySoftwareFoundation/openvdb/blob/master/nanovdb/nanovdb/math/Ray.h) Ray class. +* [math/HDDA.h](https://github.com/AcademySoftwareFoundation/openvdb/blob/master/nanovdb/nanovdb/math/HDDA.h) HDDA related. +* [math/SampleFromVoxels.h](https://github.com/AcademySoftwareFoundation/openvdb/blob/master/nanovdb/nanovdb/math/SampleFromVoxels.h) interpolation. ```bash foo@bar:~$ tree @@ -22,25 +22,19 @@ foo@bar:~$ tree │ │ └── nanovdb_convert.cc │ ├── print │ │ └── nanovdb_print.cc +│ ├── updateFiles.sh │ └── validate │ └── nanovdb_validate.cc ├── CNanoVDB.h +├── cuda +│ ├── DeviceBuffer.h +│ ├── GridHandle.cuh +│ └── NodeManager.cuh ├── docs │ ├── CMakeLists.txt │ ├── codingstyle.txt │ └── doxygen-config ├── examples -│ ├── benchmark -│ │ ├── BenchKernels_dense.cu -│ │ ├── BenchKernels_nano.cu -│ │ ├── Benchmark_dense.cu -│ │ ├── Benchmark_nano.cu -│ │ ├── Camera.h -│ │ ├── CMakeLists.txt -│ │ ├── DenseGrid.h -│ │ ├── Image.h -│ │ ├── TestBenchmark.cc -│ │ └── TestBenchmark.cu │ ├── CMakeLists.txt │ ├── ex_bump_pool_buffer │ │ └── bump_pool_buffer.cc @@ -50,7 +44,7 @@ foo@bar:~$ tree │ │ ├── nanovdb.cu │ │ └── openvdb.cc │ ├── ex_index_grid_cuda -│ │ ├── index_grid_cuda.cu +│ │ ├── index_grid_cuda.cc │ │ └── index_grid_cuda_kernel.cu │ ├── ex_make_custom_nanovdb │ │ └── make_custom_nanovdb.cc @@ -66,6 +60,7 @@ foo@bar:~$ tree │ ├── ex_map_pool_buffer │ │ └── map_pool_buffer.cc │ ├── ex_modify_nanovdb_thrust +│ │ ├── modify_nanovdb_thrust.cc │ │ └── modify_nanovdb_thrust.cu │ ├── ex_nodemanager_cuda │ │ ├── nodemanager_cuda.cc @@ -103,9 +98,38 @@ foo@bar:~$ tree │ │ └── VoxToNanoVDB.h │ └── ex_write_nanovdb_grids │ └── write_nanovdb_grids.cc +├── GridHandle.h +├── HostBuffer.h +├── io +│ └── IO.h +├── math +│ ├── CSampleFromVoxels.h +│ ├── DitherLUT.h +│ ├── HDDA.h +│ ├── Math.h +│ ├── Ray.h +│ ├── SampleFromVoxels.h +│ └── Stencils.h ├── NanoVDB.h +├── NodeManager.h ├── PNanoVDB.h ├── Readme.md +├── tools +│ ├── CreateNanoGrid.h +│ ├── CreatePrimitives.h +│ ├── cuda +│ │ ├── AddBlindData.cuh +│ │ ├── GridChecksum.cuh +│ │ ├── GridStats.cuh +│ │ ├── GridValidator.cuh +│ │ ├── IndexToGrid.cuh +│ │ ├── PointsToGrid.cuh +│ │ └── SignedFloodFill.cuh +│ ├── GridBuilder.h +│ ├── GridChecksum.h +│ ├── GridStats.h +│ ├── GridValidator.h +│ └── NanoToOpenVDB.h ├── unittest │ ├── CMakeLists.txt │ ├── pnanovdb_validate_strides.h @@ -115,21 +139,25 @@ foo@bar:~$ tree └── util ├── CpuTimer.h ├── CreateNanoGrid.h - ├── CSampleFromVoxels.h ├── cuda │ ├── CudaAddBlindData.cuh │ ├── CudaDeviceBuffer.h + │ ├── CudaGridChecksum.cuh │ ├── CudaGridHandle.cuh + │ ├── CudaGridStats.cuh + │ ├── CudaGridValidator.cuh │ ├── CudaIndexToGrid.cuh + │ ├── CudaNodeManager.cuh │ ├── CudaPointsToGrid.cuh │ ├── CudaSignedFloodFill.cuh │ ├── CudaUtils.h - │ └── GpuTimer.cuh + │ ├── GpuTimer.h + │ ├── Timer.h + │ └── Util.h ├── DitherLUT.h ├── ForEach.h ├── GridBuilder.h ├── GridChecksum.h - ├── GridHandle.h ├── GridStats.h ├── GridValidator.h ├── HDDA.h @@ -145,5 +173,6 @@ foo@bar:~$ tree ├── Ray.h ├── Reduce.h ├── SampleFromVoxels.h - └── Stencils.h -``` + ├── Stencils.h + ├── Timer.h + └── Util.h \ No newline at end of file diff --git a/nanovdb/nanovdb/CMakeLists.txt b/nanovdb/nanovdb/CMakeLists.txt index 7bb3ab862d..5ef70a9fc1 100644 --- a/nanovdb/nanovdb/CMakeLists.txt +++ b/nanovdb/nanovdb/CMakeLists.txt @@ -160,31 +160,66 @@ endif() # NanoVDB header files set(NANOVDB_INCLUDE_FILES CNanoVDB.h + GridHandle.h + HostBuffer.h NanoVDB.h + NodeManager.h PNanoVDB.h ) +# NanoVDB cuda header files +set(NANOVDB_INCLUDE_CUDA_FILES + cuda/DeviceBuffer.h + cuda/GridHandle.cuh + cuda/NodeManager.cuh +) + +# NanoVDB io header files +set(NANOVDB_INCLUDE_IO_FILES + io/IO.h +) + +# NanoVDB math header files +set(NANOVDB_INCLUDE_MATH_FILES + math/CSampleFromVoxels.h + math/DitherLUT.h + math/HDDA.h + math/Math.h + math/Ray.h + math/SampleFromVoxels.h + math/Stencils.h +) + +# NanoVDB tools header files +set(NANOVDB_INCLUDE_TOOLS_FILES + tools/CreateNanoGrid.h + tools/CreatePrimitives.h + tools/GridBuilder.h + tools/GridChecksum.h + tools/GridStats.h + tools/GridValidator.h + tools/NanoToOpenVDB.h +) + +# NanoVDB tools/cuda header files +set(NANOVDB_INCLUDE_TOOLS_CUDA_FILES + tools/cuda/AddBlindData.cuh + tools/cuda/GridChecksum.cuh + tools/cuda/GridStats.cuh + tools/cuda/GridValidator.cuh + tools/cuda/IndexToGrid.cuh + tools/cuda/PointsToGrid.cuh + tools/cuda/SignedFloodFill.cuh +) + # NanoVDB util header files -set(NANOVDB_INCLUDE_UTILFILES +set(NANOVDB_INCLUDE_UTIL_FILES util/CpuTimer.h util/CreateNanoGrid.h - util/CSampleFromVoxels.h - util/cuda/CudaAddBlindData.cuh - util/cuda/CudaDeviceBuffer.h - util/cuda/CudaGridChecksum.cuh - util/cuda/CudaGridHandle.cuh - util/cuda/CudaGridStats.cuh - util/cuda/CudaIndexToGrid.cuh - util/cuda/CudaNodeManager.cuh - util/cuda/CudaPointsToGrid.cuh - util/cuda/CudaSignedFloodFill.cuh - util/cuda/CudaUtils.h - util/cuda/GpuTimer.h util/DitherLUT.h util/ForEach.h util/GridBuilder.h util/GridChecksum.h - util/GridHandle.h util/GridStats.h util/GridValidator.h util/HDDA.h @@ -201,6 +236,26 @@ set(NANOVDB_INCLUDE_UTILFILES util/Reduce.h util/SampleFromVoxels.h util/Stencils.h + util/Timer.h + util/Util.h +) + +# NanoVDB util/cuda header files +set(NANOVDB_INCLUDE_UTIL_CUDA_FILES + util/cuda/CudaAddBlindData.cuh + util/cuda/CudaGridHandle.cuh + util/cuda/CudaIndexToGrid.cuh + util/cuda/CudaSignedFloodFill.cuh + util/cuda/Timer.h + util/cuda/CudaDeviceBuffer.h + util/cuda/CudaGridStats.cuh + util/cuda/CudaNodeManager.cuh + util/cuda/CudaUtils.h + util/cuda/Util.h + util/cuda/CudaGridChecksum.cuh + util/cuda/CudaGridValidator.cuh + util/cuda/CudaPointsToGrid.cuh + util/cuda/GpuTimer.h ) add_library(nanovdb INTERFACE) @@ -266,11 +321,23 @@ if(TARGET Threads::Threads) target_link_libraries(nanovdb INTERFACE Threads::Threads) endif() -set(NANOVDB_INSTALL_INCLUDEDIR ${CMAKE_INSTALL_INCLUDEDIR}/nanovdb) -set(NANOVDB_INSTALL_UTILDIR ${NANOVDB_INSTALL_INCLUDEDIR}/util) - -install(FILES ${NANOVDB_INCLUDE_FILES} DESTINATION ${NANOVDB_INSTALL_INCLUDEDIR}) -install(FILES ${NANOVDB_INCLUDE_UTILFILES} DESTINATION ${NANOVDB_INSTALL_UTILDIR}) +set(NANOVDB_INSTALL_INCLUDE_DIR ${CMAKE_INSTALL_INCLUDEDIR}/nanovdb) +set(NANOVDB_INSTALL_CUDA_DIR ${NANOVDB_INSTALL_INCLUDE_DIR}/cuda) +set(NANOVDB_INSTALL_IO_DIR ${NANOVDB_INSTALL_INCLUDE_DIR}/io) +set(NANOVDB_INSTALL_MATH_DIR ${NANOVDB_INSTALL_INCLUDE_DIR}/math) +set(NANOVDB_INSTALL_TOOLS_DIR ${NANOVDB_INSTALL_INCLUDE_DIR}/tools) +set(NANOVDB_INSTALL_TOOLS_CUDA_DIR ${NANOVDB_INSTALL_TOOLS_DIR}/cuda) +set(NANOVDB_INSTALL_UTIL_DIR ${NANOVDB_INSTALL_INCLUDE_DIR}/util) +set(NANOVDB_INSTALL_UTIL_CUDA_DIR ${NANOVDB_INSTALL_UTIL_DIR}/cuda) + +install(FILES ${NANOVDB_INCLUDE_FILES} DESTINATION ${NANOVDB_INSTALL_INCLUDE_DIR}) +install(FILES ${NANOVDB_INCLUDE_CUDA_FILES} DESTINATION ${NANOVDB_INSTALL_CUDA_DIR}) +install(FILES ${NANOVDB_INCLUDE_IO_FILES} DESTINATION ${NANOVDB_INSTALL_IO_DIR}) +install(FILES ${NANOVDB_INCLUDE_MATH_FILES} DESTINATION ${NANOVDB_INSTALL_MATH_DIR}) +install(FILES ${NANOVDB_INCLUDE_TOOLS_FILES} DESTINATION ${NANOVDB_INSTALL_TOOLS_DIR}) +install(FILES ${NANOVDB_INCLUDE_TOOLS_CUDA_FILES} DESTINATION ${NANOVDB_INSTALL_TOOLS_CUDA_DIR}) +install(FILES ${NANOVDB_INCLUDE_UTIL_FILES} DESTINATION ${NANOVDB_INSTALL_UTIL_DIR}) +install(FILES ${NANOVDB_INCLUDE_UTIL_CUDA_FILES} DESTINATION ${NANOVDB_INSTALL_UTIL_CUDA_DIR}) ############################################################################### # Options diff --git a/nanovdb/nanovdb/util/GridHandle.h b/nanovdb/nanovdb/GridHandle.h similarity index 89% rename from nanovdb/nanovdb/util/GridHandle.h rename to nanovdb/nanovdb/GridHandle.h index 14094fbe69..a3d868e8be 100644 --- a/nanovdb/nanovdb/util/GridHandle.h +++ b/nanovdb/nanovdb/GridHandle.h @@ -2,7 +2,7 @@ // SPDX-License-Identifier: MPL-2.0 /*! - \file GridHandle.h + \file nanovdb/GridHandle.h \author Ken Museth @@ -20,9 +20,9 @@ #include #include -#include // for mapToGridType -#include -#include // for updateGridCount +#include // for toGridType +#include +#include // for updateGridCount namespace nanovdb { @@ -48,13 +48,13 @@ class GridHandle /// @brief Move constructor from a host buffer /// @param buffer buffer containing one or more NanoGrids that will be moved into this GridHandle /// @throw Will throw and error with the buffer does not contain a valid NanoGrid! - template::hasDeviceDual, int>::type = 0> + template::hasDeviceDual, int>::type = 0> GridHandle(T&& buffer); /// @brief Move constructor from a dual host-device buffer /// @param buffer buffer containing one or more NanoGrids that will be moved into this GridHandle /// @throw Will throw and error with the buffer does not contain a valid NanoGrid! - template::hasDeviceDual, int>::type = 0> + template::hasDeviceDual, int>::type = 0> GridHandle(T&& buffer); /// @brief Constructs an empty GridHandle @@ -100,17 +100,17 @@ class GridHandle /// @brief Returns a non-const pointer to the data. /// @warning Note that the return pointer can be NULL if the GridHandle was not initialized - uint8_t* data() { return mBuffer.data(); } + void* data() { return mBuffer.data(); } /// @brief Returns a const pointer to the data. /// @warning Note that the return pointer can be NULL if the GridHandle was not initialized - const uint8_t* data() const { return mBuffer.data(); } + const void* data() const { return mBuffer.data(); } template - typename enable_if::hasDeviceDual, const uint8_t*>::type + typename util::enable_if::hasDeviceDual, const void*>::type deviceData() const { return mBuffer.deviceData(); } template - typename enable_if::hasDeviceDual, uint8_t*>::type + typename util::enable_if::hasDeviceDual, void*>::type deviceData() { return mBuffer.deviceData(); } /// @brief Returns the size in bytes of the raw memory buffer managed by this GridHandle. @@ -147,7 +147,7 @@ class GridHandle /// @warning Note that the return pointer can be NULL if the GridHandle has no device grid, @a n is invalid, /// or if the template parameter does not match the specified grid. template - typename enable_if::hasDeviceDual, const NanoGrid*>::type + typename util::enable_if::hasDeviceDual, const NanoGrid*>::type deviceGrid(uint32_t n=0) const; /// @brief Return a const pointer to the @a n'th grid encoded in this GridHandle on the device, e.g. GPU @@ -157,19 +157,19 @@ class GridHandle /// @warning Note that the return pointer can be NULL if the GridHandle was not initialized, @a n is invalid, /// or if the template parameter does not match the specified grid. template - typename enable_if::hasDeviceDual, NanoGrid*>::type + typename util::enable_if::hasDeviceDual, NanoGrid*>::type deviceGrid(uint32_t n=0){return const_cast*>(static_cast(this)->template deviceGrid(n));} /// @brief Upload the grid to the device, e.g. from CPU to GPU /// @note This method is only available if the buffer supports devices template - typename enable_if::hasDeviceDual, void>::type + typename util::enable_if::hasDeviceDual, void>::type deviceUpload(void* stream = nullptr, bool sync = true) { mBuffer.deviceUpload(stream, sync); } /// @brief Download the grid to from the device, e.g. from GPU to CPU /// @note This method is only available if the buffer supports devices template - typename enable_if::hasDeviceDual, void>::type + typename util::enable_if::hasDeviceDual, void>::type deviceDownload(void* stream = nullptr, bool sync = true) { mBuffer.deviceDownload(stream, sync); } /// @brief Check if the buffer is this handle has any padding, i.e. if the buffer is larger than the combined size of all its grids @@ -292,41 +292,39 @@ class GridHandle template inline const GridData* GridHandle::gridData(uint32_t n) const { - const uint8_t *data = this->data(); + const void *data = this->data(); if (data == nullptr || n >= mMetaData.size()) return nullptr; - return reinterpret_cast(data + mMetaData[n].offset); + return util::PtrAdd(data, mMetaData[n].offset); }// const GridData* GridHandle::gridData(uint32_t n) const template inline const GridMetaData* GridHandle::gridMetaData(uint32_t n) const { - const uint8_t *data = this->data(); + const auto *data = this->data(); if (data == nullptr || n >= mMetaData.size()) return nullptr; - return reinterpret_cast(data + mMetaData[n].offset); + return util::PtrAdd(data, mMetaData[n].offset); }// const GridMetaData* GridHandle::gridMetaData(uint32_t n) const -namespace {// anonymous namespace -inline __hostdev__ void cpyMetaData(const GridData *data, GridHandleMetaData *meta) +inline __hostdev__ void cpyGridHandleMeta(const GridData *data, GridHandleMetaData *meta) { uint64_t offset = 0; for (auto *p=meta, *q=p+data->mGridCount; p!=q; ++p) { *p = {offset, data->mGridSize, data->mGridType}; offset += p->size; - data = PtrAdd(data, p->size); + data = util::PtrAdd(data, p->size); } -}// void cpyMetaData(const GridData *data, GridHandleMetaData *meta) -}// anonymous namespace +}// void cpyGridHandleMeta(const GridData *data, GridHandleMetaData *meta) template -template::hasDeviceDual, int>::type> +template::hasDeviceDual, int>::type> GridHandle::GridHandle(T&& buffer) { - static_assert(is_same::value, "Expected U==BufferT"); + static_assert(util::is_same::value, "Expected U==BufferT"); mBuffer = std::move(buffer); if (auto *data = reinterpret_cast(mBuffer.data())) { if (!data->isValid()) throw std::runtime_error("GridHandle was constructed with an invalid host buffer"); mMetaData.resize(data->mGridCount); - cpyMetaData(data, mMetaData.data()); + cpyGridHandleMeta(data, mMetaData.data()); } }// GridHandle::GridHandle(T&& buffer) @@ -344,19 +342,19 @@ template template inline const NanoGrid* GridHandle::grid(uint32_t n) const { - const uint8_t *data = mBuffer.data(); - if (data == nullptr || n >= mMetaData.size() || mMetaData[n].gridType != mapToGridType()) return nullptr; - return reinterpret_cast*>(data + mMetaData[n].offset); + const void *data = mBuffer.data(); + if (data == nullptr || n >= mMetaData.size() || mMetaData[n].gridType != toGridType()) return nullptr; + return util::PtrAdd>(data, mMetaData[n].offset); }// const NanoGrid* GridHandle::grid(uint32_t n) const template template -inline typename enable_if::hasDeviceDual, const NanoGrid*>::type +inline typename util::enable_if::hasDeviceDual, const NanoGrid*>::type GridHandle::deviceGrid(uint32_t n) const { - const uint8_t *data = mBuffer.deviceData(); - if (data == nullptr || n >= mMetaData.size() || mMetaData[n].gridType != mapToGridType()) return nullptr; - return reinterpret_cast*>(data + mMetaData[n].offset); + const void *data = mBuffer.deviceData(); + if (data == nullptr || n >= mMetaData.size() || mMetaData[n].gridType != toGridType()) return nullptr; + return util::PtrAdd>(data, mMetaData[n].offset); }// GridHandle::deviceGrid(uint32_t n) cons template @@ -395,7 +393,7 @@ void GridHandle::read(std::istream& is, uint32_t n, const BufferT& pool auto buffer = BufferT::create(data.mGridSize, &pool); is.seekg(-sizeof(GridData), std::ios::cur);// rewind is.read((char*)(buffer.data()), data.mGridSize); - updateGridCount((GridData*)buffer.data(), 0u, 1u); + tools::updateGridCount((GridData*)buffer.data(), 0u, 1u); *this = GridHandle(std::move(buffer)); } else { is.seekg(-sizeof(GridData), std::ios::cur);// rewind sizeof(GridData) bytes to undo initial read @@ -420,7 +418,7 @@ void GridHandle::read(std::istream& is, const std::string &gridName, co if (n>data.mGridCount) throw std::runtime_error("No raw grid named \""+gridName+"\""); auto buffer = BufferT::create(data.mGridSize, &pool); is.read((char*)(buffer.data()), data.mGridSize); - updateGridCount((GridData*)buffer.data(), 0u, 1u); + tools::updateGridCount((GridData*)buffer.data(), 0u, 1u); *this = GridHandle(std::move(buffer)); } else { throw std::logic_error("This file does not contain a valid raw buffer"); @@ -439,7 +437,7 @@ inline VectorT> splitGrids(const GridHandle &handle, const BufferT* other = nullptr) { using HandleT = GridHandle; - const uint8_t *ptr = handle.data(); + const void *ptr = handle.data(); if (ptr == nullptr) return VectorT(); VectorT handles(handle.gridCount()); for (auto &h : handles) { @@ -448,9 +446,9 @@ splitGrids(const GridHandle &handle, const BufferT* other = nullptr) auto buffer = BufferT::create(src->mGridSize, other); GridData *dst = reinterpret_cast(buffer.data()); std::memcpy(dst, src, src->mGridSize); - updateGridCount(dst, 0u, 1u); + tools::updateGridCount(dst, 0u, 1u); h = HandleT(std::move(buffer)); - ptr += src->mGridSize; + ptr = util::PtrAdd(ptr, src->mGridSize); } return std::move(handles); }// splitGrids @@ -471,16 +469,16 @@ mergeGrids(const VectorT> &handles, const BufferT* pool = nu for (uint32_t n=0; n(dst); NANOVDB_ASSERT(data->isValid()); - updateGridCount(data, counter++, gridCount); - dst += data->mGridSize; - src += data->mGridSize; + tools::updateGridCount(data, counter++, gridCount); + dst = util::PtrAdd(dst, data->mGridSize); + src = util::PtrAdd(src, data->mGridSize); } } return GridHandle(std::move(buffer)); @@ -489,7 +487,7 @@ mergeGrids(const VectorT> &handles, const BufferT* pool = nu } // namespace nanovdb #if defined(__CUDACC__) -#include +#include #endif// defined(__CUDACC__) #endif // NANOVDB_GRID_HANDLE_H_HAS_BEEN_INCLUDED diff --git a/nanovdb/nanovdb/HostBuffer.h b/nanovdb/nanovdb/HostBuffer.h new file mode 100644 index 0000000000..c664856a07 --- /dev/null +++ b/nanovdb/nanovdb/HostBuffer.h @@ -0,0 +1,590 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: MPL-2.0 + +/*! + @file nanovdb/HostBuffer.h + + @date April 20, 2021 + + @brief HostBuffer - a buffer that contains a shared or private bump + pool to either externally or internally managed host memory. + + @details This HostBuffer can be used in multiple ways, most of which are + demonstrated in the examples below. Memory in the pool can + be managed or unmanged (e.g. internal or external) and can + be shared between multiple buffers or belong to a single buffer. + + Example that uses HostBuffer::create inside io::readGrids to create a + full self-managed buffer, i.e. not shared and without padding, per grid in the file. + @code + auto handles = nanovdb::io::readGrids("file.nvdb"); + @endcode + + Example that uses HostBuffer::createFull. Assuming you have a raw pointer + to a NanoVDB grid of unknown type, this examples shows how to create its + GridHandle which can be used to enquire about the grid type and meta data. + @code + void *data;// pointer to a NanoVDB grid of unknown type + uint64_t size;// byte size of NanoVDB grid of unknown type + auto buffer = nanovdb::HostBuffer::createFull(size, data); + nanovdb::GridHandle<> gridHandle(std::move(buffer)); + @endcode + + Example that uses HostBuffer::createPool for internally managed host memory. + Suppose you want to read multiple grids in multiple files, but reuse the same + fixed sized memory buffer to both avoid memory fragmentation as well as + exceeding the fixed memory ceiling! + @code + auto pool = nanovdb::HostBuffer::createPool(1 << 30);// 1 GB memory pool + std::vector> frames;// vector of grid names + for (int i=0; i array(new char[size + NANOVDB_DATA_ALIGNMENT]);// scoped pool of 1 GB with padding + void *buffer = nanovdb::alignPtr(array.get());// 32B aligned buffer + auto pool = nanovdb::HostBuffer::createPool(poolSize, buffer); + auto handles = nanovdb::io::readGrids("file.nvdb", 0, pool); + @endcode +*/ + +#ifndef NANOVDB_HOSTBUFFER_H_HAS_BEEN_INCLUDED +#define NANOVDB_HOSTBUFFER_H_HAS_BEEN_INCLUDED + +#include // for NANOVDB_DATA_ALIGNMENT; +#include // for types like int32_t etc +#include // for fprintf +#include // for std::malloc/std::realloc/std::free +#include // for std::make_shared +#include // for std::mutex +#include // for std::unordered_set +#include // for assert +#include // for std::stringstream +#include // for memcpy + +#define checkPtr(ptr, msg) \ + { \ + ptrAssert((ptr), (msg), __FILE__, __LINE__); \ + } + +namespace nanovdb { + +template +struct BufferTraits +{ + static constexpr bool hasDeviceDual = false; +}; + +// ----------------------------> HostBuffer <-------------------------------------- + +/// @brief This is a buffer that contains a shared or private pool +/// to either externally or internally managed host memory. +/// +/// @note Terminology: +/// Pool: 0 = buffer.size() < buffer.poolSize() +/// Buffer: 0 < buffer.size() < buffer.poolSize() +/// Full: 0 < buffer.size() = buffer.poolSize() +/// Empty: 0 = buffer.size() = buffer.poolSize() +class HostBuffer +{ + struct Pool;// forward declaration of private pool struct + std::shared_ptr mPool; + uint64_t mSize; // total number of bytes for the NanoVDB grid. + void* mData; // raw buffer for the NanoVDB grid. + +#if defined(DEBUG) || defined(_DEBUG) + static inline void ptrAssert(void* ptr, const char* msg, const char* file, int line, bool abort = true) + { + if (ptr == nullptr) { + fprintf(stderr, "NULL pointer error: %s %s %d\n", msg, file, line); + if (abort) + exit(1); + } + if (uint64_t(ptr) % NANOVDB_DATA_ALIGNMENT) { + fprintf(stderr, "Alignment pointer error: %s %s %d\n", msg, file, line); + if (abort) + exit(1); + } + } +#else + static inline void ptrAssert(void*, const char*, const char*, int, bool = true) + { + } +#endif + +public: + /// @brief Return a full buffer or an empty buffer + HostBuffer(uint64_t bufferSize = 0); + + /// @brief Move copy-constructor + HostBuffer(HostBuffer&& other); + + /// @brief Custom descructor + ~HostBuffer() { this->clear(); } + + /// @brief Move copy assignment operation + HostBuffer& operator=(HostBuffer&& other); + + /// @brief Disallow copy-construction + HostBuffer(const HostBuffer&) = delete; + + /// @brief Disallow copy assignment operation + HostBuffer& operator=(const HostBuffer&) = delete; + + /// @brief Return a pool buffer which satisfies: buffer.size == 0, + /// buffer.poolSize() == poolSize, and buffer.data() == nullptr. + /// If data==nullptr, memory for the pool will be allocated. + /// + /// @throw If poolSize is zero. + static HostBuffer createPool(uint64_t poolSize, void *data = nullptr); + + /// @brief Return a full buffer which satisfies: buffer.size == bufferSize, + /// buffer.poolSize() == bufferSize, and buffer.data() == data. + /// If data==nullptr, memory for the pool will be allocated. + /// + /// @throw If bufferSize is zero. + static HostBuffer createFull(uint64_t bufferSize, void *data = nullptr); + + /// @brief Return a buffer with @c bufferSize bytes managed by + /// the specified memory @c pool. If none is provided, i.e. + /// @c pool == nullptr or @c pool->poolSize() == 0, one is + /// created with size @c bufferSize, i.e. a full buffer is returned. + /// + /// @throw If the specified @c pool has insufficient memory for + /// the requested buffer size. + static HostBuffer create(uint64_t bufferSize, const HostBuffer* pool = nullptr); + + /// @brief Initialize as a full buffer with the specified size. If data is NULL + /// the memory is internally allocated. + void init(uint64_t bufferSize, void *data = nullptr); + + //@{ + /// @brief Retuns a pointer to the raw memory buffer managed by this allocator. + /// + /// @warning Note that the pointer can be NULL if the allocator was not initialized! + const void* data() const { return mData; } + void* data() { return mData; } + //@} + + //@{ + /// @brief Returns the size in bytes associated with this buffer. + uint64_t bufferSize() const { return mSize; } + uint64_t size() const { return this->bufferSize(); } + //@} + + /// @brief Returns the size in bytes of the memory pool shared with this instance. + uint64_t poolSize() const; + + /// @brief Return true if memory is managed (using std::malloc and std:free) by the + /// shared pool in this buffer. Else memory is assumed to be managed externally. + bool isManaged() const; + + //@{ + /// @brief Returns true if this buffer has no memory associated with it + bool isEmpty() const { return !mPool || mSize == 0 || mData == nullptr; } + bool empty() const { return this->isEmpty(); } + //@} + + /// @brief Return true if this is a pool, i.e. an empty buffer with a nonempty + /// internal pool, i.e. this->size() == 0 and this->poolSize() != 0 + bool isPool() const { return mSize == 0 && this->poolSize() > 0; } + + /// @brief Return true if the pool exists, is nonempty but has no more available memory + bool isFull() const; + + /// @brief Clear this buffer so it is empty. + void clear(); + + /// @brief Clears all existing buffers that are registered against the memory pool + /// and resets the pool so it can be reused to create new buffers. + /// + /// @throw If this instance is not empty or contains no pool. + /// + /// @warning This method is not thread-safe! + void reset(); + + /// @brief Total number of bytes from the pool currently in use by buffers + uint64_t poolUsage() const; + + /// @brief resize the pool size. It will attempt to resize the existing + /// memory block, but if that fails a deep copy is performed. + /// If @c data is not NULL it will be used as new externally + /// managed memory for the pool. All registered buffers are + /// updated so GridHandle::grid might return a new address (if + /// deep copy was performed). + /// + /// @note This method can be use to resize the memory pool and even + /// change it from internally to externally managed memory or vice versa. + /// + /// @throw if @c poolSize is less than this->poolUsage() the used memory + /// or allocations fail. + void resizePool(uint64_t poolSize, void *data = nullptr); + +}; // HostBuffer class + +// --------------------------> Implementation of HostBuffer::Pool <------------------------------------ + +// This is private struct of HostBuffer so you can safely ignore the API +struct HostBuffer::Pool +{ + using HashTableT = std::unordered_set; + std::mutex mMutex; // mutex for updating mRegister and mFree + HashTableT mRegister; + void *mData, *mFree; + uint64_t mSize, mPadding; + bool mManaged; + + /// @brief External memory ctor + Pool(uint64_t size = 0, void* data = nullptr) + : mData(data) + , mFree(mData) + , mSize(size) + , mPadding(0) + , mManaged(data == nullptr) + { + if (mManaged) { + mData = Pool::alloc(mSize); + if (mData == nullptr) throw std::runtime_error("Pool::Pool malloc failed"); + } + mPadding = alignmentPadding(mData); + if (!mManaged && mPadding != 0) { + throw std::runtime_error("Pool::Pool: external memory buffer is not aligned to " + + std::to_string(NANOVDB_DATA_ALIGNMENT) + + " bytes.\nHint: use nanovdb::alignPtr or std::aligned_alloc (C++17 only)"); + } + mFree = util::PtrAdd(mData, mPadding); + } + + /// @brief Custom destructor + ~Pool() + { + assert(mRegister.empty()); + if (mManaged) std::free(mData); + } + + /// @brief Disallow copy-construction + Pool(const Pool&) = delete; + + /// @brief Disallow move-construction + Pool(const Pool&&) = delete; + + /// @brief Disallow copy assignment operation + Pool& operator=(const Pool&) = delete; + + /// @brief Disallow move assignment operation + Pool& operator=(const Pool&&) = delete; + + /// @brief Return the total number of bytes used from this Pool by buffers + uint64_t usage() const { return util::PtrDiff(mFree, mData) - mPadding; } + + /// @brief Allocate a buffer of the specified size and add it to the register + void add(HostBuffer* buffer, uint64_t size) + { + void *alignedFree = util::PtrAdd(mFree, alignmentPadding(mFree)); + + if (util::PtrAdd(alignedFree, size) > util::PtrAdd(mData, mPadding + mSize)) { + std::stringstream ss; + ss << "HostBuffer::Pool: insufficient memory\n" + << "\tA buffer requested " << size << " bytes with " << NANOVDB_DATA_ALIGNMENT + << "-bytes alignment from a pool with " + << mSize << " bytes of which\n\t" << (util::PtrDiff(alignedFree, mData) - mPadding) + << " bytes are used by " << mRegister.size() << " other buffer(s). " + << "Pool is " << (mManaged ? "internally" : "externally") << " managed.\n"; + //std::cerr << ss.str(); + throw std::runtime_error(ss.str()); + } + buffer->mSize = size; + const std::lock_guard lock(mMutex); + mRegister.insert(buffer); + buffer->mData = alignedFree; + mFree = util::PtrAdd(alignedFree, size); + } + + /// @brief Remove the specified buffer from the register + void remove(HostBuffer *buffer) + { + const std::lock_guard lock(mMutex); + mRegister.erase(buffer); + } + + /// @brief Replaces buffer1 with buffer2 in the register + void replace(HostBuffer *buffer1, HostBuffer *buffer2) + { + const std::lock_guard lock(mMutex); + mRegister.erase( buffer1); + mRegister.insert(buffer2); + } + + /// @brief Reset the register and all its buffers + void reset() + { + for (HostBuffer *buffer : mRegister) { + buffer->mPool.reset(); + buffer->mSize = 0; + buffer->mData = nullptr; + } + mRegister.clear(); + mFree = util::PtrAdd(mData, mPadding); + } + + /// @brief Resize this Pool and update registered buffers as needed. If data is no NULL + /// it is used as externally managed memory. + void resize(uint64_t size, void *data = nullptr) + { + const uint64_t memUsage = this->usage(); + + const bool managed = (data == nullptr); + + if (!managed && alignmentPadding(data) != 0) { + throw std::runtime_error("Pool::resize: external memory buffer is not aligned to " + + std::to_string(NANOVDB_DATA_ALIGNMENT) + " bytes"); + } + + if (memUsage > size) { + throw std::runtime_error("Pool::resize: insufficient memory"); + } + + uint64_t padding = 0; + if (mManaged && managed && size != mSize) { // managed -> managed + padding = mPadding; + data = Pool::realloc(mData, memUsage, size, padding); // performs both copy and free of mData + } else if (!mManaged && managed) { // un-managed -> managed + data = Pool::alloc(size); + padding = alignmentPadding(data); + } + + if (data == nullptr) { + throw std::runtime_error("Pool::resize: allocation failed"); + } else if (data != mData) { + void* paddedData = util::PtrAdd(data, padding); + + if (!(mManaged && managed)) { // no need to copy if managed -> managed + memcpy(paddedData, util::PtrAdd(mData, mPadding), memUsage); + } + + for (HostBuffer* buffer : mRegister) { // update registered buffers + //buffer->mData = paddedData + ptrdiff_t(buffer->mData - (mData + mPadding)); + buffer->mData = util::PtrAdd(paddedData, util::PtrDiff(buffer->mData, util::PtrAdd(mData, mPadding))); + } + mFree = util::PtrAdd(paddedData, memUsage); // update the free pointer + if (mManaged && !managed) {// only free if managed -> un-managed + std::free(mData); + } + + mData = data; + mPadding = padding; + } + mSize = size; + mManaged = managed; + } + /// @brief Return true is all the memory in this pool is in use. + bool isFull() const + { + assert(mFree <= util::PtrAdd(mData, mPadding + mSize)); + return mSize > 0 ? mFree == util::PtrAdd(mData, mPadding + mSize) : false; + } + +private: + + static void* alloc(uint64_t size) + { +//#if (__cplusplus >= 201703L) +// return std::aligned_alloc(NANOVDB_DATA_ALIGNMENT, size);//C++17 or newer +//#else + // make sure we alloc enough space to align the result + return std::malloc(size + NANOVDB_DATA_ALIGNMENT); +//#endif + } + + static void* realloc(void* const origData, + uint64_t origSize, + uint64_t desiredSize, + uint64_t& padding) + { + // make sure we alloc enough space to align the result + void* data = std::realloc(origData, desiredSize + NANOVDB_DATA_ALIGNMENT); + + if (data != nullptr && data != origData) { + uint64_t newPadding = alignmentPadding(data); + // Number of padding bytes may have changed -- move data if that's the case + if (newPadding != padding) { + // Realloc should not happen when shrinking down buffer, but let's be safe + std::memmove(util::PtrAdd(data, newPadding), + util::PtrAdd(data, padding), + math::Min(origSize, desiredSize)); + padding = newPadding; + } + } + + return data; + } + +};// struct HostBuffer::Pool + +// --------------------------> Implementation of HostBuffer <------------------------------------ + +inline HostBuffer::HostBuffer(uint64_t size) : mPool(nullptr), mSize(size), mData(nullptr) +{ + if (size>0) { + mPool = std::make_shared(size); + mData = mPool->mFree; + mPool->mRegister.insert(this); + mPool->mFree = util::PtrAdd(mPool->mFree, size); + } +} + +inline HostBuffer::HostBuffer(HostBuffer&& other) : mPool(other.mPool), mSize(other.mSize), mData(other.mData) +{ + if (mPool && mSize != 0) { + mPool->replace(&other, this); + } + other.mPool.reset(); + other.mSize = 0; + other.mData = nullptr; +} + +inline void HostBuffer::init(uint64_t bufferSize, void *data) +{ + if (bufferSize == 0) { + throw std::runtime_error("HostBuffer: invalid buffer size"); + } + if (mPool) { + mPool.reset(); + } + if (!mPool || mPool->mSize != bufferSize) { + mPool = std::make_shared(bufferSize, data); + } + mPool->add(this, bufferSize); +} + +inline HostBuffer& HostBuffer::operator=(HostBuffer&& other) +{ + if (mPool) { + mPool->remove(this); + } + mPool = other.mPool; + mSize = other.mSize; + mData = other.mData; + if (mPool && mSize != 0) { + mPool->replace(&other, this); + } + other.mPool.reset(); + other.mSize = 0; + other.mData = nullptr; + return *this; +} + +inline uint64_t HostBuffer::poolSize() const +{ + return mPool ? mPool->mSize : 0u; +} + +inline uint64_t HostBuffer::poolUsage() const +{ + return mPool ? mPool->usage(): 0u; +} + +inline bool HostBuffer::isManaged() const +{ + return mPool ? mPool->mManaged : false; +} + +inline bool HostBuffer::isFull() const +{ + return mPool ? mPool->isFull() : false; +} + +inline HostBuffer HostBuffer::createPool(uint64_t poolSize, void *data) +{ + if (poolSize == 0) { + throw std::runtime_error("HostBuffer: invalid pool size"); + } + HostBuffer buffer; + buffer.mPool = std::make_shared(poolSize, data); + // note the buffer is NOT registered by its pool since it is not using its memory + buffer.mSize = 0; + buffer.mData = nullptr; + return buffer; +} + +inline HostBuffer HostBuffer::createFull(uint64_t bufferSize, void *data) +{ + if (bufferSize == 0) { + throw std::runtime_error("HostBuffer: invalid buffer size"); + } + HostBuffer buffer; + buffer.mPool = std::make_shared(bufferSize, data); + buffer.mPool->add(&buffer, bufferSize); + return buffer; +} + +inline HostBuffer HostBuffer::create(uint64_t bufferSize, const HostBuffer* pool) +{ + HostBuffer buffer; + if (pool == nullptr || !pool->mPool) { + buffer.mPool = std::make_shared(bufferSize); + } else { + buffer.mPool = pool->mPool; + } + buffer.mPool->add(&buffer, bufferSize); + return buffer; +} + +inline void HostBuffer::clear() +{ + if (mPool) {// remove self from the buffer register in the pool + mPool->remove(this); + } + mPool.reset(); + mSize = 0; + mData = nullptr; +} + +inline void HostBuffer::reset() +{ + if (this->size()>0) { + throw std::runtime_error("HostBuffer: only empty buffers can call reset"); + } + if (!mPool) { + throw std::runtime_error("HostBuffer: this buffer contains no pool to reset"); + } + mPool->reset(); +} + +inline void HostBuffer::resizePool(uint64_t size, void *data) +{ + if (!mPool) { + throw std::runtime_error("HostBuffer: this buffer contains no pool to resize"); + } + mPool->resize(size, data); +} + +} // namespace nanovdb + +#endif // end of NANOVDB_HOSTBUFFER_H_HAS_BEEN_INCLUDED diff --git a/nanovdb/nanovdb/NanoVDB.h b/nanovdb/nanovdb/NanoVDB.h index 2e37c46ac0..5e912a7868 100644 --- a/nanovdb/nanovdb/NanoVDB.h +++ b/nanovdb/nanovdb/NanoVDB.h @@ -2,7 +2,7 @@ // SPDX-License-Identifier: MPL-2.0 /*! - \file NanoVDB.h + \file nanovdb/NanoVDB.h \author Ken Museth @@ -118,20 +118,31 @@ #ifndef NANOVDB_NANOVDB_H_HAS_BEEN_INCLUDED #define NANOVDB_NANOVDB_H_HAS_BEEN_INCLUDED -// NANOVDB_MAGIC_NUMBER is currently used for both grids and files (starting with v32.6.0) -// NANOVDB_MAGIC_GRID will soon be used exclusively for grids +// The following two header files are the only mandatory dependencies +#include // for __hostdev__ and lots of other utility functions +#include // for Coord, BBox, Vec3, Vec4 etc + +// Do not change this value! 32 byte alignment is fixed in NanoVDB +#define NANOVDB_DATA_ALIGNMENT 32 + +// NANOVDB_MAGIC_NUMB is currently used for both grids and files (starting with v32.6.0) +// NANOVDB_MAGIC_GRID will soon be used exclusively for grids (serialized to a single buffer) // NANOVDB_MAGIC_FILE will soon be used exclusively for files // NANOVDB_MAGIC_NODE will soon be used exclusively for NodeManager +// NANOVDB_MAGIC_FRAG will soon be used exclusively for a fragmented grid, i.e. a grid that is not serialized // | : 0 in 30 corresponds to 0 in NanoVDB0 -#define NANOVDB_MAGIC_NUMBER 0x304244566f6e614eUL // "NanoVDB0" in hex - little endian (uint64_t) -#define NANOVDB_MAGIC_GRID 0x314244566f6e614eUL // "NanoVDB1" in hex - little endian (uint64_t) -#define NANOVDB_MAGIC_FILE 0x324244566f6e614eUL // "NanoVDB2" in hex - little endian (uint64_t) -#define NANOVDB_MAGIC_NODE 0x334244566f6e614eUL // "NanoVDB3" in hex - little endian (uint64_t) -#define NANOVDB_MAGIC_MASK 0x00FFFFFFFFFFFFFFUL // use this mask to remove the number +#define NANOVDB_MAGIC_NUMB 0x304244566f6e614eUL // "NanoVDB0" in hex - little endian (uint64_t) +#define NANOVDB_MAGIC_GRID 0x314244566f6e614eUL // "NanoVDB1" in hex - little endian (uint64_t) +#define NANOVDB_MAGIC_FILE 0x324244566f6e614eUL // "NanoVDB2" in hex - little endian (uint64_t) +#define NANOVDB_MAGIC_NODE 0x334244566f6e614eUL // "NanoVDB3" in hex - little endian (uint64_t) +#define NANOVDB_MAGIC_FRAG 0x344244566f6e614eUL // "NanoVDB4" in hex - little endian (uint64_t) +#define NANOVDB_MAGIC_MASK 0x00FFFFFFFFFFFFFFUL // use this mask to remove the number + +//#define NANOVDB_MAGIC_NUMBER 0x304244566f6e614eUL //#define NANOVDB_USE_NEW_MAGIC_NUMBERS// used to enable use of the new magic numbers described above #define NANOVDB_MAJOR_VERSION_NUMBER 32 // reflects changes to the ABI and hence also the file format -#define NANOVDB_MINOR_VERSION_NUMBER 6 // reflects changes to the API but not ABI +#define NANOVDB_MINOR_VERSION_NUMBER 7 // reflects changes to the API but not ABI #define NANOVDB_PATCH_VERSION_NUMBER 0 // reflects changes that does not affect the ABI or API #define TBB_SUPPRESS_DEPRECATED_MESSAGES 1 @@ -150,101 +161,11 @@ #define NANOVDB_FPN_BRANCHLESS -// Do not change this value! 32 byte alignment is fixed in NanoVDB -#define NANOVDB_DATA_ALIGNMENT 32 - #if !defined(NANOVDB_ALIGN) #define NANOVDB_ALIGN(n) alignas(n) #endif // !defined(NANOVDB_ALIGN) -#ifdef __CUDACC_RTC__ - -typedef signed char int8_t; -typedef short int16_t; -typedef int int32_t; -typedef long long int64_t; -typedef unsigned char uint8_t; -typedef unsigned int uint32_t; -typedef unsigned short uint16_t; -typedef unsigned long long uint64_t; - -#define NANOVDB_ASSERT(x) - -#define UINT64_C(x) (x ## ULL) - -#else // !__CUDACC_RTC__ - -#include // for abs in clang7 -#include // for types like int32_t etc -#include // for size_t type -#include // for assert -#include // for snprintf -#include // for sqrt and fma -#include // for numeric_limits -#include // for std::move -#ifdef NANOVDB_USE_IOSTREAMS -#include // for read/writeUncompressedGrids -#endif -// All asserts can be disabled here, even for debug builds -#if 1 -#define NANOVDB_ASSERT(x) assert(x) -#else -#define NANOVDB_ASSERT(x) -#endif - -#if defined(NANOVDB_USE_INTRINSICS) && defined(_MSC_VER) -#include -#pragma intrinsic(_BitScanReverse) -#pragma intrinsic(_BitScanForward) -#pragma intrinsic(_BitScanReverse64) -#pragma intrinsic(_BitScanForward64) -#endif - -#endif // __CUDACC_RTC__ - -#if defined(__CUDACC__) || defined(__HIP__) -// Only define __hostdev__ when using NVIDIA CUDA or HIP compilers -#ifndef __hostdev__ -#define __hostdev__ __host__ __device__ // Runs on the CPU and GPU, called from the CPU or the GPU -#endif -#else -// Dummy definitions of macros only defined by CUDA and HIP compilers -#ifndef __hostdev__ -#define __hostdev__ // Runs on the CPU and GPU, called from the CPU or the GPU -#endif -#ifndef __global__ -#define __global__ // Runs on the GPU, called from the CPU or the GPU -#endif -#ifndef __device__ -#define __device__ // Runs on the GPU, called from the GPU -#endif -#ifndef __host__ -#define __host__ // Runs on the CPU, called from the CPU -#endif - -#endif // if defined(__CUDACC__) || defined(__HIP__) - -// The following macro will suppress annoying warnings when nvcc -// compiles functions that call (host) intrinsics (which is perfectly valid) -#if defined(_MSC_VER) && defined(__CUDACC__) -#define NANOVDB_HOSTDEV_DISABLE_WARNING __pragma("hd_warning_disable") -#elif defined(__GNUC__) && defined(__CUDACC__) -#define NANOVDB_HOSTDEV_DISABLE_WARNING _Pragma("hd_warning_disable") -#else -#define NANOVDB_HOSTDEV_DISABLE_WARNING -#endif - -// Define compiler warnings that work with all compilers -//#if defined(_MSC_VER) -//#define NANO_WARNING(msg) _pragma("message" #msg) -//#else -//#define NANO_WARNING(msg) _Pragma("message" #msg) -//#endif - -// A portable implementation of offsetof - unfortunately it doesn't work with static_assert -#define NANOVDB_OFFSETOF(CLASS, MEMBER) ((int)(size_t)((char*)&((CLASS*)0)->MEMBER - (char*)0)) - -namespace nanovdb { +namespace nanovdb {// ================================================================= // --------------------------> Build types <------------------------------------ @@ -283,13 +204,17 @@ class Point{}; // --------------------------> GridType <------------------------------------ +/// @brief return the number of characters (including null termination) required to convert enum type to a string +template +__hostdev__ inline constexpr uint32_t strlen(){return (uint32_t)EnumT::StrLen - (uint32_t)EnumT::End;} + /// @brief List of types that are currently supported by NanoVDB /// /// @note To expand on this list do: /// 1) Add the new type between Unknown and End in the enum below /// 2) Add the new type to OpenToNanoVDB::processGrid that maps OpenVDB types to GridType /// 3) Verify that the ConvertTrait in NanoToOpenVDB.h works correctly with the new type -/// 4) Add the new type to mapToGridType (defined below) that maps NanoVDB types to GridType +/// 4) Add the new type to toGridType (defined below) that maps NanoVDB types to GridType /// 5) Add the new type to toStr (defined below) enum class GridType : uint32_t { Unknown = 0, // unknown value type - should rarely be used Float = 1, // single precision floating point value @@ -317,21 +242,47 @@ enum class GridType : uint32_t { Unknown = 0, // unknown value type - should ra PointIndex = 23, // voxels encode indices to co-located points Vec3u8 = 24, // 8bit quantization of floating point 3D vector (only as blind data) Vec3u16 = 25, // 16bit quantization of floating point 3D vector (only as blind data) - End = 26 }; // should never be used + UInt8 = 26, // 8 bit unsigned integer values (eg 0 -> 255 gray scale) + End = 27,// total number of types in this enum (excluding StrLen since it's not a type) + StrLen = End + 12};// this entry is used to determine the minimum size of c-string -#ifndef __CUDACC_RTC__ /// @brief Maps a GridType to a c-string -/// @param gridType GridType to be mapped to a string +/// @param dst destination string of size 12 or larger +/// @param gridType GridType enum to be mapped to a string /// @return Retuns a c-string used to describe a GridType -inline const char* toStr(GridType gridType) -{ - static const char* LUT[] = {"?", "float", "double", "int16", "int32", "int64", "Vec3f", "Vec3d", "Mask", "Half", - "uint32", "bool", "RGBA8", "Float4", "Float8", "Float16", "FloatN", "Vec4f", "Vec4d", - "Index", "OnIndex", "IndexMask", "OnIndexMask", "PointIndex", "Vec3u8", "Vec3u16", "End"}; - static_assert(sizeof(LUT) / sizeof(char*) - 1 == int(GridType::End), "Unexpected size of LUT"); - return LUT[static_cast(gridType)]; +__hostdev__ inline char* toStr(char *dst, GridType gridType) +{ + switch (gridType){ + case GridType::Unknown: return util::strcpy(dst, "?"); + case GridType::Float: return util::strcpy(dst, "float"); + case GridType::Double: return util::strcpy(dst, "double"); + case GridType::Int16: return util::strcpy(dst, "int16"); + case GridType::Int32: return util::strcpy(dst, "int32"); + case GridType::Int64: return util::strcpy(dst, "int64"); + case GridType::Vec3f: return util::strcpy(dst, "Vec3f"); + case GridType::Vec3d: return util::strcpy(dst, "Vec3d"); + case GridType::Mask: return util::strcpy(dst, "Mask"); + case GridType::Half: return util::strcpy(dst, "Half"); + case GridType::UInt32: return util::strcpy(dst, "uint32"); + case GridType::Boolean: return util::strcpy(dst, "bool"); + case GridType::RGBA8: return util::strcpy(dst, "RGBA8"); + case GridType::Fp4: return util::strcpy(dst, "Float4"); + case GridType::Fp8: return util::strcpy(dst, "Float8"); + case GridType::Fp16: return util::strcpy(dst, "Float16"); + case GridType::FpN: return util::strcpy(dst, "FloatN"); + case GridType::Vec4f: return util::strcpy(dst, "Vec4f"); + case GridType::Vec4d: return util::strcpy(dst, "Vec4d"); + case GridType::Index: return util::strcpy(dst, "Index"); + case GridType::OnIndex: return util::strcpy(dst, "OnIndex"); + case GridType::IndexMask: return util::strcpy(dst, "IndexMask"); + case GridType::OnIndexMask: return util::strcpy(dst, "OnIndexMask"); + case GridType::PointIndex: return util::strcpy(dst, "PointIndex"); + case GridType::Vec3u8: return util::strcpy(dst, "Vec3u8"); + case GridType::Vec3u16: return util::strcpy(dst, "Vec3u16"); + case GridType::UInt8: return util::strcpy(dst, "uint8"); + default: return util::strcpy(dst, "End"); + } } -#endif // --------------------------> GridClass <------------------------------------ @@ -346,17 +297,29 @@ enum class GridClass : uint32_t { Unknown = 0, VoxelVolume = 7, // volume of geometric cubes, e.g. colors cubes in Minecraft IndexGrid = 8, // grid whose values are offsets, e.g. into an external array TensorGrid = 9, // Index grid for indexing learnable tensor features - End = 10 }; + End = 10,// total number of types in this enum (excluding StrLen since it's not a type) + StrLen = End + 7};// this entry is used to determine the minimum size of c-string + -#ifndef __CUDACC_RTC__ /// @brief Retuns a c-string used to describe a GridClass -inline const char* toStr(GridClass gridClass) -{ - static const char* LUT[] = {"?", "SDF", "FOG", "MAC", "PNTIDX", "PNTDAT", "TOPO", "VOX", "INDEX", "TENSOR", "END"}; - static_assert(sizeof(LUT) / sizeof(char*) - 1 == int(GridClass::End), "Unexpected size of LUT"); - return LUT[static_cast(gridClass)]; +/// @param dst destination string of size 7 or larger +/// @param gridClass GridClass enum to be converted to a string +__hostdev__ inline char* toStr(char *dst, GridClass gridClass) +{ + switch (gridClass){ + case GridClass::Unknown: return util::strcpy(dst, "?"); + case GridClass::LevelSet: return util::strcpy(dst, "SDF"); + case GridClass::FogVolume: return util::strcpy(dst, "FOG"); + case GridClass::Staggered: return util::strcpy(dst, "MAC"); + case GridClass::PointIndex: return util::strcpy(dst, "PNTIDX"); + case GridClass::PointData: return util::strcpy(dst, "PNTDAT"); + case GridClass::Topology: return util::strcpy(dst, "TOPO"); + case GridClass::VoxelVolume: return util::strcpy(dst, "VOX"); + case GridClass::IndexGrid: return util::strcpy(dst, "INDEX"); + case GridClass::TensorGrid: return util::strcpy(dst, "TENSOR"); + default: return util::strcpy(dst, "END"); + } } -#endif // --------------------------> GridFlags <------------------------------------ @@ -369,23 +332,83 @@ enum class GridFlags : uint32_t { HasStdDeviation = 1 << 4, // nodes contain standard deviations of active values IsBreadthFirst = 1 << 5, // nodes are typically arranged breadth-first in memory End = 1 << 6, // use End - 1 as a mask for the 5 lower bit flags + StrLen = End + 23,// this entry is used to determine the minimum size of c-string }; -#ifndef __CUDACC_RTC__ /// @brief Retuns a c-string used to describe a GridFlags -inline const char* toStr(GridFlags gridFlags) -{ - static const char* LUT[] = {"has long grid name", - "has bbox", - "has min/max", - "has average", - "has standard deviation", - "is breadth-first", - "end"}; - static_assert(1 << (sizeof(LUT) / sizeof(char*) - 1) == int(GridFlags::End), "Unexpected size of LUT"); - return LUT[static_cast(gridFlags)]; +/// @param dst destination string of size 23 or larger +/// @param gridFlags GridFlags enum to be converted to a string +__hostdev__ inline const char* toStr(char *dst, GridFlags gridFlags) +{ + switch (gridFlags){ + case GridFlags::HasLongGridName: return util::strcpy(dst, "has long grid name"); + case GridFlags::HasBBox: return util::strcpy(dst, "has bbox"); + case GridFlags::HasMinMax: return util::strcpy(dst, "has min/max"); + case GridFlags::HasAverage: return util::strcpy(dst, "has average"); + case GridFlags::HasStdDeviation: return util::strcpy(dst, "has standard deviation"); + case GridFlags::IsBreadthFirst: return util::strcpy(dst, "is breadth-first"); + default: return util::strcpy(dst, "end"); + } +} + +// --------------------------> MagicType <------------------------------------ + +/// @brief Enums used to identify magic numbers recognized by NanoVDB +enum class MagicType : uint32_t { Unknown = 0,// first 64 bits are neither of the cases below + OpenVDB = 1,// first 32 bits = 0x56444220UL + NanoVDB = 2,// first 64 bits = NANOVDB_MAGIC_NUMB + NanoGrid = 3,// first 64 bits = NANOVDB_MAGIC_GRID + NanoFile = 4,// first 64 bits = NANOVDB_MAGIC_FILE + NanoNode = 5,// first 64 bits = NANOVDB_MAGIC_NODE + NanoFrag = 6,// first 64 bits = NANOVDB_MAGIC_FRAG + End = 7, + StrLen = End + 25};// this entry is used to determine the minimum size of c-string + +/// @brief maps 64 bits of magic number to enum +__hostdev__ inline MagicType toMagic(uint64_t magic) +{ + switch (magic){ + case NANOVDB_MAGIC_NUMB: return MagicType::NanoVDB; + case NANOVDB_MAGIC_GRID: return MagicType::NanoGrid; + case NANOVDB_MAGIC_FILE: return MagicType::NanoFile; + case NANOVDB_MAGIC_NODE: return MagicType::NanoNode; + case NANOVDB_MAGIC_FRAG: return MagicType::NanoFrag; + default: return (magic & ~uint32_t(0)) == 0x56444220UL ? MagicType::OpenVDB : MagicType::Unknown; + } +} + +/// @brief print 64-bit magic number to string +/// @param dst destination string of size 25 or larger +/// @param magic 64 bit magic number to be printed +/// @return return destination string @c dst +__hostdev__ inline char* toStr(char *dst, MagicType magic) +{ + switch (magic){ + case MagicType::Unknown: return util::strcpy(dst, "unknown"); + case MagicType::NanoVDB: return util::strcpy(dst, "nanovdb"); + case MagicType::NanoGrid: return util::strcpy(dst, "nanovdb::Grid"); + case MagicType::NanoFile: return util::strcpy(dst, "nanovdb::File"); + case MagicType::NanoNode: return util::strcpy(dst, "nanovdb::NodeManager"); + case MagicType::NanoFrag: return util::strcpy(dst, "fragmented nanovdb::Grid"); + case MagicType::OpenVDB: return util::strcpy(dst, "openvdb"); + default: return util::strcpy(dst, "end"); + } } -#endif + +// --------------------------> PointType enums <------------------------------------ + +// Define the type used when the points are encoded as blind data in the output grid +enum class PointType : uint32_t { Disable = 0,// no point information e.g. when BuildT != Point + PointID = 1,// linear index of type uint32_t to points + World64 = 2,// Vec3d in world space + World32 = 3,// Vec3f in world space + Grid64 = 4,// Vec3d in grid space + Grid32 = 5,// Vec3f in grid space + Voxel32 = 6,// Vec3f in voxel space + Voxel16 = 7,// Vec3u16 in voxel space + Voxel8 = 8,// Vec3u8 in voxel space + Default = 9,// output matches input, i.e. Vec3d or Vec3f in world space + End =10 }; // --------------------------> GridBlindData enums <------------------------------------ @@ -410,37 +433,6 @@ enum class GridBlindDataSemantic : uint32_t { Unknown = 0, VoxelCoords = 9, // 3D coordinates in voxel space, e.g. (0.2, 0.0, 0.7) End = 10 }; -// --------------------------> is_same <------------------------------------ - -/// @brief C++11 implementation of std::is_same -/// @note When more than two arguments are provided value = T0==T1 || T0==T2 || ... -template -struct is_same -{ - static constexpr bool value = is_same::value || is_same::value; -}; - -template -struct is_same -{ - static constexpr bool value = false; -}; - -template -struct is_same -{ - static constexpr bool value = true; -}; - -// --------------------------> is_floating_point <------------------------------------ - -/// @brief C++11 implementation of std::is_floating_point -template -struct is_floating_point -{ - static constexpr bool value = is_same::value; -}; - // --------------------------> BuildTraits <------------------------------------ /// @brief Define static boolean tests for template build types @@ -448,179 +440,20 @@ template struct BuildTraits { // check if T is an index type - static constexpr bool is_index = is_same::value; - static constexpr bool is_onindex = is_same::value; - static constexpr bool is_offindex = is_same::value; - static constexpr bool is_indexmask = is_same::value; + static constexpr bool is_index = util::is_same::value; + static constexpr bool is_onindex = util::is_same::value; + static constexpr bool is_offindex = util::is_same::value; + static constexpr bool is_indexmask = util::is_same::value; // check if T is a compressed float type with fixed bit precision - static constexpr bool is_FpX = is_same::value; + static constexpr bool is_FpX = util::is_same::value; // check if T is a compressed float type with fixed or variable bit precision - static constexpr bool is_Fp = is_same::value; + static constexpr bool is_Fp = util::is_same::value; // check if T is a POD float type, i.e float or double - static constexpr bool is_float = is_floating_point::value; + static constexpr bool is_float = util::is_floating_point::value; // check if T is a template specialization of LeafData, i.e. has T mValues[512] - static constexpr bool is_special = is_index || is_Fp || is_same::value; + static constexpr bool is_special = is_index || is_Fp || util::is_same::value; }; // BuildTraits -// --------------------------> enable_if <------------------------------------ - -/// @brief C++11 implementation of std::enable_if -template -struct enable_if -{ -}; - -template -struct enable_if -{ - using type = T; -}; - -// --------------------------> disable_if <------------------------------------ - -template -struct disable_if -{ - typedef T type; -}; - -template -struct disable_if -{ -}; - -// --------------------------> is_const <------------------------------------ - -template -struct is_const -{ - static constexpr bool value = false; -}; - -template -struct is_const -{ - static constexpr bool value = true; -}; - -// --------------------------> is_pointer <------------------------------------ - -/// @brief Trait used to identify template parameter that are pointers -/// @tparam T Template parameter to be tested -template -struct is_pointer -{ - static constexpr bool value = false; -}; - -/// @brief Template specialization of non-const pointers -/// @tparam T Template parameter to be tested -template -struct is_pointer -{ - static constexpr bool value = true; -}; - -/// @brief Template specialization of const pointers -/// @tparam T Template parameter to be tested -template -struct is_pointer -{ - static constexpr bool value = true; -}; - -// --------------------------> remove_const <------------------------------------ - -/// @brief Trait use to const from type. Default implementation is just a pass-through -/// @tparam T Type -/// @details remove_pointer::type = float -template -struct remove_const -{ - using type = T; -}; - -/// @brief Template specialization of trait class use to remove const qualifier type from a type -/// @tparam T Type of the const type -/// @details remove_pointer::type = float -template -struct remove_const -{ - using type = T; -}; - -// --------------------------> remove_reference <------------------------------------ - -/// @brief Trait use to remove reference, i.e. "&", qualifier from a type. Default implementation is just a pass-through -/// @tparam T Type -/// @details remove_pointer::type = float -template -struct remove_reference {using type = T;}; - -/// @brief Template specialization of trait class use to remove reference, i.e. "&", qualifier from a type -/// @tparam T Type of the reference -/// @details remove_pointer::type = float -template -struct remove_reference {using type = T;}; - -// --------------------------> remove_pointer <------------------------------------ - -/// @brief Trait use to remove pointer, i.e. "*", qualifier from a type. Default implementation is just a pass-through -/// @tparam T Type -/// @details remove_pointer::type = float -template -struct remove_pointer {using type = T;}; - -/// @brief Template specialization of trait class use to to remove pointer, i.e. "*", qualifier from a type -/// @tparam T Type of the pointer -/// @details remove_pointer::type = float -template -struct remove_pointer {using type = T;}; - -// --------------------------> match_const <------------------------------------ - -/// @brief Trait used to transfer the const-ness of a reference type to another type -/// @tparam T Type whose const-ness needs to match the reference type -/// @tparam ReferenceT Reference type that is not const -/// @details match_const::type = int -/// match_const::type = int -template -struct match_const -{ - using type = typename remove_const::type; -}; - -/// @brief Template specialization used to transfer the const-ness of a reference type to another type -/// @tparam T Type that will adopt the const-ness of the reference type -/// @tparam ReferenceT Reference type that is const -/// @details match_const::type = const int -/// match_const::type = const int -template -struct match_const -{ - using type = const typename remove_const::type; -}; - -// --------------------------> is_specialization <------------------------------------ - -/// @brief Metafunction used to determine if the first template -/// parameter is a specialization of the class template -/// given in the second template parameter. -/// -/// @details is_specialization, Vec3>::value == true; -/// is_specialization::value == true; -/// is_specialization, std::vector>::value == true; -template class TemplateType> -struct is_specialization -{ - static const bool value = false; -}; -template class TemplateType> -struct is_specialization, TemplateType> -{ - static const bool value = true; -}; - // --------------------------> BuildToValueMap <------------------------------------ /// @brief Maps one type (e.g. the build types above) to other (actual) types @@ -710,83 +543,19 @@ struct BuildToValueMap // --------------------------> utility functions related to alignment <------------------------------------ -/// @brief return true if the specified pointer is aligned -__hostdev__ inline static bool isAligned(const void* p) -{ - return uint64_t(p) % NANOVDB_DATA_ALIGNMENT == 0; -} - -/// @brief return true if the specified pointer is aligned and not NULL -__hostdev__ inline static bool isValid(const void* p) -{ - return p != nullptr && uint64_t(p) % NANOVDB_DATA_ALIGNMENT == 0; -} +/// @brief return true if the specified pointer is 32 byte aligned +__hostdev__ inline static bool isAligned(const void* p){return uint64_t(p) % NANOVDB_DATA_ALIGNMENT == 0;} -/// @brief return the smallest number of bytes that when added to the specified pointer results in an aligned pointer +/// @brief return the smallest number of bytes that when added to the specified pointer results in a 32 byte aligned pointer. __hostdev__ inline static uint64_t alignmentPadding(const void* p) { NANOVDB_ASSERT(p); return (NANOVDB_DATA_ALIGNMENT - (uint64_t(p) % NANOVDB_DATA_ALIGNMENT)) % NANOVDB_DATA_ALIGNMENT; } -/// @brief offset the specified pointer so it is aligned. -template -__hostdev__ inline static T* alignPtr(T* p) -{ - NANOVDB_ASSERT(p); - return reinterpret_cast( (uint8_t*)p + alignmentPadding(p) ); -} - -/// @brief offset the specified const pointer so it is aligned. +/// @brief offset the specified pointer so it is 32 byte aligned. Works with both const and non-const pointers. template -__hostdev__ inline static const T* alignPtr(const T* p) -{ - NANOVDB_ASSERT(p); - return reinterpret_cast( (const uint8_t*)p + alignmentPadding(p) ); -} - -// --------------------------> PtrDiff <------------------------------------ - -/// @brief Compute the distance, in bytes, between two pointers -/// @tparam T1 Type of the first pointer -/// @tparam T2 Type of the second pointer -/// @param p fist pointer, assumed to NOT be NULL -/// @param q second pointer, assumed to NOT be NULL -/// @return signed distance between pointer addresses in units of bytes -template -__hostdev__ inline static int64_t PtrDiff(const T1* p, const T2* q) -{ - NANOVDB_ASSERT(p && q); - return reinterpret_cast(p) - reinterpret_cast(q); -} - -// --------------------------> PtrAdd <------------------------------------ - -/// @brief Adds a byte offset of a non-const pointer to produce another non-const pointer -/// @tparam DstT Type of the return pointer -/// @tparam SrcT Type of the input pointer -/// @param p non-const input pointer, assumed to NOT be NULL -/// @param offset signed byte offset -/// @return a non-const pointer defined as the offset of an input pointer -template -__hostdev__ inline static DstT* PtrAdd(SrcT* p, int64_t offset) -{ - NANOVDB_ASSERT(p); - return reinterpret_cast(reinterpret_cast(p) + offset); -} - -/// @brief Adds a byte offset of a const pointer to produce another const pointer -/// @tparam DstT Type of the return pointer -/// @tparam SrcT Type of the input pointer -/// @param p const input pointer, assumed to NOT be NULL -/// @param offset signed byte offset -/// @return a const pointer defined as the offset of a const input pointer -template -__hostdev__ inline static const DstT* PtrAdd(const SrcT* p, int64_t offset) -{ - NANOVDB_ASSERT(p); - return reinterpret_cast(reinterpret_cast(p) + offset); -} +__hostdev__ inline static T* alignPtr(T* p){return util::PtrAdd(p, alignmentPadding(p));} // --------------------------> isFloatingPoint(GridType) <------------------------------------ @@ -822,7 +591,8 @@ __hostdev__ inline bool isInteger(GridType gridType) return gridType == GridType::Int16 || gridType == GridType::Int32 || gridType == GridType::Int64 || - gridType == GridType::UInt32; + gridType == GridType::UInt32|| + gridType == GridType::UInt8; } // --------------------------> isIndex(GridType) <------------------------------------ @@ -837,23 +607,6 @@ __hostdev__ inline bool isIndex(GridType gridType) gridType == GridType::OnIndexMask;// as OnIndex, but with an additional mask } -// --------------------------> memcpy64 <------------------------------------ - -/// @brief copy 64 bit words from @c src to @c dst -/// @param dst 64 bit aligned pointer to destination -/// @param src 64 bit aligned pointer to source -/// @param word_count number of 64 bit words to be copied -/// @return destination pointer @c dst -/// @warning @c src and @c dst cannot overlap and should both be 64 bit aligned -__hostdev__ inline static void* memcpy64(void *dst, const void *src, size_t word_count) -{ - NANOVDB_ASSERT(uint64_t(dst) % 8 == 0 && uint64_t(src) % 8 == 0); - auto *d = reinterpret_cast(dst), *e = d + word_count; - auto *s = reinterpret_cast(src); - while (d != e) *d++ = *s++; - return dst; -} - // --------------------------> isValue(GridType, GridClass) <------------------------------------ /// @brief return true if the combination of GridType and GridClass is valid. @@ -872,7 +625,8 @@ __hostdev__ inline bool isValid(GridType gridType, GridClass gridClass) } else if (gridClass == GridClass::VoxelVolume) { return gridType == GridType::RGBA8 || gridType == GridType::Float || gridType == GridType::Double || gridType == GridType::Vec3f || - gridType == GridType::Vec3d || gridType == GridType::UInt32; + gridType == GridType::Vec3d || gridType == GridType::UInt32 || + gridType == GridType::UInt8; } return gridClass < GridClass::End && gridType < GridType::End; // any valid combination } @@ -925,6 +679,7 @@ class Version { uint32_t mData; // 11 + 11 + 10 bit packing of major + minor + patch public: + static constexpr uint32_t End = 0, StrLen = 8;// for strlen() /// @brief Default constructor __hostdev__ Version() : mData(uint32_t(NANOVDB_MAJOR_VERSION_NUMBER) << 21 | @@ -954,1710 +709,191 @@ class Version __hostdev__ bool isCompatible() const { return this->getMajor() == uint32_t(NANOVDB_MAJOR_VERSION_NUMBER); } /// @brief Returns the difference between major version of this instance and NANOVDB_MAJOR_VERSION_NUMBER /// @return return 0 if the major version equals NANOVDB_MAJOR_VERSION_NUMBER, else a negative age if this - /// instance has a smaller major verion (is older), and a positive age if it is newer, i.e. larger. - __hostdev__ int age() const {return int(this->getMajor()) - int(NANOVDB_MAJOR_VERSION_NUMBER);} - -#ifndef __CUDACC_RTC__ - /// @brief returns a c-string of the semantic version, i.e. major.minor.patch - const char* c_str() const - { - char* buffer = (char*)malloc(4 + 1 + 4 + 1 + 4 + 1); // xxxx.xxxx.xxxx\0 - snprintf(buffer, 4 + 1 + 4 + 1 + 4 + 1, "%u.%u.%u", this->getMajor(), this->getMinor(), this->getPatch()); // Prevents overflows by enforcing a fixed size of buffer - return buffer; - } -#endif -}; // Version - -// ----------------------------> Various math functions <------------------------------------- - -//@{ -/// @brief Pi constant taken from Boost to match old behaviour -template -inline __hostdev__ constexpr T pi() -{ - return 3.141592653589793238462643383279502884e+00; -} -template<> -inline __hostdev__ constexpr float pi() -{ - return 3.141592653589793238462643383279502884e+00F; -} -template<> -inline __hostdev__ constexpr double pi() -{ - return 3.141592653589793238462643383279502884e+00; -} -template<> -inline __hostdev__ constexpr long double pi() -{ - return 3.141592653589793238462643383279502884e+00L; -} -//@} - -//@{ -/// Tolerance for floating-point comparison -template -struct Tolerance; -template<> -struct Tolerance -{ - __hostdev__ static float value() { return 1e-8f; } -}; -template<> -struct Tolerance -{ - __hostdev__ static double value() { return 1e-15; } -}; -//@} - -//@{ -/// Delta for small floating-point offsets -template -struct Delta; -template<> -struct Delta -{ - __hostdev__ static float value() { return 1e-5f; } -}; -template<> -struct Delta -{ - __hostdev__ static double value() { return 1e-9; } -}; -//@} - -//@{ -/// Maximum floating-point values -template -struct Maximum; -#if defined(__CUDA_ARCH__) || defined(__HIP__) -template<> -struct Maximum -{ - __hostdev__ static int value() { return 2147483647; } -}; -template<> -struct Maximum -{ - __hostdev__ static uint32_t value() { return 4294967295u; } -}; -template<> -struct Maximum -{ - __hostdev__ static float value() { return 1e+38f; } -}; -template<> -struct Maximum -{ - __hostdev__ static double value() { return 1e+308; } -}; -#else -template -struct Maximum -{ - static T value() { return std::numeric_limits::max(); } -}; -#endif -//@} - -template -__hostdev__ inline bool isApproxZero(const Type& x) -{ - return !(x > Tolerance::value()) && !(x < -Tolerance::value()); -} - -template -__hostdev__ inline Type Min(Type a, Type b) -{ - return (a < b) ? a : b; -} -__hostdev__ inline int32_t Min(int32_t a, int32_t b) -{ - return int32_t(fminf(float(a), float(b))); -} -__hostdev__ inline uint32_t Min(uint32_t a, uint32_t b) -{ - return uint32_t(fminf(float(a), float(b))); -} -__hostdev__ inline float Min(float a, float b) -{ - return fminf(a, b); -} -__hostdev__ inline double Min(double a, double b) -{ - return fmin(a, b); -} -template -__hostdev__ inline Type Max(Type a, Type b) -{ - return (a > b) ? a : b; -} - -__hostdev__ inline int32_t Max(int32_t a, int32_t b) -{ - return int32_t(fmaxf(float(a), float(b))); -} -__hostdev__ inline uint32_t Max(uint32_t a, uint32_t b) -{ - return uint32_t(fmaxf(float(a), float(b))); -} -__hostdev__ inline float Max(float a, float b) -{ - return fmaxf(a, b); -} -__hostdev__ inline double Max(double a, double b) -{ - return fmax(a, b); -} -__hostdev__ inline float Clamp(float x, float a, float b) -{ - return Max(Min(x, b), a); -} -__hostdev__ inline double Clamp(double x, double a, double b) -{ - return Max(Min(x, b), a); -} - -__hostdev__ inline float Fract(float x) -{ - return x - floorf(x); -} -__hostdev__ inline double Fract(double x) -{ - return x - floor(x); -} - -__hostdev__ inline int32_t Floor(float x) -{ - return int32_t(floorf(x)); -} -__hostdev__ inline int32_t Floor(double x) -{ - return int32_t(floor(x)); -} - -__hostdev__ inline int32_t Ceil(float x) -{ - return int32_t(ceilf(x)); -} -__hostdev__ inline int32_t Ceil(double x) -{ - return int32_t(ceil(x)); -} - -template -__hostdev__ inline T Pow2(T x) -{ - return x * x; -} - -template -__hostdev__ inline T Pow3(T x) -{ - return x * x * x; -} - -template -__hostdev__ inline T Pow4(T x) -{ - return Pow2(x * x); -} -template -__hostdev__ inline T Abs(T x) -{ - return x < 0 ? -x : x; -} - -template<> -__hostdev__ inline float Abs(float x) -{ - return fabsf(x); -} - -template<> -__hostdev__ inline double Abs(double x) -{ - return fabs(x); -} - -template<> -__hostdev__ inline int Abs(int x) -{ - return abs(x); -} - -template class Vec3T> -__hostdev__ inline CoordT Round(const Vec3T& xyz); - -template class Vec3T> -__hostdev__ inline CoordT Round(const Vec3T& xyz) -{ - return CoordT(int32_t(rintf(xyz[0])), int32_t(rintf(xyz[1])), int32_t(rintf(xyz[2]))); - //return CoordT(int32_t(roundf(xyz[0])), int32_t(roundf(xyz[1])), int32_t(roundf(xyz[2])) ); - //return CoordT(int32_t(floorf(xyz[0] + 0.5f)), int32_t(floorf(xyz[1] + 0.5f)), int32_t(floorf(xyz[2] + 0.5f))); -} - -template class Vec3T> -__hostdev__ inline CoordT Round(const Vec3T& xyz) -{ - return CoordT(int32_t(floor(xyz[0] + 0.5)), int32_t(floor(xyz[1] + 0.5)), int32_t(floor(xyz[2] + 0.5))); -} - -template class Vec3T> -__hostdev__ inline CoordT RoundDown(const Vec3T& xyz) -{ - return CoordT(Floor(xyz[0]), Floor(xyz[1]), Floor(xyz[2])); -} - -//@{ -/// Return the square root of a floating-point value. -__hostdev__ inline float Sqrt(float x) -{ - return sqrtf(x); -} -__hostdev__ inline double Sqrt(double x) -{ - return sqrt(x); -} -//@} - -/// Return the sign of the given value as an integer (either -1, 0 or 1). -template -__hostdev__ inline T Sign(const T& x) -{ - return ((T(0) < x) ? T(1) : T(0)) - ((x < T(0)) ? T(1) : T(0)); -} - -template -__hostdev__ inline int MinIndex(const Vec3T& v) -{ -#if 0 - static const int hashTable[8] = {2, 1, 9, 1, 2, 9, 0, 0}; //9 are dummy values - const int hashKey = ((v[0] < v[1]) << 2) + ((v[0] < v[2]) << 1) + (v[1] < v[2]); // ?*4+?*2+?*1 - return hashTable[hashKey]; -#else - if (v[0] < v[1] && v[0] < v[2]) - return 0; - if (v[1] < v[2]) - return 1; - else - return 2; -#endif -} - -template -__hostdev__ inline int MaxIndex(const Vec3T& v) -{ -#if 0 - static const int hashTable[8] = {2, 1, 9, 1, 2, 9, 0, 0}; //9 are dummy values - const int hashKey = ((v[0] > v[1]) << 2) + ((v[0] > v[2]) << 1) + (v[1] > v[2]); // ?*4+?*2+?*1 - return hashTable[hashKey]; -#else - if (v[0] > v[1] && v[0] > v[2]) - return 0; - if (v[1] > v[2]) - return 1; - else - return 2; -#endif -} - -/// @brief round up byteSize to the nearest wordSize, e.g. to align to machine word: AlignUp -__hostdev__ inline uint64_t AlignUp(uint64_t byteCount) -{ - const uint64_t r = byteCount % wordSize; - return r ? byteCount - r + wordSize : byteCount; -} - -// ------------------------------> Coord <-------------------------------------- - -// forward declaration so we can define Coord::asVec3s and Coord::asVec3d -template -class Vec3; - -/// @brief Signed (i, j, k) 32-bit integer coordinate class, similar to openvdb::math::Coord -class Coord -{ - int32_t mVec[3]; // private member data - three signed index coordinates -public: - using ValueType = int32_t; - using IndexType = uint32_t; - - /// @brief Initialize all coordinates to zero. - __hostdev__ Coord() - : mVec{0, 0, 0} - { - } - - /// @brief Initializes all coordinates to the given signed integer. - __hostdev__ explicit Coord(ValueType n) - : mVec{n, n, n} - { - } - - /// @brief Initializes coordinate to the given signed integers. - __hostdev__ Coord(ValueType i, ValueType j, ValueType k) - : mVec{i, j, k} - { - } - - __hostdev__ Coord(ValueType* ptr) - : mVec{ptr[0], ptr[1], ptr[2]} - { - } - - __hostdev__ int32_t x() const { return mVec[0]; } - __hostdev__ int32_t y() const { return mVec[1]; } - __hostdev__ int32_t z() const { return mVec[2]; } - - __hostdev__ int32_t& x() { return mVec[0]; } - __hostdev__ int32_t& y() { return mVec[1]; } - __hostdev__ int32_t& z() { return mVec[2]; } - - __hostdev__ static Coord max() { return Coord(int32_t((1u << 31) - 1)); } - - __hostdev__ static Coord min() { return Coord(-int32_t((1u << 31) - 1) - 1); } - - __hostdev__ static size_t memUsage() { return sizeof(Coord); } - - /// @brief Return a const reference to the given Coord component. - /// @warning The argument is assumed to be 0, 1, or 2. - __hostdev__ const ValueType& operator[](IndexType i) const { return mVec[i]; } - - /// @brief Return a non-const reference to the given Coord component. - /// @warning The argument is assumed to be 0, 1, or 2. - __hostdev__ ValueType& operator[](IndexType i) { return mVec[i]; } - - /// @brief Assignment operator that works with openvdb::Coord - template - __hostdev__ Coord& operator=(const CoordT& other) - { - static_assert(sizeof(Coord) == sizeof(CoordT), "Mis-matched sizeof"); - mVec[0] = other[0]; - mVec[1] = other[1]; - mVec[2] = other[2]; - return *this; - } - - /// @brief Return a new instance with coordinates masked by the given unsigned integer. - __hostdev__ Coord operator&(IndexType n) const { return Coord(mVec[0] & n, mVec[1] & n, mVec[2] & n); } - - // @brief Return a new instance with coordinates left-shifted by the given unsigned integer. - __hostdev__ Coord operator<<(IndexType n) const { return Coord(mVec[0] << n, mVec[1] << n, mVec[2] << n); } - - // @brief Return a new instance with coordinates right-shifted by the given unsigned integer. - __hostdev__ Coord operator>>(IndexType n) const { return Coord(mVec[0] >> n, mVec[1] >> n, mVec[2] >> n); } - - /// @brief Return true if this Coord is lexicographically less than the given Coord. - __hostdev__ bool operator<(const Coord& rhs) const - { - return mVec[0] < rhs[0] ? true - : mVec[0] > rhs[0] ? false - : mVec[1] < rhs[1] ? true - : mVec[1] > rhs[1] ? false - : mVec[2] < rhs[2] ? true : false; - } - - /// @brief Return true if this Coord is lexicographically less or equal to the given Coord. - __hostdev__ bool operator<=(const Coord& rhs) const - { - return mVec[0] < rhs[0] ? true - : mVec[0] > rhs[0] ? false - : mVec[1] < rhs[1] ? true - : mVec[1] > rhs[1] ? false - : mVec[2] <=rhs[2] ? true : false; - } - - // @brief Return true if the Coord components are identical. - __hostdev__ bool operator==(const Coord& rhs) const { return mVec[0] == rhs[0] && mVec[1] == rhs[1] && mVec[2] == rhs[2]; } - __hostdev__ bool operator!=(const Coord& rhs) const { return mVec[0] != rhs[0] || mVec[1] != rhs[1] || mVec[2] != rhs[2]; } - __hostdev__ Coord& operator&=(int n) - { - mVec[0] &= n; - mVec[1] &= n; - mVec[2] &= n; - return *this; - } - __hostdev__ Coord& operator<<=(uint32_t n) - { - mVec[0] <<= n; - mVec[1] <<= n; - mVec[2] <<= n; - return *this; - } - __hostdev__ Coord& operator>>=(uint32_t n) - { - mVec[0] >>= n; - mVec[1] >>= n; - mVec[2] >>= n; - return *this; - } - __hostdev__ Coord& operator+=(int n) - { - mVec[0] += n; - mVec[1] += n; - mVec[2] += n; - return *this; - } - __hostdev__ Coord operator+(const Coord& rhs) const { return Coord(mVec[0] + rhs[0], mVec[1] + rhs[1], mVec[2] + rhs[2]); } - __hostdev__ Coord operator-(const Coord& rhs) const { return Coord(mVec[0] - rhs[0], mVec[1] - rhs[1], mVec[2] - rhs[2]); } - __hostdev__ Coord operator-() const { return Coord(-mVec[0], -mVec[1], -mVec[2]); } - __hostdev__ Coord& operator+=(const Coord& rhs) - { - mVec[0] += rhs[0]; - mVec[1] += rhs[1]; - mVec[2] += rhs[2]; - return *this; - } - __hostdev__ Coord& operator-=(const Coord& rhs) - { - mVec[0] -= rhs[0]; - mVec[1] -= rhs[1]; - mVec[2] -= rhs[2]; - return *this; - } - - /// @brief Perform a component-wise minimum with the other Coord. - __hostdev__ Coord& minComponent(const Coord& other) - { - if (other[0] < mVec[0]) - mVec[0] = other[0]; - if (other[1] < mVec[1]) - mVec[1] = other[1]; - if (other[2] < mVec[2]) - mVec[2] = other[2]; - return *this; - } - - /// @brief Perform a component-wise maximum with the other Coord. - __hostdev__ Coord& maxComponent(const Coord& other) - { - if (other[0] > mVec[0]) - mVec[0] = other[0]; - if (other[1] > mVec[1]) - mVec[1] = other[1]; - if (other[2] > mVec[2]) - mVec[2] = other[2]; - return *this; - } -#if defined(__CUDACC__) // the following functions only run on the GPU! - __device__ inline Coord& minComponentAtomic(const Coord& other) - { - atomicMin(&mVec[0], other[0]); - atomicMin(&mVec[1], other[1]); - atomicMin(&mVec[2], other[2]); - return *this; - } - __device__ inline Coord& maxComponentAtomic(const Coord& other) - { - atomicMax(&mVec[0], other[0]); - atomicMax(&mVec[1], other[1]); - atomicMax(&mVec[2], other[2]); - return *this; - } -#endif - - __hostdev__ Coord offsetBy(ValueType dx, ValueType dy, ValueType dz) const - { - return Coord(mVec[0] + dx, mVec[1] + dy, mVec[2] + dz); - } - - __hostdev__ Coord offsetBy(ValueType n) const { return this->offsetBy(n, n, n); } - - /// Return true if any of the components of @a a are smaller than the - /// corresponding components of @a b. - __hostdev__ static inline bool lessThan(const Coord& a, const Coord& b) - { - return (a[0] < b[0] || a[1] < b[1] || a[2] < b[2]); - } - - /// @brief Return the largest integer coordinates that are not greater - /// than @a xyz (node centered conversion). - template - __hostdev__ static Coord Floor(const Vec3T& xyz) { return Coord(nanovdb::Floor(xyz[0]), nanovdb::Floor(xyz[1]), nanovdb::Floor(xyz[2])); } - - /// @brief Return a hash key derived from the existing coordinates. - /// @details The hash function is originally taken from the SIGGRAPH paper: - /// "VDB: High-resolution sparse volumes with dynamic topology" - /// and the prime numbers are modified based on the ACM Transactions on Graphics paper: - /// "Real-time 3D reconstruction at scale using voxel hashing" (the second number had a typo!) - template - __hostdev__ uint32_t hash() const { return ((1 << Log2N) - 1) & (mVec[0] * 73856093 ^ mVec[1] * 19349669 ^ mVec[2] * 83492791); } - - /// @brief Return the octant of this Coord - //__hostdev__ size_t octant() const { return (uint32_t(mVec[0])>>31) | ((uint32_t(mVec[1])>>31)<<1) | ((uint32_t(mVec[2])>>31)<<2); } - __hostdev__ uint8_t octant() const { return (uint8_t(bool(mVec[0] & (1u << 31)))) | - (uint8_t(bool(mVec[1] & (1u << 31))) << 1) | - (uint8_t(bool(mVec[2] & (1u << 31))) << 2); } - - /// @brief Return a single precision floating-point vector of this coordinate - __hostdev__ inline Vec3 asVec3s() const; - - /// @brief Return a double precision floating-point vector of this coordinate - __hostdev__ inline Vec3 asVec3d() const; - - // returns a copy of itself, so it mimics the behaviour of Vec3::round() - __hostdev__ inline Coord round() const { return *this; } -}; // Coord class - -// ----------------------------> Vec3 <-------------------------------------- - -/// @brief A simple vector class with three components, similar to openvdb::math::Vec3 -template -class Vec3 -{ - T mVec[3]; - -public: - static const int SIZE = 3; - static const int size = 3; // in openvdb::math::Tuple - using ValueType = T; - Vec3() = default; - __hostdev__ explicit Vec3(T x) - : mVec{x, x, x} - { - } - __hostdev__ Vec3(T x, T y, T z) - : mVec{x, y, z} - { - } - template class Vec3T, class T2> - __hostdev__ Vec3(const Vec3T& v) - : mVec{T(v[0]), T(v[1]), T(v[2])} - { - static_assert(Vec3T::size == size, "expected Vec3T::size==3!"); - } - template - __hostdev__ explicit Vec3(const Vec3& v) - : mVec{T(v[0]), T(v[1]), T(v[2])} - { - } - __hostdev__ explicit Vec3(const Coord& ijk) - : mVec{T(ijk[0]), T(ijk[1]), T(ijk[2])} - { - } - __hostdev__ bool operator==(const Vec3& rhs) const { return mVec[0] == rhs[0] && mVec[1] == rhs[1] && mVec[2] == rhs[2]; } - __hostdev__ bool operator!=(const Vec3& rhs) const { return mVec[0] != rhs[0] || mVec[1] != rhs[1] || mVec[2] != rhs[2]; } - template class Vec3T, class T2> - __hostdev__ Vec3& operator=(const Vec3T& rhs) - { - static_assert(Vec3T::size == size, "expected Vec3T::size==3!"); - mVec[0] = rhs[0]; - mVec[1] = rhs[1]; - mVec[2] = rhs[2]; - return *this; - } - __hostdev__ const T& operator[](int i) const { return mVec[i]; } - __hostdev__ T& operator[](int i) { return mVec[i]; } - template - __hostdev__ T dot(const Vec3T& v) const { return mVec[0] * v[0] + mVec[1] * v[1] + mVec[2] * v[2]; } - template - __hostdev__ Vec3 cross(const Vec3T& v) const - { - return Vec3(mVec[1] * v[2] - mVec[2] * v[1], - mVec[2] * v[0] - mVec[0] * v[2], - mVec[0] * v[1] - mVec[1] * v[0]); - } - __hostdev__ T lengthSqr() const - { - return mVec[0] * mVec[0] + mVec[1] * mVec[1] + mVec[2] * mVec[2]; // 5 flops - } - __hostdev__ T length() const { return Sqrt(this->lengthSqr()); } - __hostdev__ Vec3 operator-() const { return Vec3(-mVec[0], -mVec[1], -mVec[2]); } - __hostdev__ Vec3 operator*(const Vec3& v) const { return Vec3(mVec[0] * v[0], mVec[1] * v[1], mVec[2] * v[2]); } - __hostdev__ Vec3 operator/(const Vec3& v) const { return Vec3(mVec[0] / v[0], mVec[1] / v[1], mVec[2] / v[2]); } - __hostdev__ Vec3 operator+(const Vec3& v) const { return Vec3(mVec[0] + v[0], mVec[1] + v[1], mVec[2] + v[2]); } - __hostdev__ Vec3 operator-(const Vec3& v) const { return Vec3(mVec[0] - v[0], mVec[1] - v[1], mVec[2] - v[2]); } - __hostdev__ Vec3 operator+(const Coord& ijk) const { return Vec3(mVec[0] + ijk[0], mVec[1] + ijk[1], mVec[2] + ijk[2]); } - __hostdev__ Vec3 operator-(const Coord& ijk) const { return Vec3(mVec[0] - ijk[0], mVec[1] - ijk[1], mVec[2] - ijk[2]); } - __hostdev__ Vec3 operator*(const T& s) const { return Vec3(s * mVec[0], s * mVec[1], s * mVec[2]); } - __hostdev__ Vec3 operator/(const T& s) const { return (T(1) / s) * (*this); } - __hostdev__ Vec3& operator+=(const Vec3& v) - { - mVec[0] += v[0]; - mVec[1] += v[1]; - mVec[2] += v[2]; - return *this; - } - __hostdev__ Vec3& operator+=(const Coord& ijk) - { - mVec[0] += T(ijk[0]); - mVec[1] += T(ijk[1]); - mVec[2] += T(ijk[2]); - return *this; - } - __hostdev__ Vec3& operator-=(const Vec3& v) - { - mVec[0] -= v[0]; - mVec[1] -= v[1]; - mVec[2] -= v[2]; - return *this; - } - __hostdev__ Vec3& operator-=(const Coord& ijk) - { - mVec[0] -= T(ijk[0]); - mVec[1] -= T(ijk[1]); - mVec[2] -= T(ijk[2]); - return *this; - } - __hostdev__ Vec3& operator*=(const T& s) - { - mVec[0] *= s; - mVec[1] *= s; - mVec[2] *= s; - return *this; - } - __hostdev__ Vec3& operator/=(const T& s) { return (*this) *= T(1) / s; } - __hostdev__ Vec3& normalize() { return (*this) /= this->length(); } - /// @brief Perform a component-wise minimum with the other Coord. - __hostdev__ Vec3& minComponent(const Vec3& other) - { - if (other[0] < mVec[0]) - mVec[0] = other[0]; - if (other[1] < mVec[1]) - mVec[1] = other[1]; - if (other[2] < mVec[2]) - mVec[2] = other[2]; - return *this; - } - - /// @brief Perform a component-wise maximum with the other Coord. - __hostdev__ Vec3& maxComponent(const Vec3& other) - { - if (other[0] > mVec[0]) - mVec[0] = other[0]; - if (other[1] > mVec[1]) - mVec[1] = other[1]; - if (other[2] > mVec[2]) - mVec[2] = other[2]; - return *this; - } - /// @brief Return the smallest vector component - __hostdev__ ValueType min() const - { - return mVec[0] < mVec[1] ? (mVec[0] < mVec[2] ? mVec[0] : mVec[2]) : (mVec[1] < mVec[2] ? mVec[1] : mVec[2]); - } - /// @brief Return the largest vector component - __hostdev__ ValueType max() const - { - return mVec[0] > mVec[1] ? (mVec[0] > mVec[2] ? mVec[0] : mVec[2]) : (mVec[1] > mVec[2] ? mVec[1] : mVec[2]); - } - /// @brief Round each component if this Vec up to its integer value - /// @return Return an integer Coord - __hostdev__ Coord floor() const { return Coord(Floor(mVec[0]), Floor(mVec[1]), Floor(mVec[2])); } - /// @brief Round each component if this Vec down to its integer value - /// @return Return an integer Coord - __hostdev__ Coord ceil() const { return Coord(Ceil(mVec[0]), Ceil(mVec[1]), Ceil(mVec[2])); } - /// @brief Round each component if this Vec to its closest integer value - /// @return Return an integer Coord - __hostdev__ Coord round() const - { - if constexpr(is_same::value) { - return Coord(Floor(mVec[0] + 0.5f), Floor(mVec[1] + 0.5f), Floor(mVec[2] + 0.5f)); - } else if constexpr(is_same::value) { - return Coord(mVec[0], mVec[1], mVec[2]); - } else { - return Coord(Floor(mVec[0] + 0.5), Floor(mVec[1] + 0.5), Floor(mVec[2] + 0.5)); - } - } - - /// @brief return a non-const raw constant pointer to array of three vector components - __hostdev__ T* asPointer() { return mVec; } - /// @brief return a const raw constant pointer to array of three vector components - __hostdev__ const T* asPointer() const { return mVec; } -}; // Vec3 - -template -__hostdev__ inline Vec3 operator*(T1 scalar, const Vec3& vec) -{ - return Vec3(scalar * vec[0], scalar * vec[1], scalar * vec[2]); -} -template -__hostdev__ inline Vec3 operator/(T1 scalar, const Vec3& vec) -{ - return Vec3(scalar / vec[0], scalar / vec[1], scalar / vec[2]); -} - -//using Vec3R = Vec3;// deprecated -using Vec3d = Vec3; -using Vec3f = Vec3; -using Vec3i = Vec3; -using Vec3u = Vec3; -using Vec3u8 = Vec3; -using Vec3u16 = Vec3; - -/// @brief Return a single precision floating-point vector of this coordinate -__hostdev__ inline Vec3f Coord::asVec3s() const -{ - return Vec3f(float(mVec[0]), float(mVec[1]), float(mVec[2])); -} - -/// @brief Return a double precision floating-point vector of this coordinate -__hostdev__ inline Vec3d Coord::asVec3d() const -{ - return Vec3d(double(mVec[0]), double(mVec[1]), double(mVec[2])); -} - -// ----------------------------> Vec4 <-------------------------------------- - -/// @brief A simple vector class with four components, similar to openvdb::math::Vec4 -template -class Vec4 -{ - T mVec[4]; - -public: - static const int SIZE = 4; - static const int size = 4; - using ValueType = T; - Vec4() = default; - __hostdev__ explicit Vec4(T x) - : mVec{x, x, x, x} - { - } - __hostdev__ Vec4(T x, T y, T z, T w) - : mVec{x, y, z, w} - { - } - template - __hostdev__ explicit Vec4(const Vec4& v) - : mVec{T(v[0]), T(v[1]), T(v[2]), T(v[3])} - { - } - template class Vec4T, class T2> - __hostdev__ Vec4(const Vec4T& v) - : mVec{T(v[0]), T(v[1]), T(v[2]), T(v[3])} - { - static_assert(Vec4T::size == size, "expected Vec4T::size==4!"); - } - __hostdev__ bool operator==(const Vec4& rhs) const { return mVec[0] == rhs[0] && mVec[1] == rhs[1] && mVec[2] == rhs[2] && mVec[3] == rhs[3]; } - __hostdev__ bool operator!=(const Vec4& rhs) const { return mVec[0] != rhs[0] || mVec[1] != rhs[1] || mVec[2] != rhs[2] || mVec[3] != rhs[3]; } - template class Vec4T, class T2> - __hostdev__ Vec4& operator=(const Vec4T& rhs) - { - static_assert(Vec4T::size == size, "expected Vec4T::size==4!"); - mVec[0] = rhs[0]; - mVec[1] = rhs[1]; - mVec[2] = rhs[2]; - mVec[3] = rhs[3]; - return *this; - } - - __hostdev__ const T& operator[](int i) const { return mVec[i]; } - __hostdev__ T& operator[](int i) { return mVec[i]; } - template - __hostdev__ T dot(const Vec4T& v) const { return mVec[0] * v[0] + mVec[1] * v[1] + mVec[2] * v[2] + mVec[3] * v[3]; } - __hostdev__ T lengthSqr() const - { - return mVec[0] * mVec[0] + mVec[1] * mVec[1] + mVec[2] * mVec[2] + mVec[3] * mVec[3]; // 7 flops - } - __hostdev__ T length() const { return Sqrt(this->lengthSqr()); } - __hostdev__ Vec4 operator-() const { return Vec4(-mVec[0], -mVec[1], -mVec[2], -mVec[3]); } - __hostdev__ Vec4 operator*(const Vec4& v) const { return Vec4(mVec[0] * v[0], mVec[1] * v[1], mVec[2] * v[2], mVec[3] * v[3]); } - __hostdev__ Vec4 operator/(const Vec4& v) const { return Vec4(mVec[0] / v[0], mVec[1] / v[1], mVec[2] / v[2], mVec[3] / v[3]); } - __hostdev__ Vec4 operator+(const Vec4& v) const { return Vec4(mVec[0] + v[0], mVec[1] + v[1], mVec[2] + v[2], mVec[3] + v[3]); } - __hostdev__ Vec4 operator-(const Vec4& v) const { return Vec4(mVec[0] - v[0], mVec[1] - v[1], mVec[2] - v[2], mVec[3] - v[3]); } - __hostdev__ Vec4 operator*(const T& s) const { return Vec4(s * mVec[0], s * mVec[1], s * mVec[2], s * mVec[3]); } - __hostdev__ Vec4 operator/(const T& s) const { return (T(1) / s) * (*this); } - __hostdev__ Vec4& operator+=(const Vec4& v) - { - mVec[0] += v[0]; - mVec[1] += v[1]; - mVec[2] += v[2]; - mVec[3] += v[3]; - return *this; - } - __hostdev__ Vec4& operator-=(const Vec4& v) - { - mVec[0] -= v[0]; - mVec[1] -= v[1]; - mVec[2] -= v[2]; - mVec[3] -= v[3]; - return *this; - } - __hostdev__ Vec4& operator*=(const T& s) - { - mVec[0] *= s; - mVec[1] *= s; - mVec[2] *= s; - mVec[3] *= s; - return *this; - } - __hostdev__ Vec4& operator/=(const T& s) { return (*this) *= T(1) / s; } - __hostdev__ Vec4& normalize() { return (*this) /= this->length(); } - /// @brief Perform a component-wise minimum with the other Coord. - __hostdev__ Vec4& minComponent(const Vec4& other) - { - if (other[0] < mVec[0]) - mVec[0] = other[0]; - if (other[1] < mVec[1]) - mVec[1] = other[1]; - if (other[2] < mVec[2]) - mVec[2] = other[2]; - if (other[3] < mVec[3]) - mVec[3] = other[3]; - return *this; - } - - /// @brief Perform a component-wise maximum with the other Coord. - __hostdev__ Vec4& maxComponent(const Vec4& other) - { - if (other[0] > mVec[0]) - mVec[0] = other[0]; - if (other[1] > mVec[1]) - mVec[1] = other[1]; - if (other[2] > mVec[2]) - mVec[2] = other[2]; - if (other[3] > mVec[3]) - mVec[3] = other[3]; - return *this; - } -}; // Vec4 - -template -__hostdev__ inline Vec4 operator*(T1 scalar, const Vec4& vec) -{ - return Vec4(scalar * vec[0], scalar * vec[1], scalar * vec[2], scalar * vec[3]); -} -template -__hostdev__ inline Vec4 operator/(T1 scalar, const Vec4& vec) -{ - return Vec4(scalar / vec[0], scalar / vec[1], scalar / vec[2], scalar / vec[3]); -} - -using Vec4R = Vec4; -using Vec4d = Vec4; -using Vec4f = Vec4; -using Vec4i = Vec4; - - -// --------------------------> Rgba8 <------------------------------------ - -/// @brief 8-bit red, green, blue, alpha packed into 32 bit unsigned int -class Rgba8 -{ - union - { - uint8_t c[4]; // 4 integer color channels of red, green, blue and alpha components. - uint32_t packed; // 32 bit packed representation - } mData; - -public: - static const int SIZE = 4; - using ValueType = uint8_t; - - /// @brief Default copy constructor - Rgba8(const Rgba8&) = default; - - /// @brief Default move constructor - Rgba8(Rgba8&&) = default; - - /// @brief Default move assignment operator - /// @return non-const reference to this instance - Rgba8& operator=(Rgba8&&) = default; - - /// @brief Default copy assignment operator - /// @return non-const reference to this instance - Rgba8& operator=(const Rgba8&) = default; - - /// @brief Default ctor initializes all channels to zero - __hostdev__ Rgba8() - : mData{{0, 0, 0, 0}} - { - static_assert(sizeof(uint32_t) == sizeof(Rgba8), "Unexpected sizeof"); - } - - /// @brief integer r,g,b,a ctor where alpha channel defaults to opaque - /// @note all values should be in the range 0u to 255u - __hostdev__ Rgba8(uint8_t r, uint8_t g, uint8_t b, uint8_t a = 255u) - : mData{{r, g, b, a}} - { - } - - /// @brief @brief ctor where all channels are initialized to the same value - /// @note value should be in the range 0u to 255u - explicit __hostdev__ Rgba8(uint8_t v) - : mData{{v, v, v, v}} - { - } - - /// @brief floating-point r,g,b,a ctor where alpha channel defaults to opaque - /// @note all values should be in the range 0.0f to 1.0f - __hostdev__ Rgba8(float r, float g, float b, float a = 1.0f) - : mData{{static_cast(0.5f + r * 255.0f), // round floats to nearest integers - static_cast(0.5f + g * 255.0f), // double {{}} is needed due to union - static_cast(0.5f + b * 255.0f), - static_cast(0.5f + a * 255.0f)}} - { - } - - /// @brief Vec3f r,g,b ctor (alpha channel it set to 1) - /// @note all values should be in the range 0.0f to 1.0f - __hostdev__ Rgba8(const Vec3f& rgb) - : Rgba8(rgb[0], rgb[1], rgb[2]) - { - } - - /// @brief Vec4f r,g,b,a ctor - /// @note all values should be in the range 0.0f to 1.0f - __hostdev__ Rgba8(const Vec4f& rgba) - : Rgba8(rgba[0], rgba[1], rgba[2], rgba[3]) - { - } - - __hostdev__ bool operator< (const Rgba8& rhs) const { return mData.packed < rhs.mData.packed; } - __hostdev__ bool operator==(const Rgba8& rhs) const { return mData.packed == rhs.mData.packed; } - __hostdev__ float lengthSqr() const - { - return 0.0000153787005f * (float(mData.c[0]) * mData.c[0] + - float(mData.c[1]) * mData.c[1] + - float(mData.c[2]) * mData.c[2]); //1/255^2 - } - __hostdev__ float length() const { return sqrtf(this->lengthSqr()); } - /// @brief return n'th color channel as a float in the range 0 to 1 - __hostdev__ float asFloat(int n) const { return 0.003921569f*float(mData.c[n]); }// divide by 255 - __hostdev__ const uint8_t& operator[](int n) const { return mData.c[n]; } - __hostdev__ uint8_t& operator[](int n) { return mData.c[n]; } - __hostdev__ const uint32_t& packed() const { return mData.packed; } - __hostdev__ uint32_t& packed() { return mData.packed; } - __hostdev__ const uint8_t& r() const { return mData.c[0]; } - __hostdev__ const uint8_t& g() const { return mData.c[1]; } - __hostdev__ const uint8_t& b() const { return mData.c[2]; } - __hostdev__ const uint8_t& a() const { return mData.c[3]; } - __hostdev__ uint8_t& r() { return mData.c[0]; } - __hostdev__ uint8_t& g() { return mData.c[1]; } - __hostdev__ uint8_t& b() { return mData.c[2]; } - __hostdev__ uint8_t& a() { return mData.c[3]; } - __hostdev__ operator Vec3f() const { - return Vec3f(this->asFloat(0), this->asFloat(1), this->asFloat(2)); - } - __hostdev__ operator Vec4f() const { - return Vec4f(this->asFloat(0), this->asFloat(1), this->asFloat(2), this->asFloat(3)); - } -}; // Rgba8 - -using PackedRGBA8 = Rgba8; // for backwards compatibility - -// ----------------------------> TensorTraits <-------------------------------------- - -template::value || is_specialization::value || is_same::value) ? 1 : 0> -struct TensorTraits; - -template -struct TensorTraits -{ - static const int Rank = 0; // i.e. scalar - static const bool IsScalar = true; - static const bool IsVector = false; - static const int Size = 1; - using ElementType = T; - static T scalar(const T& s) { return s; } -}; - -template -struct TensorTraits -{ - static const int Rank = 1; // i.e. vector - static const bool IsScalar = false; - static const bool IsVector = true; - static const int Size = T::SIZE; - using ElementType = typename T::ValueType; - static ElementType scalar(const T& v) { return v.length(); } -}; - -// ----------------------------> FloatTraits <-------------------------------------- - -template::ElementType)> -struct FloatTraits -{ - using FloatType = float; -}; - -template -struct FloatTraits -{ - using FloatType = double; -}; - -template<> -struct FloatTraits -{ - using FloatType = bool; -}; - -template<> -struct FloatTraits // size of empty class in C++ is 1 byte and not 0 byte -{ - using FloatType = uint64_t; -}; - -template<> -struct FloatTraits // size of empty class in C++ is 1 byte and not 0 byte -{ - using FloatType = uint64_t; -}; - -template<> -struct FloatTraits // size of empty class in C++ is 1 byte and not 0 byte -{ - using FloatType = uint64_t; -}; - -template<> -struct FloatTraits // size of empty class in C++ is 1 byte and not 0 byte -{ - using FloatType = uint64_t; -}; - -template<> -struct FloatTraits // size of empty class in C++ is 1 byte and not 0 byte -{ - using FloatType = bool; -}; - -template<> -struct FloatTraits // size of empty class in C++ is 1 byte and not 0 byte -{ - using FloatType = double; -}; - -// ----------------------------> mapping BuildType -> GridType <-------------------------------------- - -/// @brief Maps from a templated build type to a GridType enum -template -__hostdev__ inline GridType mapToGridType() -{ - if constexpr(is_same::value) { // resolved at compile-time - return GridType::Float; - } else if constexpr(is_same::value) { - return GridType::Double; - } else if constexpr(is_same::value) { - return GridType::Int16; - } else if constexpr(is_same::value) { - return GridType::Int32; - } else if constexpr(is_same::value) { - return GridType::Int64; - } else if constexpr(is_same::value) { - return GridType::Vec3f; - } else if constexpr(is_same::value) { - return GridType::Vec3d; - } else if constexpr(is_same::value) { - return GridType::UInt32; - } else if constexpr(is_same::value) { - return GridType::Mask; - } else if constexpr(is_same::value) { - return GridType::Half; - } else if constexpr(is_same::value) { - return GridType::Index; - } else if constexpr(is_same::value) { - return GridType::OnIndex; - } else if constexpr(is_same::value) { - return GridType::IndexMask; - } else if constexpr(is_same::value) { - return GridType::OnIndexMask; - } else if constexpr(is_same::value) { - return GridType::Boolean; - } else if constexpr(is_same::value) { - return GridType::RGBA8; - } else if (is_same::value) { - return GridType::Fp4; - } else if constexpr(is_same::value) { - return GridType::Fp8; - } else if constexpr(is_same::value) { - return GridType::Fp16; - } else if constexpr(is_same::value) { - return GridType::FpN; - } else if constexpr(is_same::value) { - return GridType::Vec4f; - } else if constexpr(is_same::value) { - return GridType::Vec4d; - } else if (is_same::value) { - return GridType::PointIndex; - } else if constexpr(is_same::value) { - return GridType::Vec3u8; - } else if constexpr(is_same::value) { - return GridType::Vec3u16; - } - return GridType::Unknown; -} - -// ----------------------------> mapping BuildType -> GridClass <-------------------------------------- - -/// @brief Maps from a templated build type to a GridClass enum -template -__hostdev__ inline GridClass mapToGridClass(GridClass defaultClass = GridClass::Unknown) -{ - if (is_same::value) { - return GridClass::Topology; - } else if (BuildTraits::is_index) { - return GridClass::IndexGrid; - } else if (is_same::value) { - return GridClass::VoxelVolume; - } else if (is_same::value) { - return GridClass::PointIndex; - } - return defaultClass; -} - -// ----------------------------> matMult <-------------------------------------- - -/// @brief Multiply a 3x3 matrix and a 3d vector using 32bit floating point arithmetics -/// @note This corresponds to a linear mapping, e.g. scaling, rotation etc. -/// @tparam Vec3T Template type of the input and output 3d vectors -/// @param mat pointer to an array of floats with the 3x3 matrix -/// @param xyz input vector to be multiplied by the matrix -/// @return result of matrix-vector multiplication, i.e. mat x xyz -template -__hostdev__ inline Vec3T matMult(const float* mat, const Vec3T& xyz) -{ - return Vec3T(fmaf(static_cast(xyz[0]), mat[0], fmaf(static_cast(xyz[1]), mat[1], static_cast(xyz[2]) * mat[2])), - fmaf(static_cast(xyz[0]), mat[3], fmaf(static_cast(xyz[1]), mat[4], static_cast(xyz[2]) * mat[5])), - fmaf(static_cast(xyz[0]), mat[6], fmaf(static_cast(xyz[1]), mat[7], static_cast(xyz[2]) * mat[8]))); // 6 fmaf + 3 mult = 9 flops -} - -/// @brief Multiply a 3x3 matrix and a 3d vector using 64bit floating point arithmetics -/// @note This corresponds to a linear mapping, e.g. scaling, rotation etc. -/// @tparam Vec3T Template type of the input and output 3d vectors -/// @param mat pointer to an array of floats with the 3x3 matrix -/// @param xyz input vector to be multiplied by the matrix -/// @return result of matrix-vector multiplication, i.e. mat x xyz -template -__hostdev__ inline Vec3T matMult(const double* mat, const Vec3T& xyz) -{ - return Vec3T(fma(static_cast(xyz[0]), mat[0], fma(static_cast(xyz[1]), mat[1], static_cast(xyz[2]) * mat[2])), - fma(static_cast(xyz[0]), mat[3], fma(static_cast(xyz[1]), mat[4], static_cast(xyz[2]) * mat[5])), - fma(static_cast(xyz[0]), mat[6], fma(static_cast(xyz[1]), mat[7], static_cast(xyz[2]) * mat[8]))); // 6 fmaf + 3 mult = 9 flops -} - -/// @brief Multiply a 3x3 matrix to a 3d vector and add another 3d vector using 32bit floating point arithmetics -/// @note This corresponds to an affine transformation, i.e a linear mapping followed by a translation. e.g. scale/rotation and translation -/// @tparam Vec3T Template type of the input and output 3d vectors -/// @param mat pointer to an array of floats with the 3x3 matrix -/// @param vec 3d vector to be added AFTER the matrix multiplication -/// @param xyz input vector to be multiplied by the matrix and a translated by @c vec -/// @return result of affine transformation, i.e. (mat x xyz) + vec -template -__hostdev__ inline Vec3T matMult(const float* mat, const float* vec, const Vec3T& xyz) -{ - return Vec3T(fmaf(static_cast(xyz[0]), mat[0], fmaf(static_cast(xyz[1]), mat[1], fmaf(static_cast(xyz[2]), mat[2], vec[0]))), - fmaf(static_cast(xyz[0]), mat[3], fmaf(static_cast(xyz[1]), mat[4], fmaf(static_cast(xyz[2]), mat[5], vec[1]))), - fmaf(static_cast(xyz[0]), mat[6], fmaf(static_cast(xyz[1]), mat[7], fmaf(static_cast(xyz[2]), mat[8], vec[2])))); // 9 fmaf = 9 flops -} - -/// @brief Multiply a 3x3 matrix to a 3d vector and add another 3d vector using 64bit floating point arithmetics -/// @note This corresponds to an affine transformation, i.e a linear mapping followed by a translation. e.g. scale/rotation and translation -/// @tparam Vec3T Template type of the input and output 3d vectors -/// @param mat pointer to an array of floats with the 3x3 matrix -/// @param vec 3d vector to be added AFTER the matrix multiplication -/// @param xyz input vector to be multiplied by the matrix and a translated by @c vec -/// @return result of affine transformation, i.e. (mat x xyz) + vec -template -__hostdev__ inline Vec3T matMult(const double* mat, const double* vec, const Vec3T& xyz) -{ - return Vec3T(fma(static_cast(xyz[0]), mat[0], fma(static_cast(xyz[1]), mat[1], fma(static_cast(xyz[2]), mat[2], vec[0]))), - fma(static_cast(xyz[0]), mat[3], fma(static_cast(xyz[1]), mat[4], fma(static_cast(xyz[2]), mat[5], vec[1]))), - fma(static_cast(xyz[0]), mat[6], fma(static_cast(xyz[1]), mat[7], fma(static_cast(xyz[2]), mat[8], vec[2])))); // 9 fma = 9 flops -} - -/// @brief Multiply the transposed of a 3x3 matrix and a 3d vector using 32bit floating point arithmetics -/// @note This corresponds to an inverse linear mapping, e.g. inverse scaling, inverse rotation etc. -/// @tparam Vec3T Template type of the input and output 3d vectors -/// @param mat pointer to an array of floats with the 3x3 matrix -/// @param xyz input vector to be multiplied by the transposed matrix -/// @return result of matrix-vector multiplication, i.e. mat^T x xyz -template -__hostdev__ inline Vec3T matMultT(const float* mat, const Vec3T& xyz) -{ - return Vec3T(fmaf(static_cast(xyz[0]), mat[0], fmaf(static_cast(xyz[1]), mat[3], static_cast(xyz[2]) * mat[6])), - fmaf(static_cast(xyz[0]), mat[1], fmaf(static_cast(xyz[1]), mat[4], static_cast(xyz[2]) * mat[7])), - fmaf(static_cast(xyz[0]), mat[2], fmaf(static_cast(xyz[1]), mat[5], static_cast(xyz[2]) * mat[8]))); // 6 fmaf + 3 mult = 9 flops -} - -/// @brief Multiply the transposed of a 3x3 matrix and a 3d vector using 64bit floating point arithmetics -/// @note This corresponds to an inverse linear mapping, e.g. inverse scaling, inverse rotation etc. -/// @tparam Vec3T Template type of the input and output 3d vectors -/// @param mat pointer to an array of floats with the 3x3 matrix -/// @param xyz input vector to be multiplied by the transposed matrix -/// @return result of matrix-vector multiplication, i.e. mat^T x xyz -template -__hostdev__ inline Vec3T matMultT(const double* mat, const Vec3T& xyz) -{ - return Vec3T(fma(static_cast(xyz[0]), mat[0], fma(static_cast(xyz[1]), mat[3], static_cast(xyz[2]) * mat[6])), - fma(static_cast(xyz[0]), mat[1], fma(static_cast(xyz[1]), mat[4], static_cast(xyz[2]) * mat[7])), - fma(static_cast(xyz[0]), mat[2], fma(static_cast(xyz[1]), mat[5], static_cast(xyz[2]) * mat[8]))); // 6 fmaf + 3 mult = 9 flops -} - -template -__hostdev__ inline Vec3T matMultT(const float* mat, const float* vec, const Vec3T& xyz) -{ - return Vec3T(fmaf(static_cast(xyz[0]), mat[0], fmaf(static_cast(xyz[1]), mat[3], fmaf(static_cast(xyz[2]), mat[6], vec[0]))), - fmaf(static_cast(xyz[0]), mat[1], fmaf(static_cast(xyz[1]), mat[4], fmaf(static_cast(xyz[2]), mat[7], vec[1]))), - fmaf(static_cast(xyz[0]), mat[2], fmaf(static_cast(xyz[1]), mat[5], fmaf(static_cast(xyz[2]), mat[8], vec[2])))); // 9 fmaf = 9 flops -} - -template -__hostdev__ inline Vec3T matMultT(const double* mat, const double* vec, const Vec3T& xyz) -{ - return Vec3T(fma(static_cast(xyz[0]), mat[0], fma(static_cast(xyz[1]), mat[3], fma(static_cast(xyz[2]), mat[6], vec[0]))), - fma(static_cast(xyz[0]), mat[1], fma(static_cast(xyz[1]), mat[4], fma(static_cast(xyz[2]), mat[7], vec[1]))), - fma(static_cast(xyz[0]), mat[2], fma(static_cast(xyz[1]), mat[5], fma(static_cast(xyz[2]), mat[8], vec[2])))); // 9 fma = 9 flops -} - -// ----------------------------> BBox <------------------------------------- - -// Base-class for static polymorphism (cannot be constructed directly) -template -struct BaseBBox -{ - Vec3T mCoord[2]; - __hostdev__ bool operator==(const BaseBBox& rhs) const { return mCoord[0] == rhs.mCoord[0] && mCoord[1] == rhs.mCoord[1]; }; - __hostdev__ bool operator!=(const BaseBBox& rhs) const { return mCoord[0] != rhs.mCoord[0] || mCoord[1] != rhs.mCoord[1]; }; - __hostdev__ const Vec3T& operator[](int i) const { return mCoord[i]; } - __hostdev__ Vec3T& operator[](int i) { return mCoord[i]; } - __hostdev__ Vec3T& min() { return mCoord[0]; } - __hostdev__ Vec3T& max() { return mCoord[1]; } - __hostdev__ const Vec3T& min() const { return mCoord[0]; } - __hostdev__ const Vec3T& max() const { return mCoord[1]; } - __hostdev__ BaseBBox& translate(const Vec3T& xyz) - { - mCoord[0] += xyz; - mCoord[1] += xyz; - return *this; - } - /// @brief Expand this bounding box to enclose point @c xyz. - __hostdev__ BaseBBox& expand(const Vec3T& xyz) - { - mCoord[0].minComponent(xyz); - mCoord[1].maxComponent(xyz); - return *this; - } - - /// @brief Expand this bounding box to enclose the given bounding box. - __hostdev__ BaseBBox& expand(const BaseBBox& bbox) - { - mCoord[0].minComponent(bbox[0]); - mCoord[1].maxComponent(bbox[1]); - return *this; - } - - /// @brief Intersect this bounding box with the given bounding box. - __hostdev__ BaseBBox& intersect(const BaseBBox& bbox) - { - mCoord[0].maxComponent(bbox[0]); - mCoord[1].minComponent(bbox[1]); - return *this; - } - - //__hostdev__ BaseBBox expandBy(typename Vec3T::ValueType padding) const - //{ - // return BaseBBox(mCoord[0].offsetBy(-padding),mCoord[1].offsetBy(padding)); - //} - __hostdev__ bool isInside(const Vec3T& xyz) - { - if (xyz[0] < mCoord[0][0] || xyz[1] < mCoord[0][1] || xyz[2] < mCoord[0][2]) - return false; - if (xyz[0] > mCoord[1][0] || xyz[1] > mCoord[1][1] || xyz[2] > mCoord[1][2]) - return false; - return true; - } - -protected: - __hostdev__ BaseBBox() {} - __hostdev__ BaseBBox(const Vec3T& min, const Vec3T& max) - : mCoord{min, max} - { - } -}; // BaseBBox - -template::value> -struct BBox; - -/// @brief Partial template specialization for floating point coordinate types. -/// -/// @note Min is inclusive and max is exclusive. If min = max the dimension of -/// the bounding box is zero and therefore it is also empty. -template -struct BBox : public BaseBBox -{ - using Vec3Type = Vec3T; - using ValueType = typename Vec3T::ValueType; - static_assert(is_floating_point::value, "Expected a floating point coordinate type"); - using BaseT = BaseBBox; - using BaseT::mCoord; - /// @brief Default construction sets BBox to an empty bbox - __hostdev__ BBox() - : BaseT(Vec3T( Maximum::value()), - Vec3T(-Maximum::value())) - { - } - __hostdev__ BBox(const Vec3T& min, const Vec3T& max) - : BaseT(min, max) - { - } - __hostdev__ BBox(const Coord& min, const Coord& max) - : BaseT(Vec3T(ValueType(min[0]), ValueType(min[1]), ValueType(min[2])), - Vec3T(ValueType(max[0] + 1), ValueType(max[1] + 1), ValueType(max[2] + 1))) - { - } - __hostdev__ static BBox createCube(const Coord& min, typename Coord::ValueType dim) - { - return BBox(min, min.offsetBy(dim)); - } - - __hostdev__ BBox(const BaseBBox& bbox) - : BBox(bbox[0], bbox[1]) - { - } - __hostdev__ bool empty() const { return mCoord[0][0] >= mCoord[1][0] || - mCoord[0][1] >= mCoord[1][1] || - mCoord[0][2] >= mCoord[1][2]; } - __hostdev__ operator bool() const { return mCoord[0][0] < mCoord[1][0] && - mCoord[0][1] < mCoord[1][1] && - mCoord[0][2] < mCoord[1][2]; } - __hostdev__ Vec3T dim() const { return *this ? this->max() - this->min() : Vec3T(0); } - __hostdev__ bool isInside(const Vec3T& p) const - { - return p[0] > mCoord[0][0] && p[1] > mCoord[0][1] && p[2] > mCoord[0][2] && - p[0] < mCoord[1][0] && p[1] < mCoord[1][1] && p[2] < mCoord[1][2]; - } - -}; // BBox - -/// @brief Partial template specialization for integer coordinate types -/// -/// @note Both min and max are INCLUDED in the bbox so dim = max - min + 1. So, -/// if min = max the bounding box contains exactly one point and dim = 1! -template -struct BBox : public BaseBBox -{ - static_assert(is_same::value, "Expected \"int\" coordinate type"); - using BaseT = BaseBBox; - using BaseT::mCoord; - /// @brief Iterator over the domain covered by a BBox - /// @details z is the fastest-moving coordinate. - class Iterator - { - const BBox& mBBox; - CoordT mPos; - - public: - __hostdev__ Iterator(const BBox& b) - : mBBox(b) - , mPos(b.min()) - { - } - __hostdev__ Iterator(const BBox& b, const Coord& p) - : mBBox(b) - , mPos(p) - { - } - __hostdev__ Iterator& operator++() - { - if (mPos[2] < mBBox[1][2]) { // this is the most common case - ++mPos[2];// increment z - } else if (mPos[1] < mBBox[1][1]) { - mPos[2] = mBBox[0][2];// reset z - ++mPos[1];// increment y - } else if (mPos[0] <= mBBox[1][0]) { - mPos[2] = mBBox[0][2];// reset z - mPos[1] = mBBox[0][1];// reset y - ++mPos[0];// increment x - } - return *this; - } - __hostdev__ Iterator operator++(int) - { - auto tmp = *this; - ++(*this); - return tmp; - } - __hostdev__ bool operator==(const Iterator& rhs) const - { - NANOVDB_ASSERT(mBBox == rhs.mBBox); - return mPos == rhs.mPos; - } - __hostdev__ bool operator!=(const Iterator& rhs) const - { - NANOVDB_ASSERT(mBBox == rhs.mBBox); - return mPos != rhs.mPos; - } - __hostdev__ bool operator<(const Iterator& rhs) const - { - NANOVDB_ASSERT(mBBox == rhs.mBBox); - return mPos < rhs.mPos; - } - __hostdev__ bool operator<=(const Iterator& rhs) const - { - NANOVDB_ASSERT(mBBox == rhs.mBBox); - return mPos <= rhs.mPos; - } - /// @brief Return @c true if the iterator still points to a valid coordinate. - __hostdev__ operator bool() const { return mPos <= mBBox[1]; } - __hostdev__ const CoordT& operator*() const { return mPos; } - }; // Iterator - __hostdev__ Iterator begin() const { return Iterator{*this}; } - __hostdev__ Iterator end() const { return Iterator{*this, CoordT(mCoord[1][0]+1, mCoord[0][1], mCoord[0][2])}; } - __hostdev__ BBox() - : BaseT(CoordT::max(), CoordT::min()) - { - } - __hostdev__ BBox(const CoordT& min, const CoordT& max) - : BaseT(min, max) - { - } + /// instance has a smaller major verion (is older), and a positive age if it is newer, i.e. larger. + __hostdev__ int age() const {return int(this->getMajor()) - int(NANOVDB_MAJOR_VERSION_NUMBER);} +}; // Version - template - __hostdev__ BBox(BBox& other, const SplitT&) - : BaseT(other.mCoord[0], other.mCoord[1]) - { - NANOVDB_ASSERT(this->is_divisible()); - const int n = MaxIndex(this->dim()); - mCoord[1][n] = (mCoord[0][n] + mCoord[1][n]) >> 1; - other.mCoord[0][n] = mCoord[1][n] + 1; - } +/// @brief print the verion number to a c-string +/// @param dst destination string of size 8 or more +/// @param v version to be printed +/// @return returns destination string @c dst +__hostdev__ inline char* toStr(char *dst, const Version &v) +{ + return util::sprint(dst, v.getMajor(), ".",v.getMinor(), ".",v.getPatch()); +} - __hostdev__ static BBox createCube(const CoordT& min, typename CoordT::ValueType dim) - { - return BBox(min, min.offsetBy(dim - 1)); - } +// ----------------------------> TensorTraits <-------------------------------------- - __hostdev__ static BBox createCube(typename CoordT::ValueType min, typename CoordT::ValueType max) - { - return BBox(CoordT(min), CoordT(max)); - } +template::value || util::is_specialization::value || util::is_same::value) ? 1 : 0> +struct TensorTraits; - __hostdev__ bool is_divisible() const { return mCoord[0][0] < mCoord[1][0] && - mCoord[0][1] < mCoord[1][1] && - mCoord[0][2] < mCoord[1][2]; } - /// @brief Return true if this bounding box is empty, e.g. uninitialized - __hostdev__ bool empty() const { return mCoord[0][0] > mCoord[1][0] || - mCoord[0][1] > mCoord[1][1] || - mCoord[0][2] > mCoord[1][2]; } - /// @brief Convert this BBox to boolean true if it is not empty - __hostdev__ operator bool() const { return mCoord[0][0] <= mCoord[1][0] && - mCoord[0][1] <= mCoord[1][1] && - mCoord[0][2] <= mCoord[1][2]; } - __hostdev__ CoordT dim() const { return *this ? this->max() - this->min() + Coord(1) : Coord(0); } - __hostdev__ uint64_t volume() const - { - auto d = this->dim(); - return uint64_t(d[0]) * uint64_t(d[1]) * uint64_t(d[2]); - } - __hostdev__ bool isInside(const CoordT& p) const { return !(CoordT::lessThan(p, this->min()) || CoordT::lessThan(this->max(), p)); } - /// @brief Return @c true if the given bounding box is inside this bounding box. - __hostdev__ bool isInside(const BBox& b) const - { - return !(CoordT::lessThan(b.min(), this->min()) || CoordT::lessThan(this->max(), b.max())); - } +template +struct TensorTraits +{ + static const int Rank = 0; // i.e. scalar + static const bool IsScalar = true; + static const bool IsVector = false; + static const int Size = 1; + using ElementType = T; + static T scalar(const T& s) { return s; } +}; - /// @brief Return @c true if the given bounding box overlaps with this bounding box. - __hostdev__ bool hasOverlap(const BBox& b) const - { - return !(CoordT::lessThan(this->max(), b.min()) || CoordT::lessThan(b.max(), this->min())); - } +template +struct TensorTraits +{ + static const int Rank = 1; // i.e. vector + static const bool IsScalar = false; + static const bool IsVector = true; + static const int Size = T::SIZE; + using ElementType = typename T::ValueType; + static ElementType scalar(const T& v) { return v.length(); } +}; - /// @warning This converts a CoordBBox into a floating-point bounding box which implies that max += 1 ! - template - __hostdev__ BBox> asReal() const - { - static_assert(is_floating_point::value, "CoordBBox::asReal: Expected a floating point coordinate"); - return BBox>(Vec3(RealT(mCoord[0][0]), RealT(mCoord[0][1]), RealT(mCoord[0][2])), - Vec3(RealT(mCoord[1][0] + 1), RealT(mCoord[1][1] + 1), RealT(mCoord[1][2] + 1))); - } - /// @brief Return a new instance that is expanded by the specified padding. - __hostdev__ BBox expandBy(typename CoordT::ValueType padding) const - { - return BBox(mCoord[0].offsetBy(-padding), mCoord[1].offsetBy(padding)); - } +// ----------------------------> FloatTraits <-------------------------------------- - /// @brief @brief transform this coordinate bounding box by the specified map - /// @param map mapping of index to world coordinates - /// @return world bounding box - template - __hostdev__ BBox transform(const Map& map) const - { - const Vec3d tmp = map.applyMap(Vec3d(mCoord[0][0], mCoord[0][1], mCoord[0][2])); - BBox bbox(tmp, tmp); - bbox.expand(map.applyMap(Vec3d(mCoord[0][0], mCoord[0][1], mCoord[1][2]))); - bbox.expand(map.applyMap(Vec3d(mCoord[0][0], mCoord[1][1], mCoord[0][2]))); - bbox.expand(map.applyMap(Vec3d(mCoord[1][0], mCoord[0][1], mCoord[0][2]))); - bbox.expand(map.applyMap(Vec3d(mCoord[1][0], mCoord[1][1], mCoord[0][2]))); - bbox.expand(map.applyMap(Vec3d(mCoord[1][0], mCoord[0][1], mCoord[1][2]))); - bbox.expand(map.applyMap(Vec3d(mCoord[0][0], mCoord[1][1], mCoord[1][2]))); - bbox.expand(map.applyMap(Vec3d(mCoord[1][0], mCoord[1][1], mCoord[1][2]))); - return bbox; - } +template::ElementType)> +struct FloatTraits +{ + using FloatType = float; +}; -#if defined(__CUDACC__) // the following functions only run on the GPU! - __device__ inline BBox& expandAtomic(const CoordT& ijk) - { - mCoord[0].minComponentAtomic(ijk); - mCoord[1].maxComponentAtomic(ijk); - return *this; - } - __device__ inline BBox& expandAtomic(const BBox& bbox) - { - mCoord[0].minComponentAtomic(bbox[0]); - mCoord[1].maxComponentAtomic(bbox[1]); - return *this; - } - __device__ inline BBox& intersectAtomic(const BBox& bbox) - { - mCoord[0].maxComponentAtomic(bbox[0]); - mCoord[1].minComponentAtomic(bbox[1]); - return *this; - } -#endif -}; // BBox +template +struct FloatTraits +{ + using FloatType = double; +}; -using CoordBBox = BBox; -using BBoxR = BBox; +template<> +struct FloatTraits +{ + using FloatType = bool; +}; -// -------------------> Find lowest and highest bit in a word <---------------------------- +template<> +struct FloatTraits // size of empty class in C++ is 1 byte and not 0 byte +{ + using FloatType = uint64_t; +}; -/// @brief Returns the index of the lowest, i.e. least significant, on bit in the specified 32 bit word -/// -/// @warning Assumes that at least one bit is set in the word, i.e. @a v != uint32_t(0)! -NANOVDB_HOSTDEV_DISABLE_WARNING -__hostdev__ static inline uint32_t FindLowestOn(uint32_t v) +template<> +struct FloatTraits // size of empty class in C++ is 1 byte and not 0 byte { - NANOVDB_ASSERT(v); -#if (defined(__CUDA_ARCH__) || defined(__HIP__)) && defined(NANOVDB_USE_INTRINSICS) - return __ffs(v) - 1; // one based indexing -#elif defined(_MSC_VER) && defined(NANOVDB_USE_INTRINSICS) - unsigned long index; - _BitScanForward(&index, v); - return static_cast(index); -#elif (defined(__GNUC__) || defined(__clang__)) && defined(NANOVDB_USE_INTRINSICS) - return static_cast(__builtin_ctzl(v)); -#else - //NANO_WARNING("Using software implementation for FindLowestOn(uint32_t v)") - static const unsigned char DeBruijn[32] = { - 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9}; -// disable unary minus on unsigned warning -#if defined(_MSC_VER) && !defined(__NVCC__) -#pragma warning(push) -#pragma warning(disable : 4146) -#endif - return DeBruijn[uint32_t((v & -v) * 0x077CB531U) >> 27]; -#if defined(_MSC_VER) && !defined(__NVCC__) -#pragma warning(pop) -#endif + using FloatType = uint64_t; +}; -#endif -} +template<> +struct FloatTraits // size of empty class in C++ is 1 byte and not 0 byte +{ + using FloatType = uint64_t; +}; -/// @brief Returns the index of the highest, i.e. most significant, on bit in the specified 32 bit word -/// -/// @warning Assumes that at least one bit is set in the word, i.e. @a v != uint32_t(0)! -NANOVDB_HOSTDEV_DISABLE_WARNING -__hostdev__ static inline uint32_t FindHighestOn(uint32_t v) +template<> +struct FloatTraits // size of empty class in C++ is 1 byte and not 0 byte { - NANOVDB_ASSERT(v); -#if (defined(__CUDA_ARCH__) || defined(__HIP__)) && defined(NANOVDB_USE_INTRINSICS) - return sizeof(uint32_t) * 8 - 1 - __clz(v); // Return the number of consecutive high-order zero bits in a 32-bit integer. -#elif defined(_MSC_VER) && defined(NANOVDB_USE_INTRINSICS) - unsigned long index; - _BitScanReverse(&index, v); - return static_cast(index); -#elif (defined(__GNUC__) || defined(__clang__)) && defined(NANOVDB_USE_INTRINSICS) - return sizeof(unsigned long) * 8 - 1 - __builtin_clzl(v); -#else - //NANO_WARNING("Using software implementation for FindHighestOn(uint32_t)") - static const unsigned char DeBruijn[32] = { - 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, - 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31}; - v |= v >> 1; // first round down to one less than a power of 2 - v |= v >> 2; - v |= v >> 4; - v |= v >> 8; - v |= v >> 16; - return DeBruijn[uint32_t(v * 0x07C4ACDDU) >> 27]; -#endif -} + using FloatType = uint64_t; +}; -/// @brief Returns the index of the lowest, i.e. least significant, on bit in the specified 64 bit word -/// -/// @warning Assumes that at least one bit is set in the word, i.e. @a v != uint32_t(0)! -NANOVDB_HOSTDEV_DISABLE_WARNING -__hostdev__ static inline uint32_t FindLowestOn(uint64_t v) +template<> +struct FloatTraits // size of empty class in C++ is 1 byte and not 0 byte { - NANOVDB_ASSERT(v); -#if (defined(__CUDA_ARCH__) || defined(__HIP__)) && defined(NANOVDB_USE_INTRINSICS) - return __ffsll(static_cast(v)) - 1; // one based indexing -#elif defined(_MSC_VER) && defined(NANOVDB_USE_INTRINSICS) - unsigned long index; - _BitScanForward64(&index, v); - return static_cast(index); -#elif (defined(__GNUC__) || defined(__clang__)) && defined(NANOVDB_USE_INTRINSICS) - return static_cast(__builtin_ctzll(v)); -#else - //NANO_WARNING("Using software implementation for FindLowestOn(uint64_t)") - static const unsigned char DeBruijn[64] = { - 0, 1, 2, 53, 3, 7, 54, 27, 4, 38, 41, 8, 34, 55, 48, 28, - 62, 5, 39, 46, 44, 42, 22, 9, 24, 35, 59, 56, 49, 18, 29, 11, - 63, 52, 6, 26, 37, 40, 33, 47, 61, 45, 43, 21, 23, 58, 17, 10, - 51, 25, 36, 32, 60, 20, 57, 16, 50, 31, 19, 15, 30, 14, 13, 12, - }; -// disable unary minus on unsigned warning -#if defined(_MSC_VER) && !defined(__NVCC__) -#pragma warning(push) -#pragma warning(disable : 4146) -#endif - return DeBruijn[uint64_t((v & -v) * UINT64_C(0x022FDD63CC95386D)) >> 58]; -#if defined(_MSC_VER) && !defined(__NVCC__) -#pragma warning(pop) -#endif + using FloatType = bool; +}; -#endif -} +template<> +struct FloatTraits // size of empty class in C++ is 1 byte and not 0 byte +{ + using FloatType = double; +}; -/// @brief Returns the index of the highest, i.e. most significant, on bit in the specified 64 bit word -/// -/// @warning Assumes that at least one bit is set in the word, i.e. @a v != uint32_t(0)! -NANOVDB_HOSTDEV_DISABLE_WARNING -__hostdev__ static inline uint32_t FindHighestOn(uint64_t v) +// ----------------------------> mapping BuildType -> GridType <-------------------------------------- + +/// @brief Maps from a templated build type to a GridType enum +template +__hostdev__ inline GridType toGridType() { - NANOVDB_ASSERT(v); -#if (defined(__CUDA_ARCH__) || defined(__HIP__)) && defined(NANOVDB_USE_INTRINSICS) - return sizeof(unsigned long) * 8 - 1 - __clzll(static_cast(v)); -#elif defined(_MSC_VER) && defined(NANOVDB_USE_INTRINSICS) - unsigned long index; - _BitScanReverse64(&index, v); - return static_cast(index); -#elif (defined(__GNUC__) || defined(__clang__)) && defined(NANOVDB_USE_INTRINSICS) - return sizeof(unsigned long) * 8 - 1 - __builtin_clzll(v); -#else - const uint32_t* p = reinterpret_cast(&v); - return p[1] ? 32u + FindHighestOn(p[1]) : FindHighestOn(p[0]); -#endif -} + if constexpr(util::is_same::value) { // resolved at compile-time + return GridType::Float; + } else if constexpr(util::is_same::value) { + return GridType::Double; + } else if constexpr(util::is_same::value) { + return GridType::Int16; + } else if constexpr(util::is_same::value) { + return GridType::Int32; + } else if constexpr(util::is_same::value) { + return GridType::Int64; + } else if constexpr(util::is_same::value) { + return GridType::Vec3f; + } else if constexpr(util::is_same::value) { + return GridType::Vec3d; + } else if constexpr(util::is_same::value) { + return GridType::UInt32; + } else if constexpr(util::is_same::value) { + return GridType::Mask; + } else if constexpr(util::is_same::value) { + return GridType::Half; + } else if constexpr(util::is_same::value) { + return GridType::Index; + } else if constexpr(util::is_same::value) { + return GridType::OnIndex; + } else if constexpr(util::is_same::value) { + return GridType::IndexMask; + } else if constexpr(util::is_same::value) { + return GridType::OnIndexMask; + } else if constexpr(util::is_same::value) { + return GridType::Boolean; + } else if constexpr(util::is_same::value) { + return GridType::RGBA8; + } else if constexpr(util::is_same::value) { + return GridType::Fp4; + } else if constexpr(util::is_same::value) { + return GridType::Fp8; + } else if constexpr(util::is_same::value) { + return GridType::Fp16; + } else if constexpr(util::is_same::value) { + return GridType::FpN; + } else if constexpr(util::is_same::value) { + return GridType::Vec4f; + } else if constexpr(util::is_same::value) { + return GridType::Vec4d; + } else if constexpr(util::is_same::value) { + return GridType::PointIndex; + } else if constexpr(util::is_same::value) { + return GridType::Vec3u8; + } else if constexpr(util::is_same::value) { + return GridType::Vec3u16; + } else if constexpr(util::is_same::value) { + return GridType::UInt8; + } + return GridType::Unknown; +}// toGridType -// ----------------------------> CountOn <-------------------------------------- +template +[[deprecated("Use toGridType() instead.")]] +__hostdev__ inline GridType mapToGridType(){return toGridType();} + +// ----------------------------> mapping BuildType -> GridClass <-------------------------------------- -/// @return Number of bits that are on in the specified 64-bit word -NANOVDB_HOSTDEV_DISABLE_WARNING -__hostdev__ inline uint32_t CountOn(uint64_t v) +/// @brief Maps from a templated build type to a GridClass enum +template +__hostdev__ inline GridClass toGridClass(GridClass defaultClass = GridClass::Unknown) { -#if (defined(__CUDA_ARCH__) || defined(__HIP__)) && defined(NANOVDB_USE_INTRINSICS) - //#warning Using popcll for CountOn - return __popcll(v); -// __popcnt64 intrinsic support was added in VS 2019 16.8 -#elif defined(_MSC_VER) && defined(_M_X64) && (_MSC_VER >= 1928) && defined(NANOVDB_USE_INTRINSICS) - //#warning Using popcnt64 for CountOn - return uint32_t(__popcnt64(v)); -#elif (defined(__GNUC__) || defined(__clang__)) && defined(NANOVDB_USE_INTRINSICS) - //#warning Using builtin_popcountll for CountOn - return __builtin_popcountll(v); -#else // use software implementation - //NANO_WARNING("Using software implementation for CountOn") - v = v - ((v >> 1) & uint64_t(0x5555555555555555)); - v = (v & uint64_t(0x3333333333333333)) + ((v >> 2) & uint64_t(0x3333333333333333)); - return (((v + (v >> 4)) & uint64_t(0xF0F0F0F0F0F0F0F)) * uint64_t(0x101010101010101)) >> 56; -#endif + if constexpr(util::is_same::value) { + return GridClass::Topology; + } else if constexpr(BuildTraits::is_index) { + return GridClass::IndexGrid; + } else if constexpr(util::is_same::value) { + return GridClass::VoxelVolume; + } else if constexpr(util::is_same::value) { + return GridClass::PointIndex; + } + return defaultClass; +} + +template +[[deprecated("Use toGridClass() instead.")]] +__hostdev__ inline GridClass mapToGridClass(GridClass defaultClass = GridClass::Unknown) +{ + return toGridClass(); } // ----------------------------> BitFlags <-------------------------------------- @@ -2694,31 +930,28 @@ class BitFlags : public BitArray public: using Type = decltype(mFlags); BitFlags() {} + BitFlags(Type mask) : BitArray{mask} {} BitFlags(std::initializer_list list) { - for (auto bit : list) - mFlags |= static_cast(1 << bit); + for (auto bit : list) mFlags |= static_cast(1 << bit); } template BitFlags(std::initializer_list list) { - for (auto mask : list) - mFlags |= static_cast(mask); + for (auto mask : list) mFlags |= static_cast(mask); } __hostdev__ Type data() const { return mFlags; } __hostdev__ Type& data() { return mFlags; } __hostdev__ void initBit(std::initializer_list list) { mFlags = 0u; - for (auto bit : list) - mFlags |= static_cast(1 << bit); + for (auto bit : list) mFlags |= static_cast(1 << bit); } template __hostdev__ void initMask(std::initializer_list list) { mFlags = 0u; - for (auto mask : list) - mFlags |= static_cast(mask); + for (auto mask : list) mFlags |= static_cast(mask); } //__hostdev__ Type& data() { return mFlags; } //__hostdev__ Type data() const { return mFlags; } @@ -2732,13 +965,11 @@ class BitFlags : public BitArray __hostdev__ void setBitOn(std::initializer_list list) { - for (auto bit : list) - mFlags |= static_cast(1 << bit); + for (auto bit : list) mFlags |= static_cast(1 << bit); } __hostdev__ void setBitOff(std::initializer_list list) { - for (auto bit : list) - mFlags &= ~static_cast(1 << bit); + for (auto bit : list) mFlags &= ~static_cast(1 << bit); } template @@ -2749,14 +980,12 @@ class BitFlags : public BitArray template __hostdev__ void setMaskOn(std::initializer_list list) { - for (auto mask : list) - mFlags |= static_cast(mask); + for (auto mask : list) mFlags |= static_cast(mask); } template __hostdev__ void setMaskOff(std::initializer_list list) { - for (auto mask : list) - mFlags &= ~static_cast(mask); + for (auto mask : list) mFlags &= ~static_cast(mask); } __hostdev__ void setBit(uint8_t bit, bool on) { on ? this->setBitOn(bit) : this->setBitOff(bit); } @@ -2775,18 +1004,18 @@ class BitFlags : public BitArray template __hostdev__ bool isMaskOn(std::initializer_list list) const { - for (auto mask : list) - if (0 != (mFlags & static_cast(mask))) - return true; + for (auto mask : list) { + if (0 != (mFlags & static_cast(mask))) return true; + } return false; } /// @brief return true if any of the masks in the list are off template __hostdev__ bool isMaskOff(std::initializer_list list) const { - for (auto mask : list) - if (0 == (mFlags & static_cast(mask))) - return true; + for (auto mask : list) { + if (0 == (mFlags & static_cast(mask))) return true; + } return false; } /// @brief required for backwards compatibility @@ -2822,16 +1051,16 @@ class Mask { uint32_t sum = 0; for (const uint64_t *w = mWords, *q = w + WORD_COUNT; w != q; ++w) - sum += CountOn(*w); + sum += util::countOn(*w); return sum; } /// @brief Return the number of lower set bits in mask up to but excluding the i'th bit inline __hostdev__ uint32_t countOn(uint32_t i) const { - uint32_t n = i >> 6, sum = CountOn(mWords[n] & ((uint64_t(1) << (i & 63u)) - 1u)); + uint32_t n = i >> 6, sum = util::countOn(mWords[n] & ((uint64_t(1) << (i & 63u)) - 1u)); for (const uint64_t* w = mWords; n--; ++w) - sum += CountOn(*w); + sum += util::countOn(*w); return sum; } @@ -2932,7 +1161,7 @@ class Mask /// @brief Assignment operator that works with openvdb::util::NodeMask template - __hostdev__ typename enable_if::value, Mask&>::type operator=(const MaskT& other) + __hostdev__ typename util::enable_if::value, Mask&>::type operator=(const MaskT& other) { static_assert(sizeof(Mask) == sizeof(MaskT), "Mismatching sizeof"); static_assert(WORD_COUNT == MaskT::WORD_COUNT, "Mismatching word count"); @@ -2943,11 +1172,8 @@ class Mask return *this; } - __hostdev__ Mask& operator=(const Mask& other) - { - memcpy64(mWords, other.mWords, WORD_COUNT); - return *this; - } + //__hostdev__ Mask& operator=(const Mask& other){return *util::memcpy(this, &other);} + Mask& operator=(const Mask&) = default; __hostdev__ bool operator==(const Mask& other) const { @@ -3019,30 +1245,26 @@ class Mask /// @brief Set all bits on __hostdev__ void setOn() { - for (uint32_t i = 0; i < WORD_COUNT; ++i) - mWords[i] = ~uint64_t(0); + for (uint32_t i = 0; i < WORD_COUNT; ++i)mWords[i] = ~uint64_t(0); } /// @brief Set all bits off __hostdev__ void setOff() { - for (uint32_t i = 0; i < WORD_COUNT; ++i) - mWords[i] = uint64_t(0); + for (uint32_t i = 0; i < WORD_COUNT; ++i) mWords[i] = uint64_t(0); } /// @brief Set all bits off __hostdev__ void set(bool on) { const uint64_t v = on ? ~uint64_t(0) : uint64_t(0); - for (uint32_t i = 0; i < WORD_COUNT; ++i) - mWords[i] = v; + for (uint32_t i = 0; i < WORD_COUNT; ++i) mWords[i] = v; } /// brief Toggle the state of all bits in the mask __hostdev__ void toggle() { uint32_t n = WORD_COUNT; - for (auto* w = mWords; n--; ++w) - *w = ~*w; + for (auto* w = mWords; n--; ++w) *w = ~*w; } __hostdev__ void toggle(uint32_t n) { mWords[n >> 6] ^= uint64_t(1) << (n & 63); } @@ -3051,8 +1273,7 @@ class Mask { uint64_t* w1 = mWords; const uint64_t* w2 = other.mWords; - for (uint32_t n = WORD_COUNT; n--; ++w1, ++w2) - *w1 &= *w2; + for (uint32_t n = WORD_COUNT; n--; ++w1, ++w2) *w1 &= *w2; return *this; } /// @brief Bitwise union @@ -3060,8 +1281,7 @@ class Mask { uint64_t* w1 = mWords; const uint64_t* w2 = other.mWords; - for (uint32_t n = WORD_COUNT; n--; ++w1, ++w2) - *w1 |= *w2; + for (uint32_t n = WORD_COUNT; n--; ++w1, ++w2) *w1 |= *w2; return *this; } /// @brief Bitwise difference @@ -3069,8 +1289,7 @@ class Mask { uint64_t* w1 = mWords; const uint64_t* w2 = other.mWords; - for (uint32_t n = WORD_COUNT; n--; ++w1, ++w2) - *w1 &= ~*w2; + for (uint32_t n = WORD_COUNT; n--; ++w1, ++w2) *w1 &= ~*w2; return *this; } /// @brief Bitwise XOR @@ -3078,8 +1297,7 @@ class Mask { uint64_t* w1 = mWords; const uint64_t* w2 = other.mWords; - for (uint32_t n = WORD_COUNT; n--; ++w1, ++w2) - *w1 ^= *w2; + for (uint32_t n = WORD_COUNT; n--; ++w1, ++w2) *w1 ^= *w2; return *this; } @@ -3089,9 +1307,8 @@ class Mask { uint32_t n = 0u; const uint64_t* w = mWords; - for (; n < WORD_COUNT && !(ON ? *w : ~*w); ++w, ++n) - ; - return n < WORD_COUNT ? (n << 6) + FindLowestOn(ON ? *w : ~*w) : SIZE; + for (; n < WORD_COUNT && !(ON ? *w : ~*w); ++w, ++n); + return n < WORD_COUNT ? (n << 6) + util::findLowestOn(ON ? *w : ~*w) : SIZE; } NANOVDB_HOSTDEV_DISABLE_WARNING @@ -3099,16 +1316,13 @@ class Mask __hostdev__ uint32_t findNext(uint32_t start) const { uint32_t n = start >> 6; // initiate - if (n >= WORD_COUNT) - return SIZE; // check for out of bounds + if (n >= WORD_COUNT) return SIZE; // check for out of bounds uint32_t m = start & 63u; uint64_t b = ON ? mWords[n] : ~mWords[n]; - if (b & (uint64_t(1u) << m)) - return start; // simple case: start is on/off + if (b & (uint64_t(1u) << m)) return start; // simple case: start is on/off b &= ~uint64_t(0u) << m; // mask out lower bits - while (!b && ++n < WORD_COUNT) - b = ON ? mWords[n] : ~mWords[n]; // find next non-zero word - return b ? (n << 6) + FindLowestOn(b) : SIZE; // catch last word=0 + while (!b && ++n < WORD_COUNT) b = ON ? mWords[n] : ~mWords[n]; // find next non-zero word + return b ? (n << 6) + util::findLowestOn(b) : SIZE; // catch last word=0 } NANOVDB_HOSTDEV_DISABLE_WARNING @@ -3116,16 +1330,13 @@ class Mask __hostdev__ uint32_t findPrev(uint32_t start) const { uint32_t n = start >> 6; // initiate - if (n >= WORD_COUNT) - return SIZE; // check for out of bounds + if (n >= WORD_COUNT) return SIZE; // check for out of bounds uint32_t m = start & 63u; uint64_t b = ON ? mWords[n] : ~mWords[n]; - if (b & (uint64_t(1u) << m)) - return start; // simple case: start is on/off + if (b & (uint64_t(1u) << m)) return start; // simple case: start is on/off b &= (uint64_t(1u) << m) - 1u; // mask out higher bits - while (!b && n) - b = ON ? mWords[--n] : ~mWords[--n]; // find previous non-zero word - return b ? (n << 6) + FindHighestOn(b) : SIZE; // catch first word=0 + while (!b && n) b = ON ? mWords[--n] : ~mWords[--n]; // find previous non-zero word + return b ? (n << 6) + util::findHighestOn(b) : SIZE; // catch first word=0 } private: @@ -3148,11 +1359,11 @@ struct Map /// @brief Default constructor for the identity map __hostdev__ Map() - : mMatF{1.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 1.0f} + : mMatF{ 1.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 1.0f} , mInvMatF{1.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 1.0f} , mVecF{0.0f, 0.0f, 0.0f} , mTaperF{1.0f} - , mMatD{1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0} + , mMatD{ 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0} , mInvMatD{1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0} , mVecD{0.0, 0.0, 0.0} , mTaperD{1.0} @@ -3190,7 +1401,7 @@ struct Map /// @param ijk 3D vector to be mapped - typically floating point index coordinates /// @return Forward mapping for affine transformation, i.e. (mat x ijk) + translation template - __hostdev__ Vec3T applyMap(const Vec3T& ijk) const { return matMult(mMatD, mVecD, ijk); } + __hostdev__ Vec3T applyMap(const Vec3T& ijk) const { return math::matMult(mMatD, mVecD, ijk); } /// @brief Apply the forward affine transformation to a vector using 32bit floating point arithmetics. /// @note Typically this operation is used for the scale, rotation and translation of index -> world mapping @@ -3198,7 +1409,7 @@ struct Map /// @param ijk 3D vector to be mapped - typically floating point index coordinates /// @return Forward mapping for affine transformation, i.e. (mat x ijk) + translation template - __hostdev__ Vec3T applyMapF(const Vec3T& ijk) const { return matMult(mMatF, mVecF, ijk); } + __hostdev__ Vec3T applyMapF(const Vec3T& ijk) const { return math::matMult(mMatF, mVecF, ijk); } /// @brief Apply the linear forward 3x3 transformation to an input 3d vector using 64bit floating point arithmetics, /// e.g. scale and rotation WITHOUT translation. @@ -3207,7 +1418,7 @@ struct Map /// @param ijk 3D vector to be mapped - typically floating point index coordinates /// @return linear forward 3x3 mapping of the input vector template - __hostdev__ Vec3T applyJacobian(const Vec3T& ijk) const { return matMult(mMatD, ijk); } + __hostdev__ Vec3T applyJacobian(const Vec3T& ijk) const { return math::matMult(mMatD, ijk); } /// @brief Apply the linear forward 3x3 transformation to an input 3d vector using 32bit floating point arithmetics, /// e.g. scale and rotation WITHOUT translation. @@ -3216,7 +1427,7 @@ struct Map /// @param ijk 3D vector to be mapped - typically floating point index coordinates /// @return linear forward 3x3 mapping of the input vector template - __hostdev__ Vec3T applyJacobianF(const Vec3T& ijk) const { return matMult(mMatF, ijk); } + __hostdev__ Vec3T applyJacobianF(const Vec3T& ijk) const { return math::matMult(mMatF, ijk); } /// @brief Apply the inverse affine mapping to a vector using 64bit floating point arithmetics. /// @note Typically this operation is used for the world -> index mapping @@ -3226,7 +1437,7 @@ struct Map template __hostdev__ Vec3T applyInverseMap(const Vec3T& xyz) const { - return matMult(mInvMatD, Vec3T(xyz[0] - mVecD[0], xyz[1] - mVecD[1], xyz[2] - mVecD[2])); + return math::matMult(mInvMatD, Vec3T(xyz[0] - mVecD[0], xyz[1] - mVecD[1], xyz[2] - mVecD[2])); } /// @brief Apply the inverse affine mapping to a vector using 32bit floating point arithmetics. @@ -3237,7 +1448,7 @@ struct Map template __hostdev__ Vec3T applyInverseMapF(const Vec3T& xyz) const { - return matMult(mInvMatF, Vec3T(xyz[0] - mVecF[0], xyz[1] - mVecF[1], xyz[2] - mVecF[2])); + return math::matMult(mInvMatF, Vec3T(xyz[0] - mVecF[0], xyz[1] - mVecF[1], xyz[2] - mVecF[2])); } /// @brief Apply the linear inverse 3x3 transformation to an input 3d vector using 64bit floating point arithmetics, @@ -3247,7 +1458,7 @@ struct Map /// @param ijk 3D vector to be mapped - typically floating point index coordinates /// @return linear inverse 3x3 mapping of the input vector i.e. xyz x mat^-1 template - __hostdev__ Vec3T applyInverseJacobian(const Vec3T& xyz) const { return matMult(mInvMatD, xyz); } + __hostdev__ Vec3T applyInverseJacobian(const Vec3T& xyz) const { return math::matMult(mInvMatD, xyz); } /// @brief Apply the linear inverse 3x3 transformation to an input 3d vector using 32bit floating point arithmetics, /// e.g. inverse scale and inverse rotation WITHOUT translation. @@ -3256,7 +1467,7 @@ struct Map /// @param ijk 3D vector to be mapped - typically floating point index coordinates /// @return linear inverse 3x3 mapping of the input vector i.e. xyz x mat^-1 template - __hostdev__ Vec3T applyInverseJacobianF(const Vec3T& xyz) const { return matMult(mInvMatF, xyz); } + __hostdev__ Vec3T applyInverseJacobianF(const Vec3T& xyz) const { return math::matMult(mInvMatF, xyz); } /// @brief Apply the transposed inverse 3x3 transformation to an input 3d vector using 64bit floating point arithmetics, /// e.g. inverse scale and inverse rotation WITHOUT translation. @@ -3265,9 +1476,9 @@ struct Map /// @param ijk 3D vector to be mapped - typically floating point index coordinates /// @return linear inverse 3x3 mapping of the input vector i.e. xyz x mat^-1 template - __hostdev__ Vec3T applyIJT(const Vec3T& xyz) const { return matMultT(mInvMatD, xyz); } + __hostdev__ Vec3T applyIJT(const Vec3T& xyz) const { return math::matMultT(mInvMatD, xyz); } template - __hostdev__ Vec3T applyIJTF(const Vec3T& xyz) const { return matMultT(mInvMatF, xyz); } + __hostdev__ Vec3T applyIJTF(const Vec3T& xyz) const { return math::matMultT(mInvMatF, xyz); } /// @brief Return a voxels size in each coordinate direction, measured at the origin __hostdev__ Vec3d getVoxelSize() const { return this->applyMap(Vec3d(1)) - this->applyMap(Vec3d(0)); } @@ -3326,20 +1537,20 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) GridBlindMetaData // disallow copy-assignment since methods like blindData and getBlindData uses the this pointer! const GridBlindMetaData& operator=(const GridBlindMetaData&) = delete; - __hostdev__ void setBlindData(void* blindData) { mDataOffset = PtrDiff(blindData, this); } + __hostdev__ void setBlindData(void* blindData) { mDataOffset = util::PtrDiff(blindData, this); } // unsafe - __hostdev__ const void* blindData() const {return PtrAdd(this, mDataOffset);} + __hostdev__ const void* blindData() const {return util::PtrAdd(this, mDataOffset);} /// @brief Get a const pointer to the blind data represented by this meta data /// @tparam BlindDataT Expected value type of the blind data. - /// @return Returns NULL if mGridType!=mapToGridType(), else a const point of type BlindDataT. + /// @return Returns NULL if mGridType!=toGridType(), else a const point of type BlindDataT. /// @note Use mDataType=Unknown if BlindDataT is a custom data type unknown to NanoVDB. template __hostdev__ const BlindDataT* getBlindData() const { - //if (mDataType != mapToGridType()) printf("getBlindData mismatch\n"); - return mDataType == mapToGridType() ? PtrAdd(this, mDataOffset) : nullptr; + //if (mDataType != toGridType()) printf("getBlindData mismatch\n"); + return mDataType == toGridType() ? util::PtrAdd(this, mDataOffset) : nullptr; } /// @brief return true if this meta data has a valid combination of semantic, class and value tags @@ -3373,7 +1584,7 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) GridBlindMetaData /// of bind data is mValueCount * mValueSize __hostdev__ uint64_t blindDataSize() const { - return AlignUp(mValueCount * mValueSize); + return math::AlignUp(mValueCount * mValueSize); } }; // GridBlindMetaData @@ -3463,6 +1674,113 @@ struct ProbeValue; template struct GetNodeInfo; +// ----------------------------> CheckMode <---------------------------------- + +/// @brief List of different modes for computing for a checksum +enum class CheckMode : uint32_t { Disable = 0, // no computation + Empty = 0, + Half = 1, + Partial = 1, // fast but approximate + Default = 1, // defaults to Partial + Full = 2, // slow but accurate + End = 3, // marks the end of the enum list + StrLen = 9 + End}; + +/// @brief Prints CheckMode enum to a c-string +/// @param dst Destination c-string +/// @param mode CheckMode enum to be converted to string +/// @return destinations string @c dst +__hostdev__ inline char* toStr(char *dst, CheckMode mode) +{ + switch (mode){ + case CheckMode::Half: return util::strcpy(dst, "half"); + case CheckMode::Full: return util::strcpy(dst, "full"); + default: return util::strcpy(dst, "disabled"); + } +} + +// ----------------------------> Checksum <---------------------------------- + +/// @brief Class that encapsulates two CRC32 checksums, one for the Grid, Tree and Root node meta data +/// and one for the remaining grid nodes. +class Checksum +{ + /// Three types of checksums: + /// 1) Empty: all 64 bits are on (used to signify a disabled or undefined checksum) + /// 2) Half: Upper 32 bits are on and not all of lower 32 bits are on (lower 32 bits checksum head of grid) + /// 3) Full: Not all of the 64 bits are one (lower 32 bits checksum head of grid and upper 32 bits checksum tail of grid) + union { uint32_t mCRC32[2]; uint64_t mCRC64; };// mCRC32[0] is checksum of Grid, Tree and Root, and mCRC32[1] is checksum of nodes + +public: + + static constexpr uint32_t EMPTY32 = ~uint32_t{0}; + static constexpr uint64_t EMPTY64 = ~uint64_t(0); + + /// @brief default constructor initiates checksum to EMPTY + __hostdev__ Checksum() : mCRC64{EMPTY64} {} + + /// @brief Constructor that allows the two 32bit checksums to be initiated explicitly + /// @param head Initial 32bit CRC checksum of grid, tree and root data + /// @param tail Initial 32bit CRC checksum of all the nodes and blind data + __hostdev__ Checksum(uint32_t head, uint32_t tail) : mCRC32{head, tail} {} + + /// @brief + /// @param checksum + /// @param mode + __hostdev__ Checksum(uint64_t checksum, CheckMode mode = CheckMode::Full) : mCRC64{mode == CheckMode::Disable ? EMPTY64 : checksum} + { + if (mode == CheckMode::Partial) mCRC32[1] = EMPTY32; + } + + /// @brief return the 64 bit checksum of this instance + [[deprecated("Use Checksum::data instead.")]] + __hostdev__ uint64_t checksum() const { return mCRC64; } + [[deprecated("Use Checksum::head and Ckecksum::tail instead.")]] + __hostdev__ uint32_t& checksum(int i) {NANOVDB_ASSERT(i==0 || i==1); return mCRC32[i]; } + [[deprecated("Use Checksum::head and Ckecksum::tail instead.")]] + __hostdev__ uint32_t checksum(int i) const {NANOVDB_ASSERT(i==0 || i==1); return mCRC32[i]; } + + __hostdev__ uint64_t full() const { return mCRC64; } + __hostdev__ uint64_t& full() { return mCRC64; } + __hostdev__ uint32_t head() const { return mCRC32[0]; } + __hostdev__ uint32_t& head() { return mCRC32[0]; } + __hostdev__ uint32_t tail() const { return mCRC32[1]; } + __hostdev__ uint32_t& tail() { return mCRC32[1]; } + + /// @brief return true if the 64 bit checksum is partial, i.e. of head only + [[deprecated("Use Checksum::isHalf instead.")]] + __hostdev__ bool isPartial() const { return mCRC32[0] != EMPTY32 && mCRC32[1] == EMPTY32; } + __hostdev__ bool isHalf() const { return mCRC32[0] != EMPTY32 && mCRC32[1] == EMPTY32; } + + /// @brief return true if the 64 bit checksum is fill, i.e. of both had and nodes + __hostdev__ bool isFull() const { return mCRC64 != EMPTY64 && mCRC32[1] != EMPTY32; } + + /// @brief return true if the 64 bit checksum is disables (unset) + __hostdev__ bool isEmpty() const { return mCRC64 == EMPTY64; } + + __hostdev__ void disable() { mCRC64 = EMPTY64; } + + /// @brief return the mode of the 64 bit checksum + __hostdev__ CheckMode mode() const + { + return mCRC64 == EMPTY64 ? CheckMode::Disable : + mCRC32[1] == EMPTY32 ? CheckMode::Partial : CheckMode::Full; + } + + /// @brief return true if the checksums are identical + /// @param rhs other Checksum + __hostdev__ bool operator==(const Checksum &rhs) const {return mCRC64 == rhs.mCRC64;} + + /// @brief return true if the checksums are not identical + /// @param rhs other Checksum + __hostdev__ bool operator!=(const Checksum &rhs) const {return mCRC64 != rhs.mCRC64;} +};// Checksum + +/// @brief Maps 64 bit checksum to CheckMode enum +/// @param checksum 64 bit checksum with two CRC32 codes +/// @return CheckMode enum +__hostdev__ inline CheckMode toCheckMode(const Checksum &checksum){return checksum.mode();} + // ----------------------------> Grid <-------------------------------------- /* @@ -3493,7 +1811,7 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) GridData { // sizeof(GridData) = 672B static const int MaxNameSize = 256; // due to NULL termination the maximum length is one less uint64_t mMagic; // 8B (0) magic to validate it is valid grid data. - uint64_t mChecksum; // 8B (8). Checksum of grid buffer. + Checksum mChecksum; // 8B (8). Checksum of grid buffer. Version mVersion; // 4B (16) major, minor, and patch version numbers BitFlags<32> mFlags; // 4B (20). flags for grid. uint32_t mGridIndex; // 4B (24). Index of this grid in the buffer @@ -3501,21 +1819,18 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) GridData uint64_t mGridSize; // 8B (32). byte count of this entire grid occupied in the buffer. char mGridName[MaxNameSize]; // 256B (40) Map mMap; // 264B (296). affine transformation between index and world space in both single and double precision - BBox mWorldBBox; // 48B (560). floating-point AABB of active values in WORLD SPACE (2 x 3 doubles) + Vec3dBBox mWorldBBox; // 48B (560). floating-point AABB of active values in WORLD SPACE (2 x 3 doubles) Vec3d mVoxelSize; // 24B (608). size of a voxel in world units GridClass mGridClass; // 4B (632). GridType mGridType; // 4B (636). int64_t mBlindMetadataOffset; // 8B (640). offset to beginning of GridBlindMetaData structures that follow this grid. uint32_t mBlindMetadataCount; // 4B (648). count of GridBlindMetaData structures that follow this grid. - uint32_t mData0; // 4B (652) - uint64_t mData1, mData2; // 2x8B (656) padding to 32 B alignment. mData1 is use for the total number of values indexed by an IndexGrid - /// @brief Use this method to initiate most member dat - __hostdev__ GridData& operator=(const GridData& other) - { - static_assert(8 * 84 == sizeof(GridData), "GridData has unexpected size"); - memcpy64(this, &other, 84); - return *this; - } + uint32_t mData0; // 4B (652) unused + uint64_t mData1; // 8B (656) is use for the total number of values indexed by an IndexGrid + uint64_t mData2; // 8B (664) padding to 32 B alignment + /// @brief Use this method to initiate most member data + GridData& operator=(const GridData&) = default; + //__hostdev__ GridData& operator=(const GridData& other){return *util::memcpy(this, &other);} __hostdev__ void init(std::initializer_list list = {GridFlags::IsBreadthFirst}, uint64_t gridSize = 0u, const Map& map = Map(), @@ -3525,9 +1840,9 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) GridData #ifdef NANOVDB_USE_NEW_MAGIC_NUMBERS mMagic = NANOVDB_MAGIC_GRID; #else - mMagic = NANOVDB_MAGIC_NUMBER; + mMagic = NANOVDB_MAGIC_NUMB; #endif - mChecksum = ~uint64_t(0);// all 64 bits ON means checksum is disabled + mChecksum.disable();// all 64 bits ON means checksum is disabled mVersion = Version(); mFlags.initMask(list); mGridIndex = 0u; @@ -3535,7 +1850,7 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) GridData mGridSize = gridSize; mGridName[0] = '\0'; mMap = map; - mWorldBBox = BBox();// invalid bbox + mWorldBBox = Vec3dBBox();// invalid bbox mVoxelSize = map.getVoxelSize(); mGridClass = gridClass; mGridType = gridType; @@ -3543,12 +1858,16 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) GridData mBlindMetadataCount = 0u; // i.e. no blind data mData0 = 0u; // zero padding mData1 = 0u; // only used for index and point grids - mData2 = NANOVDB_MAGIC_GRID; // since version 32.6.0 (might be removed in the future) + mData2 = NANOVDB_MAGIC_GRID; // since version 32.6.0 (will change in the future) } /// @brief return true if the magic number and the version are both valid __hostdev__ bool isValid() const { - if (mMagic == NANOVDB_MAGIC_GRID || mData2 == NANOVDB_MAGIC_GRID) return true; - bool test = mMagic == NANOVDB_MAGIC_NUMBER;// could be GridData or io::FileHeader + // Before v32.6.0: toMagic(mMagic) = MagicType::NanoVDB and mData2 was undefined + // For v32.6.0: toMagic(mMagic) = MagicType::NanoVDB and toMagic(mData2) = MagicType::NanoGrid + // After v32.7.X: toMagic(mMagic) = MagicType::NanoGrid and mData2 will again be undefined + const MagicType magic = toMagic(mMagic); + if (magic == MagicType::NanoGrid || toMagic(mData2) == MagicType::NanoGrid) return true; + bool test = magic == MagicType::NanoVDB;// could be GridData or io::FileHeader if (test) test = mVersion.isCompatible(); if (test) test = mGridCount > 0u && mGridIndex < mGridCount; if (test) test = mGridClass < GridClass::End && mGridType < GridType::End; @@ -3562,12 +1881,9 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) GridData __hostdev__ void setStdDeviationOn(bool on = true) { mFlags.setMask(GridFlags::HasStdDeviation, on); } __hostdev__ bool setGridName(const char* src) { - char *dst = mGridName, *end = dst + MaxNameSize; - while (*src != '\0' && dst < end - 1) - *dst++ = *src++; - while (dst < end) - *dst++ = '\0'; - return *src == '\0'; // returns true if input grid name is NOT longer than MaxNameSize characters + const bool success = (util::strncpy(mGridName, src, MaxNameSize)[MaxNameSize-1] == '\0'); + if (!success) mGridName[MaxNameSize-1] = '\0'; + return success; // returns true if input grid name is NOT longer than MaxNameSize characters } // Affine transformations based on double precision template @@ -3592,31 +1908,43 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) GridData template __hostdev__ Vec3T applyIJTF(const Vec3T& xyz) const { return mMap.applyIJTF(xyz); } - // @brief Return a non-const uint8_t pointer to the tree - __hostdev__ uint8_t* treePtr() { return reinterpret_cast(this + 1); }// TreeData is always right after GridData - //__hostdev__ TreeData* treePtr() { return reinterpret_cast(this + 1); }// TreeData is always right after GridData + // @brief Return a non-const void pointer to the tree + __hostdev__ void* treePtr() { return this + 1; }// TreeData is always right after GridData - // @brief Return a const uint8_t pointer to the tree - __hostdev__ const uint8_t* treePtr() const { return reinterpret_cast(this + 1); }// TreeData is always right after GridData - //__hostdev__ const TreeData* treePtr() const { return reinterpret_cast(this + 1); }// TreeData is always right after GridData + // @brief Return a const void pointer to the tree + __hostdev__ const void* treePtr() const { return this + 1; }// TreeData is always right after GridData - /// @brief Return a non-const uint8_t pointer to the first node at @c LEVEL - /// @tparam LEVEL of the node. LEVEL 0 means leaf node and LEVEL 3 means root node - /// @warning If not nodes exist at @c LEVEL NULL is returned + /// @brief Return a non-const void pointer to the first node at @c LEVEL + /// @tparam LEVEL Level of the node. LEVEL 0 means leaf node and LEVEL 3 means root node template - __hostdev__ const uint8_t* nodePtr() const + __hostdev__ const void* nodePtr() const { static_assert(LEVEL >= 0 && LEVEL <= 3, "invalid LEVEL template parameter"); - auto *treeData = this->treePtr(); - auto nodeOffset = *reinterpret_cast(treeData + 8*LEVEL);// skip LEVEL uint64_t - return nodeOffset ? PtrAdd(treeData, nodeOffset) : nullptr; + const void *treeData = this + 1;// TreeData is always right after GridData + const uint64_t nodeOffset = *util::PtrAdd(treeData, 8*LEVEL);// skip LEVEL uint64_t + return nodeOffset ? util::PtrAdd(treeData, nodeOffset) : nullptr; } - /// @brief Return a non-const uint8_t pointer to the first node at @c LEVEL + /// @brief Return a non-const void pointer to the first node at @c LEVEL /// @tparam LEVEL of the node. LEVEL 0 means leaf node and LEVEL 3 means root node /// @warning If not nodes exist at @c LEVEL NULL is returned template - __hostdev__ uint8_t* nodePtr(){return const_cast(const_cast(this)->template nodePtr());} + __hostdev__ void* nodePtr() + { + static_assert(LEVEL >= 0 && LEVEL <= 3, "invalid LEVEL template parameter"); + void *treeData = this + 1;// TreeData is always right after GridData + const uint64_t nodeOffset = *util::PtrAdd(treeData, 8*LEVEL);// skip LEVEL uint64_t + return nodeOffset ? util::PtrAdd(treeData, nodeOffset) : nullptr; + } + + /// @brief Return number of nodes at @c LEVEL + /// @tparam Level of the node. LEVEL 0 means leaf node and LEVEL 2 means upper node + template + __hostdev__ uint32_t nodeCount() const + { + static_assert(LEVEL >= 0 && LEVEL < 3, "invalid LEVEL template parameter"); + return *util::PtrAdd(this + 1, 4*(8 + LEVEL));// TreeData is always right after GridData + } /// @brief Returns a const reference to the blindMetaData at the specified linear offset. /// @@ -3624,7 +1952,7 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) GridData __hostdev__ const GridBlindMetaData* blindMetaData(uint32_t n) const { NANOVDB_ASSERT(n < mBlindMetadataCount); - return PtrAdd(this, mBlindMetadataOffset) + n; + return util::PtrAdd(this, mBlindMetadataOffset) + n; } __hostdev__ const char* gridName() const @@ -3647,17 +1975,16 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) GridData __hostdev__ static uint64_t memUsage() { return sizeof(GridData); } /// @brief return AABB of active values in world space - __hostdev__ const BBox& worldBBox() const { return mWorldBBox; } + __hostdev__ const Vec3dBBox& worldBBox() const { return mWorldBBox; } /// @brief return AABB of active values in index space __hostdev__ const CoordBBox& indexBBox() const {return *(const CoordBBox*)(this->nodePtr<3>());} /// @brief return the root table has size - __hostdev__ uint32_t rootTableSize() const { - if (const uint8_t *root = this->nodePtr<3>()) { - return *(const uint32_t*)(root + sizeof(CoordBBox)); - } - return 0u; + __hostdev__ uint32_t rootTableSize() const + { + const void *root = this->nodePtr<3>(); + return root ? *util::PtrAdd(root, sizeof(CoordBBox)) : 0u; } /// @brief test if the grid is empty, e.i the root table has size 0 @@ -3725,14 +2052,14 @@ class Grid : public GridData /// /// @note This method is only defined for IndexGrid = NanoGrid template - __hostdev__ typename enable_if::is_index, const uint64_t&>::type + __hostdev__ typename util::enable_if::is_index, const uint64_t&>::type valueCount() const { return DataType::mData1; } /// @brief @brief Return the total number of points indexed by this PointGrid /// /// @note This method is only defined for PointGrid = NanoGrid template - __hostdev__ typename enable_if::value, const uint64_t&>::type + __hostdev__ typename util::enable_if::value, const uint64_t&>::type pointCount() const { return DataType::mData1; } /// @brief Return a const reference to the tree @@ -3797,7 +2124,7 @@ class Grid : public GridData __hostdev__ Vec3T indexToWorldGradF(const Vec3T& grad) const { return DataType::applyIJTF(grad); } /// @brief Computes a AABB of active values in world space - //__hostdev__ const BBox& worldBBox() const { return DataType::mWorldBBox; } + //__hostdev__ const Vec3dBBox& worldBBox() const { return DataType::mWorldBBox; } /// @brief Computes a AABB of active values in index space /// @@ -3847,7 +2174,7 @@ class Grid : public GridData __hostdev__ const char* shortGridName() const { return DataType::mGridName; } /// @brief Return checksum of the grid buffer. - __hostdev__ uint64_t checksum() const { return DataType::mChecksum; } + __hostdev__ const Checksum& checksum() const { return DataType::mChecksum; } /// @brief Return true if this grid is empty, i.e. contains no values or nodes. //__hostdev__ bool isEmpty() const { return this->tree().isEmpty(); } @@ -3865,6 +2192,7 @@ class Grid : public GridData /// /// @warning Pointer might be NULL and the linear offset is assumed to be in the valid range // this method is deprecated !!!! + [[deprecated("Use Grid::getBlindData() instead.")]] __hostdev__ const void* blindData(uint32_t n) const { printf("\nnanovdb::Grid::blindData is unsafe and hence deprecated! Please use nanovdb::Grid::getBlindData instead.\n\n"); @@ -3925,28 +2253,32 @@ __hostdev__ int Grid::findBlindData(const char* name) const struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) TreeData { // sizeof(TreeData) == 64B - int64_t mNodeOffset[4];// 32B, byte offset from this tree to first leaf, lower, upper and root node. A zero offset means no node exists + int64_t mNodeOffset[4];// 32B, byte offset from this tree to first leaf, lower, upper and root node. If mNodeCount[N]=0 => mNodeOffset[N]==mNodeOffset[N+1] uint32_t mNodeCount[3]; // 12B, total number of nodes of type: leaf, lower internal, upper internal uint32_t mTileCount[3]; // 12B, total number of active tile values at the lower internal, upper internal and root node levels uint64_t mVoxelCount; // 8B, total number of active voxels in the root and all its child nodes. // No padding since it's always 32B aligned - __hostdev__ TreeData& operator=(const TreeData& other) - { - static_assert(8 * 8 == sizeof(TreeData), "TreeData has unexpected size"); - memcpy64(this, &other, 8); - return *this; + //__hostdev__ TreeData& operator=(const TreeData& other){return *util::memcpy(this, &other);} + TreeData& operator=(const TreeData&) = default; + __hostdev__ void setRoot(const void* root) { + NANOVDB_ASSERT(root); + mNodeOffset[3] = util::PtrDiff(root, this); } - __hostdev__ void setRoot(const void* root) {mNodeOffset[3] = root ? PtrDiff(root, this) : 0;} - __hostdev__ uint8_t* getRoot() { return mNodeOffset[3] ? PtrAdd(this, mNodeOffset[3]) : nullptr; } - __hostdev__ const uint8_t* getRoot() const { return mNodeOffset[3] ? PtrAdd(this, mNodeOffset[3]) : nullptr; } + + /// @brief Get a non-const void pointer to the root node (never NULL) + __hostdev__ void* getRoot() { return util::PtrAdd(this, mNodeOffset[3]); } + + /// @brief Get a const void pointer to the root node (never NULL) + __hostdev__ const void* getRoot() const { return util::PtrAdd(this, mNodeOffset[3]); } template - __hostdev__ void setFirstNode(const NodeT* node) {mNodeOffset[NodeT::LEVEL] = node ? PtrDiff(node, this) : 0;} + __hostdev__ void setFirstNode(const NodeT* node) {mNodeOffset[NodeT::LEVEL] = (node ? util::PtrDiff(node, this) : 0);} - __hostdev__ bool isEmpty() const {return mNodeOffset[3] ? *PtrAdd(this, mNodeOffset[3] + sizeof(BBox)) == 0 : true;} + /// @brief Return true if the root is empty, i.e. has not child nodes or constant tiles + __hostdev__ bool isEmpty() const {return mNodeOffset[3] ? *util::PtrAdd(this, mNodeOffset[3] + sizeof(CoordBBox)) == 0 : true;} /// @brief Return the index bounding box of all the active values in this tree, i.e. in all nodes of the tree - __hostdev__ CoordBBox bbox() const {return mNodeOffset[3] ? *PtrAdd(this, mNodeOffset[3]) : CoordBBox();} + __hostdev__ CoordBBox bbox() const {return mNodeOffset[3] ? *util::PtrAdd(this, mNodeOffset[3]) : CoordBBox();} /// @brief return true if RootData is layout out immediately after TreeData in memory __hostdev__ bool isRootNext() const {return mNodeOffset[3] ? mNodeOffset[3] == sizeof(TreeData) : false; } @@ -4009,19 +2341,9 @@ class Tree : public TreeData /// @brief return memory usage in bytes for the class __hostdev__ static uint64_t memUsage() { return sizeof(DataType); } - __hostdev__ RootT& root() - { - RootT* ptr = reinterpret_cast(DataType::getRoot()); - NANOVDB_ASSERT(ptr); - return *ptr; - } + __hostdev__ RootT& root() {return *reinterpret_cast(DataType::getRoot());} - __hostdev__ const RootT& root() const - { - const RootT* ptr = reinterpret_cast(DataType::getRoot()); - NANOVDB_ASSERT(ptr); - return *ptr; - } + __hostdev__ const RootT& root() const {return *reinterpret_cast(DataType::getRoot());} __hostdev__ AccessorType getAccessor() const { return AccessorType(this->root()); } @@ -4085,8 +2407,8 @@ class Tree : public TreeData template __hostdev__ NodeT* getFirstNode() { - const int64_t offset = DataType::mNodeOffset[NodeT::LEVEL]; - return offset ? PtrAdd(this, offset) : nullptr; + const int64_t nodeOffset = DataType::mNodeOffset[NodeT::LEVEL]; + return nodeOffset ? util::PtrAdd(this, nodeOffset) : nullptr; } /// @brief return a const pointer to the first node of the specified type @@ -4095,16 +2417,15 @@ class Tree : public TreeData template __hostdev__ const NodeT* getFirstNode() const { - const int64_t offset = DataType::mNodeOffset[NodeT::LEVEL]; - return offset ? PtrAdd(this, offset) : nullptr; + const int64_t nodeOffset = DataType::mNodeOffset[NodeT::LEVEL]; + return nodeOffset ? util::PtrAdd(this, nodeOffset) : nullptr; } /// @brief return a pointer to the first node at the specified level /// /// @warning Note it may return NULL if no nodes exist template - __hostdev__ typename NodeTrait::type* - getFirstNode() + __hostdev__ typename NodeTrait::type* getFirstNode() { return this->template getFirstNode::type>(); } @@ -4113,8 +2434,7 @@ class Tree : public TreeData /// /// @warning Note it may return NULL if no nodes exist template - __hostdev__ const typename NodeTrait::type* - getFirstNode() const + __hostdev__ const typename NodeTrait::type* getFirstNode() const { return this->template getFirstNode::type>(); } @@ -4189,8 +2509,8 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) RootData __hostdev__ static KeyT CoordToKey(const CoordT& ijk) { return ijk & ~ChildT::MASK; } __hostdev__ static CoordT KeyToCoord(const KeyT& key) { return key; } #endif - BBox mBBox; // 24B. AABB of active values in index space. - uint32_t mTableSize; // 4B. number of tiles and child pointers in the root node + math::BBox mBBox; // 24B. AABB of active values in index space. + uint32_t mTableSize; // 4B. number of tiles and child pointers in the root node ValueT mBackground; // background value, i.e. value of any unset voxel ValueT mMinimum; // typically 4B, minimum of all the active values @@ -4213,7 +2533,7 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) RootData { key = CoordToKey(k); state = false; - child = PtrDiff(ptr, data); + child = util::PtrDiff(ptr, data); } template __hostdev__ void setValue(const CoordType& k, bool s, const ValueType& v) @@ -4283,12 +2603,12 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) RootData __hostdev__ ChildT* getChild(const Tile* tile) { NANOVDB_ASSERT(tile->child); - return PtrAdd(this, tile->child); + return util::PtrAdd(this, tile->child); } __hostdev__ const ChildT* getChild(const Tile* tile) const { NANOVDB_ASSERT(tile->child); - return PtrAdd(this, tile->child); + return util::PtrAdd(this, tile->child); } __hostdev__ const ValueT& getMin() const { return mMinimum; } @@ -4327,7 +2647,7 @@ class RootNode : public RootData using BuildType = typename DataType::BuildT; // in rare cases BuildType != ValueType, e.g. then BuildType = ValueMask and ValueType = bool using CoordType = typename ChildT::CoordType; - using BBoxType = BBox; + using BBoxType = math::BBox; using AccessorType = DefaultReadAccessor; using Tile = typename DataType::Tile; static constexpr bool FIXED_SIZE = DataType::FIXED_SIZE; @@ -4338,8 +2658,8 @@ class RootNode : public RootData class BaseIter { protected: - using DataT = typename match_const::type; - using TileT = typename match_const::type; + using DataT = typename util::match_const::type; + using TileT = typename util::match_const::type; DataT* mData; uint32_t mPos, mSize; __hostdev__ BaseIter(DataT* data = nullptr, uint32_t n = 0) @@ -4369,9 +2689,9 @@ class RootNode : public RootData template class ChildIter : public BaseIter { - static_assert(is_same::type, RootNode>::value, "Invalid RootT"); + static_assert(util::is_same::type, RootNode>::value, "Invalid RootT"); using BaseT = BaseIter; - using NodeT = typename match_const::type; + using NodeT = typename util::match_const::type; public: __hostdev__ ChildIter() @@ -4514,7 +2834,7 @@ class RootNode : public RootData class DenseIter : public BaseIter { using BaseT = BaseIter; - using NodeT = typename match_const::type; + using NodeT = typename util::match_const::type; public: __hostdev__ DenseIter() @@ -4599,7 +2919,7 @@ class RootNode : public RootData __hostdev__ const FloatType& average() const { return DataType::mAverage; } /// @brief Return the variance of all the active values encoded in this root node and any of its child nodes - __hostdev__ FloatType variance() const { return Pow2(DataType::mStdDevi); } + __hostdev__ FloatType variance() const { return math::Pow2(DataType::mStdDevi); } /// @brief Return a const reference to the standard deviation of all the active values encoded in this root node and any of its child nodes __hostdev__ const FloatType& stdDeviation() const { return DataType::mStdDevi; } @@ -4692,7 +3012,7 @@ class RootNode : public RootData template // __hostdev__ auto // occasionally fails with NVCC - __hostdev__ decltype(OpT::set(std::declval(), std::declval()...)) + __hostdev__ decltype(OpT::set(util::declval(), util::declval()...)) set(const CoordType& ijk, ArgsT&&... args) { if (Tile* tile = DataType::probeTile(ijk)) { @@ -4800,7 +3120,7 @@ class RootNode : public RootData } template - //__hostdev__ decltype(OpT::get(std::declval(), std::declval()...)) + //__hostdev__ decltype(OpT::get(util::declval(), util::declval()...)) __hostdev__ auto getAndCache(const CoordType& ijk, const AccT& acc, ArgsT&&... args) const { @@ -4817,7 +3137,7 @@ class RootNode : public RootData template // __hostdev__ auto // occasionally fails with NVCC - __hostdev__ decltype(OpT::set(std::declval(), std::declval()...)) + __hostdev__ decltype(OpT::set(util::declval(), util::declval()...)) setAndCache(const CoordType& ijk, const AccT& acc, ArgsT&&... args) { if (Tile* tile = DataType::probeTile(ijk)) { @@ -4861,7 +3181,7 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) InternalData ~Tile() = delete; }; - BBox mBBox; // 24B. node bounding box. | + math::BBox mBBox; // 24B. node bounding box. | uint64_t mFlags; // 8B. node flags. | 32B aligned MaskT mValueMask; // LOG2DIM(5): 4096B, LOG2DIM(4): 512B | 32B aligned MaskT mChildMask; // LOG2DIM(5): 4096B, LOG2DIM(4): 512B | 32B aligned @@ -4886,7 +3206,7 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) InternalData __hostdev__ void setChild(uint32_t n, const void* ptr) { NANOVDB_ASSERT(mChildMask.isOn(n)); - mTable[n].child = PtrDiff(ptr, this); + mTable[n].child = util::PtrDiff(ptr, this); } template @@ -4900,12 +3220,12 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) InternalData __hostdev__ ChildT* getChild(uint32_t n) { NANOVDB_ASSERT(mChildMask.isOn(n)); - return PtrAdd(this, mTable[n].child); + return util::PtrAdd(this, mTable[n].child); } __hostdev__ const ChildT* getChild(uint32_t n) const { NANOVDB_ASSERT(mChildMask.isOn(n)); - return PtrAdd(this, mTable[n].child); + return util::PtrAdd(this, mTable[n].child); } __hostdev__ ValueT getValue(uint32_t n) const @@ -4949,7 +3269,7 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) InternalData ~InternalData() = delete; }; // InternalData -/// @brief Internal nodes of a VDB treedim(), +/// @brief Internal nodes of a VDB tree template class InternalNode : public InternalData { @@ -4979,9 +3299,9 @@ class InternalNode : public InternalData template class ChildIter : public MaskIterT { - static_assert(is_same::type, InternalNode>::value, "Invalid ParentT"); + static_assert(util::is_same::type, InternalNode>::value, "Invalid ParentT"); using BaseT = MaskIterT; - using NodeT = typename match_const::type; + using NodeT = typename util::match_const::type; ParentT* mParent; public: @@ -5181,7 +3501,7 @@ class InternalNode : public InternalData __hostdev__ const FloatType& stdDeviation() const { return DataType::mStdDevi; } /// @brief Return a const reference to the bounding box in index space of active values in this internal node and any of its child nodes - __hostdev__ const BBox& bbox() const { return DataType::mBBox; } + __hostdev__ const math::BBox& bbox() const { return DataType::mBBox; } /// @brief If the first entry in this node's table is a tile, return the tile's value. /// Otherwise, return the result of calling getFirstValue() on the child. @@ -5288,7 +3608,7 @@ class InternalNode : public InternalData template //__hostdev__ auto // occasionally fails with NVCC - __hostdev__ decltype(OpT::set(std::declval(), std::declval(), std::declval()...)) + __hostdev__ decltype(OpT::set(util::declval(), util::declval(), util::declval()...)) set(const CoordType& ijk, ArgsT&&... args) { const uint32_t n = CoordToOffset(ijk); @@ -5384,7 +3704,7 @@ class InternalNode : public InternalData template __hostdev__ auto - //__hostdev__ decltype(OpT::get(std::declval(), std::declval(), std::declval()...)) + //__hostdev__ decltype(OpT::get(util::declval(), util::declval(), util::declval()...)) getAndCache(const CoordType& ijk, const AccT& acc, ArgsT&&... args) const { const uint32_t n = CoordToOffset(ijk); @@ -5397,7 +3717,7 @@ class InternalNode : public InternalData template //__hostdev__ auto // occasionally fails with NVCC - __hostdev__ decltype(OpT::set(std::declval(), std::declval(), std::declval()...)) + __hostdev__ decltype(OpT::set(util::declval(), util::declval(), util::declval()...)) setAndCache(const CoordType& ijk, const AccT& acc, ArgsT&&... args) { const uint32_t n = CoordToOffset(ijk); @@ -5849,6 +4169,13 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafIndexBase __hostdev__ void setOn(uint32_t offset) { mValueMask.setOn(offset); } template __hostdev__ void setOrigin(const T& ijk) { mBBoxMin = ijk; } + +protected: + /// @brief This class should be used as an abstract class and only constructed or deleted via child classes + LeafIndexBase() = default; + LeafIndexBase(const LeafIndexBase&) = default; + LeafIndexBase& operator=(const LeafIndexBase&) = default; + ~LeafIndexBase() = default; }; // LeafIndexBase // --------------------------> LeafData <------------------------------------ @@ -5870,12 +4197,6 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafDatahasStats() ? BaseT::mOffset + 514u : 0u; } __hostdev__ uint64_t getDev() const { return this->hasStats() ? BaseT::mOffset + 515u : 0u; } __hostdev__ uint64_t getValue(uint32_t i) const { return BaseT::mOffset + i; } // dense leaf node with active and inactive voxels - - /// @brief This class cannot be constructed or deleted - LeafData() = delete; - LeafData(const LeafData&) = delete; - LeafData& operator=(const LeafData&) = delete; - ~LeafData() = delete; }; // LeafData // --------------------------> LeafData <------------------------------------ @@ -5888,7 +4209,7 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData> 54u & 511u); // last 9 bits of mPrefixSum do not account for the last word in mValueMask + return util::countOn(BaseT::mValueMask.words()[7]) + (BaseT::mPrefixSum >> 54u & 511u); // last 9 bits of mPrefixSum do not account for the last word in mValueMask } __hostdev__ uint64_t lastOffset() const { return BaseT::mOffset + this->valueCount() - 1u; } __hostdev__ uint64_t getMin() const { return this->hasStats() ? this->lastOffset() + 1u : 0u; } @@ -5901,16 +4222,10 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData> 6; const uint64_t w = BaseT::mValueMask.words()[n], mask = uint64_t(1) << (i & 63u); if (!(w & mask)) return uint64_t(0); // if i'th value is inactive return offset to background value - uint64_t sum = BaseT::mOffset + CountOn(w & (mask - 1u)); + uint64_t sum = BaseT::mOffset + util::countOn(w & (mask - 1u)); if (n--) sum += BaseT::mPrefixSum >> (9u * n) & 511u; return sum; } - - /// @brief This class cannot be constructed or deleted - LeafData() = delete; - LeafData(const LeafData&) = delete; - LeafData& operator=(const LeafData&) = delete; - ~LeafData() = delete; }; // LeafData // --------------------------> LeafData <------------------------------------ @@ -6147,7 +4462,7 @@ class LeafNode : public LeafData __hostdev__ ValueIterator beginValue() const { return ValueIterator(this); } __hostdev__ ValueIterator cbeginValueAll() const { return ValueIterator(this); } - static_assert(is_same::Type>::value, "Mismatching BuildType"); + static_assert(util::is_same::Type>::value, "Mismatching BuildType"); static constexpr uint32_t LOG2DIM = Log2Dim; static constexpr uint32_t TOTAL = LOG2DIM; // needed by parent nodes static constexpr uint32_t DIM = 1u << TOTAL; // number of voxels along each axis of this node @@ -6206,15 +4521,15 @@ class LeafNode : public LeafData __hostdev__ static uint32_t dim() { return 1u << LOG2DIM; } /// @brief Return the bounding box in index space of active values in this leaf node - __hostdev__ BBox bbox() const + __hostdev__ math::BBox bbox() const { - BBox bbox(DataType::mBBoxMin, DataType::mBBoxMin); + math::BBox bbox(DataType::mBBoxMin, DataType::mBBoxMin); if (this->hasBBox()) { bbox.max()[0] += DataType::mBBoxDif[0]; bbox.max()[1] += DataType::mBBoxDif[1]; bbox.max()[2] += DataType::mBBoxDif[2]; } else { // very rare case - bbox = BBox(); // invalid + bbox = math::BBox(); // invalid } return bbox; } @@ -6364,7 +4679,7 @@ class LeafNode : public LeafData template __hostdev__ auto - //__hostdev__ decltype(OpT::get(std::declval(), std::declval(), std::declval()...)) + //__hostdev__ decltype(OpT::get(util::declval(), util::declval(), util::declval()...)) getAndCache(const CoordType& ijk, const AccT&, ArgsT&&... args) const { return OpT::get(*this, CoordToOffset(ijk), args...); @@ -6372,7 +4687,7 @@ class LeafNode : public LeafData template //__hostdev__ auto // occasionally fails with NVCC - __hostdev__ decltype(OpT::set(std::declval(), std::declval(), std::declval()...)) + __hostdev__ decltype(OpT::set(util::declval(), util::declval(), util::declval()...)) setAndCache(const CoordType& ijk, const AccT&, ArgsT&&... args) { return OpT::set(*this, CoordToOffset(ijk), args...); @@ -6407,12 +4722,12 @@ __hostdev__ inline bool LeafNode::updateBBox() } NANOVDB_ASSERT(word64); update(Xmin, Xmax, 0); - update(FindLowestOn(word64) >> 3, FindHighestOn(word64) >> 3, 1); + update(util::findLowestOn(word64) >> 3, util::findHighestOn(word64) >> 3, 1); const uint32_t *p = reinterpret_cast(&word64), word32 = p[0] | p[1]; const uint16_t *q = reinterpret_cast(&word32), word16 = q[0] | q[1]; - const uint8_t * b = reinterpret_cast(&word16), byte = b[0] | b[1]; + const uint8_t *b = reinterpret_cast(&word16), byte = b[0] | b[1]; NANOVDB_ASSERT(byte); - update(FindLowestOn(static_cast(byte)), FindHighestOn(static_cast(byte)), 2); + update(util::findLowestOn(static_cast(byte)), util::findHighestOn(static_cast(byte)), 2); DataType::mFlags |= uint8_t(2); // set 2nd bit on, which indicates that this nodes has a bbox return true; } // LeafNode::updateBBox @@ -6507,6 +4822,89 @@ using OnIndexGrid = Grid; using IndexMaskGrid = Grid; using OnIndexMaskGrid = Grid; +// --------------------------> callNanoGrid <------------------------------------ + +/** +* @brief Below is an example of the struct used for generic programming with callNanoGrid +* @details For an example see "struct Crc32TailOld" in nanovdb/tools/GridChecksum.h or +* "struct IsNanoGridValid" in nanovdb/tools/GridValidator.h +* @code +* struct OpT { + // define these two static functions with non-const GridData +* template +* static auto known( GridData *gridData, args...); +* static auto unknown( GridData *gridData, args...); +* // or alternatively these two static functions with const GridData +* template +* static auto known(const GridData *gridData, args...); +* static auto unknown(const GridData *gridData, args...); +* }; +* @endcode +* +* @brief Here is an example of how to use callNanoGrid in client code +* @code +* return callNanoGrid(gridData, args...); +* @endcode +*/ + +/// @brief Use this function, which depends a pointer to GridData, to call +/// other functions that depend on a NanoGrid of a known ValueType. +/// @details This function allows for generic programming by converting GridData +/// to a NanoGrid of the type encoded in GridData::mGridType. +template +auto callNanoGrid(GridDataT *gridData, ArgsT&&... args) +{ + static_assert(util::is_same::value, "Expected gridData to be of type GridData* or const GridData*"); + switch (gridData->mGridType){ + case GridType::Float: + return OpT::template known(gridData, args...); + case GridType::Double: + return OpT::template known(gridData, args...); + case GridType::Int16: + return OpT::template known(gridData, args...); + case GridType::Int32: + return OpT::template known(gridData, args...); + case GridType::Int64: + return OpT::template known(gridData, args...); + case GridType::Vec3f: + return OpT::template known(gridData, args...); + case GridType::Vec3d: + return OpT::template known(gridData, args...); + case GridType::UInt32: + return OpT::template known(gridData, args...); + case GridType::Mask: + return OpT::template known(gridData, args...); + case GridType::Index: + return OpT::template known(gridData, args...); + case GridType::OnIndex: + return OpT::template known(gridData, args...); + case GridType::IndexMask: + return OpT::template known(gridData, args...); + case GridType::OnIndexMask: + return OpT::template known(gridData, args...); + case GridType::Boolean: + return OpT::template known(gridData, args...); + case GridType::RGBA8: + return OpT::template known(gridData, args...); + case GridType::Fp4: + return OpT::template known(gridData, args...); + case GridType::Fp8: + return OpT::template known(gridData, args...); + case GridType::Fp16: + return OpT::template known(gridData, args...); + case GridType::FpN: + return OpT::template known(gridData, args...); + case GridType::Vec4f: + return OpT::template known(gridData, args...); + case GridType::Vec4d: + return OpT::template known(gridData, args...); + case GridType::UInt8: + return OpT::template known(gridData, args...); + default: + return OpT::unknown(gridData, args...); + } +}// callNanoGrid + // --------------------------> ReadAccessor <------------------------------------ /// @brief A read-only value accessor with three levels of node caching. This allows for @@ -7218,7 +5616,7 @@ class ReadAccessor __hostdev__ const NodeT* getNode() const { using T = typename NodeTrait::type; - static_assert(is_same::value, "ReadAccessor::getNode: Invalid node type"); + static_assert(util::is_same::value, "ReadAccessor::getNode: Invalid node type"); return reinterpret_cast(mNode[NodeT::LEVEL]); } @@ -7511,9 +5909,9 @@ class GridMetaData } GridMetaData(const GridData* gridData) { - static_assert(8 * 96 == sizeof(GridMetaData), "GridMetaData has unexpected size"); if (GridMetaData::safeCast(gridData)) { - memcpy64(this, gridData, 96); + *this = *reinterpret_cast(gridData); + //util::memcpy(this, (const GridMetaData*)gridData); } else {// otherwise copy each member individually mGridData = *gridData; mTreeData = *reinterpret_cast(gridData->treePtr()); @@ -7521,6 +5919,7 @@ class GridMetaData mRootTableSize = gridData->rootTableSize(); } } + GridMetaData& operator=(const GridMetaData&) = default; /// @brief return true if the RootData follows right after the TreeData. /// If so, this implies that it's safe to cast the grid from which /// this instance was constructed to a GridMetaData @@ -7558,14 +5957,14 @@ class GridMetaData __hostdev__ uint32_t gridCount() const { return mGridData.mGridCount; } __hostdev__ const char* shortGridName() const { return mGridData.mGridName; } __hostdev__ const Map& map() const { return mGridData.mMap; } - __hostdev__ const BBox& worldBBox() const { return mGridData.mWorldBBox; } - __hostdev__ const BBox& indexBBox() const { return mIndexBBox; } + __hostdev__ const Vec3dBBox& worldBBox() const { return mGridData.mWorldBBox; } + __hostdev__ const CoordBBox& indexBBox() const { return mIndexBBox; } __hostdev__ Vec3d voxelSize() const { return mGridData.mVoxelSize; } __hostdev__ int blindDataCount() const { return mGridData.mBlindMetadataCount; } __hostdev__ uint64_t activeVoxelCount() const { return mTreeData.mVoxelCount; } __hostdev__ const uint32_t& activeTileCount(uint32_t level) const { return mTreeData.mTileCount[level - 1]; } __hostdev__ uint32_t nodeCount(uint32_t level) const { return mTreeData.mNodeCount[level]; } - __hostdev__ uint64_t checksum() const { return mGridData.mChecksum; } + __hostdev__ const Checksum& checksum() const { return mGridData.mChecksum; } __hostdev__ uint32_t rootTableSize() const { return mRootTableSize; } __hostdev__ bool isEmpty() const { return mRootTableSize == 0; } __hostdev__ Version version() const { return mGridData.mVersion; } @@ -7587,9 +5986,9 @@ class PointAccessor : public DefaultReadAccessor , mGrid(grid) , mData(grid.template getBlindData(0)) { - NANOVDB_ASSERT(grid.gridType() == mapToGridType()); - NANOVDB_ASSERT((grid.gridClass() == GridClass::PointIndex && is_same::value) || - (grid.gridClass() == GridClass::PointData && is_same::value)); + NANOVDB_ASSERT(grid.gridType() == toGridType()); + NANOVDB_ASSERT((grid.gridClass() == GridClass::PointIndex && util::is_same::value) || + (grid.gridClass() == GridClass::PointData && util::is_same::value)); } /// @brief return true if this access was initialized correctly @@ -7652,11 +6051,11 @@ class PointAccessor : public DefaultReadAccessor { NANOVDB_ASSERT(mData); NANOVDB_ASSERT(grid.gridType() == GridType::PointIndex); - NANOVDB_ASSERT((grid.gridClass() == GridClass::PointIndex && is_same::value) || - (grid.gridClass() == GridClass::PointData && is_same::value) || - (grid.gridClass() == GridClass::PointData && is_same::value) || - (grid.gridClass() == GridClass::PointData && is_same::value) || - (grid.gridClass() == GridClass::PointData && is_same::value)); + NANOVDB_ASSERT((grid.gridClass() == GridClass::PointIndex && util::is_same::value) || + (grid.gridClass() == GridClass::PointData && util::is_same::value) || + (grid.gridClass() == GridClass::PointData && util::is_same::value) || + (grid.gridClass() == GridClass::PointData && util::is_same::value) || + (grid.gridClass() == GridClass::PointData && util::is_same::value)); } /// @brief return true if this access was initialized correctly @@ -7770,16 +6169,16 @@ class ChannelAccessor : public DefaultReadAccessor } /// @brief Return the linear offset into a channel that maps to the specified coordinate - __hostdev__ uint64_t getIndex(const Coord& ijk) const { return BaseT::getValue(ijk); } - __hostdev__ uint64_t idx(int i, int j, int k) const { return BaseT::getValue(Coord(i, j, k)); } + __hostdev__ uint64_t getIndex(const math::Coord& ijk) const { return BaseT::getValue(ijk); } + __hostdev__ uint64_t idx(int i, int j, int k) const { return BaseT::getValue(math::Coord(i, j, k)); } /// @brief Return the value from a cached channel that maps to the specified coordinate - __hostdev__ ChannelT& getValue(const Coord& ijk) const { return mChannel[BaseT::getValue(ijk)]; } - __hostdev__ ChannelT& operator()(const Coord& ijk) const { return this->getValue(ijk); } - __hostdev__ ChannelT& operator()(int i, int j, int k) const { return this->getValue(Coord(i, j, k)); } + __hostdev__ ChannelT& getValue(const math::Coord& ijk) const { return mChannel[BaseT::getValue(ijk)]; } + __hostdev__ ChannelT& operator()(const math::Coord& ijk) const { return this->getValue(ijk); } + __hostdev__ ChannelT& operator()(int i, int j, int k) const { return this->getValue(math::Coord(i, j, k)); } /// @brief return the state and updates the value of the specified voxel - __hostdev__ bool probeValue(const Coord& ijk, typename remove_const::type& v) const + __hostdev__ bool probeValue(const math::Coord& ijk, typename util::remove_const::type& v) const { uint64_t idx; const bool isActive = BaseT::probeValue(ijk, idx); @@ -7790,14 +6189,14 @@ class ChannelAccessor : public DefaultReadAccessor /// /// @note The template parameter can be either const or non-const template - __hostdev__ T& getValue(const Coord& ijk, T* channelPtr) const { return channelPtr[BaseT::getValue(ijk)]; } + __hostdev__ T& getValue(const math::Coord& ijk, T* channelPtr) const { return channelPtr[BaseT::getValue(ijk)]; } }; // ChannelAccessor #if 0 // This MiniGridHandle class is only included as a stand-alone example. Note that aligned_alloc is a C++17 feature! // Normally we recommend using GridHandle defined in util/GridHandle.h but this minimal implementation could be an -// alternative when using the IO medthods defined below. +// alternative when using the IO methods defined below. struct MiniGridHandle { struct BufferType { uint8_t *data; @@ -7825,7 +6224,26 @@ namespace io { enum class Codec : uint16_t { NONE = 0, ZIP = 1, BLOSC = 2, - END = 3 }; + End = 3, + StrLen = 6 + End }; + +__hostdev__ inline const char* toStr(char *dst, Codec codec) +{ + switch (codec){ + case Codec::NONE: return util::strcpy(dst, "NONE"); + case Codec::ZIP: return util::strcpy(dst, "ZIP"); + case Codec::BLOSC : return util::strcpy(dst, "BLOSC"); + default: return util::strcpy(dst, "END"); + } +} + +__hostdev__ inline Codec toCodec(const char *str) +{ + if (util::streq(str, "none")) return Codec::NONE; + if (util::streq(str, "zip")) return Codec::ZIP; + if (util::streq(str, "blosc")) return Codec::BLOSC; + return Codec::End; +} /// @brief Data encoded at the head of each segment of a file or stream. /// @@ -7836,7 +6254,7 @@ struct FileHeader {// 16 bytes Version version;// 4 bytes version numbers uint16_t gridCount;// 2 bytes Codec codec;// 2 bytes - bool isValid() const {return magic == NANOVDB_MAGIC_NUMBER || magic == NANOVDB_MAGIC_FILE;} + bool isValid() const {return magic == NANOVDB_MAGIC_NUMB || magic == NANOVDB_MAGIC_FILE;} }; // FileHeader ( 16 bytes = 2 words ) // @brief Data encoded for each of the grids associated with a segment. @@ -7861,7 +6279,7 @@ struct FileMetaData uint64_t gridSize, fileSize, nameKey, voxelCount; // 4 * 8 = 32B. GridType gridType; // 4B. GridClass gridClass; // 4B. - BBox worldBBox; // 2 * 3 * 8 = 48B. + Vec3dBBox worldBBox; // 2 * 3 * 8 = 48B. CoordBBox indexBBox; // 2 * 3 * 4 = 24B. Vec3d voxelSize; // 24B. uint32_t nameSize; // 4B. @@ -7875,13 +6293,6 @@ struct FileMetaData // the following code block uses std and therefore needs to be ignored by CUDA and HIP #if !defined(__CUDA_ARCH__) && !defined(__HIP__) -inline const char* toStr(Codec codec) -{ - static const char * LUT[] = { "NONE", "ZIP", "BLOSC" , "END" }; - static_assert(sizeof(LUT) / sizeof(char*) - 1 == int(Codec::END), "Unexpected size of LUT"); - return LUT[static_cast(codec)]; -} - // Note that starting with version 32.6.0 it is possible to write and read raw grid buffers to // files, e.g. os.write((const char*)&buffer.data(), buffer.size()) or more conveniently as // handle.write(fileName). In addition to this simple approach we offer the methods below to @@ -7906,18 +6317,17 @@ inline const char* toStr(Codec codec) template // StreamT class must support: "void write(const char*, size_t)" void writeUncompressedGrid(StreamT& os, const GridData* gridData, bool raw = false) { - NANOVDB_ASSERT(gridData->mMagic == NANOVDB_MAGIC_NUMBER || gridData->mMagic == NANOVDB_MAGIC_GRID); + NANOVDB_ASSERT(gridData->mMagic == NANOVDB_MAGIC_NUMB || gridData->mMagic == NANOVDB_MAGIC_GRID); NANOVDB_ASSERT(gridData->mVersion.isCompatible()); if (!raw) {// segment with a single grid: FileHeader, FileMetaData, gridName, Grid #ifdef NANOVDB_USE_NEW_MAGIC_NUMBERS FileHeader head{NANOVDB_MAGIC_FILE, gridData->mVersion, 1u, Codec::NONE}; #else - FileHeader head{NANOVDB_MAGIC_NUMBER, gridData->mVersion, 1u, Codec::NONE}; + FileHeader head{NANOVDB_MAGIC_NUMB, gridData->mVersion, 1u, Codec::NONE}; #endif const char* gridName = gridData->gridName(); - uint32_t nameSize = 1; // '\0' - for (const char* p = gridName; *p != '\0'; ++p) ++nameSize; - const TreeData* treeData = (const TreeData*)gridData->treePtr(); + const uint32_t nameSize = util::strlen(gridName) + 1;// include '\0' + const TreeData* treeData = (const TreeData*)(gridData->treePtr()); FileMetaData meta{gridData->mGridSize, gridData->mGridSize, 0u, treeData->mVoxelCount, gridData->mGridType, gridData->mGridClass, gridData->mWorldBBox, treeData->bbox(), gridData->mVoxelSize, nameSize, @@ -7986,10 +6396,12 @@ VecT readUncompressedGrids(StreamT& is, const typename GridHandleT: fprintf(stderr, "nanovdb::readUncompressedGrids: invalid magic number = \"%s\"\n", (const char*)&(head.magic)); exit(EXIT_FAILURE); } else if (!head.version.isCompatible()) { - fprintf(stderr, "nanovdb::readUncompressedGrids: invalid major version = \"%s\"\n", head.version.c_str()); + char str[20]; + fprintf(stderr, "nanovdb::readUncompressedGrids: invalid major version = \"%s\"\n", toStr(str, head.version)); exit(EXIT_FAILURE); } else if (head.codec != Codec::NONE) { - fprintf(stderr, "nanovdb::readUncompressedGrids: invalid codec = \"%s\"\n", toStr(head.codec)); + char str[8]; + fprintf(stderr, "nanovdb::readUncompressedGrids: invalid codec = \"%s\"\n", toStr(str, head.codec)); exit(EXIT_FAILURE); } FileMetaData meta; @@ -8041,7 +6453,7 @@ VecT readUncompressedGrids(const char* fileName, const typename Gri // ----------------------------> Implementations of random access methods <-------------------------------------- -/// @brief Implements Tree::getValue(Coord), i.e. return the value associated with a specific coordinate @c ijk. +/// @brief Implements Tree::getValue(math::Coord), i.e. return the value associated with a specific coordinate @c ijk. /// @tparam BuildT Build type of the grid being called /// @details The value at a coordinate maps to the background, a tile value or a leaf value. template @@ -8078,7 +6490,7 @@ struct SetVoxel __hostdev__ static auto set(NanoLeaf& leaf, uint32_t n, const ValueT& v) { leaf.mValues[n] = v; } }; // SetVoxel -/// @brief Implements Tree::isActive(Coord) +/// @brief Implements Tree::isActive(math::Coord) /// @tparam BuildT Build type of the grid being called template struct GetState @@ -8090,7 +6502,7 @@ struct GetState __hostdev__ static auto get(const NanoLeaf& leaf, uint32_t n) { return leaf.mValueMask.isOn(n); } }; // GetState -/// @brief Implements Tree::getDim(Coord) +/// @brief Implements Tree::getDim(math::Coord) /// @tparam BuildT Build type of the grid being called template struct GetDim @@ -8102,7 +6514,7 @@ struct GetDim __hostdev__ static uint32_t get(const NanoLeaf&, uint32_t) { return 1u; } }; // GetDim -/// @brief Return the pointer to the leaf node that contains Coord. Implements Tree::probeLeaf(Coord) +/// @brief Return the pointer to the leaf node that contains math::Coord. Implements Tree::probeLeaf(math::Coord) /// @tparam BuildT Build type of the grid being called template struct GetLeaf @@ -8114,7 +6526,7 @@ struct GetLeaf __hostdev__ static const NanoLeaf* get(const NanoLeaf& leaf, uint32_t) { return &leaf; } }; // GetLeaf -/// @brief Return point to the lower internal node where Coord maps to one of its values, i.e. terminates +/// @brief Return point to the lower internal node where math::Coord maps to one of its values, i.e. terminates /// @tparam BuildT Build type of the grid being called template struct GetLower @@ -8126,7 +6538,7 @@ struct GetLower __hostdev__ static const NanoLower* get(const NanoLeaf&, uint32_t) { return nullptr; } }; // GetLower -/// @brief Return point to the upper internal node where Coord maps to one of its values, i.e. terminates +/// @brief Return point to the upper internal node where math::Coord maps to one of its values, i.e. terminates /// @tparam BuildT Build type of the grid being called template struct GetUpper @@ -8138,7 +6550,7 @@ struct GetUpper __hostdev__ static const NanoUpper* get(const NanoLeaf&, uint32_t) { return nullptr; } }; // GetUpper -/// @brief Implements Tree::probeLeaf(Coord) +/// @brief Implements Tree::probeLeaf(math::Coord) /// @tparam BuildT Build type of the grid being called template struct ProbeValue @@ -8171,7 +6583,7 @@ struct ProbeValue } }; // ProbeValue -/// @brief Implements Tree::getNodeInfo(Coord) +/// @brief Implements Tree::getNodeInfo(math::Coord) /// @tparam BuildT Build type of the grid being called template struct GetNodeInfo @@ -8207,6 +6619,6 @@ struct GetNodeInfo } }; // GetNodeInfo -} // namespace nanovdb +} // namespace nanovdb =================================================================== #endif // end of NANOVDB_NANOVDB_H_HAS_BEEN_INCLUDED diff --git a/nanovdb/nanovdb/NodeManager.h b/nanovdb/nanovdb/NodeManager.h new file mode 100644 index 0000000000..0e95ecf872 --- /dev/null +++ b/nanovdb/nanovdb/NodeManager.h @@ -0,0 +1,327 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: MPL-2.0 + +/*! + \file nanovdb/NodeManager.h + + \author Ken Museth + + \date February 12, 2021 + + \brief This class allows for sequential access to nodes + in a NanoVDB tree on both the host and device. + + \details The ordering of the sequential access to nodes is always breadth-first! +*/ + +#include // for NanoGrid etc +#include // for HostBuffer + +#ifndef NANOVDB_NODEMANAGER_H_HAS_BEEN_INCLUDED +#define NANOVDB_NODEMANAGER_H_HAS_BEEN_INCLUDED + +namespace nanovdb { + +/// @brief NodeManager allows for sequential access to nodes +template +class NodeManager; + +/// @brief NodeManagerHandle manages the memory of a NodeManager +template +class NodeManagerHandle; + +/// @brief brief Construct a NodeManager and return its handle +/// +/// @param grid grid whose nodes will be accessed sequentially +/// @param buffer buffer from which to allocate the output handle +/// +/// @note This is the only way to create a NodeManager since it's using +/// managed memory pointed to by a NodeManagerHandle. +template +NodeManagerHandle createNodeManager(const NanoGrid &grid, + const BufferT& buffer = BufferT()); + +struct NodeManagerData +{// 48B = 6*8B + uint64_t mMagic;// 8B + union {int64_t mPadding; uint8_t mLinear;};// 8B of which 1B is used for a binary flag + void *mGrid;// 8B pointer to either host or device grid + union {int64_t *mPtr[3], mOff[3];};// 24B, use mOff if mLinear!=0 +}; + +/// @brief This class serves to manage a raw memory buffer of a NanoVDB NodeManager or LeafManager. +template +class NodeManagerHandle +{ + GridType mGridType{GridType::Unknown}; + BufferT mBuffer; + + template + const NodeManager* getMgr() const { + return mGridType == toGridType() ? (const NodeManager*)mBuffer.data() : nullptr; + } + + template + typename util::enable_if::hasDeviceDual, const NodeManager*>::type + getDeviceMgr() const { + return mGridType == toGridType() ? (const NodeManager*)mBuffer.deviceData() : nullptr; + } + + template + static T* no_const(const T* ptr) { return const_cast(ptr); } + +public: + /// @brief Move constructor from a buffer + NodeManagerHandle(GridType gridType, BufferT&& buffer) : mGridType(gridType) { mBuffer = std::move(buffer); } + /// @brief Empty ctor + NodeManagerHandle() = default; + /// @brief Disallow copy-construction + NodeManagerHandle(const NodeManagerHandle&) = delete; + /// @brief Disallow copy assignment operation + NodeManagerHandle& operator=(const NodeManagerHandle&) = delete; + /// @brief Move copy assignment operation + NodeManagerHandle& operator=(NodeManagerHandle&& other) noexcept { + mGridType = other.mGridType; + mBuffer = std::move(other.mBuffer); + other.mGridType = GridType::Unknown; + return *this; + } + /// @brief Move copy-constructor + NodeManagerHandle(NodeManagerHandle&& other) noexcept { + mGridType = other.mGridType; + mBuffer = std::move(other.mBuffer); + other.mGridType = GridType::Unknown; + } + /// @brief Default destructor + ~NodeManagerHandle() { this->reset(); } + /// @brief clear the buffer + void reset() { mBuffer.clear(); } + + /// @brief Return a reference to the buffer + BufferT& buffer() { return mBuffer; } + + /// @brief Return a const reference to the buffer + const BufferT& buffer() const { return mBuffer; } + + /// @brief Returns a non-const pointer to the data. + /// + /// @warning Note that the return pointer can be NULL if the NodeManagerHandle was not initialized + void* data() { return mBuffer.data(); } + + /// @brief Returns a const pointer to the data. + /// + /// @warning Note that the return pointer can be NULL if the NodeManagerHandle was not initialized + const void* data() const { return mBuffer.data(); } + + /// @brief Returns the size in bytes of the raw memory buffer managed by this NodeManagerHandle's allocator. + uint64_t size() const { return mBuffer.size(); } + + /// @brief Returns a const pointer to the NodeManager encoded in this NodeManagerHandle. + /// + /// @warning Note that the return pointer can be NULL if the template parameter does not match the specified grid! + template + const NodeManager* mgr() const { return this->template getMgr(); } + + /// @brief Returns a pointer to the NodeManager encoded in this NodeManagerHandle. + /// + /// @warning Note that the return pointer can be NULL if the template parameter does not match the specified grid! + template + NodeManager* mgr() { return no_const(this->template getMgr()); } + + /// @brief Return a const pointer to the NodeManager encoded in this NodeManagerHandle on the device, e.g. GPU + /// + /// @warning Note that the return pointer can be NULL if the template parameter does not match the specified grid! + template + typename util::enable_if::hasDeviceDual, const NodeManager*>::type + deviceMgr() const { return this->template getDeviceMgr(); } + + /// @brief Return a const pointer to the NodeManager encoded in this NodeManagerHandle on the device, e.g. GPU + /// + /// @warning Note that the return pointer can be NULL if the template parameter does not match the specified grid! + template + typename util::enable_if::hasDeviceDual, NodeManager*>::type + deviceMgr() { return no_const(this->template getDeviceMgr()); } + + /// @brief Upload the NodeManager to the device, e.g. from CPU to GPU + /// + /// @note This method is only available if the buffer supports devices + template + typename util::enable_if::hasDeviceDual, void>::type + deviceUpload(void* deviceGrid, void* stream = nullptr, bool sync = true) + { + assert(deviceGrid); + auto *data = reinterpret_cast(mBuffer.data()); + void *tmp = data->mGrid; + data->mGrid = deviceGrid; + mBuffer.deviceUpload(stream, sync); + data->mGrid = tmp; + } + + /// @brief Download the NodeManager to from the device, e.g. from GPU to CPU + /// + /// @note This method is only available if the buffer supports devices + template + typename util::enable_if::hasDeviceDual, void>::type + deviceDownload(void* stream = nullptr, bool sync = true) + { + auto *data = reinterpret_cast(mBuffer.data()); + void *tmp = data->mGrid; + mBuffer.deviceDownload(stream, sync); + data->mGrid = tmp; + } +};// NodeManagerHandle + +/// @brief This class allows for sequential access to nodes in a NanoVDB tree +/// +/// @details Nodes are always arranged breadth first during sequential access of nodes +/// at a particular level. +template +class NodeManager : private NodeManagerData +{ + using DataT = NodeManagerData; + using GridT = NanoGrid; + using TreeT = typename GridTree::type; + template + using NodeT = typename NodeTrait::type; + using RootT = NodeT<3>;// root node + using Node2 = NodeT<2>;// upper internal node + using Node1 = NodeT<1>;// lower internal node + using Node0 = NodeT<0>;// leaf node + +public: + static constexpr bool FIXED_SIZE = Node0::FIXED_SIZE && Node1::FIXED_SIZE && Node2::FIXED_SIZE; + + NodeManager(const NodeManager&) = delete; + NodeManager(NodeManager&&) = delete; + NodeManager& operator=(const NodeManager&) = delete; + NodeManager& operator=(NodeManager&&) = delete; + ~NodeManager() = delete; + + /// @brief return true if the nodes have both fixed size and are arranged breadth-first in memory. + /// This allows for direct and memory-efficient linear access to nodes. + __hostdev__ static bool isLinear(const GridT &grid) {return FIXED_SIZE && grid.isBreadthFirst();} + + /// @brief return true if the nodes have both fixed size and are arranged breadth-first in memory. + /// This allows for direct and memory-efficient linear access to nodes. + __hostdev__ bool isLinear() const {return DataT::mLinear!=0u;} + + /// @brief Return the memory footprint in bytes of the NodeManager derived from the specified grid + __hostdev__ static uint64_t memUsage(const GridT &grid) { + uint64_t size = sizeof(NodeManagerData); + if (!NodeManager::isLinear(grid)) { + const uint32_t *p = grid.tree().mNodeCount; + size += sizeof(int64_t)*(p[0]+p[1]+p[2]); + } + return size; + } + + /// @brief Return the memory footprint in bytes of this instance + __hostdev__ uint64_t memUsage() const {return NodeManager::memUsage(this->grid());} + + /// @brief Return a reference to the grid + __hostdev__ GridT& grid() { return *reinterpret_cast(DataT::mGrid); } + __hostdev__ const GridT& grid() const { return *reinterpret_cast(DataT::mGrid); } + + /// @brief Return a reference to the tree + __hostdev__ TreeT& tree() { return this->grid().tree(); } + __hostdev__ const TreeT& tree() const { return this->grid().tree(); } + + /// @brief Return a reference to the root + __hostdev__ RootT& root() { return this->tree().root(); } + __hostdev__ const RootT& root() const { return this->tree().root(); } + + /// @brief Return the number of tree nodes at the specified level + /// @details 0 is leaf, 1 is lower internal, and 2 is upper internal level + __hostdev__ uint64_t nodeCount(int level) const { return this->tree().nodeCount(level); } + + __hostdev__ uint64_t leafCount() const { return this->tree().nodeCount(0); } + __hostdev__ uint64_t lowerCount() const { return this->tree().nodeCount(1); } + __hostdev__ uint64_t upperCount() const { return this->tree().nodeCount(2); } + + /// @brief Return the i'th leaf node with respect to breadth-first ordering + template + __hostdev__ const NodeT& node(uint32_t i) const { + NANOVDB_ASSERT(i < this->nodeCount(LEVEL)); + const NodeT* ptr = nullptr; + if (DataT::mLinear) { + ptr = util::PtrAdd>(DataT::mGrid, DataT::mOff[LEVEL]) + i; + } else { + ptr = util::PtrAdd>(DataT::mGrid, DataT::mPtr[LEVEL][i]); + } + NANOVDB_ASSERT(ptr && isAligned(ptr)); + return *ptr; + } + + /// @brief Return the i'th node with respect to breadth-first ordering + template + __hostdev__ NodeT& node(uint32_t i) { + NANOVDB_ASSERT(i < this->nodeCount(LEVEL)); + NodeT* ptr = nullptr; + if (DataT::mLinear) { + ptr = util::PtrAdd>(DataT::mGrid, DataT::mOff[LEVEL]) + i; + } else { + ptr = util::PtrAdd>(DataT::mGrid, DataT::mPtr[LEVEL][i]); + } + NANOVDB_ASSERT(ptr && isAligned(ptr)); + return *ptr; + } + + /// @brief Return the i'th leaf node with respect to breadth-first ordering + __hostdev__ const Node0& leaf(uint32_t i) const { return this->node<0>(i); } + __hostdev__ Node0& leaf(uint32_t i) { return this->node<0>(i); } + + /// @brief Return the i'th lower internal node with respect to breadth-first ordering + __hostdev__ const Node1& lower(uint32_t i) const { return this->node<1>(i); } + __hostdev__ Node1& lower(uint32_t i) { return this->node<1>(i); } + + /// @brief Return the i'th upper internal node with respect to breadth-first ordering + __hostdev__ const Node2& upper(uint32_t i) const { return this->node<2>(i); } + __hostdev__ Node2& upper(uint32_t i) { return this->node<2>(i); } + +}; // NodeManager class + +template +NodeManagerHandle createNodeManager(const NanoGrid &grid, + const BufferT& buffer) +{ + NodeManagerHandle handle(toGridType(), BufferT::create(NodeManager::memUsage(grid), &buffer)); + auto *data = reinterpret_cast(handle.data()); + NANOVDB_ASSERT(data && isAligned(data)); + NANOVDB_ASSERT(toGridType() == grid.gridType()); +#ifdef NANOVDB_USE_NEW_MAGIC_NUMBERS + *data = NodeManagerData{NANOVDB_MAGIC_NODE, {0u}, (void*)&grid, {{0u,0u,0u}}}; +#else + *data = NodeManagerData{NANOVDB_MAGIC_NUMB, {0u}, (void*)&grid, {{0u,0u,0u}}}; +#endif + + if (NodeManager::isLinear(grid)) { + data->mLinear = uint8_t(1u); + data->mOff[0] = util::PtrDiff(grid.tree().template getFirstNode<0>(), &grid); + data->mOff[1] = util::PtrDiff(grid.tree().template getFirstNode<1>(), &grid); + data->mOff[2] = util::PtrDiff(grid.tree().template getFirstNode<2>(), &grid); + } else { + int64_t *ptr0 = data->mPtr[0] = reinterpret_cast(data + 1); + int64_t *ptr1 = data->mPtr[1] = data->mPtr[0] + grid.tree().nodeCount(0); + int64_t *ptr2 = data->mPtr[2] = data->mPtr[1] + grid.tree().nodeCount(1); + // Performs depth first traversal but breadth first insertion + for (auto it2 = grid.tree().root().cbeginChild(); it2; ++it2) { + *ptr2++ = util::PtrDiff(&*it2, &grid); + for (auto it1 = it2->cbeginChild(); it1; ++it1) { + *ptr1++ = util::PtrDiff(&*it1, &grid); + for (auto it0 = it1->cbeginChild(); it0; ++it0) { + *ptr0++ = util::PtrDiff(&*it0, &grid); + }// loop over child nodes of the lower internal node + }// loop over child nodes of the upper internal node + }// loop over child nodes of the root node + } + + return handle;// // is converted to r-value so return value is move constructed! +} + +} // namespace nanovdb + +#if defined(__CUDACC__) +#include +#endif// defined(__CUDACC__) + +#endif // NANOVDB_NODEMANAGER_H_HAS_BEEN_INCLUDED diff --git a/nanovdb/nanovdb/PNanoVDB.h b/nanovdb/nanovdb/PNanoVDB.h index 24fb68478c..40888f242c 100644 --- a/nanovdb/nanovdb/PNanoVDB.h +++ b/nanovdb/nanovdb/PNanoVDB.h @@ -3,7 +3,7 @@ // SPDX-License-Identifier: MPL-2.0 /*! - \file PNanoVDB.h + \file nanovdb/PNanoVDB.h \author Andrew Reidmeyer @@ -291,7 +291,11 @@ void pnanovdb_buf_write_uint64(pnanovdb_buf_t buf, uint byte_offset, uvec2 value // struct typedef, static const, inout #if defined(PNANOVDB_C) #define PNANOVDB_STRUCT_TYPEDEF(X) typedef struct X X; +#if defined(__CUDA_ARCH__) +#define PNANOVDB_STATIC_CONST constexpr __constant__ +#else #define PNANOVDB_STATIC_CONST static const +#endif #define PNANOVDB_INOUT(X) X* #define PNANOVDB_IN(X) const X* #define PNANOVDB_DEREF(X) (*X) @@ -929,7 +933,7 @@ PNANOVDB_FORCE_INLINE void pnanovdb_write_vec3(pnanovdb_buf_t buf, pnanovdb_addr #define PNANOVDB_MAGIC_FILE 0x324244566f6e614eUL// "NanoVDB2" in hex - little endian (uint64_t) #define PNANOVDB_MAJOR_VERSION_NUMBER 32// reflects changes to the ABI -#define PNANOVDB_MINOR_VERSION_NUMBER 6// reflects changes to the API but not ABI +#define PNANOVDB_MINOR_VERSION_NUMBER 7// reflects changes to the API but not ABI #define PNANOVDB_PATCH_VERSION_NUMBER 0// reflects bug-fixes with no ABI or API changes #define PNANOVDB_GRID_TYPE_UNKNOWN 0 @@ -958,7 +962,8 @@ PNANOVDB_FORCE_INLINE void pnanovdb_write_vec3(pnanovdb_buf_t buf, pnanovdb_addr #define PNANOVDB_GRID_TYPE_POINTINDEX 23 #define PNANOVDB_GRID_TYPE_VEC3U8 24 #define PNANOVDB_GRID_TYPE_VEC3U16 25 -#define PNANOVDB_GRID_TYPE_END 26 +#define PNANOVDB_GRID_TYPE_UINT8 26 +#define PNANOVDB_GRID_TYPE_END 27 #define PNANOVDB_GRID_CLASS_UNKNOWN 0 #define PNANOVDB_GRID_CLASS_LEVEL_SET 1 // narrow band level set, e.g. SDF @@ -989,17 +994,17 @@ PNANOVDB_FORCE_INLINE void pnanovdb_write_vec3(pnanovdb_buf_t buf, pnanovdb_addr // BuildType = Unknown, float, double, int16_t, int32_t, int64_t, Vec3f, Vec3d, Mask, ... // bit count of values in leaf nodes, i.e. 8*sizeof(*nanovdb::LeafNode::mValues) or zero if no values are stored -PNANOVDB_STATIC_CONST pnanovdb_uint32_t pnanovdb_grid_type_value_strides_bits[PNANOVDB_GRID_TYPE_END] = { 0, 32, 64, 16, 32, 64, 96, 192, 0, 16, 32, 1, 32, 4, 8, 16, 0, 128, 256, 0, 0, 0, 0, 16, 24, 48 }; +PNANOVDB_STATIC_CONST pnanovdb_uint32_t pnanovdb_grid_type_value_strides_bits[PNANOVDB_GRID_TYPE_END] = { 0, 32, 64, 16, 32, 64, 96, 192, 0, 16, 32, 1, 32, 4, 8, 16, 0, 128, 256, 0, 0, 0, 0, 16, 24, 48, 8 }; // bit count of the Tile union in InternalNodes, i.e. 8*sizeof(nanovdb::InternalData::Tile) -PNANOVDB_STATIC_CONST pnanovdb_uint32_t pnanovdb_grid_type_table_strides_bits[PNANOVDB_GRID_TYPE_END] = { 64, 64, 64, 64, 64, 64, 128, 192, 64, 64, 64, 64, 64, 64, 64, 64, 64, 128, 256, 64, 64, 64, 64, 64, 64, 64 }; +PNANOVDB_STATIC_CONST pnanovdb_uint32_t pnanovdb_grid_type_table_strides_bits[PNANOVDB_GRID_TYPE_END] = { 64, 64, 64, 64, 64, 64, 128, 192, 64, 64, 64, 64, 64, 64, 64, 64, 64, 128, 256, 64, 64, 64, 64, 64, 64, 64, 64 }; // bit count of min/max values, i.e. 8*sizeof(nanovdb::LeafData::mMinimum) or zero if no min/max exists -PNANOVDB_STATIC_CONST pnanovdb_uint32_t pnanovdb_grid_type_minmax_strides_bits[PNANOVDB_GRID_TYPE_END] = { 0, 32, 64, 16, 32, 64, 96, 192, 8, 16, 32, 8, 32, 32, 32, 32, 32, 128, 256, 64, 64, 64, 64, 64, 24, 48 }; +PNANOVDB_STATIC_CONST pnanovdb_uint32_t pnanovdb_grid_type_minmax_strides_bits[PNANOVDB_GRID_TYPE_END] = { 0, 32, 64, 16, 32, 64, 96, 192, 8, 16, 32, 8, 32, 32, 32, 32, 32, 128, 256, 64, 64, 64, 64, 64, 24, 48, 8 }; // bit alignment of the value type, controlled by the smallest native type, which is why it is always 0, 8, 16, 32, or 64, e.g. for Vec3f it is 32 -PNANOVDB_STATIC_CONST pnanovdb_uint32_t pnanovdb_grid_type_minmax_aligns_bits[PNANOVDB_GRID_TYPE_END] = { 0, 32, 64, 16, 32, 64, 32, 64, 8, 16, 32, 8, 32, 32, 32, 32, 32, 32, 64, 64, 64, 64, 64, 64, 8, 16 }; +PNANOVDB_STATIC_CONST pnanovdb_uint32_t pnanovdb_grid_type_minmax_aligns_bits[PNANOVDB_GRID_TYPE_END] = { 0, 32, 64, 16, 32, 64, 32, 64, 8, 16, 32, 8, 32, 32, 32, 32, 32, 32, 64, 64, 64, 64, 64, 64, 8, 16, 8 }; // bit alignment of the stats (avg/std-dev) types, e.g. 8*sizeof(nanovdb::LeafData::mAverage) -PNANOVDB_STATIC_CONST pnanovdb_uint32_t pnanovdb_grid_type_stat_strides_bits[PNANOVDB_GRID_TYPE_END] = { 0, 32, 64, 32, 32, 64, 32, 64, 8, 32, 32, 8, 32, 32, 32, 32, 32, 32, 64, 64, 64, 64, 64, 64, 32, 32 }; +PNANOVDB_STATIC_CONST pnanovdb_uint32_t pnanovdb_grid_type_stat_strides_bits[PNANOVDB_GRID_TYPE_END] = { 0, 32, 64, 32, 32, 64, 32, 64, 8, 32, 32, 8, 32, 32, 32, 32, 32, 32, 64, 64, 64, 64, 64, 64, 32, 32, 32 }; // one of the 4 leaf types defined above, e.g. PNANOVDB_LEAF_TYPE_INDEX = 3 -PNANOVDB_STATIC_CONST pnanovdb_uint32_t pnanovdb_grid_type_leaf_type[PNANOVDB_GRID_TYPE_END] = { 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 2, 2, 2, 2, 0, 0, 3, 3, 4, 4, 5, 0, 0 }; +PNANOVDB_STATIC_CONST pnanovdb_uint32_t pnanovdb_grid_type_leaf_type[PNANOVDB_GRID_TYPE_END] = { 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 2, 2, 2, 2, 0, 0, 3, 3, 4, 4, 5, 0, 0, 0 }; struct pnanovdb_map_t { @@ -1229,9 +1234,9 @@ PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_version_get_patch(pnanovdb_uint struct pnanovdb_gridblindmetadata_t { - pnanovdb_int64_t byte_offset; // 8 bytes, 0 - pnanovdb_uint64_t element_count; // 8 bytes, 8 - pnanovdb_uint32_t flags; // 4 bytes, 16 + pnanovdb_int64_t data_offset; // 8 bytes, 0 + pnanovdb_uint64_t value_count; // 8 bytes, 8 + pnanovdb_uint32_t value_size; // 4 bytes, 16 pnanovdb_uint32_t semantic; // 4 bytes, 20 pnanovdb_uint32_t data_class; // 4 bytes, 24 pnanovdb_uint32_t data_type; // 4 bytes, 28 @@ -1243,22 +1248,22 @@ PNANOVDB_STRUCT_TYPEDEF(pnanovdb_gridblindmetadata_handle_t) #define PNANOVDB_GRIDBLINDMETADATA_SIZE 288 -#define PNANOVDB_GRIDBLINDMETADATA_OFF_BYTE_OFFSET 0 -#define PNANOVDB_GRIDBLINDMETADATA_OFF_ELEMENT_COUNT 8 -#define PNANOVDB_GRIDBLINDMETADATA_OFF_FLAGS 16 +#define PNANOVDB_GRIDBLINDMETADATA_OFF_DATA_OFFSET 0 +#define PNANOVDB_GRIDBLINDMETADATA_OFF_VALUE_COUNT 8 +#define PNANOVDB_GRIDBLINDMETADATA_OFF_VALUE_SIZE 16 #define PNANOVDB_GRIDBLINDMETADATA_OFF_SEMANTIC 20 #define PNANOVDB_GRIDBLINDMETADATA_OFF_DATA_CLASS 24 #define PNANOVDB_GRIDBLINDMETADATA_OFF_DATA_TYPE 28 #define PNANOVDB_GRIDBLINDMETADATA_OFF_NAME 32 -PNANOVDB_FORCE_INLINE pnanovdb_int64_t pnanovdb_gridblindmetadata_get_byte_offset(pnanovdb_buf_t buf, pnanovdb_gridblindmetadata_handle_t p) { - return pnanovdb_read_int64(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRIDBLINDMETADATA_OFF_BYTE_OFFSET)); +PNANOVDB_FORCE_INLINE pnanovdb_int64_t pnanovdb_gridblindmetadata_get_data_offset(pnanovdb_buf_t buf, pnanovdb_gridblindmetadata_handle_t p) { + return pnanovdb_read_int64(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRIDBLINDMETADATA_OFF_DATA_OFFSET)); } -PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_gridblindmetadata_get_element_count(pnanovdb_buf_t buf, pnanovdb_gridblindmetadata_handle_t p) { - return pnanovdb_read_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRIDBLINDMETADATA_OFF_ELEMENT_COUNT)); +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_gridblindmetadata_get_value_count(pnanovdb_buf_t buf, pnanovdb_gridblindmetadata_handle_t p) { + return pnanovdb_read_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRIDBLINDMETADATA_OFF_VALUE_COUNT)); } -PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_gridblindmetadata_get_flags(pnanovdb_buf_t buf, pnanovdb_gridblindmetadata_handle_t p) { - return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRIDBLINDMETADATA_OFF_FLAGS)); +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_gridblindmetadata_get_value_size(pnanovdb_buf_t buf, pnanovdb_gridblindmetadata_handle_t p) { + return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRIDBLINDMETADATA_OFF_VALUE_SIZE)); } PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_gridblindmetadata_get_semantic(pnanovdb_buf_t buf, pnanovdb_gridblindmetadata_handle_t p) { return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRIDBLINDMETADATA_OFF_SEMANTIC)); @@ -1662,6 +1667,7 @@ PNANOVDB_STATIC_CONST pnanovdb_grid_type_constants_t pnanovdb_grid_type_constant {32, 40, 48, 56, 64, 96, 16, 8, 24, 32, 8224, 8232, 8240, 8248, 8256, 270400, 1056, 1064, 1072, 1080, 1088, 33856, 80, 88, 96, 96, 96, 1120}, {28, 31, 34, 40, 44, 64, 24, 8, 20, 32, 8224, 8227, 8232, 8236, 8256, 270400, 1056, 1059, 1064, 1068, 1088, 33856, 80, 83, 88, 92, 96, 1632}, {28, 34, 40, 48, 52, 64, 48, 8, 20, 32, 8224, 8230, 8236, 8240, 8256, 270400, 1056, 1062, 1068, 1072, 1088, 33856, 80, 86, 92, 96, 128, 3200}, +{28, 29, 30, 32, 36, 64, 8, 8, 20, 32, 8224, 8225, 8228, 8232, 8256, 270400, 1056, 1057, 1060, 1064, 1088, 33856, 80, 81, 84, 88, 96, 608}, }; // ------------------------------------------------ Basic Lookup ----------------------------------------------------------- @@ -1678,7 +1684,7 @@ PNANOVDB_FORCE_INLINE pnanovdb_gridblindmetadata_handle_t pnanovdb_grid_get_grid PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_grid_get_gridblindmetadata_value_address(pnanovdb_buf_t buf, pnanovdb_grid_handle_t grid, pnanovdb_uint32_t index) { pnanovdb_gridblindmetadata_handle_t meta = pnanovdb_grid_get_gridblindmetadata(buf, grid, index); - pnanovdb_int64_t byte_offset = pnanovdb_gridblindmetadata_get_byte_offset(buf, meta); + pnanovdb_int64_t byte_offset = pnanovdb_gridblindmetadata_get_data_offset(buf, meta); pnanovdb_address_t address = pnanovdb_address_offset64(meta.address, pnanovdb_int64_as_uint64(byte_offset)); return address; } diff --git a/nanovdb/nanovdb/cmd/convert/nanovdb_convert.cc b/nanovdb/nanovdb/cmd/convert/nanovdb_convert.cc index 7a3a5b5170..9133bd7f8c 100644 --- a/nanovdb/nanovdb/cmd/convert/nanovdb_convert.cc +++ b/nanovdb/nanovdb/cmd/convert/nanovdb_convert.cc @@ -15,9 +15,9 @@ #include #include -#include // this is required to read (and write) NanoVDB files on the host -#include -#include +#include // this is required to read (and write) NanoVDB files on the host +#include +#include void usage [[noreturn]] (const std::string& progName, int exitStatus = EXIT_FAILURE) { @@ -47,7 +47,9 @@ void usage [[noreturn]] (const std::string& progName, int exitStatus = EXIT_FAIL void version [[noreturn]] (const char* progName, int exitStatus = EXIT_SUCCESS) { - printf("\n%s was build against NanoVDB version %s\n", progName, nanovdb::Version().c_str()); + char str[8]; + nanovdb::toStr(str, nanovdb::Version()); + printf("\n%s was build against NanoVDB version %s\n", progName, str); exit(exitStatus); } @@ -56,8 +58,8 @@ int main(int argc, char* argv[]) int exitStatus = EXIT_SUCCESS; nanovdb::io::Codec codec = nanovdb::io::Codec::NONE;// compression codec for the file - nanovdb::StatsMode sMode = nanovdb::StatsMode::Default; - nanovdb::ChecksumMode cMode = nanovdb::ChecksumMode::Default; + nanovdb::tools::StatsMode sMode = nanovdb::tools::StatsMode::Default; + nanovdb::CheckMode cMode = nanovdb::CheckMode::Default; nanovdb::GridType qMode = nanovdb::GridType::Unknown;//specify the quantization mode bool verbose = false, overwrite = false, dither = false, absolute = true; float tolerance = -1.0f; @@ -99,11 +101,11 @@ int main(int argc, char* argv[]) std::string str(argv[++i]); toLowerCase(str); if (str == "none") { - cMode = nanovdb::ChecksumMode::Disable; + cMode = nanovdb::CheckMode::Disable; } else if (str == "partial") { - cMode = nanovdb::ChecksumMode::Partial; + cMode = nanovdb::CheckMode::Partial; } else if (str == "full") { - cMode = nanovdb::ChecksumMode::Full; + cMode = nanovdb::CheckMode::Full; } else { std::cerr << "Expected one of the following checksum modes: {none, partial, full}\n" << std::endl; usage(argv[0]); @@ -117,13 +119,13 @@ int main(int argc, char* argv[]) std::string str(argv[++i]); toLowerCase(str); if (str == "none") { - sMode = nanovdb::StatsMode::Disable; + sMode = nanovdb::tools::StatsMode::Disable; } else if (str == "bbox") { - sMode = nanovdb::StatsMode::BBox; + sMode = nanovdb::tools::StatsMode::BBox; } else if (str == "extrema") { - sMode = nanovdb::StatsMode::MinMax; + sMode = nanovdb::tools::StatsMode::MinMax; } else if (str == "all") { - sMode = nanovdb::StatsMode::All; + sMode = nanovdb::tools::StatsMode::All; } else { std::cerr << "Expected one of the following stats modes: {none, bbox, extrema, all}\n" << std::endl; usage(argv[0]); @@ -136,7 +138,7 @@ int main(int argc, char* argv[]) } else { qMode = nanovdb::GridType::FpN; absolute = true; - tolerance = atof(argv[++i]); + tolerance = static_cast(atof(argv[++i])); } } else if (arg == "-r" || arg == "--rel-error") { if (i + 1 == argc) { @@ -145,7 +147,7 @@ int main(int argc, char* argv[]) } else { qMode = nanovdb::GridType::FpN; absolute = false; - tolerance = atof(argv[++i]); + tolerance = static_cast(atof(argv[++i])); } } else if (arg == "-g" || arg == "--grid") { if (i + 1 == argc) { @@ -203,7 +205,7 @@ int main(int argc, char* argv[]) { using SrcGridT = openvdb::FloatGrid; if (auto floatGrid = openvdb::GridBase::grid(base)) { - nanovdb::CreateNanoGrid s(*floatGrid); + nanovdb::tools::CreateNanoGrid s(*floatGrid); s.setStats(sMode); s.setChecksum(cMode); s.enableDithering(dither); @@ -217,15 +219,15 @@ int main(int argc, char* argv[]) return s.getHandle(); case nanovdb::GridType::FpN: if (absolute) { - return s.getHandle(nanovdb::AbsDiff(tolerance)); + return s.getHandle(nanovdb::tools::AbsDiff(tolerance)); } else { - return s.getHandle(nanovdb::RelDiff(tolerance)); + return s.getHandle(nanovdb::tools::RelDiff(tolerance)); } default: break; }// end of switch } - return nanovdb::openToNanoVDB(base, sMode, cMode, verbose ? 1 : 0); + return nanovdb::tools::openToNanoVDB(base, sMode, cMode, verbose ? 1 : 0); }; try { if (toNanoVDB) { // OpenVDB -> NanoVDB @@ -275,7 +277,7 @@ int main(int argc, char* argv[]) for (uint32_t i = 0; i < h.gridCount(); ++i) { if (verbose) std::cout << "Converting NanoVDB grid named \"" << h.gridMetaData(i)->shortGridName() << "\" to OpenVDB" << std::endl; - grids->push_back(nanoToOpenVDB(h, 0, i)); + grids->push_back(nanovdb::tools::nanoToOpenVDB(h, 0, i)); } } } else { @@ -286,7 +288,7 @@ int main(int argc, char* argv[]) } if (verbose) std::cout << "Converting NanoVDB grid named \"" << handle.gridMetaData()->shortGridName() << "\" to OpenVDB" << std::endl; - grids->push_back(nanoToOpenVDB(handle)); + grids->push_back(nanovdb::tools::nanoToOpenVDB(handle)); } } // loop over input files file.write(*grids); diff --git a/nanovdb/nanovdb/cmd/print/nanovdb_print.cc b/nanovdb/nanovdb/cmd/print/nanovdb_print.cc index 5336a07190..dd091e75a7 100644 --- a/nanovdb/nanovdb/cmd/print/nanovdb_print.cc +++ b/nanovdb/nanovdb/cmd/print/nanovdb_print.cc @@ -11,7 +11,7 @@ \brief Command-line tool that prints information about grids in a nanovdb file */ -#include // this is required to read (and write) NanoVDB files on the host +#include // this is required to read (and write) NanoVDB files on the host #include #include @@ -31,7 +31,9 @@ void usage [[noreturn]] (const std::string& progName, int exitStatus = EXIT_FAIL void version [[noreturn]] (const char* progName, int exitStatus = EXIT_SUCCESS) { - printf("\n%s was build against NanoVDB version %s\n", progName, nanovdb::Version().c_str()); + char str[8]; + nanovdb::toStr(str, nanovdb::Version()); + printf("\n%s was build against NanoVDB version %s\n", progName, str); exit(exitStatus); } @@ -42,6 +44,7 @@ int main(int argc, char* argv[]) enum Mode : int { Short = 0, Default = 1, Long = 2 } mode = Default; + char str[32]; bool verbose = false; std::string gridName; std::vector fileNames; @@ -109,7 +112,7 @@ int main(int argc, char* argv[]) ss << "(" << v[0] << "," << v[1] << "," << v[2] << ")"; return ss.str(); }; - auto wbboxToStr = [](const nanovdb::BBox& bbox) { + auto wbboxToStr = [](const nanovdb::math::BBox& bbox) { std::stringstream ss; if (bbox.empty()) { ss << "empty grid"; @@ -174,15 +177,15 @@ int main(int argc, char* argv[]) auto resWidth = std::string("Resolution").length() + padding; for (auto& m : list) { width(nameWidth, m.gridName); - width(typeWidth, nanovdb::toStr(m.gridType)); - width(classWidth, nanovdb::toStr(m.gridClass)); - width(codecWidth, nanovdb::io::toStr(m.codec)); + width(typeWidth, nanovdb::toStr(str, m.gridType)); + width(classWidth, nanovdb::toStr(str, m.gridClass)); + width(codecWidth, nanovdb::io::toStr(str, m.codec)); width(wbboxWidth, wbboxToStr(m.worldBBox)); width(ibboxWidth, ibboxToStr(m.indexBBox)); width(resWidth, resToStr(m.indexBBox)); width(sizeWidth, format(m.gridSize)); width(fileWidth, format(m.fileSize)); - width(versionWidth, std::string(m.version.c_str())); + width(versionWidth, nanovdb::toStr(str, m.version)); width(configWidth, nodesToStr(m.nodeCount)); width(tileWidth, nodesToStr(m.tileCount)); width(voxelsWidth, std::to_string(m.voxelCount)); @@ -220,11 +223,11 @@ int main(int argc, char* argv[]) continue; std::cout << std::left << std::setw(numberWidth) << ++n << std::left << std::setw(nameWidth) << m.gridName - << std::left << std::setw(typeWidth) << nanovdb::toStr(m.gridType); + << std::left << std::setw(typeWidth) << nanovdb::toStr(str, m.gridType); if (mode != Short) { - std::cout << std::left << std::setw(classWidth) << nanovdb::toStr(m.gridClass) - << std::left << std::setw(versionWidth) << std::string(m.version.c_str()) - << std::left << std::setw(codecWidth) << nanovdb::io::toStr(m.codec) + std::cout << std::left << std::setw(classWidth) << nanovdb::toStr(str, m.gridClass) + << std::left << std::setw(versionWidth) << nanovdb::toStr(str+10, m.version) + << std::left << std::setw(codecWidth) << nanovdb::io::toStr(str + 20, m.codec) << std::left << std::setw(sizeWidth) << format(m.gridSize) << std::left << std::setw(fileWidth) << format(m.fileSize) << std::left << std::setw(voxelSizeWidth) << Vec3dToStr(m.voxelSize); @@ -321,4 +324,4 @@ int main(int argc, char* argv[]) } return exitStatus; -} +}// main diff --git a/nanovdb/nanovdb/cmd/updateFiles.py b/nanovdb/nanovdb/cmd/updateFiles.py new file mode 100644 index 0000000000..e4041c91f6 --- /dev/null +++ b/nanovdb/nanovdb/cmd/updateFiles.py @@ -0,0 +1,220 @@ +import argparse +import os +from pathlib import Path + + +def open_file(file_path): + """ + Opens a file. If utf-8 decoding fails, try windows-1252. + + Args: + file_path: Path of the file to open. + + Returns: + The content of the file in an arbitrary format. + """ + try: + with open(file_path, "r", encoding="utf-8", errors="replace") as file: + return file.read() + except UnicodeDecodeError: + with open(file_path, "r", encoding="windows-1252", errors="replace") as file: + return file.read() + + +def write_file(file_path, content): + """ + Writes a file. If utf-8 decoding fails, try windows-1252. + + Args: + file_path: Path of the file to open. + + Returns: + None. + """ + try: + with open(file_path, "w", encoding="utf-8", errors="replace") as file: + file.write(content) + except UnicodeDecodeError: + with open(file_path, "w", encoding="windows-1252", errors="replace") as file: + file.write(content) + + +def update_files(dir_path): + """ + Updates the content of files ending in .h, .cuh, .cc, .cu, and .cpp + to call the appropriate API as we update NanoVDB from version 32.6 to + version 32.7. This includes changes in namespaces, function names, and + include directories. + + Args: + Directory path: will include files in downstream directories. + + Returns: + None. Writes the contents of the file. + """ + + # List of file extensions to search for + file_extensions = [".h", ".cuh", ".cc", ".cu", ".cpp"] + + nspace_dic = { + "math": [ + "Ray", + "DDA<", + "HDDA", + "Vec3<", + "Vec4<", + "BBox<", + "ZeroCrossing", + "TreeMarcher", + "PointTreeMarcher", + "BoxStencil<", + "CurvatureStencil<", + "GradStencil<", + "WenoStencil<", + "AlignUp", + "Min", + "Max", + "Abs", + "Clamp", + "Sqrt", + "Sign", + "Maximum<", + "Delta<", + "RoundDown<", + "pi<", + "isApproxZero<", + "Round<", + "createSampler", + "SampleFromVoxels<", + ], + "tools": [ + "createNanoGrid", + "StatsMode", + "createLevelSetSphere", + "createFogVolumeSphere", + "createFogVolumeSphere createFogVolumeSphere", + "createFogVolumeTorus", + "createLevelSetBox", + "CreateNanoGrid", + "updateGridStats", + "evalChecksum", + "validateChecksum", + "checkGrid", + "Extrema", + ], + "util": [ + "is_floating_point", + "findLowestOn", + "findHighestOn", + "Range", + "streq", + "strcpy", + "strcat", + "empty(", + "Split", + "invoke", + "forEach", + "reduce", + "prefixSum", + "is_same", + "is_specialization", + "PtrAdd", + "PtrDiff", + ], + } + + rename_dic = { + # list from func4 in updateFiles.sh + "nanovdb::build::": "nanovdb::tools::build::", + "nanovdb::BBoxR": "nanovdb::Vec3dBBox", + "nanovdb::BBox": "nanovdb::Vec3dBbox", + # scope and rename, i.e. list from func2 in updateFiles.sh + "nanovdb::cudaCreateNodeManager": "nanovdb::cuda::createNodeManager", + "nanovdb::cudaVoxelsToGrid": "nanovdb::cuda::voxelsToGrid", + "nanovdb::cudaPointsToGrid": "nanovdb::cuda::pointsToGrid", + "nanovdb::DitherLUT": "nanovdb::math::DitherLUT", + "nanovdb::PackedRGBA8": "nanovdb::math::Rgba8", + "nanovdb::Rgba8": "nanovdb::math::Rgba8", + "nanovdb::CpuTimer": "nanovdb::util::Timer", + "nanovdb::GpuTimer": "nanovdb::util::cuda::Timer", + "nanovdb::CountOn": "nanovdb::util::countOn", + } + + movdir_dic = { + # list comes from func3 calls on updateFiles.sh + "util/GridHandle.h": "GridHandle.h", + "util/BuildGrid.h": "tools/GridBuilder.h", + "util/GridBuilder.h": "tools/GridBuilder.h", + "util/IO.h": "io/IO.h", + "util/CSampleFromVoxels.h": "math/CSampleFromVoxels.h", + "util/DitherLUT.h": "math/DitherLUT.h", + "util/HDDA.h": "math/HDDA.h", + "util/Ray.h": "math/Ray.h", + "util/SampleFromVoxels.h": "math/SampleFromVoxels.h", + "util/Stencils.h": "nanovdb/math/Stencils.h", + "util/CreateNanoGrid.h": "tools/CreateNanoGrid.h", + "util/Primitives.h": "tools/CreatePrimitives.h", + "util/GridChecksum.h": "tools/GridChecksum.h", + "util/GridStats.h": "tools/GridStats.h", + "util/GridChecksum.h": "tools/GridChecksum.h", + "util/GridValidator.h": "tools/GridValidator.h", + "util/NanoToOpenVDB.h": "tools/NanoToOpenVDB.h", + "util/cuda/CudaGridChecksum.cuh": "tools/cuda/CudaGridChecksum.cuh", + "util/cuda/CudaGridStats.cuh": "tools/cuda/CudaGridStats.cuh", + "util/cuda/CudaGridValidator.cuh": "tools/cuda/CudaGridValidator.cuh", + "util/cuda/CudaIndexToGrid.cuh": "tools/cuda/CudaIndexToGrid.cuh", + "util/cuda/CudaPointsToGrid.cuh": "tools/cuda/PointsToGrid.cuh", + "util/cuda/CudaSignedFloodFill.cuh": "tools/cuda/CudaSignedFloodFill.cuh", + "util/cuda/CudaDeviceBuffer.h": "cuda/DeviceBuffer.h", + "util/cuda/CudaGridHandle.cuh": "cuda/GridHandle.cuh", + "util/cuda/CudaUtils.h": "util/cuda/Util.h", + "util/cuda/GpuTimer.h": "util/cuda/Timer.h", + } + + # Iterate over files in the directory and its subdirectories + for root, dirs, files in os.walk(dir_path): + for file in files: + if any(file.endswith(ext) for ext in file_extensions): + file_path = os.path.join(root, file) + print(f"Processing file: {file_path}") + + content = open_file(file_path) + + # Correspond to func1 $file in updateFiles.sh + for key, vals in nspace_dic.items(): + for val in vals: + old_word = "nanovdb::" + val + new_word = "nanovdb::" + key + "::" + val + content = content.replace(old_word, new_word) + + # Correspond to func4 and func2 in updateFiles.sh + for key, val in rename_dic.items(): + content = content.replace(key, val) + + # Correspond to func3 in updateFiles.sh + for key, val in movdir_dic.items(): + old_path = "" + new_path = "" + content = content.replace(old_path, new_path) + + write_file(file_path, content) + +# Example use: +# To update all the files using NanoVDB in the current directory (and directories downstream): +# python ./nanovdb/nanovdb/cmd/updateFiles.py +# To update all the files using NanoVDB in a directory called foo (and directories downstream): +# python ./nanovdb/nanovdb/cmd/updateFiles.py -d /path/to/foo +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Synthetic Data Generation for USD") + parser.add_argument( + "-d", + "--directory", + type=str, + default=None, + help="Path to directory containing .h, .cc, and .cu files using NanoVDB.", + ) + + args = parser.parse_args() + dir_path = os.getcwd() if args.directory is None else Path(args.directory).resolve() + + update_files(dir_path) diff --git a/nanovdb/nanovdb/cmd/updateFiles.sh b/nanovdb/nanovdb/cmd/updateFiles.sh new file mode 100755 index 0000000000..87613c3a44 --- /dev/null +++ b/nanovdb/nanovdb/cmd/updateFiles.sh @@ -0,0 +1,99 @@ +#!/bin/bash +#Usage process all files in this directory or optionally specify a target directory + +# Define directory in which to find files +dir="." +if [ "$1" ]; then + dir="$1" +fi + +# Check if dir is not a directory +if [ ! -d "$dir" ]; then + echo -e "\nUsage: '$0 '\n" + exit 1 +fi + +# E.g.: func1 $file "math" "Coord" "Vec3" "Vec4" +func1 () { + for ((i=3; i<=$#; i++)); do + arg="s/nanovdb::${!i}/nanovdb::$2::${!i}/g" + #echo "sed -i $arg $1" + sed -i $arg $1 + done +} + +# E.G.: func2 file namespace old new : nanovdb::old -> nanovdb::namespace::new in file +func2 () { + arg="s/nanovdb::$3/nanovdb::$2::$4/g" + #echo "sed -i $arg $1" + sed -i $arg $1 +} + +# E.G.: func3 file path1/old.h path2/new.h -> in file +func3 () { + arg="s;;;g" + #echo "sed -i $arg $1" + sed -i $arg $1 +} + +# E.g.: func4 file old new new -> old +func4 () { + arg="s;$2;$3;g" + #echo "sed -i $arg $1" + sed -i $arg $1 +} + +# Loop through files in the target directory +for file in $(find "$dir" -name '*.h' -or -name '*.cuh' -or -name '*.cc' -or -name '*.cu' -or -name '*.cpp'); do + if [ -f "$file" ]; then + echo "Processing file: $file" + func1 $file math Ray "DDA<" HDDA "Vec3<" "Vec4<" "BBox<" ZeroCrossing TreeMarcher PointTreeMarcher\ + "BoxStencil<" "CurvatureStencil<" "GradStencil<" "WenoStencil<" AlignUp Min Max Abs Clamp\ + Sqrt Sign "Maximum<" "Delta<" "RoundDown<" "pi<" "isApproxZero<" "Round<" createSampler "SampleFromVoxels<" + func1 $file tools createNanoGrid StatsMode createLevelSetSphere\ + createFogVolumeSphere createFogVolumeSphere createFogVolumeSphere\ + createFogVolumeTorus createLevelSetBox CreateNanoGrid updateGridStats\ + evalChecksum validateChecksum checkGrid Extrema + func1 $file util is_floating_point findLowestOn findHighestOn Range streq strcpy strcat "empty("\ + Split invoke forEach reduce prefixSum is_same is_specialization PtrAdd PtrDiff + func4 $file "nanovdb::build::" "nanovdb::tools::build::" + func4 $file "nanovdb::BBoxR" "nanovdb::Vec3dBBox" + func4 $file "nanovdb::BBox" "nanovdb::Vec3dBbox" + func2 $file cuda cudaCreateNodeManager createNodeManager + func2 $file cuda cudaVoxelsToGrid voxelsToGrid + func2 $file cuda cudaPointsToGrid pointsToGrid + func2 $file math DitherLUT DitherLUT + func2 $file math PackedRGBA8 Rgba8 + func2 $file math Rgba8 Rgba8 + func2 $file util CpuTimer Timer + func2 $file util GpuTimer "cuda::Timer" + func2 $file util CountOn countOn + func3 $file "util/GridHandle.h" "GridHandle.h" + func3 $file "util/BuildGrid.h" "tools/GridBuilder.h" + func3 $file "util/GridBuilder.h" "tools/GridBuilder.h" + func3 $file "util/IO.h" "io/IO.h" + func3 $file "util/CSampleFromVoxels.h" "math/CSampleFromVoxels.h" + func3 $file "util/DitherLUT.h" "math/DitherLUT.h" + func3 $file "util/HDDA.h" "math/HDDA.h" + func3 $file "util/Ray.h" "math/Ray.h" + func3 $file "util/SampleFromVoxels.h" "math/SampleFromVoxels.h" + func3 $file "util/Stencils.h" "nanovdb/math/Stencils.h" + func3 $file "util/CreateNanoGrid.h" "tools/CreateNanoGrid.h" + func3 $file "util/Primitives.h" "tools/CreatePrimitives.h" + func3 $file "util/GridChecksum.h" "tools/GridChecksum.h" + func3 $file "util/GridStats.h" "tools/GridStats.h" + func3 $file "util/GridChecksum.h" "tools/GridChecksum.h" + func3 $file "util/GridValidator.h" "tools/GridValidator.h" + func3 $file "util/NanoToOpenVDB.h" "tools/NanoToOpenVDB.h" + func3 $file "util/cuda/CudaGridChecksum.cuh" "tools/cuda/CudaGridChecksum.cuh" + func3 $file "util/cuda/CudaGridStats.cuh" "tools/cuda/CudaGridStats.cuh" + func3 $file "util/cuda/CudaGridValidator.cuh" "tools/cuda/CudaGridValidator.cuh" + func3 $file "util/cuda/CudaIndexToGrid.cuh" "tools/cuda/CudaIndexToGrid.cuh" + func3 $file "util/cuda/CudaPointsToGrid.cuh" "tools/cuda/PointsToGrid.cuh" + func3 $file "util/cuda/CudaSignedFloodFill.cuh" "tools/cuda/CudaSignedFloodFill.cuh" + func3 $file "util/cuda/CudaDeviceBuffer.h" "cuda/DeviceBuffer.h" + func3 $file "util/cuda/CudaGridHandle.cuh" "cuda/GridHandle.cuh" + func3 $file "util/cuda/CudaUtils.h" "util/cuda/Util.h" + func3 $file "util/cuda/GpuTimer.h" "util/cuda/Timer.h" + fi +done diff --git a/nanovdb/nanovdb/cmd/validate/nanovdb_validate.cc b/nanovdb/nanovdb/cmd/validate/nanovdb_validate.cc index faec25aa4d..2d563d92de 100644 --- a/nanovdb/nanovdb/cmd/validate/nanovdb_validate.cc +++ b/nanovdb/nanovdb/cmd/validate/nanovdb_validate.cc @@ -11,8 +11,8 @@ \brief Command-line tool that validates Grids in nanovdb files */ -#include // this is required to read (and write) NanoVDB files on the host -#include +#include // this is required to read (and write) NanoVDB files on the host +#include #include #include @@ -23,6 +23,7 @@ void usage [[noreturn]] (const std::string& progName, int exitStatus = EXIT_FAIL << "Options:\n" << "-g,--grid name\tOnly validate grids matching the specified string name\n" << "-h,--help\tPrints this message\n" + << "-p,--partial\tPerform partial (i.e. fast) validation tests\n" << "-v,--verbose\tPrint verbose information information useful for debugging\n" << "--version\tPrint version information to the terminal\n"; exit(exitStatus); @@ -30,17 +31,18 @@ void usage [[noreturn]] (const std::string& progName, int exitStatus = EXIT_FAIL void version [[noreturn]] (const char* progName, int exitStatus = EXIT_SUCCESS) { - printf("\n%s was build against NanoVDB version %s\n", progName, nanovdb::Version().c_str()); + char str[8]; + nanovdb::toStr(str, nanovdb::Version()); + printf("\n%s was build against NanoVDB version %s\n", progName, str); exit(exitStatus); } int main(int argc, char* argv[]) { - int exitStatus = EXIT_SUCCESS; - - bool verbose = false; - bool detailed = true; - std::string gridName; + int exitStatus = EXIT_SUCCESS; + bool verbose = false; + nanovdb::CheckMode mode = nanovdb::CheckMode::Full; + std::string gridName; std::vector fileNames; for (int i = 1; i < argc; ++i) { std::string arg = argv[i]; @@ -51,6 +53,8 @@ int main(int argc, char* argv[]) version(argv[0]); } else if (arg == "-v" || arg == "--verbose") { verbose = true; + } else if (arg == "-p" || arg == "--partial") { + mode = nanovdb::CheckMode::Partial; } else if (arg == "-g" || arg == "--grid") { if (i + 1 == argc) { std::cerr << "\nExpected a grid name to follow the -g,--grid option\n"; @@ -79,62 +83,17 @@ int main(int argc, char* argv[]) if (!gridName.empty()) { std::vector tmp; for (auto& m : list) { - if (nameKey == m.nameKey && gridName == m.gridName) - tmp.emplace_back(m); + if (nameKey == m.nameKey && gridName == m.gridName) tmp.emplace_back(m); } - list = tmp; - } - if (list.size() == 0) { - continue; + list = std::move(tmp); } + if (list.size() == 0) continue; - if (verbose) { - std::cout << "\nThe file \"" << file << "\" contains the following matching " << list.size() << " grid(s):\n"; - } + if (verbose) std::cout << "\nThe file \"" << file << "\" contains the following matching " << list.size() << " grid(s):\n"; for (auto& m : list) { auto handle = nanovdb::io::readGrid(file, m.gridName); - auto gridType = handle.gridType(); - bool test = false; - if (gridType == nanovdb::GridType::End) { - std::cerr << "GridHandle was empty\n" << std::endl; - usage(argv[0]); - } else if (auto* grid = handle.grid()) { - test = isValid(*grid, detailed, verbose); - } else if (auto* grid = handle.grid()) { - test = isValid(*grid, detailed, verbose); - } else if (auto* grid = handle.grid()) { - test = isValid(*grid, detailed, verbose); - } else if (auto* grid = handle.grid()) { - test = isValid(*grid, detailed, verbose); - } else if (auto* grid = handle.grid()) { - test = isValid(*grid, detailed, verbose); - } else if (auto* grid = handle.grid()) { - test = isValid(*grid, detailed, verbose); - } else if (auto* grid = handle.grid()) { - test = isValid(*grid, detailed, verbose); - } else if (auto* grid = handle.grid()) { - test = isValid(*grid, detailed, verbose); - } else if (auto* grid = handle.grid()) { - test = isValid(*grid, detailed, verbose); - } else if (auto* grid = handle.grid()) { - test = isValid(*grid, detailed, verbose); - } else if (auto* grid = handle.grid()) { - test = isValid(*grid, detailed, verbose); - } else if (auto* grid = handle.grid()) { - test = isValid(*grid, detailed, verbose); - } else if (auto* grid = handle.grid()) { - test = isValid(*grid, detailed, verbose); - } else if (auto* grid = handle.grid()) { - test = isValid(*grid, detailed, verbose); - } else if (auto* grid = handle.grid()) { - test = isValid(*grid, detailed, verbose); - } else if (auto* grid = handle.grid()) { - test = isValid(*grid, detailed, verbose); - } else { - std::cerr << "Unsupported GridType: \"" << nanovdb::toStr(gridType) << "\"\n" << std::endl; - usage(argv[0]); - } + const bool test = nanovdb::tools::validateGrids(handle, mode, verbose); if (verbose) { std::cout << "Grid named \"" << m.gridName << "\": " << (test ? "passed" : "failed") << std::endl; } else if (!test) { diff --git a/nanovdb/nanovdb/cuda/DeviceBuffer.h b/nanovdb/nanovdb/cuda/DeviceBuffer.h new file mode 100644 index 0000000000..171235afbc --- /dev/null +++ b/nanovdb/nanovdb/cuda/DeviceBuffer.h @@ -0,0 +1,231 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: MPL-2.0 + +/*! + \file DeviceBuffer.h + + \author Ken Museth + + \date January 8, 2020 + + \brief Implements a simple dual (host/device) CUDA buffer. + + \note This file has no device-only kernel functions, + which explains why it's a .h and not .cuh file. +*/ + +#ifndef NANOVDB_CUDA_DEVICEBUFFER_H_HAS_BEEN_INCLUDED +#define NANOVDB_CUDA_DEVICEBUFFER_H_HAS_BEEN_INCLUDED + +#include // for BufferTraits +#include // for cudaMalloc/cudaMallocManaged/cudaFree + +namespace nanovdb {// ================================================================ + +namespace cuda {// =================================================================== + +// ----------------------------> DeviceBuffer <-------------------------------------- + +/// @brief Simple memory buffer using un-managed pinned host memory when compiled with NVCC. +/// Obviously this class is making explicit used of CUDA so replace it with your own memory +/// allocator if you are not using CUDA. +/// @note While CUDA's pinned host memory allows for asynchronous memory copy between host and device +/// it is significantly slower then cached (un-pinned) memory on the host. +class DeviceBuffer +{ + uint64_t mSize; // total number of bytes managed by this buffer (assumed to be identical for host and device) + void *mCpuData, *mGpuData; // raw pointers to the host and device buffers + bool mManaged; + +public: + /// @brief Static factory method that return an instance of this buffer + /// @param size byte size of buffer to be initialized + /// @param dummy this argument is currently ignored but required to match the API of the HostBuffer + /// @param host If true buffer is initialized only on the host/CPU, else on the device/GPU + /// @param stream optional stream argument (defaults to stream NULL) + /// @return An instance of this class using move semantics + static DeviceBuffer create(uint64_t size, const DeviceBuffer* dummy = nullptr, bool host = true, void* stream = nullptr); + + /// @brief Static factory method that return an instance of this buffer that wraps externally managed memory + /// @param size byte size of buffer specified by external memory + /// @param cpuData pointer to externally managed host memory + /// @param gpuData pointer to externally managed device memory + /// @return An instance of this class using move semantics + static DeviceBuffer create(uint64_t size, void* cpuData, void* gpuData); + + /// @brief Constructor + /// @param size byte size of buffer to be initialized + /// @param host If true buffer is initialized only on the host/CPU, else on the device/GPU + /// @param stream optional stream argument (defaults to stream NULL) + DeviceBuffer(uint64_t size = 0, bool host = true, void* stream = nullptr) + : mSize(0) + , mCpuData(nullptr) + , mGpuData(nullptr) + , mManaged(false) + { + if (size > 0) this->init(size, host, stream); + } + + DeviceBuffer(uint64_t size, void* cpuData, void* gpuData) + : mSize(size) + , mCpuData(cpuData) + , mGpuData(gpuData) + , mManaged(false) + { + } + + /// @brief Disallow copy-construction + DeviceBuffer(const DeviceBuffer&) = delete; + + /// @brief Move copy-constructor + DeviceBuffer(DeviceBuffer&& other) noexcept + : mSize(other.mSize) + , mCpuData(other.mCpuData) + , mGpuData(other.mGpuData) + , mManaged(other.mManaged) + { + other.mSize = 0; + other.mCpuData = nullptr; + other.mGpuData = nullptr; + other.mManaged = false; + } + + /// @brief Disallow copy assignment operation + DeviceBuffer& operator=(const DeviceBuffer&) = delete; + + /// @brief Move copy assignment operation + DeviceBuffer& operator=(DeviceBuffer&& other) noexcept + { + this->clear(); + mSize = other.mSize; + mCpuData = other.mCpuData; + mGpuData = other.mGpuData; + mManaged = other.mManaged; + other.mSize = 0; + other.mCpuData = nullptr; + other.mGpuData = nullptr; + other.mManaged = false; + return *this; + } + + /// @brief Destructor frees memory on both the host and device + ~DeviceBuffer() { this->clear(); }; + + /// @brief Initialize buffer + /// @param size byte size of buffer to be initialized + /// @param host If true buffer is initialized only on the host/CPU, else on the device/GPU + /// @note All existing buffers are first cleared + /// @warning size is expected to be non-zero. Use clear() clear buffer! + void init(uint64_t size, bool host = true, void* stream = nullptr); + + /// @brief Retuns a raw pointer to the host/CPU buffer managed by this allocator. + /// @warning Note that the pointer can be NULL! + void* data() const { return mCpuData; } + + /// @brief Retuns a raw pointer to the device/GPU buffer managed by this allocator. + /// @warning Note that the pointer can be NULL! + void* deviceData() const { return mGpuData; } + + /// @brief Upload this buffer from the host to the device, i.e. CPU -> GPU. + /// @param stream optional CUDA stream (defaults to CUDA stream 0) + /// @param sync if false the memory copy is asynchronous + /// @note If the device/GPU buffer does not exist it is first allocated + /// @warning Assumes that the host/CPU buffer already exists + void deviceUpload(void* stream = nullptr, bool sync = true) const; + + /// @brief Upload this buffer from the device to the host, i.e. GPU -> CPU. + /// @param stream optional CUDA stream (defaults to CUDA stream 0) + /// @param sync if false the memory copy is asynchronous + /// @note If the host/CPU buffer does not exist it is first allocated + /// @warning Assumes that the device/GPU buffer already exists + void deviceDownload(void* stream = nullptr, bool sync = true) const; + + /// @brief Returns the size in bytes of the raw memory buffer managed by this allocator. + uint64_t size() const { return mSize; } + + //@{ + /// @brief Returns true if this allocator is empty, i.e. has no allocated memory + bool empty() const { return mSize == 0; } + bool isEmpty() const { return mSize == 0; } + //@} + + /// @brief De-allocate all memory managed by this allocator and set all pointers to NULL + void clear(void* stream = nullptr); + +}; // DeviceBuffer class + +// --------------------------> Implementations below <------------------------------------ + +inline DeviceBuffer DeviceBuffer::create(uint64_t size, const DeviceBuffer*, bool host, void* stream) +{ + return DeviceBuffer(size, host, stream); +} + +inline DeviceBuffer DeviceBuffer::create(uint64_t size, void* cpuData, void* gpuData) +{ + return DeviceBuffer(size, cpuData, gpuData); +} + +inline void DeviceBuffer::init(uint64_t size, bool host, void* stream) +{ + if (mSize>0) this->clear(stream); + NANOVDB_ASSERT(size > 0); + if (host) { + cudaCheck(cudaMallocHost((void**)&mCpuData, size)); // un-managed pinned memory on the host (can be slow to access!). Always 32B aligned + checkPtr(mCpuData, "cuda::DeviceBuffer::init: failed to allocate host buffer"); + } else { + cudaCheck(util::cuda::mallocAsync((void**)&mGpuData, size, reinterpret_cast(stream))); // un-managed memory on the device, always 32B aligned! + checkPtr(mGpuData, "cuda::DeviceBuffer::init: failed to allocate device buffer"); + } + mSize = size; + mManaged = true; +} // DeviceBuffer::init + +inline void DeviceBuffer::deviceUpload(void* stream, bool sync) const +{ + if (!mManaged) throw std::runtime_error("DeviceBuffer::deviceUpload called on externally managed memory. Replace deviceUpload call with the appropriate external copy operation."); + + checkPtr(mCpuData, "uninitialized cpu data"); + if (mGpuData == nullptr) { + cudaCheck(util::cuda::mallocAsync((void**)&mGpuData, mSize, reinterpret_cast(stream))); // un-managed memory on the device, always 32B aligned! + } + checkPtr(mGpuData, "uninitialized gpu data"); + cudaCheck(cudaMemcpyAsync(mGpuData, mCpuData, mSize, cudaMemcpyHostToDevice, reinterpret_cast(stream))); + if (sync) cudaCheck(cudaStreamSynchronize(reinterpret_cast(stream))); +} // DeviceBuffer::gpuUpload + +inline void DeviceBuffer::deviceDownload(void* stream, bool sync) const +{ + if (!mManaged) throw std::runtime_error("DeviceBuffer::deviceDownload called on externally managed memory. Replace deviceDownload call with the appropriate external copy operation."); + + checkPtr(mGpuData, "uninitialized gpu data"); + if (mCpuData == nullptr) { + cudaCheck(cudaMallocHost((void**)&mCpuData, mSize)); // un-managed pinned memory on the host (can be slow to access!). Always 32B aligned + } + checkPtr(mCpuData, "uninitialized cpu data"); + cudaCheck(cudaMemcpyAsync(mCpuData, mGpuData, mSize, cudaMemcpyDeviceToHost, reinterpret_cast(stream))); + if (sync) cudaCheck(cudaStreamSynchronize(reinterpret_cast(stream))); +} // DeviceBuffer::gpuDownload + +inline void DeviceBuffer::clear(void *stream) +{ + if (mManaged && mGpuData) cudaCheck(util::cuda::freeAsync(mGpuData, reinterpret_cast(stream))); + if (mManaged && mCpuData) cudaCheck(cudaFreeHost(mCpuData)); + mCpuData = mGpuData = nullptr; + mSize = 0; + mManaged = false; +} // DeviceBuffer::clear + +}// namespace cuda + +using CudaDeviceBuffer [[deprecated("Use nanovdb::cuda::DeviceBuffer instead")]] = cuda::DeviceBuffer; + +template<> +struct BufferTraits +{ + static constexpr bool hasDeviceDual = true; +}; + +}// namespace nanovdb + +#endif // end of NANOVDB_CUDA_DEVICEBUFFER_H_HAS_BEEN_INCLUDED diff --git a/nanovdb/nanovdb/cuda/GridHandle.cuh b/nanovdb/nanovdb/cuda/GridHandle.cuh new file mode 100644 index 0000000000..db3a99d713 --- /dev/null +++ b/nanovdb/nanovdb/cuda/GridHandle.cuh @@ -0,0 +1,145 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: MPL-2.0 + +/*! + \file nanovdb/cuda/GridHandle.cuh + + \author Ken Museth, Doyub Kim + + \date August 3, 2023 + + \brief Contains cuda kernels for GridHandle + + \warning The header file contains cuda device code so be sure + to only include it in .cu files (or other .cuh files) +*/ + +#ifndef NANOVDB_CUDA_GRIDHANDLE_CUH_HAS_BEEN_INCLUDED +#define NANOVDB_CUDA_GRIDHANDLE_CUH_HAS_BEEN_INCLUDED + +#include // required for instantiation of move c-tor of GridHandle +#include // for cuda::updateChecksum +#include + +namespace nanovdb { + +namespace cuda { + +namespace {// anonymous namespace +__global__ void cpyGridHandleMeta(const GridData *d_data, GridHandleMetaData *d_meta) +{ + nanovdb::cpyGridHandleMeta(d_data, d_meta); +} + +__global__ void updateGridCount(GridData *d_data, uint32_t gridIndex, uint32_t gridCount, bool *d_dirty) +{ + NANOVDB_ASSERT(gridIndex < gridCount); + if (*d_dirty = d_data->mGridIndex != gridIndex || d_data->mGridCount != gridCount) { + d_data->mGridIndex = gridIndex; + d_data->mGridCount = gridCount; + if (d_data->mChecksum.isEmpty()) *d_dirty = false;// no need to update checksum if it didn't already exist + } +} +}// anonymous namespace + +template class VectorT = std::vector> +inline typename util::enable_if::hasDeviceDual, VectorT>>::type +splitGridHandles(const GridHandle &handle, const BufferT* other = nullptr, cudaStream_t stream = 0) +{ + const void *ptr = handle.deviceData(); + if (ptr == nullptr) return VectorT>(); + VectorT> handles(handle.gridCount()); + bool dirty, *d_dirty;// use this to check if the checksum needs to be recomputed + cudaCheck(util::cuda::mallocAsync((void**)&d_dirty, sizeof(bool), stream)); + for (uint32_t n=0; n(buffer.deviceData()); + const GridData *src = reinterpret_cast(ptr); + cudaCheck(cudaMemcpyAsync(dst, src, handle.gridSize(n), cudaMemcpyDeviceToDevice, stream)); + updateGridCount<<<1, 1, 0, stream>>>(dst, 0u, 1u, d_dirty); + cudaCheckError(); + cudaCheck(cudaMemcpyAsync(&dirty, d_dirty, sizeof(bool), cudaMemcpyDeviceToHost, stream)); + if (dirty) tools::cuda::updateChecksum(dst, CheckMode::Partial, stream); + handles[n] = nanovdb::GridHandle(std::move(buffer)); + ptr = util::PtrAdd(ptr, handle.gridSize(n)); + } + cudaCheck(util::cuda::freeAsync(d_dirty, stream)); + return std::move(handles); +}// cuda::splitGridHandles + +template class VectorT> +inline typename util::enable_if::hasDeviceDual, GridHandle>::type +mergeGridHandles(const VectorT> &handles, const BufferT* other = nullptr, cudaStream_t stream = 0) +{ + uint64_t size = 0u; + uint32_t counter = 0u, gridCount = 0u; + for (auto &h : handles) { + gridCount += h.gridCount(); + for (uint32_t n=0; n(dst); + updateGridCount<<<1, 1, 0, stream>>>(data, counter++, gridCount, d_dirty); + cudaCheckError(); + cudaCheck(cudaMemcpyAsync(&dirty, d_dirty, sizeof(bool), cudaMemcpyDeviceToHost, stream)); + if (dirty) tools::cuda::updateChecksum(data, CheckMode::Partial, stream); + dst = util::PtrAdd(dst, h.gridSize(n)); + src = util::PtrAdd(src, h.gridSize(n)); + } + } + cudaCheck(util::cuda::freeAsync(d_dirty, stream)); + return GridHandle(std::move(buffer)); +}// cuda::mergeGridHandles + +}// namespace cuda + +template class VectorT = std::vector> +[[deprecated("Use nanovdb::cuda::splitGridHandles instead")]] +inline typename util::enable_if::hasDeviceDual, VectorT>>::type +splitDeviceGrids(const GridHandle &handle, const BufferT* other = nullptr, cudaStream_t stream = 0) +{ return cuda::splitGridHandles(handle, other, stream); } + +template class VectorT> +[[deprecated("Use nanovdb::cuda::mergeGridHandles instead")]] +inline typename util::enable_if::hasDeviceDual, GridHandle>::type +mergeDeviceGrids(const VectorT> &handles, const BufferT* other = nullptr, cudaStream_t stream = 0) +{ return cuda::mergeGridHandles(handles, other, stream); } + +template +template::hasDeviceDual, int>::type> +GridHandle::GridHandle(T&& buffer) +{ + static_assert(util::is_same::value, "Expected U==BufferT"); + mBuffer = std::move(buffer); + if (auto *data = reinterpret_cast(mBuffer.data())) { + if (!data->isValid()) throw std::runtime_error("GridHandle was constructed with an invalid host buffer"); + mMetaData.resize(data->mGridCount); + cpyGridHandleMeta(data, mMetaData.data()); + } else { + if (auto *d_data = reinterpret_cast(mBuffer.deviceData())) { + GridData tmp; + cudaCheck(cudaMemcpy(&tmp, d_data, sizeof(GridData), cudaMemcpyDeviceToHost)); + if (!tmp.isValid()) throw std::runtime_error("GridHandle was constructed with an invalid device buffer"); + GridHandleMetaData *d_metaData; + cudaMalloc((void**)&d_metaData, tmp.mGridCount*sizeof(GridHandleMetaData)); + cuda::cpyGridHandleMeta<<<1,1>>>(d_data, d_metaData); + mMetaData.resize(tmp.mGridCount); + cudaCheck(cudaMemcpy(mMetaData.data(), d_metaData,tmp.mGridCount*sizeof(GridHandleMetaData), cudaMemcpyDeviceToHost)); + cudaCheck(cudaFree(d_metaData)); + } + } +}// GridHandle(T&& buffer) + +// Dummy function that ensures instantiation of the move-constructor above when BufferT=cuda::DeviceBuffer +namespace {auto __dummy(){return GridHandle(std::move(cuda::DeviceBuffer()));}} + +} // namespace nanovdb + +#endif // NANOVDB_CUDA_GRIDHANDLE_CUH_HAS_BEEN_INCLUDED diff --git a/nanovdb/nanovdb/cuda/NodeManager.cuh b/nanovdb/nanovdb/cuda/NodeManager.cuh new file mode 100644 index 0000000000..8e9f24d0f8 --- /dev/null +++ b/nanovdb/nanovdb/cuda/NodeManager.cuh @@ -0,0 +1,104 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: MPL-2.0 + +/*! + \file nanovdb/cuda/NodeManager.cuh + + \author Ken Museth + + \date October 3, 2023 + + \brief Contains cuda kernels for NodeManager + + \warning The header file contains cuda device code so be sure + to only include it in .cu files (or other .cuh files) +*/ + +#ifndef NANOVDB_CUDA_NODE_MANAGER_CUH_HAS_BEEN_INCLUDED +#define NANOVDB_CUDA_NODE_MANAGER_CUH_HAS_BEEN_INCLUDED + +#include // for cuda::lambdaKernel +#include +#include + +namespace nanovdb { + +namespace cuda { + +/// @brief Construct a NodeManager from a device grid pointer +/// +/// @param d_grid device grid pointer whose nodes will be accessed sequentially +/// @param buffer buffer from which to allocate the output handle +/// @param stream cuda stream +/// @return Handle that contains a device NodeManager +template +inline typename util::enable_if::hasDeviceDual, NodeManagerHandle>::type +createNodeManager(const NanoGrid *d_grid, + const BufferT& pool = BufferT(), + cudaStream_t stream = 0) +{ + auto buffer = BufferT::create(sizeof(NodeManagerData), &pool, false, stream); + auto *d_data = (NodeManagerData*)buffer.deviceData(); + size_t size = 0u, *d_size; + cudaCheck(util::cuda::mallocAsync((void**)&d_size, sizeof(size_t), stream)); + util::cuda::lambdaKernel<<<1, 1, 0, stream>>>(1, [=] __device__(size_t) { +#ifdef NANOVDB_USE_NEW_MAGIC_NUMBERS + *d_data = NodeManagerData{NANOVDB_MAGIC_NODE, 0u, (void*)d_grid, {0u,0u,0u}}; +#else + *d_data = NodeManagerData{NANOVDB_MAGIC_NUMB, 0u, (void*)d_grid, {0u,0u,0u}}; +#endif + *d_size = sizeof(NodeManagerData); + auto &tree = d_grid->tree(); + if (NodeManager::FIXED_SIZE && d_grid->isBreadthFirst()) { + d_data->mLinear = uint8_t(1u); + d_data->mOff[0] = util::PtrDiff(tree.template getFirstNode<0>(), d_grid); + d_data->mOff[1] = util::PtrDiff(tree.template getFirstNode<1>(), d_grid); + d_data->mOff[2] = util::PtrDiff(tree.template getFirstNode<2>(), d_grid); + } else { + *d_size += sizeof(uint64_t)*tree.totalNodeCount(); + } + }); + cudaCheckError(); + cudaCheck(cudaMemcpyAsync(&size, d_size, sizeof(size_t), cudaMemcpyDeviceToHost, stream)); + cudaCheck(util::cuda::freeAsync(d_size, stream)); + if (size > sizeof(NodeManagerData)) { + auto tmp = BufferT::create(size, &pool, false, stream);// only allocate buffer on the device + cudaCheck(cudaMemcpyAsync(tmp.deviceData(), buffer.deviceData(), sizeof(NodeManagerData), cudaMemcpyDeviceToDevice, stream)); + buffer = std::move(tmp); + d_data = reinterpret_cast(buffer.deviceData()); + util::cuda::lambdaKernel<<<1, 1, 0, stream>>>(1, [=] __device__ (size_t) { + auto &tree = d_grid->tree(); + int64_t *ptr0 = d_data->mPtr[0] = reinterpret_cast(d_data + 1); + int64_t *ptr1 = d_data->mPtr[1] = d_data->mPtr[0] + tree.nodeCount(0); + int64_t *ptr2 = d_data->mPtr[2] = d_data->mPtr[1] + tree.nodeCount(1); + // Performs depth first traversal but breadth first insertion + for (auto it2 = tree.root().cbeginChild(); it2; ++it2) { + *ptr2++ = util::PtrDiff(&*it2, d_grid); + for (auto it1 = it2->cbeginChild(); it1; ++it1) { + *ptr1++ = util::PtrDiff(&*it1, d_grid); + for (auto it0 = it1->cbeginChild(); it0; ++it0) { + *ptr0++ = util::PtrDiff(&*it0, d_grid); + }// loop over child nodes of the lower internal node + }// loop over child nodes of the upper internal node + }// loop over child nodes of the root node + }); + } + + return NodeManagerHandle(toGridType(), std::move(buffer)); +}// cuda::createNodeManager + +}// namespace cuda + +template +[[deprecated("Use cuda::createNodeManager instead")]] +inline typename util::enable_if::hasDeviceDual, NodeManagerHandle>::type +cudaCreateNodeManager(const NanoGrid *d_grid, + const BufferT& pool = BufferT(), + cudaStream_t stream = 0) +{ + return cuda::createNodeManager(d_grid, pool, stream); +} + +} // namespace nanovdb + +#endif // NANOVDB_CUDA_NODE_MANAGER_CUH_HAS_BEEN_INCLUDED diff --git a/nanovdb/nanovdb/examples/ex_bump_pool_buffer/bump_pool_buffer.cc b/nanovdb/nanovdb/examples/ex_bump_pool_buffer/bump_pool_buffer.cc index 12edb019d5..bcd54036ec 100644 --- a/nanovdb/nanovdb/examples/ex_bump_pool_buffer/bump_pool_buffer.cc +++ b/nanovdb/nanovdb/examples/ex_bump_pool_buffer/bump_pool_buffer.cc @@ -1,9 +1,9 @@ // Copyright Contributors to the OpenVDB Project // SPDX-License-Identifier: MPL-2.0 -#include -#include -#include +#include +#include +#include #include ////////////////////////////////////////////// @@ -83,11 +83,11 @@ class PoolBuffer // Mandatory. // Return non-const pointer to the buffer data. - uint8_t* data() { return mState->mPoolSlab.data() + mOffset; } + void* data() { return nanovdb::util::PtrAdd(mState->mPoolSlab.data(), mOffset); } // Mandatory. // Return const pointer to the buffer data. - const uint8_t* data() const { return mState->mPoolSlab.data() + mOffset; } + const void* data() const { return nanovdb::util::PtrAdd(mState->mPoolSlab.data(), mOffset); } }; // we specify this trait to avoid declaring the "device...(...)" convenience methods. @@ -110,8 +110,8 @@ int main() std::vector> gridHdls; // create two grids... - gridHdls.push_back(nanovdb::createLevelSetSphere(100.0, nanovdb::Vec3d(-20, 0, 0), 1.0, 3.0, nanovdb::Vec3d(0), "spheref", nanovdb::StatsMode::BBox, nanovdb::ChecksumMode::Partial, bufferContext)); - gridHdls.push_back(nanovdb::createLevelSetSphere(100.0, nanovdb::Vec3d( 20, 0, 0), 1.0, 3.0, nanovdb::Vec3d(0), "sphered", nanovdb::StatsMode::BBox, nanovdb::ChecksumMode::Partial, bufferContext)); + gridHdls.push_back(nanovdb::tools::createLevelSetSphere(100.0, nanovdb::Vec3d(-20, 0, 0), 1.0, 3.0, nanovdb::Vec3d(0), "spheref", nanovdb::tools::StatsMode::BBox, nanovdb::CheckMode::Partial, bufferContext)); + gridHdls.push_back(nanovdb::tools::createLevelSetSphere(100.0, nanovdb::Vec3d( 20, 0, 0), 1.0, 3.0, nanovdb::Vec3d(0), "sphered", nanovdb::tools::StatsMode::BBox, nanovdb::CheckMode::Partial, bufferContext)); // Get a (raw) pointer to the NanoVDB grid form the GridManager. auto* dstGrid = gridHdls[0].grid(); diff --git a/nanovdb/nanovdb/examples/ex_collide_level_set/main.cc b/nanovdb/nanovdb/examples/ex_collide_level_set/main.cc index 876c08e16a..5d0ae28475 100644 --- a/nanovdb/nanovdb/examples/ex_collide_level_set/main.cc +++ b/nanovdb/nanovdb/examples/ex_collide_level_set/main.cc @@ -3,12 +3,12 @@ #include #include -#include -#include -#include +#include +#include +#include #if defined(NANOVDB_USE_CUDA) -using BufferT = nanovdb::CudaDeviceBuffer; +using BufferT = nanovdb::cuda::DeviceBuffer; #else using BufferT = nanovdb::HostBuffer; #endif @@ -26,7 +26,7 @@ int main(int ac, char** av) handle = nanovdb::io::readGrid(av[1]); std::cout << "Loaded NanoVDB grid[" << handle.gridMetaData()->shortGridName() << "]...\n"; } else { - handle = nanovdb::createLevelSetSphere(100.0f, nanovdb::Vec3d(-20, 0, 0), 1.0, 3.0, nanovdb::Vec3d(0), "sphere"); + handle = nanovdb::tools::createLevelSetSphere(100.0f, nanovdb::Vec3d(-20, 0, 0), 1.0, 3.0, nanovdb::Vec3d(0), "sphere"); } if (handle.gridMetaData()->isLevelSet() == false) { diff --git a/nanovdb/nanovdb/examples/ex_collide_level_set/nanovdb.cu b/nanovdb/nanovdb/examples/ex_collide_level_set/nanovdb.cu index 71a976eca4..7eb9f2de06 100644 --- a/nanovdb/nanovdb/examples/ex_collide_level_set/nanovdb.cu +++ b/nanovdb/nanovdb/examples/ex_collide_level_set/nanovdb.cu @@ -5,15 +5,15 @@ #include #include -#include -#include -#include -#include +#include +#include +#include +#include #include "common.h" #if defined(NANOVDB_USE_CUDA) -using BufferT = nanovdb::CudaDeviceBuffer; +using BufferT = nanovdb::cuda::DeviceBuffer; #else using BufferT = nanovdb::HostBuffer; #endif diff --git a/nanovdb/nanovdb/examples/ex_collide_level_set/openvdb.cc b/nanovdb/nanovdb/examples/ex_collide_level_set/openvdb.cc index ec67f754bd..294ded6010 100644 --- a/nanovdb/nanovdb/examples/ex_collide_level_set/openvdb.cc +++ b/nanovdb/nanovdb/examples/ex_collide_level_set/openvdb.cc @@ -10,13 +10,13 @@ #include #include -#include -#include +#include +#include #include "common.h" #if defined(NANOVDB_USE_CUDA) -using BufferT = nanovdb::CudaDeviceBuffer; +using BufferT = nanovdb::cuda::DeviceBuffer; #else using BufferT = nanovdb::HostBuffer; #endif @@ -29,9 +29,9 @@ void runOpenVDB(nanovdb::GridHandle& handle, int numIterations, int num using CoordT = openvdb::Coord; using RealT = float; using Vec3T = openvdb::math::Vec3; - using RayT = openvdb::math::Ray; + using RayT = openvdb::math::Ray; - auto srcGrid = nanovdb::nanoToOpenVDB(handle); + auto srcGrid = nanovdb::tools::nanoToOpenVDB(handle); std::cout << "Exporting to OpenVDB grid[" << srcGrid->getName() << "]...\n"; auto h_grid = (GridT*)srcGrid.get(); diff --git a/nanovdb/nanovdb/examples/ex_index_grid_cuda/index_grid_cuda.cc b/nanovdb/nanovdb/examples/ex_index_grid_cuda/index_grid_cuda.cc index b81d71c22b..1bb2a855d9 100644 --- a/nanovdb/nanovdb/examples/ex_index_grid_cuda/index_grid_cuda.cc +++ b/nanovdb/nanovdb/examples/ex_index_grid_cuda/index_grid_cuda.cc @@ -1,27 +1,27 @@ // Copyright Contributors to the OpenVDB Project // SPDX-License-Identifier: MPL-2.0 -#include -#include // for nanovdb::createLevelSetSphere -#include // for nanovdb::CudaDeviceBuffer +#include +#include // for nanovdb::tools::createLevelSetSphere +#include // for nanovdb::cuda::DeviceBuffer extern "C" void launch_kernels(const nanovdb::NanoGrid*,// device grid const nanovdb::NanoGrid*,// host grid cudaStream_t stream); -/// @brief This examples depends on NanoVDB and CUDA. +/// @brief This examples depends on NanoVDB and CUDA. int main(int, char**) { using SrcGridT = nanovdb::FloatGrid; using DstBuildT = nanovdb::ValueOnIndex; - using BufferT = nanovdb::CudaDeviceBuffer; + using BufferT = nanovdb::cuda::DeviceBuffer; try { // Create an NanoVDB grid of a sphere at the origin with radius 100 and voxel size 1. - auto srcHandle = nanovdb::createLevelSetSphere(); + auto srcHandle = nanovdb::tools::createLevelSetSphere(); auto *srcGrid = srcHandle.grid(); // Converts the FloatGrid to an IndexGrid using CUDA for memory management. - auto idxHandle = nanovdb::createNanoGrid(*srcGrid, 1u, false , false);// 1 channel, no tiles or stats + auto idxHandle = nanovdb::tools::createNanoGrid(*srcGrid, 1u, false , false);// 1 channel, no tiles or stats cudaStream_t stream; // Create a CUDA stream to allow for asynchronous copy of pinned CUDA memory. cudaStreamCreate(&stream); diff --git a/nanovdb/nanovdb/examples/ex_index_grid_cuda/index_grid_cuda_kernel.cu b/nanovdb/nanovdb/examples/ex_index_grid_cuda/index_grid_cuda_kernel.cu index 5bb29979cf..ed1ae04100 100644 --- a/nanovdb/nanovdb/examples/ex_index_grid_cuda/index_grid_cuda_kernel.cu +++ b/nanovdb/nanovdb/examples/ex_index_grid_cuda/index_grid_cuda_kernel.cu @@ -2,7 +2,7 @@ // SPDX-License-Identifier: MPL-2.0 #include // this defined the core tree data structure of NanoVDB accessable on both the host and device -#include // required since GridHandle has device code +#include // required since GridHandle has device code #include // for printf // This is called by the host only diff --git a/nanovdb/nanovdb/examples/ex_make_custom_nanovdb/make_custom_nanovdb.cc b/nanovdb/nanovdb/examples/ex_make_custom_nanovdb/make_custom_nanovdb.cc index aea2812a4b..6ee036eddb 100644 --- a/nanovdb/nanovdb/examples/ex_make_custom_nanovdb/make_custom_nanovdb.cc +++ b/nanovdb/nanovdb/examples/ex_make_custom_nanovdb/make_custom_nanovdb.cc @@ -1,25 +1,25 @@ // Copyright Contributors to the OpenVDB Project // SPDX-License-Identifier: MPL-2.0 -#include -#include +#include +#include #include -/// @brief Creates a NanoVDB grids with custom values and access them. +/// @brief Creates a NanoVDB grid with custom values and access them. /// /// @note This example only depends on NanoVDB. int main() { try { - nanovdb::build::Grid grid(0.0f); + nanovdb::tools::build::Grid grid(0.0f); auto acc = grid.getAccessor(); acc.setValue(nanovdb::Coord(1, 2, 3), 1.0f); printf("build::Grid: (%i,%i,%i)=%4.2f\t", 1, 2, 3, acc.getValue(nanovdb::Coord(1, 2, 3))); printf("build::Grid: (%i,%i,%i)=%4.2f\n", 1, 2,-3, acc.getValue(nanovdb::Coord(1, 2,-3))); - auto handle = nanovdb::createNanoGrid(grid); + auto handle = nanovdb::tools::createNanoGrid(grid); auto* dstGrid = handle.grid(); // Get a (raw) pointer to the NanoVDB grid form the GridManager. if (!dstGrid) throw std::runtime_error("GridHandle does not contain a grid with value type float"); diff --git a/nanovdb/nanovdb/examples/ex_make_custom_nanovdb_cuda/make_custom_nanovdb_cuda.cc b/nanovdb/nanovdb/examples/ex_make_custom_nanovdb_cuda/make_custom_nanovdb_cuda.cc index 7b4da85f0a..1846e010e8 100644 --- a/nanovdb/nanovdb/examples/ex_make_custom_nanovdb_cuda/make_custom_nanovdb_cuda.cc +++ b/nanovdb/nanovdb/examples/ex_make_custom_nanovdb_cuda/make_custom_nanovdb_cuda.cc @@ -3,9 +3,9 @@ #undef NANOVDB_USE_OPENVDB // Prevents include/openvdb/points/AttributeArray.h:1841:25: error: ‘stride’ cannot be used as a function -#include -#include -#include +#include +#include +#include #include @@ -13,13 +13,13 @@ extern "C" void launch_kernels(const nanovdb::NanoGrid*,// GPU grid const nanovdb::NanoGrid*,// CPU grid cudaStream_t stream); -/// @brief Creates a NanoVDB grids with custom values and access them. +/// @brief Creates a NanoVDB grid with custom values and access them. /// /// @note This example only depends on NanoVDB. int main() { try { - using GridT = nanovdb::build::Grid; + using GridT = nanovdb::tools::build::Grid; GridT grid(0.0f);// empty grid with a background value of zero auto acc = grid.getAccessor(); acc.setValue(nanovdb::Coord(1, 2, 3), 1.0f); @@ -27,7 +27,7 @@ int main() printf("build::Grid: (%i,%i,%i)=%4.2f\n", 1, 2, 3, acc.getValue(nanovdb::Coord(1, 2, 3))); // convert build::grid to a nanovdb::GridHandle using a Cuda buffer - auto handle = nanovdb::createNanoGrid(grid); + auto handle = nanovdb::tools::createNanoGrid(grid); auto* cpuGrid = handle.grid(); //get a (raw) pointer to a NanoVDB grid of value type float on the CPU if (!cpuGrid) throw std::runtime_error("GridHandle does not contain a grid with value type float"); diff --git a/nanovdb/nanovdb/examples/ex_make_custom_nanovdb_cuda/make_custom_nanovdb_cuda_kernel.cu b/nanovdb/nanovdb/examples/ex_make_custom_nanovdb_cuda/make_custom_nanovdb_cuda_kernel.cu index ae3556ad7a..0ab22d15ef 100644 --- a/nanovdb/nanovdb/examples/ex_make_custom_nanovdb_cuda/make_custom_nanovdb_cuda_kernel.cu +++ b/nanovdb/nanovdb/examples/ex_make_custom_nanovdb_cuda/make_custom_nanovdb_cuda_kernel.cu @@ -2,7 +2,7 @@ // SPDX-License-Identifier: MPL-2.0 #include // this defined the core tree data structure of NanoVDB accessable on both the host and device -#include // required since GridHandle has device code +#include // required since GridHandle has device code #include // for printf // This is called by the host only diff --git a/nanovdb/nanovdb/examples/ex_make_funny_nanovdb/make_funny_nanovdb.cc b/nanovdb/nanovdb/examples/ex_make_funny_nanovdb/make_funny_nanovdb.cc index e9b7350bb8..7997fc78e6 100644 --- a/nanovdb/nanovdb/examples/ex_make_funny_nanovdb/make_funny_nanovdb.cc +++ b/nanovdb/nanovdb/examples/ex_make_funny_nanovdb/make_funny_nanovdb.cc @@ -1,13 +1,13 @@ // Copyright Contributors to the OpenVDB Project // SPDX-License-Identifier: MPL-2.0 -#include -#include -#include +#include +#include +#include #include -/// @brief Creates a NanoVDB grids with custom values and access them. +/// @brief Creates a NanoVDB grid with custom values and access them. /// /// @note This example only depends on NanoVDB. int main() @@ -20,12 +20,12 @@ int main() float v = 40.0f + 50.0f*(cos(ijk[0]*0.1f)*sin(ijk[1]*0.1f) + cos(ijk[1]*0.1f)*sin(ijk[2]*0.1f) + cos(ijk[2]*0.1f)*sin(ijk[0]*0.1f)); - v = Max(v, Vec3f(ijk).length() - size);// CSG intersection with a sphere + v = math::Max(v, Vec3f(ijk).length() - size);// CSG intersection with a sphere return v > background ? background : v < -background ? -background : v;// clamp value }; - build::Grid grid(background, "funny", GridClass::LevelSet); + tools::build::Grid grid(background, "funny", GridClass::LevelSet); grid(func, CoordBBox(Coord(-size), Coord(size))); - io::writeGrid("data/funny.nvdb", createNanoGrid(grid), io::Codec::BLOSC); + io::writeGrid("data/funny.nvdb", tools::createNanoGrid(grid), io::Codec::BLOSC); } catch (const std::exception& e) { std::cerr << "An exception occurred: \"" << e.what() << "\"" << std::endl; diff --git a/nanovdb/nanovdb/examples/ex_make_nanovdb_sphere/make_nanovdb_sphere.cc b/nanovdb/nanovdb/examples/ex_make_nanovdb_sphere/make_nanovdb_sphere.cc index a711eb3f02..a7f8980a64 100644 --- a/nanovdb/nanovdb/examples/ex_make_nanovdb_sphere/make_nanovdb_sphere.cc +++ b/nanovdb/nanovdb/examples/ex_make_nanovdb_sphere/make_nanovdb_sphere.cc @@ -1,17 +1,17 @@ // Copyright Contributors to the OpenVDB Project // SPDX-License-Identifier: MPL-2.0 -#include +#include #include -/// @brief Creates a NanoVDB grids of a level set sphere and accesses a value. +/// @brief Creates a NanoVDB grid of a level set sphere and access a value. /// /// @note This example only depends on NanoVDB. int main() { try { - auto handle = nanovdb::createLevelSetSphere(100.0f); + auto handle = nanovdb::tools::createLevelSetSphere(100.0f); auto* dstGrid = handle.grid(); // Get a (raw) pointer to the NanoVDB grid form the GridManager. if (!dstGrid) diff --git a/nanovdb/nanovdb/examples/ex_make_typed_grids/make_typed_grids.cc b/nanovdb/nanovdb/examples/ex_make_typed_grids/make_typed_grids.cc index f9d4666784..bcb0ffce95 100644 --- a/nanovdb/nanovdb/examples/ex_make_typed_grids/make_typed_grids.cc +++ b/nanovdb/nanovdb/examples/ex_make_typed_grids/make_typed_grids.cc @@ -1,9 +1,9 @@ // Copyright Contributors to the OpenVDB Project // SPDX-License-Identifier: MPL-2.0 -#include -#include -#include +#include +#include +#include // Helper struct to create a default value for the type. // We use a helper struct so we can specialize it for different types. @@ -37,7 +37,7 @@ void buildGridForType(std::vector>& gridHandles, T const& try { - nanovdb::build::Grid grid(bgValue, typeNameStr); + nanovdb::tools::build::Grid grid(bgValue, typeNameStr); auto acc = grid.getAccessor(); const int radius = 16; for (int z = -radius; z <= radius; ++z) { @@ -49,7 +49,7 @@ void buildGridForType(std::vector>& gridHandles, T const& } } } - gridHandles.push_back(nanovdb::createNanoGrid(grid)); + gridHandles.push_back(nanovdb::tools::createNanoGrid(grid)); } catch (const std::exception& e) { std::cerr << "An exception occurred: \"" << e.what() << "\"" << std::endl; diff --git a/nanovdb/nanovdb/examples/ex_map_pool_buffer/map_pool_buffer.cc b/nanovdb/nanovdb/examples/ex_map_pool_buffer/map_pool_buffer.cc index 526ed9c8cf..5b00ca30d8 100644 --- a/nanovdb/nanovdb/examples/ex_map_pool_buffer/map_pool_buffer.cc +++ b/nanovdb/nanovdb/examples/ex_map_pool_buffer/map_pool_buffer.cc @@ -1,9 +1,9 @@ // Copyright Contributors to the OpenVDB Project // SPDX-License-Identifier: MPL-2.0 -#include -#include -#include +#include +#include +#include #include ////////////////////////////////////////////// @@ -95,8 +95,8 @@ class MapPoolBuffer } uint64_t size() const { return getBuffer(mId).size(); } - uint8_t* data() { return getBuffer(mId).data(); } - const uint8_t* data() const { return getBuffer(mId).data(); } + void* data() { return getBuffer(mId).data(); } + const void* data() const { return getBuffer(mId).data(); } std::vector getGridKeys() const { @@ -148,8 +148,8 @@ int main() std::vector> gridHdls; // create two grids... - gridHdls.push_back(nanovdb::createLevelSetSphere(100.0, nanovdb::Vec3d(-20, 0, 0), 1.0, 3.0, nanovdb::Vec3d(0), "spheref", nanovdb::StatsMode::BBox, nanovdb::ChecksumMode::Partial, bufferContext)); - gridHdls.push_back(nanovdb::createLevelSetSphere(100.0, nanovdb::Vec3d( 20, 0, 0), 1.0, 3.0, nanovdb::Vec3d(0), "sphered", nanovdb::StatsMode::BBox, nanovdb::ChecksumMode::Partial, bufferContext)); + gridHdls.push_back(nanovdb::tools::createLevelSetSphere(100.0, nanovdb::Vec3d(-20, 0, 0), 1.0, 3.0, nanovdb::Vec3d(0), "spheref", nanovdb::tools::StatsMode::BBox, nanovdb::CheckMode::Partial, bufferContext)); + gridHdls.push_back(nanovdb::tools::createLevelSetSphere(100.0, nanovdb::Vec3d( 20, 0, 0), 1.0, 3.0, nanovdb::Vec3d(0), "sphered", nanovdb::tools::StatsMode::BBox, nanovdb::CheckMode::Partial, bufferContext)); // share grid[0]'s buffer into a parent-scope handle to prevent deletion. anotherHdl = nanovdb::GridHandle(bufferContext.copy(gridHdls[0].buffer().mId)); diff --git a/nanovdb/nanovdb/examples/ex_modify_nanovdb_thrust/modify_nanovdb_thrust.cc b/nanovdb/nanovdb/examples/ex_modify_nanovdb_thrust/modify_nanovdb_thrust.cc index dbda5b3d73..54d369fd53 100644 --- a/nanovdb/nanovdb/examples/ex_modify_nanovdb_thrust/modify_nanovdb_thrust.cc +++ b/nanovdb/nanovdb/examples/ex_modify_nanovdb_thrust/modify_nanovdb_thrust.cc @@ -4,8 +4,8 @@ /// @brief This examples demonstrates how values in a NanpVDB grid can be /// modified on the device. It depends on NanoVDB and CUDA thrust. -#include -#include +#include +#include extern "C" void scaleActiveVoxels(nanovdb::FloatGrid *grid_d, uint64_t leafCount, float scale); @@ -13,7 +13,7 @@ int main() { try { // Create an NanoVDB grid of a sphere at the origin with radius 100 and voxel size 1. - auto handle = nanovdb::createLevelSetSphere(100.0f); + auto handle = nanovdb::tools::createLevelSetSphere(100.0f); using GridT = nanovdb::FloatGrid; handle.deviceUpload(0, false); // Copy the NanoVDB grid to the GPU asynchronously diff --git a/nanovdb/nanovdb/examples/ex_modify_nanovdb_thrust/modify_nanovdb_thrust.cu b/nanovdb/nanovdb/examples/ex_modify_nanovdb_thrust/modify_nanovdb_thrust.cu index 1078b8aa1b..a8590d751b 100644 --- a/nanovdb/nanovdb/examples/ex_modify_nanovdb_thrust/modify_nanovdb_thrust.cu +++ b/nanovdb/nanovdb/examples/ex_modify_nanovdb_thrust/modify_nanovdb_thrust.cu @@ -7,8 +7,8 @@ #include #include -#include -#include +#include +#include extern "C" void scaleActiveVoxels(nanovdb::FloatGrid *grid_d, uint64_t leafCount, float scale) { diff --git a/nanovdb/nanovdb/examples/ex_nodemanager_cuda/nodemanager_cuda.cc b/nanovdb/nanovdb/examples/ex_nodemanager_cuda/nodemanager_cuda.cc index 68906b90e8..2ffe307866 100644 --- a/nanovdb/nanovdb/examples/ex_nodemanager_cuda/nodemanager_cuda.cc +++ b/nanovdb/nanovdb/examples/ex_nodemanager_cuda/nodemanager_cuda.cc @@ -2,22 +2,22 @@ // SPDX-License-Identifier: MPL-2.0 #include // replace with your own dependencies for generating the OpenVDB grid -#include // converter from OpenVDB to NanoVDB (includes NanoVDB.h and GridManager.h) -#include -#include +#include // converter from OpenVDB to NanoVDB (includes NanoVDB.h and GridManager.h) +#include +#include -extern "C" void launch_kernels(const nanovdb::NodeManager*, - const nanovdb::NodeManager*, +extern "C" void launch_kernels(const nanovdb::NodeManager*,// device NaodeManager + const nanovdb::NodeManager*,// host NodeManager cudaStream_t stream); -extern "C" void cudaCreateNodeManager(const nanovdb::NanoGrid*, - nanovdb::NodeManagerHandle*); +extern "C" void cudaCreateNodeManager(const nanovdb::NanoGrid*,// device grid + nanovdb::NodeManagerHandle*);// Handle to device NodeManager /// @brief This examples depends on OpenVDB, NanoVDB and CUDA. int main() { using SrcGridT = openvdb::FloatGrid; - using BufferT = nanovdb::CudaDeviceBuffer; + using BufferT = nanovdb::cuda::DeviceBuffer; try { cudaStream_t stream; // Create a CUDA stream to allow for asynchronous copy of pinned CUDA memory. cudaStreamCreate(&stream); @@ -26,7 +26,7 @@ int main() auto srcGrid = openvdb::tools::createLevelSetSphere(100.0f, openvdb::Vec3f(0.0f), 1.0f); // Converts the OpenVDB to NanoVDB and returns a GridHandle that uses CUDA for memory management. - auto gridHandle = nanovdb::createNanoGrid(*srcGrid); + auto gridHandle = nanovdb::tools::createNanoGrid(*srcGrid); gridHandle.deviceUpload(stream, false); // Copy the NanoVDB grid to the GPU asynchronously auto* grid = gridHandle.grid(); // get a (raw) pointer to a NanoVDB grid of value type float on the CPU auto* deviceGrid = gridHandle.deviceGrid(); // get a (raw) pointer to a NanoVDB grid of value type float on the GPU diff --git a/nanovdb/nanovdb/examples/ex_nodemanager_cuda/nodemanager_cuda_kernel.cu b/nanovdb/nanovdb/examples/ex_nodemanager_cuda/nodemanager_cuda_kernel.cu index b06c87b4e5..246d53e9e6 100644 --- a/nanovdb/nanovdb/examples/ex_nodemanager_cuda/nodemanager_cuda_kernel.cu +++ b/nanovdb/nanovdb/examples/ex_nodemanager_cuda/nodemanager_cuda_kernel.cu @@ -2,9 +2,9 @@ // SPDX-License-Identifier: MPL-2.0 #include // this defined the core tree data structure of NanoVDB accessable on both the host and device -#include -#include // required since GridHandle has device code -#include +#include +#include // required since GridHandle has device code +#include #include // for printf // This is called by the host only @@ -29,9 +29,9 @@ extern "C" void launch_kernels(const nanovdb::NodeManager* deviceMgr, cpu_kernel(cpuMgr); // Launch the host "kernel" (synchronously) } -// Simple wrapper that makes sure nanovdb::cudaCreateNodeManager is initiated +// Simple wrapper that makes sure nanovdb::cuda::createNodeManager is initiated extern "C" void cudaCreateNodeManager(const nanovdb::NanoGrid *d_grid, nanovdb::NodeManagerHandle *handle) { - *handle = std::move(nanovdb::cudaCreateNodeManager(d_grid)); + *handle = std::move(nanovdb::cuda::createNodeManager(d_grid)); } \ No newline at end of file diff --git a/nanovdb/nanovdb/examples/ex_openvdb_to_nanovdb/openvdb_to_nanovdb.cc b/nanovdb/nanovdb/examples/ex_openvdb_to_nanovdb/openvdb_to_nanovdb.cc index 870114db39..68ca9bec83 100644 --- a/nanovdb/nanovdb/examples/ex_openvdb_to_nanovdb/openvdb_to_nanovdb.cc +++ b/nanovdb/nanovdb/examples/ex_openvdb_to_nanovdb/openvdb_to_nanovdb.cc @@ -2,8 +2,8 @@ // SPDX-License-Identifier: MPL-2.0 #include // replace with your own dependencies for generating the OpenVDB grid -#include // converter from OpenVDB to NanoVDB (includes NanoVDB.h and GridManager.h) -#include +#include // converter from OpenVDB to NanoVDB (includes NanoVDB.h and GridManager.h) +#include /// @brief Convert an openvdb level set sphere into a nanovdb, access a single value in both grids, and save NanoVDB to file. /// @@ -13,7 +13,7 @@ int main() try { // Create an OpenVDB grid of a sphere at the origin with radius 100 and voxel size 1. auto srcGrid = openvdb::tools::createLevelSetSphere(100.0f, openvdb::Vec3f(0.0f), 1.0f); - auto handle = nanovdb::createNanoGrid(*srcGrid); // Convert from OpenVDB to NanoVDB and return a shared pointer to a GridHandle. + auto handle = nanovdb::tools::createNanoGrid(*srcGrid); // Convert from OpenVDB to NanoVDB and return a shared pointer to a GridHandle. auto* dstGrid = handle.grid(); // Get a (raw) pointer to the NanoVDB grid form the GridManager. if (!dstGrid) throw std::runtime_error("GridHandle does not contain a grid with value type float"); diff --git a/nanovdb/nanovdb/examples/ex_openvdb_to_nanovdb_accessor/openvdb_to_nanovdb_accessor.cc b/nanovdb/nanovdb/examples/ex_openvdb_to_nanovdb_accessor/openvdb_to_nanovdb_accessor.cc index 4851732882..de7f3d62bb 100644 --- a/nanovdb/nanovdb/examples/ex_openvdb_to_nanovdb_accessor/openvdb_to_nanovdb_accessor.cc +++ b/nanovdb/nanovdb/examples/ex_openvdb_to_nanovdb_accessor/openvdb_to_nanovdb_accessor.cc @@ -2,8 +2,8 @@ // SPDX-License-Identifier: MPL-2.0 #include // replace with your own dependencies for generating the OpenVDB grid -#include // converter from OpenVDB to NanoVDB (includes NanoVDB.h and GridManager.h) -#include +#include // converter from OpenVDB to NanoVDB (includes NanoVDB.h and GridManager.h) +#include // Convert an openvdb level set sphere into a nanovdb, use accessors to print out multiple values from both // grids and save the NanoVDB grid to file. @@ -15,7 +15,7 @@ int main() auto srcGrid = openvdb::tools::createLevelSetSphere(100.0f, openvdb::Vec3f(0.0f), 1.0f); // Convert the OpenVDB grid, srcGrid, into a NanoVDB grid handle. - auto handle = nanovdb::createNanoGrid(*srcGrid); + auto handle = nanovdb::tools::createNanoGrid(*srcGrid); // Define a (raw) pointer to the NanoVDB grid on the host. Note we match the value type of the srcGrid! auto* dstGrid = handle.grid(); diff --git a/nanovdb/nanovdb/examples/ex_openvdb_to_nanovdb_cuda/openvdb_to_nanovdb_cuda.cc b/nanovdb/nanovdb/examples/ex_openvdb_to_nanovdb_cuda/openvdb_to_nanovdb_cuda.cc index ae4d435dfc..ca4eacf1c8 100644 --- a/nanovdb/nanovdb/examples/ex_openvdb_to_nanovdb_cuda/openvdb_to_nanovdb_cuda.cc +++ b/nanovdb/nanovdb/examples/ex_openvdb_to_nanovdb_cuda/openvdb_to_nanovdb_cuda.cc @@ -2,8 +2,8 @@ // SPDX-License-Identifier: MPL-2.0 #include // replace with your own dependencies for generating the OpenVDB grid -#include // converter from OpenVDB to NanoVDB (includes NanoVDB.h and GridManager.h) -#include +#include // converter from OpenVDB to NanoVDB (includes NanoVDB.h and GridManager.h) +#include extern "C" void launch_kernels(const nanovdb::NanoGrid*, const nanovdb::NanoGrid*, @@ -18,7 +18,7 @@ int main(int, char**) auto srcGrid = openvdb::tools::createLevelSetSphere(100.0f, openvdb::Vec3f(0.0f), 1.0f); // Converts the OpenVDB to NanoVDB and returns a GridHandle that uses CUDA for memory management. - auto handle = nanovdb::createNanoGrid(*srcGrid); + auto handle = nanovdb::tools::createNanoGrid(*srcGrid); cudaStream_t stream; // Create a CUDA stream to allow for asynchronous copy of pinned CUDA memory. cudaStreamCreate(&stream); diff --git a/nanovdb/nanovdb/examples/ex_openvdb_to_nanovdb_cuda/openvdb_to_nanovdb_cuda_kernel.cu b/nanovdb/nanovdb/examples/ex_openvdb_to_nanovdb_cuda/openvdb_to_nanovdb_cuda_kernel.cu index 543b0e3027..2db56ef9ef 100644 --- a/nanovdb/nanovdb/examples/ex_openvdb_to_nanovdb_cuda/openvdb_to_nanovdb_cuda_kernel.cu +++ b/nanovdb/nanovdb/examples/ex_openvdb_to_nanovdb_cuda/openvdb_to_nanovdb_cuda_kernel.cu @@ -2,7 +2,7 @@ // SPDX-License-Identifier: MPL-2.0 #include // this defined the core tree data structure of NanoVDB accessable on both the host and device -#include // required since GridHandle has device code +#include // required since GridHandle has device code #include // for printf // This is called by the host only diff --git a/nanovdb/nanovdb/examples/ex_raytrace_fog_volume/main.cc b/nanovdb/nanovdb/examples/ex_raytrace_fog_volume/main.cc index 29752239f1..fb429494c6 100644 --- a/nanovdb/nanovdb/examples/ex_raytrace_fog_volume/main.cc +++ b/nanovdb/nanovdb/examples/ex_raytrace_fog_volume/main.cc @@ -3,12 +3,12 @@ #include #include -#include -#include +#include +#include #if defined(NANOVDB_USE_CUDA) -#include -using BufferT = nanovdb::CudaDeviceBuffer; +#include +using BufferT = nanovdb::cuda::DeviceBuffer; #else using BufferT = nanovdb::HostBuffer; #endif @@ -26,7 +26,7 @@ int main(int ac, char** av) handle = nanovdb::io::readGrid(av[1]); std::cout << "Loaded NanoVDB grid[" << handle.gridMetaData()->shortGridName() << "]...\n"; } else { - handle = nanovdb::createFogVolumeSphere(100.0f, nanovdb::Vec3d(-20, 0, 0), 1.0, 3.0, nanovdb::Vec3d(0), "sphere"); + handle = nanovdb::tools::createFogVolumeSphere(100.0f, nanovdb::Vec3d(-20, 0, 0), 1.0, 3.0, nanovdb::Vec3d(0), "sphere"); } if (handle.gridMetaData()->isFogVolume() == false) { diff --git a/nanovdb/nanovdb/examples/ex_raytrace_fog_volume/nanovdb.cu b/nanovdb/nanovdb/examples/ex_raytrace_fog_volume/nanovdb.cu index c65dfff85a..9e2c95a64c 100644 --- a/nanovdb/nanovdb/examples/ex_raytrace_fog_volume/nanovdb.cu +++ b/nanovdb/nanovdb/examples/ex_raytrace_fog_volume/nanovdb.cu @@ -6,14 +6,14 @@ #include #if defined(NANOVDB_USE_CUDA) -#include -using BufferT = nanovdb::CudaDeviceBuffer; +#include +using BufferT = nanovdb::cuda::DeviceBuffer; #else using BufferT = nanovdb::HostBuffer; #endif -#include -#include -#include +#include +#include +#include #include "common.h" @@ -22,10 +22,10 @@ void runNanoVDB(nanovdb::GridHandle& handle, int numIterations, int wid using GridT = nanovdb::FloatGrid; using CoordT = nanovdb::Coord; using RealT = float; - using Vec3T = nanovdb::Vec3; - using RayT = nanovdb::Ray; + using Vec3T = nanovdb::math::Vec3; + using RayT = nanovdb::math::Ray; - auto* h_grid = handle.grid(); + auto *h_grid = handle.grid(); if (!h_grid) throw std::runtime_error("GridHandle does not contain a valid host grid"); diff --git a/nanovdb/nanovdb/examples/ex_raytrace_fog_volume/openvdb.cc b/nanovdb/nanovdb/examples/ex_raytrace_fog_volume/openvdb.cc index aaa9aa6a63..c41d597c61 100644 --- a/nanovdb/nanovdb/examples/ex_raytrace_fog_volume/openvdb.cc +++ b/nanovdb/nanovdb/examples/ex_raytrace_fog_volume/openvdb.cc @@ -11,13 +11,13 @@ #include #include -#include -#include +#include +#include #include "common.h" #if defined(NANOVDB_USE_CUDA) -using BufferT = nanovdb::CudaDeviceBuffer; +using BufferT = nanovdb::cuda::DeviceBuffer; #else using BufferT = nanovdb::HostBuffer; #endif @@ -30,7 +30,7 @@ void runOpenVDB(nanovdb::GridHandle& handle, int numIterations, int wid using Vec3T = openvdb::math::Vec3; using RayT = openvdb::math::Ray; - auto srcGrid = nanovdb::nanoToOpenVDB(handle); + auto srcGrid = nanovdb::tools::nanoToOpenVDB(handle); std::cout << "Exporting to OpenVDB grid[" << srcGrid->getName() << "]...\n"; auto h_grid = (GridT*)srcGrid.get(); diff --git a/nanovdb/nanovdb/examples/ex_raytrace_level_set/main.cc b/nanovdb/nanovdb/examples/ex_raytrace_level_set/main.cc index 5e066c20d7..88ef4125d2 100644 --- a/nanovdb/nanovdb/examples/ex_raytrace_level_set/main.cc +++ b/nanovdb/nanovdb/examples/ex_raytrace_level_set/main.cc @@ -3,12 +3,12 @@ #include #include -#include -#include -#include +#include +#include +#include #if defined(NANOVDB_USE_CUDA) -using BufferT = nanovdb::CudaDeviceBuffer; +using BufferT = nanovdb::cuda::DeviceBuffer; #else using BufferT = nanovdb::HostBuffer; #endif @@ -26,7 +26,7 @@ int main(int ac, char** av) handle = nanovdb::io::readGrid(av[1]); std::cout << "Loaded NanoVDB grid[" << handle.gridMetaData()->shortGridName() << "]...\n"; } else { - handle = nanovdb::createLevelSetSphere(100.0f, nanovdb::Vec3d(-20, 0, 0), 1.0, 3.0, nanovdb::Vec3d(0), "sphere"); + handle = nanovdb::tools::createLevelSetSphere(100.0f, nanovdb::Vec3d(-20, 0, 0), 1.0, 3.0, nanovdb::Vec3d(0), "sphere"); } if (handle.gridMetaData()->isLevelSet() == false) { diff --git a/nanovdb/nanovdb/examples/ex_raytrace_level_set/nanovdb.cu b/nanovdb/nanovdb/examples/ex_raytrace_level_set/nanovdb.cu index 14c8bd678d..d2864032e5 100644 --- a/nanovdb/nanovdb/examples/ex_raytrace_level_set/nanovdb.cu +++ b/nanovdb/nanovdb/examples/ex_raytrace_level_set/nanovdb.cu @@ -6,15 +6,15 @@ #include #if defined(NANOVDB_USE_CUDA) -#include -using BufferT = nanovdb::CudaDeviceBuffer; +#include +using BufferT = nanovdb::cuda::DeviceBuffer; #else using BufferT = nanovdb::HostBuffer; #endif -#include -#include -#include -#include +#include +#include +#include +#include #include "common.h" @@ -23,10 +23,10 @@ void runNanoVDB(nanovdb::GridHandle& handle, int numIterations, int wid using GridT = nanovdb::FloatGrid; using CoordT = nanovdb::Coord; using RealT = float; - using Vec3T = nanovdb::Vec3; - using RayT = nanovdb::Ray; + using Vec3T = nanovdb::math::Vec3; + using RayT = nanovdb::math::Ray; - auto* h_grid = handle.grid(); + auto *h_grid = handle.grid(); if (!h_grid) throw std::runtime_error("GridHandle does not contain a valid host grid"); @@ -58,7 +58,7 @@ void runNanoVDB(nanovdb::GridHandle& handle, int numIterations, int wid float t0; CoordT ijk; float v; - if (nanovdb::ZeroCrossing(iRay, acc, ijk, v, t0)) { + if (nanovdb::math::ZeroCrossing(iRay, acc, ijk, v, t0)) { // write distance to surface. (we assume it is a uniform voxel) float wT0 = t0 * float(grid->voxelSize()[0]); compositeOp(image, i, width, height, wT0 / (wBBoxDimZ * 2), 1.0f); diff --git a/nanovdb/nanovdb/examples/ex_raytrace_level_set/openvdb.cc b/nanovdb/nanovdb/examples/ex_raytrace_level_set/openvdb.cc index c8a28e60eb..ef64f1d463 100644 --- a/nanovdb/nanovdb/examples/ex_raytrace_level_set/openvdb.cc +++ b/nanovdb/nanovdb/examples/ex_raytrace_level_set/openvdb.cc @@ -11,18 +11,18 @@ #include #include -#include -#include +#include +#include #include "common.h" #if defined(NANOVDB_USE_CUDA) -using BufferT = nanovdb::CudaDeviceBuffer; +using BufferT = nanovdb::cuda::DeviceBuffer; #else using BufferT = nanovdb::HostBuffer; #endif -void runOpenVDB(nanovdb::GridHandle& handle, int numIterations, int width, int height, BufferT& imageBuffer) +void runOpenVDB(nanovdb::GridHandle& handle, int numIterations, int width, int height, BufferT& imageBuffer) { using GridT = openvdb::FloatGrid; using CoordT = openvdb::Coord; @@ -31,7 +31,7 @@ void runOpenVDB(nanovdb::GridHandle& handle, int numI using RayT = openvdb::math::Ray; #if 1 - auto srcGrid = nanovdb::nanoToOpenVDB(handle); + auto srcGrid = nanovdb::tools::nanoToOpenVDB(handle); std::cout << "Exporting to OpenVDB grid[" << srcGrid->getName() << "]...\n"; #else openvdb::initialize(); diff --git a/nanovdb/nanovdb/examples/ex_read_nanovdb_sphere/read_nanovdb_sphere.cc b/nanovdb/nanovdb/examples/ex_read_nanovdb_sphere/read_nanovdb_sphere.cc index 76f56fe2dc..9bdf8f8f9d 100644 --- a/nanovdb/nanovdb/examples/ex_read_nanovdb_sphere/read_nanovdb_sphere.cc +++ b/nanovdb/nanovdb/examples/ex_read_nanovdb_sphere/read_nanovdb_sphere.cc @@ -1,7 +1,7 @@ // Copyright Contributors to the OpenVDB Project // SPDX-License-Identifier: MPL-2.0 -#include +#include /// @brief Read a NanoVDB grid form file, check pointer and access a single value /// diff --git a/nanovdb/nanovdb/examples/ex_read_nanovdb_sphere_accessor/read_nanovdb_sphere_accessor.cc b/nanovdb/nanovdb/examples/ex_read_nanovdb_sphere_accessor/read_nanovdb_sphere_accessor.cc index c9cf0b0883..91010b6cf7 100644 --- a/nanovdb/nanovdb/examples/ex_read_nanovdb_sphere_accessor/read_nanovdb_sphere_accessor.cc +++ b/nanovdb/nanovdb/examples/ex_read_nanovdb_sphere_accessor/read_nanovdb_sphere_accessor.cc @@ -1,4 +1,4 @@ -#include // this is required to read (and write) NanoVDB files on the host +#include // this is required to read (and write) NanoVDB files on the host /// @brief Read a NanoVDB grid from a file and print out multiple values. /// diff --git a/nanovdb/nanovdb/examples/ex_read_nanovdb_sphere_accessor_cuda/read_nanovdb_sphere_accessor_cuda.cu b/nanovdb/nanovdb/examples/ex_read_nanovdb_sphere_accessor_cuda/read_nanovdb_sphere_accessor_cuda.cu index 4343e01420..c07393a77f 100644 --- a/nanovdb/nanovdb/examples/ex_read_nanovdb_sphere_accessor_cuda/read_nanovdb_sphere_accessor_cuda.cu +++ b/nanovdb/nanovdb/examples/ex_read_nanovdb_sphere_accessor_cuda/read_nanovdb_sphere_accessor_cuda.cu @@ -1,9 +1,9 @@ // Copyright Contributors to the OpenVDB Project // SPDX-License-Identifier: MPL-2.0 -#include // this is required to read (and write) NanoVDB files on the host -#include // required for CUDA memory management -#include +#include // this is required to read (and write) NanoVDB files on the host +#include // required for CUDA memory management +#include extern "C" void launch_kernels(const nanovdb::NanoGrid*, const nanovdb::NanoGrid*, diff --git a/nanovdb/nanovdb/examples/ex_vox_to_nanovdb/VoxToNanoVDB.h b/nanovdb/nanovdb/examples/ex_vox_to_nanovdb/VoxToNanoVDB.h index 98bacb538e..ab9316ad33 100644 --- a/nanovdb/nanovdb/examples/ex_vox_to_nanovdb/VoxToNanoVDB.h +++ b/nanovdb/nanovdb/examples/ex_vox_to_nanovdb/VoxToNanoVDB.h @@ -3,8 +3,8 @@ #pragma once -#include -#include +#include +#include #define OGT_VOX_IMPLEMENTATION #include "ogt_vox.h" @@ -132,7 +132,7 @@ nanovdb::GridHandle convertVoxToNanoVDB(const std::string& inFilename, try { if (const auto* scene = detail::load_vox_scene(inFilename.c_str())) { // we just merge into one grid... - nanovdb::build::Grid grid(nanovdb::Rgba8(),modelName,nanovdb::GridClass::VoxelVolume); + nanovdb::tools::build::Grid grid(nanovdb::math::Rgba8(),modelName,nanovdb::GridClass::VoxelVolume); auto acc = grid.getAccessor(); auto processModelFn = [&](int modelIndex, const ogt_vox_transform& xform) { @@ -145,7 +145,7 @@ nanovdb::GridHandle convertVoxToNanoVDB(const std::string& inFilename, if (uint8_t color_index = model->voxel_data[voxel_index]) { ogt_vox_rgba rgba = scene->palette.color[color_index]; auto ijk = nanovdb::Coord::Floor(detail::matMult4x4((float*)&xform, nanovdb::Vec4f(x, y, z, 1))); - acc.setValue(nanovdb::Coord(ijk[0], ijk[2], -ijk[1]), *reinterpret_cast(&rgba)); + acc.setValue(nanovdb::Coord(ijk[0], ijk[2], -ijk[1]), *reinterpret_cast(&rgba)); } } } @@ -185,7 +185,7 @@ nanovdb::GridHandle convertVoxToNanoVDB(const std::string& inFilename, printf("scene processing end.\n"); ogt_vox_destroy_scene(scene); - return nanovdb::createNanoGrid(grid); + return nanovdb::tools::createNanoGrid(grid); } else { std::ostringstream ss; ss << "Invalid file \"" << inFilename << "\""; diff --git a/nanovdb/nanovdb/examples/ex_vox_to_nanovdb/vox_to_nanovdb.cc b/nanovdb/nanovdb/examples/ex_vox_to_nanovdb/vox_to_nanovdb.cc index bf6d528e1c..27b1b67d2b 100644 --- a/nanovdb/nanovdb/examples/ex_vox_to_nanovdb/vox_to_nanovdb.cc +++ b/nanovdb/nanovdb/examples/ex_vox_to_nanovdb/vox_to_nanovdb.cc @@ -1,7 +1,7 @@ // Copyright Contributors to the OpenVDB Project // SPDX-License-Identifier: MPL-2.0 -#include +#include #include "VoxToNanoVDB.h" /// @brief Convert an .vox file into a .nvdb file. diff --git a/nanovdb/nanovdb/examples/ex_voxels_to_grid_cuda/ex_voxels_to_grid_cuda.cu b/nanovdb/nanovdb/examples/ex_voxels_to_grid_cuda/ex_voxels_to_grid_cuda.cu index d3ca1d8883..83ffc37067 100644 --- a/nanovdb/nanovdb/examples/ex_voxels_to_grid_cuda/ex_voxels_to_grid_cuda.cu +++ b/nanovdb/nanovdb/examples/ex_voxels_to_grid_cuda/ex_voxels_to_grid_cuda.cu @@ -1,22 +1,21 @@ // Copyright Contributors to the OpenVDB Project // SPDX-License-Identifier: MPL-2.0 -#include +#include +#include /// @brief Demonstrates how to create a NanoVDB grid from voxel coordinates on the GPU int main() { - using namespace nanovdb; - try { // Define list of voxel coordinates and copy them to the device const size_t numVoxels = 3; - Coord coords[numVoxels] = {Coord(1, 2, 3), Coord(-1,3,6), Coord(-90,100,5678)}, *d_coords = nullptr; - cudaCheck(cudaMalloc(&d_coords, numVoxels * sizeof(Coord))); - cudaCheck(cudaMemcpy(d_coords, coords, numVoxels * sizeof(Coord), cudaMemcpyHostToDevice));// coords CPU -> GPU + nanovdb::Coord coords[numVoxels] = {nanovdb::Coord(1, 2, 3), nanovdb::Coord(-1,3,6), nanovdb::Coord(-90,100,5678)}, *d_coords = nullptr; + cudaCheck(cudaMalloc(&d_coords, numVoxels * sizeof(nanovdb::Coord))); + cudaCheck(cudaMemcpy(d_coords, coords, numVoxels * sizeof(nanovdb::Coord), cudaMemcpyHostToDevice));// coords CPU -> GPU // Generate a NanoVDB grid that contains the list of voxels on the device - auto handle = cudaVoxelsToGrid(d_coords, numVoxels); + auto handle = nanovdb::tools::cuda::voxelsToGrid(d_coords, numVoxels); auto *d_grid = handle.deviceGrid(); // Define a list of values and copy them to the device @@ -25,10 +24,10 @@ int main() cudaCheck(cudaMemcpy(d_values, values, numVoxels * sizeof(float), cudaMemcpyHostToDevice));// values CPU -> GPU // Launch a device kernel that sets the values of voxels define above and prints them - const unsigned int numThreads = 128, numBlocks = (numVoxels + numThreads - 1) / numThreads; - cudaLambdaKernel<<>>(numVoxels, [=] __device__(size_t tid) { - using OpT = SetVoxel;// defines type of random-access operation (set value) - const Coord &ijk = d_coords[tid]; + const unsigned int numThreads = 128, numBlocks = nanovdb::util::cuda::blocksPerGrid(numVoxels, numThreads); + nanovdb::util::cuda::lambdaKernel<<>>(numVoxels, [=] __device__(size_t tid) { + using OpT = nanovdb::SetVoxel;// defines type of random-access operation (set value) + const nanovdb::Coord &ijk = d_coords[tid]; d_grid->tree().set(ijk, d_values[tid]);// normally one should use a ValueAccessor printf("GPU: voxel # %lu, grid(%4i,%4i,%4i) = %5.1f\n", tid, ijk[0], ijk[1], ijk[2], d_grid->tree().getValue(ijk)); }); cudaCheckError(); @@ -37,7 +36,7 @@ int main() handle.deviceDownload();// creates a copy on the CPU auto *grid = handle.grid(); for (size_t i=0; itree().getValue(ijk)); } diff --git a/nanovdb/nanovdb/examples/ex_write_nanovdb_grids/write_nanovdb_grids.cc b/nanovdb/nanovdb/examples/ex_write_nanovdb_grids/write_nanovdb_grids.cc index 314fe4ea57..2f9465e62e 100644 --- a/nanovdb/nanovdb/examples/ex_write_nanovdb_grids/write_nanovdb_grids.cc +++ b/nanovdb/nanovdb/examples/ex_write_nanovdb_grids/write_nanovdb_grids.cc @@ -1,8 +1,8 @@ // Copyright Contributors to the OpenVDB Project // SPDX-License-Identifier: MPL-2.0 -#include -#include +#include +#include /// @brief Creates multiple NanoVDB grids, accesses a value in one, and saves all grids to file. /// @@ -12,11 +12,11 @@ int main() try { std::vector> handles; // Create multiple NanoVDB grids of various types - handles.push_back(nanovdb::createLevelSetSphere(100.0f)); - handles.push_back(nanovdb::createLevelSetTorus(100.0f, 50.0f)); - handles.push_back(nanovdb::createLevelSetBox(400.0f, 600.0f, 800.0f)); - handles.push_back(nanovdb::createLevelSetBBox(400.0f, 600.0f, 800.0f, 10.0f)); - handles.push_back(nanovdb::createPointSphere(1, 100.0f)); + handles.push_back(nanovdb::tools::createLevelSetSphere(100.0f)); + handles.push_back(nanovdb::tools::createLevelSetTorus(100.0f, 50.0f)); + handles.push_back(nanovdb::tools::createLevelSetBox(400.0f, 600.0f, 800.0f)); + handles.push_back(nanovdb::tools::createLevelSetBBox(400.0f, 600.0f, 800.0f, 10.0f)); + handles.push_back(nanovdb::tools::createPointSphere(1, 100.0f)); auto* dstGrid = handles[0].grid(); // Get a (raw) pointer to the NanoVDB grid form the GridManager. if (!dstGrid) diff --git a/nanovdb/nanovdb/io/IO.h b/nanovdb/nanovdb/io/IO.h new file mode 100644 index 0000000000..3c6259140f --- /dev/null +++ b/nanovdb/nanovdb/io/IO.h @@ -0,0 +1,767 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: MPL-2.0 + +/*! + \file IO.h + + \author Ken Museth + + \date May 1, 2020 + + \brief Implements I/O for NanoVDB grids. Features optional BLOSC and ZIP + file compression, support for multiple grids per file as well as + multiple grid types. + + \note This file does NOT depend on OpenVDB, but optionally on ZIP and BLOSC + + \details NanoVDB files take on of two formats: + 1) multiple segments each with multiple grids (segments have easy to access metadata about its grids) + 2) starting with verion 32.6.0 nanovdb files also support a raw buffer with one or more grids (just a + dump of a raw grid buffer, so no new metadata in headers as when using segments mentioned above). + + // 1: Segment: FileHeader, MetaData0, gridName0...MetaDataN, gridNameN, compressed Grid0, ... compressed GridN + // 2: Raw: Grid0, ... GridN +*/ + +#ifndef NANOVDB_IO_H_HAS_BEEN_INCLUDED +#define NANOVDB_IO_H_HAS_BEEN_INCLUDED + +#include +#include +#include // for updateGridCount + +#include // for std::ifstream +#include // for std::cerr/cout +#include // for std::string +#include // for std::stringstream +#include // for std::strcmp +#include // for std::unique_ptr +#include // for std::vector +#ifdef NANOVDB_USE_ZIP +#include // for ZIP compression +#endif +#ifdef NANOVDB_USE_BLOSC +#include // for BLOSC compression +#endif + +// Due to a bug in older versions of gcc, including fstream might +// define "major" and "minor" which are used as member data below. +// See https://bugzilla.redhat.com/show_bug.cgi?id=130601 +#if defined(major) || defined(minor) +#undef major +#undef minor +#endif + +namespace nanovdb {// ========================================================== + +namespace io {// =============================================================== + +// --------------------------> writeGrid(s) <------------------------------------ + +/// @brief Write a single grid to file (over-writing existing content of the file) +/// +/// @note The single grid is written into a single segment, i.e. header with metadata about its type and size. +template +void writeGrid(const std::string& fileName, const GridHandle& handle, io::Codec codec = io::Codec::NONE, int verbose = 0); + +/// @brief Write multiple grids to file (over-writing existing content of the file) +/// +/// @note The multiple grids are written into the same segment, i.e. header with metadata about all grids +template class VecT = std::vector> +void writeGrids(const std::string& fileName, const VecT>& handles, Codec codec = Codec::NONE, int verbose = 0); + +// --------------------------> readGrid(s) <------------------------------------ + +/// @brief Read and return one or all grids from a file into a single GridHandle +/// @tparam BufferT Type of buffer used memory allocation +/// @param fileName string name of file to be read from +/// @param n zero-based signed index of the grid to be read. +/// The default value of 0 means read only first grid. +/// A negative value of n means read all grids in the file. +/// @param verbose specify verbosity level. Default value of zero means quiet. +/// @param buffer optional buffer used for memory allocation +/// @return return a single GridHandle with one or all grids found in the file +/// @throw will throw a std::runtime_error if the file does not contain a grid with index n +template +GridHandle readGrid(const std::string& fileName, int n = 0, int verbose = 0, const BufferT& buffer = BufferT()); + +/// @brief Read and return the first grid with a specific name from a file +/// @tparam BufferT Type of buffer used memory allocation +/// @param fileName string name of file to be read from +/// @param gridName string name of the grid to be read +/// @param verbose specify verbosity level. Default value of zero means quiet. +/// @param buffer optional buffer used for memory allocation +/// @return return a single GridHandle containing the grid with the specific name +/// @throw will throw a std::runtime_error if the file does not contain a grid with the specific name +template +GridHandle readGrid(const std::string& fileName, const std::string& gridName, int verbose = 0, const BufferT& buffer = BufferT()); + +/// @brief Read all the grids in the file and return them as a vector of multiple GridHandles, each containing +/// all grids encoded in the same segment of the file (i.e. they where written together). This method also +/// works if the file contains a raw grid buffer in which case a single GridHandle is returned. +/// @tparam BufferT Type of buffer used memory allocation +/// @param fileName string name of file to be read from +/// @param verbose specify verbosity level. Default value of zero means quiet. +/// @param buffer optional buffer used for memory allocation +/// @return Return a vector of GridHandles each containing all grids encoded +/// in the same segment of the file (i.e. they where written together). +template class VecT = std::vector> +VecT> readGrids(const std::string& fileName, int verbose = 0, const BufferT& buffer = BufferT()); + +// ----------------------------------------------------------------------- + +/// We fix a specific size for counting bytes in files so that they +/// are saved the same regardless of machine precision. (Note there are +/// still little/bigendian issues, however) +using fileSize_t = uint64_t; + +/// @brief Internal functions for compressed read/write of a NanoVDB GridHandle into a stream +/// +/// @warning These functions should never be called directly by client code +namespace Internal { +static constexpr fileSize_t MAX_SIZE = 1UL << 30; // size is 1 GB + +template +static fileSize_t write(std::ostream& os, const GridHandle& handle, Codec codec, uint32_t n); + +template +static void read(std::istream& is, BufferT& buffer, Codec codec); + +static void read(std::istream& is, char* data, fileSize_t size, Codec codec); +} // namespace Internal + +/// @brief Standard hash function to use on strings; std::hash may vary by +/// platform/implementation and is know to produce frequent collisions. +uint64_t stringHash(const char* cstr); + +/// @brief Return a uint64_t hash key of a std::string +inline uint64_t stringHash(const std::string& str){return stringHash(str.c_str());} + +/// @brief Return a uint64_t with its bytes reversed so we can check for endianness +inline uint64_t reverseEndianness(uint64_t val) +{ + return (((val) >> 56) & 0x00000000000000FF) | (((val) >> 40) & 0x000000000000FF00) | + (((val) >> 24) & 0x0000000000FF0000) | (((val) >> 8) & 0x00000000FF000000) | + (((val) << 8) & 0x000000FF00000000) | (((val) << 24) & 0x0000FF0000000000) | + (((val) << 40) & 0x00FF000000000000) | (((val) << 56) & 0xFF00000000000000); +} + +/// @brief This class defines the meta data stored for each grid in a segment +/// +/// @details A segment consists of a FileHeader followed by a list of FileGridMetaData +/// each followed by grid names and then finally the grids themselves. +/// +/// @note This class should not be confused with nanovdb::GridMetaData defined in NanoVDB.h +/// Also, io::FileMetaData is defined in NanoVDB.h. +struct FileGridMetaData : public FileMetaData +{ + static_assert(sizeof(FileMetaData) == 176, "Unexpected sizeof(FileMetaData)"); + std::string gridName; + void read(std::istream& is); + void write(std::ostream& os) const; + FileGridMetaData() {} + FileGridMetaData(uint64_t size, Codec c, const GridData &gridData); + uint64_t memUsage() const { return sizeof(FileMetaData) + nameSize; } +}; // FileGridMetaData + +/// @brief This class defines all the data stored in segment of a file +/// +/// @details A segment consists of a FileHeader followed by a list of FileGridMetaData +/// each followed by grid names and then finally the grids themselves. +struct Segment +{ + // Check assumptions made during read and write of FileHeader and FileMetaData + static_assert(sizeof(FileHeader) == 16u, "Unexpected sizeof(FileHeader)"); + FileHeader header;// defined in NanoVDB.h + std::vector meta;// defined in NanoVDB.h + Segment(Codec c = Codec::NONE) +#ifdef NANOVDB_USE_NEW_MAGIC_NUMBERS + : header{NANOVDB_MAGIC_FILE, Version(), 0u, c} +#else + : header{NANOVDB_MAGIC_NUMB, Version(), 0u, c} +#endif + , meta() + { + } + template + void add(const GridHandle& h); + bool read(std::istream& is); + void write(std::ostream& os) const; + uint64_t memUsage() const; +}; // Segment + +/// @brief Return true if the file contains a grid with the specified name +bool hasGrid(const std::string& fileName, const std::string& gridName); + +/// @brief Return true if the stream contains a grid with the specified name +bool hasGrid(std::istream& is, const std::string& gridName); + +/// @brief Reads and returns a vector of meta data for all the grids found in the specified file +std::vector readGridMetaData(const std::string& fileName); + +/// @brief Reads and returns a vector of meta data for all the grids found in the specified stream +std::vector readGridMetaData(std::istream& is); + +// --------------------------> Implementations for Internal <------------------------------------ + +template +fileSize_t Internal::write(std::ostream& os, const GridHandle& handle, Codec codec, unsigned int n) +{ + const char* data = reinterpret_cast(handle.gridData(n)); + fileSize_t total = 0, residual = handle.gridSize(n); + + switch (codec) { + case Codec::ZIP: { +#ifdef NANOVDB_USE_ZIP + uLongf size = compressBound(static_cast(residual)); // Get an upper bound on the size of the compressed data. + std::unique_ptr tmp(new Bytef[size]); + const int status = compress(tmp.get(), &size, reinterpret_cast(data), static_cast(residual)); + if (status != Z_OK) std::runtime_error("Internal write error in ZIP"); + if (size > residual) std::cerr << "\nWarning: Unexpected ZIP compression from " << residual << " to " << size << " bytes\n"; + const fileSize_t outBytes = size; + os.write(reinterpret_cast(&outBytes), sizeof(fileSize_t)); + os.write(reinterpret_cast(tmp.get()), outBytes); + total += sizeof(fileSize_t) + outBytes; +#else + throw std::runtime_error("ZIP compression codec was disabled during build"); +#endif + break; + } + case Codec::BLOSC: { +#ifdef NANOVDB_USE_BLOSC + do { + fileSize_t chunk = residual < MAX_SIZE ? residual : MAX_SIZE, size = chunk + BLOSC_MAX_OVERHEAD; + std::unique_ptr tmp(new char[size]); + const int count = blosc_compress_ctx(9, 1, sizeof(float), chunk, data, tmp.get(), size, BLOSC_LZ4_COMPNAME, 1 << 18, 1); + if (count <= 0) std::runtime_error("Internal write error in BLOSC"); + const fileSize_t outBytes = count; + os.write(reinterpret_cast(&outBytes), sizeof(fileSize_t)); + os.write(reinterpret_cast(tmp.get()), outBytes); + total += sizeof(fileSize_t) + outBytes; + data += chunk; + residual -= chunk; + } while (residual > 0); +#else + throw std::runtime_error("BLOSC compression codec was disabled during build"); +#endif + break; + } + default: + os.write(data, residual); + total += residual; + } + if (!os) throw std::runtime_error("Failed to write Tree to file"); + return total; +} // Internal::write + +template +void Internal::read(std::istream& is, BufferT& buffer, Codec codec) +{ + Internal::read(is, reinterpret_cast(buffer.data()), buffer.size(), codec); +} // Internal::read + +/// @brief read compressed grid from stream +/// @param is input stream to read from +/// @param data data buffer to write into. Must be of size @c residual or larger. +/// @param residual expected byte size of uncompressed data. +/// @param codec mode of compression +void Internal::read(std::istream& is, char* data, fileSize_t residual, Codec codec) +{ + // read tree using optional compression + switch (codec) { + case Codec::ZIP: { +#ifdef NANOVDB_USE_ZIP + fileSize_t size; + is.read(reinterpret_cast(&size), sizeof(fileSize_t)); + std::unique_ptr tmp(new Bytef[size]);// temp buffer for compressed data + is.read(reinterpret_cast(tmp.get()), size); + uLongf numBytes = static_cast(residual); + int status = uncompress(reinterpret_cast(data), &numBytes, tmp.get(), static_cast(size)); + if (status != Z_OK) std::runtime_error("Internal read error in ZIP"); + if (fileSize_t(numBytes) != residual) throw std::runtime_error("UNZIP failed on byte size"); +#else + throw std::runtime_error("ZIP compression codec was disabled during build"); +#endif + break; + } + case Codec::BLOSC: { +#ifdef NANOVDB_USE_BLOSC + do { + fileSize_t size; + is.read(reinterpret_cast(&size), sizeof(fileSize_t)); + std::unique_ptr tmp(new char[size]);// temp buffer for compressed data + is.read(reinterpret_cast(tmp.get()), size); + const fileSize_t chunk = residual < MAX_SIZE ? residual : MAX_SIZE; + const int count = blosc_decompress_ctx(tmp.get(), data, size_t(chunk), 1); //fails with more threads :( + if (count < 1) std::runtime_error("Internal read error in BLOSC"); + if (count != int(chunk)) throw std::runtime_error("BLOSC failed on byte size"); + data += size_t(chunk); + residual -= chunk; + } while (residual > 0); +#else + throw std::runtime_error("BLOSC compression codec was disabled during build"); +#endif + break; + } + default: + is.read(data, residual);// read uncompressed data + } + if (!is) throw std::runtime_error("Failed to read Tree from file"); +} // Internal::read + +// --------------------------> Implementations for FileGridMetaData <------------------------------------ + +inline FileGridMetaData::FileGridMetaData(uint64_t size, Codec c, const GridData &gridData) + : FileMetaData{size, // gridSize + size, // fileSize (will typically be redefined) + 0u, // nameKey + 0u, // voxelCount + gridData.mGridType, // gridType + gridData.mGridClass, // gridClass + gridData.mWorldBBox, // worldBBox + gridData.indexBBox(), // indexBBox + gridData.mVoxelSize, // voxelSize + 0, // nameSize + {0, 0, 0, 1}, // nodeCount[4] + {0, 0, 0}, // tileCount[3] + c, // codec + 0, // padding + Version()}// version + , gridName(gridData.gridName()) +{ + auto &treeData = *reinterpret_cast(gridData.treePtr()); + nameKey = stringHash(gridName); + voxelCount = treeData.mVoxelCount; + nameSize = static_cast(gridName.size() + 1); // include '\0' + for (int i = 0; i < 3; ++i) { + FileMetaData::nodeCount[i] = treeData.mNodeCount[i]; + FileMetaData::tileCount[i] = treeData.mTileCount[i]; + } +}// FileGridMetaData::FileGridMetaData + +inline void FileGridMetaData::write(std::ostream& os) const +{ + os.write(reinterpret_cast(this), sizeof(FileMetaData)); + os.write(gridName.c_str(), nameSize); + if (!os) throw std::runtime_error("Failed writing FileGridMetaData"); +}// FileGridMetaData::write + +inline void FileGridMetaData::read(std::istream& is) +{ + is.read(reinterpret_cast(this), sizeof(FileMetaData)); + std::unique_ptr tmp(new char[nameSize]); + is.read(reinterpret_cast(tmp.get()), nameSize); + gridName.assign(tmp.get()); + if (!is) throw std::runtime_error("Failed reading FileGridMetaData"); +}// FileGridMetaData::read + +// --------------------------> Implementations for Segment <------------------------------------ + +inline uint64_t Segment::memUsage() const +{ + uint64_t sum = sizeof(FileHeader); + for (auto& m : meta) sum += m.memUsage();// includes FileMetaData + grid name + return sum; +}// Segment::memUsage + +template +inline void Segment::add(const GridHandle& h) +{ + for (uint32_t i = 0; i < h.gridCount(); ++i) { + const GridData *gridData = h.gridData(i); + if (!gridData) throw std::runtime_error("Segment::add: GridHandle does not contain grid #" + std::to_string(i)); + meta.emplace_back(h.gridSize(i), header.codec, *gridData); + } + header.gridCount += h.gridCount(); +}// Segment::add + +inline void Segment::write(std::ostream& os) const +{ + if (header.gridCount == 0) { + throw std::runtime_error("Segment contains no grids"); + } else if (!os.write(reinterpret_cast(&header), sizeof(FileHeader))) { + throw std::runtime_error("Failed to write FileHeader of Segment"); + } + for (auto& m : meta) m.write(os); +}// Segment::write + +inline bool Segment::read(std::istream& is) +{ + is.read(reinterpret_cast(&header), sizeof(FileHeader)); + if (is.eof()) {// The EOF flag is only set once a read tries to read past the end of the file + is.clear(std::ios_base::eofbit);// clear eof flag so we can rewind and read again + return false; + } + const MagicType magic = toMagic(header.magic); + if (magic != MagicType::NanoVDB && magic != MagicType::NanoFile) { + // first check for byte-swapped header magic. + if (header.magic == reverseEndianness(NANOVDB_MAGIC_NUMB) || + header.magic == reverseEndianness(NANOVDB_MAGIC_FILE)) { + throw std::runtime_error("This nvdb file has reversed endianness"); + } else { + if (magic == MagicType::OpenVDB) { + throw std::runtime_error("Expected a NanoVDB file, but read an OpenVDB file!"); + } else if (magic == MagicType::NanoGrid) { + throw std::runtime_error("Expected a NanoVDB file, but read a raw NanoVDB grid!"); + } else { + throw std::runtime_error("Expected a NanoVDB file, but read a file of unknown type!"); + } + } + } else if ( !header.version.isCompatible()) { + std::stringstream ss; + Version v; + is.read(reinterpret_cast(&v), sizeof(Version));// read GridData::mVersion located at byte 16=sizeof(FileHeader) is stream + if ( v.getMajor() == NANOVDB_MAJOR_VERSION_NUMBER) { + ss << "This file looks like it contains a raw grid buffer and not a standard file with meta data"; + } else if ( header.version.getMajor() < NANOVDB_MAJOR_VERSION_NUMBER) { + char str[30]; + ss << "The file contains an older version of NanoVDB: " << std::string(toStr(str, header.version)) << "!\n\t" + << "Recommendation: Re-generate this NanoVDB file with this version: " << NANOVDB_MAJOR_VERSION_NUMBER << ".X of NanoVDB"; + } else { + ss << "This tool was compiled against an older version of NanoVDB: " << NANOVDB_MAJOR_VERSION_NUMBER << ".X!\n\t" + << "Recommendation: Re-compile this tool against the newer version: " << header.version.getMajor() << ".X of NanoVDB"; + } + throw std::runtime_error("An unrecoverable error in nanovdb::Segment::read:\n\tIncompatible file format: " + ss.str()); + } + meta.resize(header.gridCount); + for (auto& m : meta) { + m.read(is); + m.version = header.version; + } + return true; +}// Segment::read + +// --------------------------> writeGrid <------------------------------------ + +template +void writeGrid(std::ostream& os, const GridHandle& handle, Codec codec) +{ + Segment seg(codec); + seg.add(handle); + const auto start = os.tellp(); + seg.write(os); // write header without the correct fileSize (so it's allocated) + for (uint32_t i = 0; i < handle.gridCount(); ++i) { + seg.meta[i].fileSize = Internal::write(os, handle, codec, i); + } + os.seekp(start); + seg.write(os);// re-write header with the correct fileSize + os.seekp(0, std::ios_base::end);// skip to end +}// writeGrid + +template +void writeGrid(const std::string& fileName, const GridHandle& handle, Codec codec, int verbose) +{ + std::ofstream os(fileName, std::ios::out | std::ios::binary | std::ios::trunc); + if (!os.is_open()) { + throw std::ios_base::failure("Unable to open file named \"" + fileName + "\" for output"); + } + writeGrid(os, handle, codec); + if (verbose) { + std::cout << "Wrote nanovdb::Grid to file named \"" << fileName << "\"" << std::endl; + } +}// writeGrid + +// --------------------------> writeGrids <------------------------------------ + +template class VecT = std::vector> +void writeGrids(std::ostream& os, const VecT>& handles, Codec codec = Codec::NONE) +{ + for (auto& h : handles) writeGrid(os, h, codec); +}// writeGrids + +template class VecT> +void writeGrids(const std::string& fileName, const VecT>& handles, Codec codec, int verbose) +{ + std::ofstream os(fileName, std::ios::out | std::ios::binary | std::ios::trunc); + if (!os.is_open()) throw std::ios_base::failure("Unable to open file named \"" + fileName + "\" for output"); + writeGrids(os, handles, codec); + if (verbose) std::cout << "Wrote " << handles.size() << " nanovdb::Grid(s) to file named \"" << fileName << "\"" << std::endl; +}// writeGrids + +// --------------------------> readGrid <------------------------------------ + +template +GridHandle readGrid(std::istream& is, int n, const BufferT& pool) +{ + GridHandle handle; + if (n<0) {// read all grids into the same buffer + try {//first try to read a raw grid buffer + handle.read(is, pool); + } catch(const std::logic_error&) { + Segment seg; + uint64_t bufferSize = 0u; + uint32_t gridCount = 0u, gridIndex = 0u; + const auto start = is.tellg(); + while (seg.read(is)) { + std::streamoff skipSize = 0; + for (auto& m : seg.meta) { + ++gridCount; + bufferSize += m.gridSize; + skipSize += m.fileSize; + }// loop over grids in segment + is.seekg(skipSize, std::ios_base::cur); // skip forward from the current position + }// loop over segments + auto buffer = BufferT::create(bufferSize, &pool); + char *ptr = (char*)buffer.data(); + is.seekg(start);// rewind + while (seg.read(is)) { + for (auto& m : seg.meta) { + Internal::read(is, ptr, m.gridSize, seg.header.codec); + tools::updateGridCount((GridData*)ptr, gridIndex++, gridCount); + ptr += m.gridSize; + }// loop over grids in segment + }// loop over segments + return GridHandle(std::move(buffer)); + } + } else {// read a specific grid + try {//first try to read a raw grid buffer + handle.read(is, uint32_t(n), pool); + tools::updateGridCount((GridData*)handle.data(), 0u, 1u); + } catch(const std::logic_error&) { + Segment seg; + int counter = -1; + while (seg.read(is)) { + std::streamoff seek = 0; + for (auto& m : seg.meta) { + if (++counter == n) { + auto buffer = BufferT::create(m.gridSize, &pool); + Internal::read(is, buffer, seg.header.codec); + tools::updateGridCount((GridData*)buffer.data(), 0u, 1u); + return GridHandle(std::move(buffer)); + } else { + seek += m.fileSize; + } + }// loop over grids in segment + is.seekg(seek, std::ios_base::cur); // skip forward from the current position + }// loop over segments + if (n != counter) throw std::runtime_error("stream does not contain a #" + std::to_string(n) + " grid"); + } + } + return handle; +}// readGrid + +/// @brief Read the n'th grid +template +GridHandle readGrid(const std::string& fileName, int n, int verbose, const BufferT& buffer) +{ + std::ifstream is(fileName, std::ios::in | std::ios::binary); + if (!is.is_open()) throw std::ios_base::failure("Unable to open file named \"" + fileName + "\" for input"); + auto handle = readGrid(is, n, buffer); + if (verbose) { + if (n<0) { + std::cout << "Read all NanoGrids from the file named \"" << fileName << "\"" << std::endl; + } else { + std::cout << "Read NanoGrid # " << n << " from the file named \"" << fileName << "\"" << std::endl; + } + } + return handle; // is converted to r-value and return value is move constructed. +}// readGrid + +/// @brief Read a specific grid from an input stream given the name of the grid +/// @tparam BufferT Buffer type used for allocation +/// @param is input stream from which to read the grid +/// @param gridName string name of the (first) grid to be returned +/// @param pool optional memory pool from which to allocate the grid buffer +/// @return Return the first grid in the input stream with a specific name +/// @throw std::runtime_error with no grid exists with the specified name +template +GridHandle readGrid(std::istream& is, const std::string& gridName, const BufferT& pool) +{ + try { + GridHandle handle; + handle.read(is, gridName, pool); + return handle; + } catch(const std::logic_error&) { + const auto key = stringHash(gridName); + Segment seg; + while (seg.read(is)) {// loop over all segments in stream + std::streamoff seek = 0; + for (auto& m : seg.meta) {// loop over all grids in segment + if ((m.nameKey == 0u || m.nameKey == key) && m.gridName == gridName) { // check for hash key collision + auto buffer = BufferT::create(m.gridSize, &pool); + is.seekg(seek, std::ios_base::cur); // rewind + Internal::read(is, buffer, seg.header.codec); + tools::updateGridCount((GridData*)buffer.data(), 0u, 1u); + return GridHandle(std::move(buffer)); + } else { + seek += m.fileSize; + } + } + is.seekg(seek, std::ios_base::cur); // skip forward from the current position + } + } + throw std::runtime_error("Grid name '" + gridName + "' not found in file"); +}// readGrid + +/// @brief Read the first grid with a specific name +template +GridHandle readGrid(const std::string& fileName, const std::string& gridName, int verbose, const BufferT& buffer) +{ + std::ifstream is(fileName, std::ios::in | std::ios::binary); + if (!is.is_open()) throw std::ios_base::failure("Unable to open file named \"" + fileName + "\" for input"); + auto handle = readGrid(is, gridName, buffer); + if (verbose) { + if (handle) { + std::cout << "Read NanoGrid named \"" << gridName << "\" from the file named \"" << fileName << "\"" << std::endl; + } else { + std::cout << "File named \"" << fileName << "\" does not contain a grid named \"" + gridName + "\"" << std::endl; + } + } + return handle; // is converted to r-value and return value is move constructed. +}// readGrid + +// --------------------------> readGrids <------------------------------------ + +template class VecT = std::vector> +VecT> readGrids(std::istream& is, const BufferT& pool = BufferT()) +{ + VecT> handles; + try {//first try to read a raw grid buffer + GridHandle handle; + handle.read(is, pool);// will throw if stream does not contain a raw grid buffer + handles.push_back(std::move(handle)); // force move copy assignment + } catch(const std::logic_error&) { + Segment seg; + while (seg.read(is)) { + uint64_t bufferSize = 0; + for (auto& m : seg.meta) bufferSize += m.gridSize; + auto buffer = BufferT::create(bufferSize, &pool); + uint64_t bufferOffset = 0; + for (uint16_t i = 0; i < seg.header.gridCount; ++i) { + auto *data = util::PtrAdd(buffer.data(), bufferOffset); + Internal::read(is, (char*)data, seg.meta[i].gridSize, seg.header.codec); + tools::updateGridCount(data, uint32_t(i), uint32_t(seg.header.gridCount)); + bufferOffset += seg.meta[i].gridSize; + }// loop over grids in segment + handles.emplace_back(std::move(buffer)); // force move copy assignment + }// loop over segments + } + return handles; // is converted to r-value and return value is move constructed. +}// readGrids + +/// @brief Read all the grids +template class VecT> +VecT> readGrids(const std::string& fileName, int verbose, const BufferT& buffer) +{ + std::ifstream is(fileName, std::ios::in | std::ios::binary); + if (!is.is_open()) throw std::ios_base::failure("Unable to open file named \"" + fileName + "\" for input"); + auto handles = readGrids(is, buffer); + if (verbose) std::cout << "Read " << handles.size() << " NanoGrid(s) from the file named \"" << fileName << "\"" << std::endl; + return handles; // is converted to r-value and return value is move constructed. +}// readGrids + +// --------------------------> readGridMetaData <------------------------------------ + +inline std::vector readGridMetaData(const std::string& fileName) +{ + std::ifstream is(fileName, std::ios::in | std::ios::binary); + if (!is.is_open()) throw std::ios_base::failure("Unable to open file named \"" + fileName + "\" for input"); + return readGridMetaData(is); // is converted to r-value and return value is move constructed. +}// readGridMetaData + +inline std::vector readGridMetaData(std::istream& is) +{ + Segment seg; + std::vector meta; + try { + GridHandle<> handle;// if stream contains a raw grid buffer we unfortunately have to load everything + handle.read(is); + seg.add(handle); + meta = std::move(seg.meta); + } catch(const std::logic_error&) { + while (seg.read(is)) { + std::streamoff skip = 0; + for (auto& m : seg.meta) { + meta.push_back(m); + skip += m.fileSize; + }// loop over grid meta data in segment + is.seekg(skip, std::ios_base::cur); + }// loop over segments + } + return meta; // is converted to r-value and return value is move constructed. +}// readGridMetaData + +// --------------------------> hasGrid <------------------------------------ + +inline bool hasGrid(const std::string& fileName, const std::string& gridName) +{ + std::ifstream is(fileName, std::ios::in | std::ios::binary); + if (!is.is_open()) throw std::ios_base::failure("Unable to open file named \"" + fileName + "\" for input"); + return hasGrid(is, gridName); +}// hasGrid + +inline bool hasGrid(std::istream& is, const std::string& gridName) +{ + const auto key = stringHash(gridName); + Segment seg; + while (seg.read(is)) { + std::streamoff seek = 0; + for (auto& m : seg.meta) { + if (m.nameKey == key && m.gridName == gridName) return true; // check for hash key collision + seek += m.fileSize; + }// loop over grid meta data in segment + is.seekg(seek, std::ios_base::cur); + }// loop over segments + return false; +}// hasGrid + +// --------------------------> stringHash <------------------------------------ + +inline uint64_t stringHash(const char* c_str) +{ + uint64_t hash = 0;// zero is returned when cstr = nullptr or "\0" + if (c_str) { + for (auto* str = reinterpret_cast(c_str); *str; ++str) { + uint64_t overflow = hash >> (64 - 8); + hash *= 67; // Next-ish prime after 26 + 26 + 10 + hash += *str + overflow; + } + } + return hash; +}// stringHash + +} // namespace io ====================================================================== + +template +inline std::ostream& +operator<<(std::ostream& os, const math::BBox>& b) +{ + os << "(" << b[0][0] << "," << b[0][1] << "," << b[0][2] << ") -> " + << "(" << b[1][0] << "," << b[1][1] << "," << b[1][2] << ")"; + return os; +} + +inline std::ostream& +operator<<(std::ostream& os, const CoordBBox& b) +{ + os << "(" << b[0][0] << "," << b[0][1] << "," << b[0][2] << ") -> " + << "(" << b[1][0] << "," << b[1][1] << "," << b[1][2] << ")"; + return os; +} + +inline std::ostream& +operator<<(std::ostream& os, const Coord& ijk) +{ + os << "(" << ijk[0] << "," << ijk[1] << "," << ijk[2] << ")"; + return os; +} + +template +inline std::ostream& +operator<<(std::ostream& os, const math::Vec3& v) +{ + os << "(" << v[0] << "," << v[1] << "," << v[2] << ")"; + return os; +} + +template +inline std::ostream& +operator<<(std::ostream& os, const math::Vec4& v) +{ + os << "(" << v[0] << "," << v[1] << "," << v[2] << "," << v[3] << ")"; + return os; +} + +} // namespace nanovdb =================================================================== + +#endif // NANOVDB_IO_H_HAS_BEEN_INCLUDED diff --git a/nanovdb/nanovdb/util/CSampleFromVoxels.h b/nanovdb/nanovdb/math/CSampleFromVoxels.h similarity index 100% rename from nanovdb/nanovdb/util/CSampleFromVoxels.h rename to nanovdb/nanovdb/math/CSampleFromVoxels.h diff --git a/nanovdb/nanovdb/math/DitherLUT.h b/nanovdb/nanovdb/math/DitherLUT.h new file mode 100644 index 0000000000..4a980e2aa1 --- /dev/null +++ b/nanovdb/nanovdb/math/DitherLUT.h @@ -0,0 +1,189 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: MPL-2.0 +// +/// @author Jeff Lait +/// +/// @date May 13, 2021 +/// +/// @file DitherLUT.h +/// +/// @brief Defines look up table to do dithering of 8^3 leaf nodes. + +#ifndef NANOVDB_DITHERLUT_HAS_BEEN_INCLUDED +#define NANOVDB_DITHERLUT_HAS_BEEN_INCLUDED + +#include // for __hostdev__, Vec3, Min, Max, Pow2, Pow3, Pow4 + +namespace nanovdb { + +namespace math { + +class DitherLUT +{ + const bool mEnable; +public: + /// @brief Constructor with an optional scaling factor for the dithering + __hostdev__ DitherLUT(bool enable = true) : mEnable(enable) {} + + /// @brief Retrieves dither threshold for an offset within an 8^3 leaf nodes. + /// + /// @param offset into the lookup table of size 512 + __hostdev__ float operator()(const int offset) + { + +// This table was generated with +/************** + +static constexpr inline uint32 +SYSwang_inthash(uint32 key) +{ + // From http://www.concentric.net/~Ttwang/tech/inthash.htm + key += ~(key << 16); + key ^= (key >> 5); + key += (key << 3); + key ^= (key >> 13); + key += ~(key << 9); + key ^= (key >> 17); + return key; +} + +static void +ut_initDitherR(float *pattern, float offset, + int x, int y, int z, int res, int goalres) +{ + // These offsets are designed to maximize the difference between + // dither values in nearby voxels within a given 2x2x2 cell, without + // producing axis-aligned artifacts. The are organized in row-major + // order. + static const float theDitherOffset[] = {0,4,6,2,5,1,3,7}; + static const float theScale = 0.125F; + int key = (((z << res) + y) << res) + x; + + if (res == goalres) + { + pattern[key] = offset; + return; + } + + // Randomly flip (on each axis) the dithering patterns used by the + // subcells. This key is xor'd with the subcell index below before + // looking up in the dither offset list. + key = SYSwang_inthash(key) & 7; + + x <<= 1; + y <<= 1; + z <<= 1; + + offset *= theScale; + for (int i = 0; i < 8; i++) + ut_initDitherR(pattern, offset+theDitherOffset[i ^ key]*theScale, + x+(i&1), y+((i&2)>>1), z+((i&4)>>2), res+1, goalres); +} + +// This is a compact algorithm that accomplishes essentially the same thing +// as ut_initDither() above. We should eventually switch to use this and +// clean the dead code. +static fpreal32 * +ut_initDitherRecursive(int goalres) +{ + const int nfloat = 1 << (goalres*3); + float *pattern = new float[nfloat]; + ut_initDitherR(pattern, 1.0F, 0, 0, 0, 0, goalres); + + // This has built an even spacing from 1/nfloat to 1.0. + // however, our dither pattern should be 1/(nfloat+1) to nfloat/(nfloat+1) + // So we do a correction here. Note that the earlier calculations are + // done with powers of 2 so are exact, so it does make sense to delay + // the renormalization to this pass. + float correctionterm = nfloat / (nfloat+1.0F); + for (int i = 0; i < nfloat; i++) + pattern[i] *= correctionterm; + return pattern; +} + + theDitherMatrix = ut_initDitherRecursive(3); + + for (int i = 0; i < 512/8; i ++) + { + for (int j = 0; j < 8; j ++) + std::cout << theDitherMatrix[i*8+j] << "f, "; + std::cout << std::endl; + } + + **************/ + static const float LUT[512] = + { + 0.14425f, 0.643275f, 0.830409f, 0.331384f, 0.105263f, 0.604289f, 0.167641f, 0.666667f, + 0.892788f, 0.393762f, 0.0818713f, 0.580897f, 0.853801f, 0.354776f, 0.916179f, 0.417154f, + 0.612086f, 0.11306f, 0.79922f, 0.300195f, 0.510721f, 0.0116959f, 0.947368f, 0.448343f, + 0.362573f, 0.861598f, 0.0506823f, 0.549708f, 0.261209f, 0.760234f, 0.19883f, 0.697856f, + 0.140351f, 0.639376f, 0.576998f, 0.0779727f, 0.522417f, 0.0233918f, 0.460039f, 0.959064f, + 0.888889f, 0.389864f, 0.327485f, 0.826511f, 0.272904f, 0.77193f, 0.709552f, 0.210526f, + 0.483431f, 0.982456f, 0.296296f, 0.795322f, 0.116959f, 0.615984f, 0.0545809f, 0.553606f, + 0.732943f, 0.233918f, 0.545809f, 0.0467836f, 0.865497f, 0.366472f, 0.803119f, 0.304094f, + 0.518519f, 0.0194932f, 0.45614f, 0.955166f, 0.729045f, 0.230019f, 0.54191f, 0.042885f, + 0.269006f, 0.768031f, 0.705653f, 0.206628f, 0.479532f, 0.978558f, 0.292398f, 0.791423f, + 0.237817f, 0.736842f, 0.424951f, 0.923977f, 0.136452f, 0.635478f, 0.323587f, 0.822612f, + 0.986355f, 0.487329f, 0.674464f, 0.175439f, 0.88499f, 0.385965f, 0.573099f, 0.0740741f, + 0.51462f, 0.0155945f, 0.202729f, 0.701754f, 0.148148f, 0.647174f, 0.834308f, 0.335283f, + 0.265107f, 0.764133f, 0.951267f, 0.452242f, 0.896686f, 0.397661f, 0.08577f, 0.584795f, + 0.8577f, 0.358674f, 0.920078f, 0.421053f, 0.740741f, 0.241715f, 0.678363f, 0.179337f, + 0.109162f, 0.608187f, 0.17154f, 0.670565f, 0.491228f, 0.990253f, 0.42885f, 0.927875f, + 0.0662768f, 0.565302f, 0.62768f, 0.128655f, 0.183236f, 0.682261f, 0.744639f, 0.245614f, + 0.814815f, 0.315789f, 0.378168f, 0.877193f, 0.931774f, 0.432749f, 0.495127f, 0.994152f, + 0.0350877f, 0.534113f, 0.97076f, 0.471735f, 0.214425f, 0.71345f, 0.526316f, 0.0272904f, + 0.783626f, 0.2846f, 0.222222f, 0.721248f, 0.962963f, 0.463938f, 0.276803f, 0.775828f, + 0.966862f, 0.467836f, 0.405458f, 0.904483f, 0.0701754f, 0.569201f, 0.881092f, 0.382066f, + 0.218324f, 0.717349f, 0.654971f, 0.155945f, 0.818713f, 0.319688f, 0.132554f, 0.631579f, + 0.0623782f, 0.561404f, 0.748538f, 0.249513f, 0.912281f, 0.413255f, 0.974659f, 0.475634f, + 0.810916f, 0.311891f, 0.499025f, 0.998051f, 0.163743f, 0.662768f, 0.226121f, 0.725146f, + 0.690058f, 0.191033f, 0.00389864f, 0.502924f, 0.557505f, 0.0584795f, 0.120858f, 0.619883f, + 0.440546f, 0.939571f, 0.752437f, 0.253411f, 0.307992f, 0.807018f, 0.869396f, 0.37037f, + 0.658869f, 0.159844f, 0.346979f, 0.846004f, 0.588694f, 0.0896686f, 0.152047f, 0.651072f, + 0.409357f, 0.908382f, 0.596491f, 0.0974659f, 0.339181f, 0.838207f, 0.900585f, 0.401559f, + 0.34308f, 0.842105f, 0.779727f, 0.280702f, 0.693957f, 0.194932f, 0.25731f, 0.756335f, + 0.592593f, 0.0935673f, 0.0311891f, 0.530214f, 0.444444f, 0.94347f, 0.506823f, 0.00779727f, + 0.68616f, 0.187135f, 0.124756f, 0.623782f, 0.288499f, 0.787524f, 0.350877f, 0.849903f, + 0.436647f, 0.935673f, 0.873294f, 0.374269f, 0.538012f, 0.0389864f, 0.60039f, 0.101365f, + 0.57115f, 0.0721248f, 0.758285f, 0.259259f, 0.719298f, 0.220273f, 0.532164f, 0.0331384f, + 0.321637f, 0.820663f, 0.00974659f, 0.508772f, 0.469786f, 0.968811f, 0.282651f, 0.781676f, + 0.539961f, 0.0409357f, 0.727096f, 0.22807f, 0.500975f, 0.00194932f, 0.563353f, 0.0643275f, + 0.290448f, 0.789474f, 0.477583f, 0.976608f, 0.251462f, 0.750487f, 0.31384f, 0.812865f, + 0.94152f, 0.442495f, 0.879142f, 0.380117f, 0.37232f, 0.871345f, 0.309942f, 0.808967f, + 0.192982f, 0.692008f, 0.130604f, 0.62963f, 0.621832f, 0.122807f, 0.559454f, 0.0604289f, + 0.660819f, 0.161793f, 0.723197f, 0.224172f, 0.403509f, 0.902534f, 0.840156f, 0.341131f, + 0.411306f, 0.910331f, 0.473684f, 0.97271f, 0.653021f, 0.153996f, 0.0916179f, 0.590643f, + 0.196881f, 0.695906f, 0.384016f, 0.883041f, 0.0955166f, 0.594542f, 0.157895f, 0.65692f, + 0.945419f, 0.446394f, 0.633528f, 0.134503f, 0.844055f, 0.345029f, 0.906433f, 0.407407f, + 0.165692f, 0.664717f, 0.103314f, 0.602339f, 0.126706f, 0.625731f, 0.189084f, 0.688109f, + 0.91423f, 0.415205f, 0.851852f, 0.352827f, 0.875244f, 0.376218f, 0.937622f, 0.438596f, + 0.317739f, 0.816764f, 0.255361f, 0.754386f, 0.996101f, 0.497076f, 0.933723f, 0.434698f, + 0.567251f, 0.0682261f, 0.504873f, 0.00584795f, 0.247563f, 0.746589f, 0.185185f, 0.684211f, + 0.037037f, 0.536062f, 0.0994152f, 0.598441f, 0.777778f, 0.278752f, 0.465887f, 0.964912f, + 0.785575f, 0.28655f, 0.847953f, 0.348928f, 0.0292398f, 0.528265f, 0.7154f, 0.216374f, + 0.39961f, 0.898636f, 0.961014f, 0.461988f, 0.0487329f, 0.547758f, 0.111111f, 0.610136f, + 0.649123f, 0.150097f, 0.212476f, 0.711501f, 0.797271f, 0.298246f, 0.859649f, 0.360624f, + 0.118908f, 0.617934f, 0.0565302f, 0.555556f, 0.329435f, 0.82846f, 0.516569f, 0.0175439f, + 0.867446f, 0.368421f, 0.805068f, 0.306043f, 0.578947f, 0.079922f, 0.267057f, 0.766082f, + 0.270955f, 0.76998f, 0.707602f, 0.208577f, 0.668616f, 0.169591f, 0.606238f, 0.107212f, + 0.520468f, 0.0214425f, 0.45809f, 0.957115f, 0.419103f, 0.918129f, 0.356725f, 0.855751f, + 0.988304f, 0.489279f, 0.426901f, 0.925926f, 0.450292f, 0.949318f, 0.512671f, 0.0136452f, + 0.239766f, 0.738791f, 0.676413f, 0.177388f, 0.699805f, 0.20078f, 0.263158f, 0.762183f, + 0.773879f, 0.274854f, 0.337232f, 0.836257f, 0.672515f, 0.173489f, 0.734893f, 0.235867f, + 0.0253411f, 0.524366f, 0.586745f, 0.0877193f, 0.423002f, 0.922027f, 0.48538f, 0.984405f, + 0.74269f, 0.243665f, 0.680312f, 0.181287f, 0.953216f, 0.454191f, 0.1423f, 0.641326f, + 0.493177f, 0.992203f, 0.430799f, 0.929825f, 0.204678f, 0.703704f, 0.890838f, 0.391813f, + 0.894737f, 0.395712f, 0.0838207f, 0.582846f, 0.0448343f, 0.54386f, 0.231969f, 0.730994f, + 0.146199f, 0.645224f, 0.832359f, 0.333333f, 0.793372f, 0.294347f, 0.980507f, 0.481481f, + 0.364522f, 0.863548f, 0.80117f, 0.302144f, 0.824561f, 0.325536f, 0.138402f, 0.637427f, + 0.614035f, 0.11501f, 0.0526316f, 0.551657f, 0.0760234f, 0.575049f, 0.88694f, 0.387914f, + }; + return mEnable ? LUT[offset & 511] : 0.5f;// branch prediction should optimize this! + } +}; // DitherLUT class + +}// namspace math + +}// namespace nanovdb + +#endif // NANOVDB_DITHERLUT_HAS_BEEN_INCLUDED diff --git a/nanovdb/nanovdb/math/HDDA.h b/nanovdb/nanovdb/math/HDDA.h new file mode 100644 index 0000000000..420692c833 --- /dev/null +++ b/nanovdb/nanovdb/math/HDDA.h @@ -0,0 +1,510 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: MPL-2.0 + +/// @file HDDA.h +/// +/// @author Ken Museth +/// +/// @brief Hierarchical Digital Differential Analyzers specialized for VDB. + +#ifndef NANOVDB_HDDA_H_HAS_BEEN_INCLUDED +#define NANOVDB_HDDA_H_HAS_BEEN_INCLUDED + +// Comment out to disable this explicit round-off check +#define ENFORCE_FORWARD_STEPPING + +#include // only dependency + +namespace nanovdb::math { + +/// @brief A Digital Differential Analyzer specialized for OpenVDB grids +/// @note Conceptually similar to Bresenham's line algorithm applied +/// to a 3D Ray intersecting OpenVDB nodes or voxels. Log2Dim = 0 +/// corresponds to a voxel and Log2Dim a tree node of size 2^Log2Dim. +/// +/// @note The Ray template class is expected to have the following +/// methods: test(time), t0(), t1(), invDir(), and operator()(time). +/// See the example Ray class above for their definition. +template +class HDDA +{ +public: + using RealType = typename RayT::RealType; + using RealT = RealType; + using Vec3Type = typename RayT::Vec3Type; + using Vec3T = Vec3Type; + using CoordType = CoordT; + + /// @brief Default ctor + HDDA() = default; + + /// @brief ctor from ray and dimension at which the DDA marches + __hostdev__ HDDA(const RayT& ray, int dim) { this->init(ray, dim); } + + /// @brief Re-initializes the HDDA + __hostdev__ void init(const RayT& ray, RealT startTime, RealT maxTime, int dim) + { + assert(startTime <= maxTime); + mDim = dim; + mT0 = startTime; + mT1 = maxTime; + const Vec3T &pos = ray(mT0), &dir = ray.dir(), &inv = ray.invDir(); + mVoxel = RoundDown(pos) & (~(dim - 1)); + for (int axis = 0; axis < 3; ++axis) { + if (dir[axis] == RealT(0)) { //handles dir = +/- 0 + mNext[axis] = Maximum::value(); //i.e. disabled! + mStep[axis] = 0; + } else if (inv[axis] > 0) { + mStep[axis] = 1; + mNext[axis] = mT0 + (mVoxel[axis] + dim - pos[axis]) * inv[axis]; + mDelta[axis] = inv[axis]; + } else { + mStep[axis] = -1; + mNext[axis] = mT0 + (mVoxel[axis] - pos[axis]) * inv[axis]; + mDelta[axis] = -inv[axis]; + } + } + } + + /// @brief Simular to init above except it uses the bounds of the input ray + __hostdev__ void init(const RayT& ray, int dim) { this->init(ray, ray.t0(), ray.t1(), dim); } + + /// @brief Updates the HDDA to march with the specified dimension + __hostdev__ bool update(const RayT& ray, int dim) + { + if (mDim == dim) + return false; + mDim = dim; + const Vec3T &pos = ray(mT0), &inv = ray.invDir(); + mVoxel = RoundDown(pos) & (~(dim - 1)); + for (int axis = 0; axis < 3; ++axis) { + if (mStep[axis] == 0) + continue; + mNext[axis] = mT0 + (mVoxel[axis] - pos[axis]) * inv[axis]; + if (mStep[axis] > 0) + mNext[axis] += dim * inv[axis]; + } + + return true; + } + + __hostdev__ int dim() const { return mDim; } + + /// @brief Increment the voxel index to next intersected voxel or node + /// and returns true if the step in time does not exceed maxTime. + __hostdev__ bool step() + { + const int axis = MinIndex(mNext); +#if 1 + switch (axis) { + case 0: + return step<0>(); + case 1: + return step<1>(); + default: + return step<2>(); + } +#else + mT0 = mNext[axis]; + mNext[axis] += mDim * mDelta[axis]; + mVoxel[axis] += mDim * mStep[axis]; + return mT0 <= mT1; +#endif + } + + /// @brief Return the index coordinates of the next node or voxel + /// intersected by the ray. If Log2Dim = 0 the return value is the + /// actual signed coordinate of the voxel, else it is the origin + /// of the corresponding VDB tree node or tile. + /// @note Incurs no computational overhead. + __hostdev__ const CoordT& voxel() const { return mVoxel; } + + /// @brief Return the time (parameterized along the Ray) of the + /// first hit of a tree node of size 2^Log2Dim. + /// @details This value is initialized to startTime or ray.t0() + /// depending on the constructor used. + /// @note Incurs no computational overhead. + __hostdev__ RealType time() const { return mT0; } + + /// @brief Return the maximum time (parameterized along the Ray). + __hostdev__ RealType maxTime() const { return mT1; } + + /// @brief Return the time (parameterized along the Ray) of the + /// second (i.e. next) hit of a tree node of size 2^Log2Dim. + /// @note Incurs a (small) computational overhead. + __hostdev__ RealType next() const + { +#if 1 //def __CUDA_ARCH__ + return fminf(mT1, fminf(mNext[0], fminf(mNext[1], mNext[2]))); +#else + return std::min(mT1, std::min(mNext[0], std::min(mNext[1], mNext[2]))); +#endif + } + +private: + // helper to implement the general form + template + __hostdev__ bool step() + { +#ifdef ENFORCE_FORWARD_STEPPING + //if (mNext[axis] <= mT0) mNext[axis] += mT0 - mNext[axis] + fmaxf(mNext[axis]*1.0e-6f, 1.0e-6f); + //if (mNext[axis] <= mT0) mNext[axis] += mT0 - mNext[axis] + (mNext[axis] + 1.0f)*1.0e-6f; + if (mNext[axis] <= mT0) { + mNext[axis] += mT0 - 0.999999f * mNext[axis] + 1.0e-6f; + } +#endif + mT0 = mNext[axis]; + mNext[ axis] += mDim * mDelta[axis]; + mVoxel[axis] += mDim * mStep[ axis]; + return mT0 <= mT1; + } + + int32_t mDim; + RealT mT0, mT1; // min and max allowed times + CoordT mVoxel, mStep; // current voxel location and step to next voxel location + Vec3T mDelta, mNext; // delta time and next time +}; // class HDDA + +/////////////////////////////////////////// ZeroCrossing //////////////////////////////////////////// + +/// @brief returns true if the ray intersects a zero-crossing at the voxel level of the grid in the accessor +/// The empty-space ray-marching is performed at all levels of the tree using an +/// HDDA. If an intersection is detected, then ijk is updated with the index coordinate of the closest +/// voxel after the intersection point, v contains the grid values at ijk, and t is set to the time of +/// the intersection along the ray. +template +inline __hostdev__ bool ZeroCrossing(RayT& ray, AccT& acc, Coord& ijk, typename AccT::ValueType& v, float& t) +{ + if (!ray.clip(acc.root().bbox()) || ray.t1() > 1e20) + return false; // clip ray to bbox + static const float Delta = 1.0001f; + ijk = RoundDown(ray.start()); // first hit of bbox + HDDA hdda(ray, acc.getDim(ijk, ray)); + const auto v0 = acc.getValue(ijk); + while (hdda.step()) { + ijk = RoundDown(ray(hdda.time() + Delta)); + hdda.update(ray, acc.getDim(ijk, ray)); + if (hdda.dim() > 1 || !acc.isActive(ijk)) + continue; // either a tile value or an inactive voxel + while (hdda.step() && acc.isActive(hdda.voxel())) { // in the narrow band + v = acc.getValue(hdda.voxel()); + if (v * v0 < 0) { // zero crossing + ijk = hdda.voxel(); + t = hdda.time(); + return true; + } + } + } + return false; +} + +/////////////////////////////////////////// DDA //////////////////////////////////////////// + +/// @brief A Digital Differential Analyzer. Unlike HDDA (defined above) this DDA +/// uses a fixed step-size defined by the template parameter Dim! +/// +/// @note The Ray template class is expected to have the following +/// methods: test(time), t0(), t1(), invDir(), and operator()(time). +/// See the example Ray class above for their definition. +template +class DDA +{ + static_assert(Dim >= 1, "Dim must be >= 1"); + +public: + using RealType = typename RayT::RealType; + using RealT = RealType; + using Vec3Type = typename RayT::Vec3Type; + using Vec3T = Vec3Type; + using CoordType = CoordT; + + /// @brief Default ctor + DDA() = default; + + /// @brief ctor from ray and dimension at which the DDA marches + __hostdev__ DDA(const RayT& ray) { this->init(ray); } + + /// @brief Re-initializes the DDA + __hostdev__ void init(const RayT& ray, RealT startTime, RealT maxTime) + { + assert(startTime <= maxTime); + mT0 = startTime; + mT1 = maxTime; + const Vec3T &pos = ray(mT0), &dir = ray.dir(), &inv = ray.invDir(); + mVoxel = RoundDown(pos) & (~(Dim - 1)); + for (int axis = 0; axis < 3; ++axis) { + if (dir[axis] == RealT(0)) { //handles dir = +/- 0 + mNext[axis] = Maximum::value(); //i.e. disabled! + mStep[axis] = 0; + } else if (inv[axis] > 0) { + mStep[axis] = Dim; + mNext[axis] = (mT0 + (mVoxel[axis] + Dim - pos[axis]) * inv[axis]); + mDelta[axis] = inv[axis]; + } else { + mStep[axis] = -Dim; + mNext[axis] = mT0 + (mVoxel[axis] - pos[axis]) * inv[axis]; + mDelta[axis] = -inv[axis]; + } + } + } + + /// @brief Simular to init above except it uses the bounds of the input ray + __hostdev__ void init(const RayT& ray) { this->init(ray, ray.t0(), ray.t1()); } + + /// @brief Increment the voxel index to next intersected voxel or node + /// and returns true if the step in time does not exceed maxTime. + __hostdev__ bool step() + { + const int axis = MinIndex(mNext); +#if 1 + switch (axis) { + case 0: + return step<0>(); + case 1: + return step<1>(); + default: + return step<2>(); + } +#else +#ifdef ENFORCE_FORWARD_STEPPING + if (mNext[axis] <= mT0) { + mNext[axis] += mT0 - 0.999999f * mNext[axis] + 1.0e-6f; + } +#endif + mT0 = mNext[axis]; + mNext[axis] += mDelta[axis]; + mVoxel[axis] += mStep[axis]; + return mT0 <= mT1; +#endif + } + + /// @brief Return the index coordinates of the next node or voxel + /// intersected by the ray. If Log2Dim = 0 the return value is the + /// actual signed coordinate of the voxel, else it is the origin + /// of the corresponding VDB tree node or tile. + /// @note Incurs no computational overhead. + __hostdev__ const CoordT& voxel() const { return mVoxel; } + + /// @brief Return the time (parameterized along the Ray) of the + /// first hit of a tree node of size 2^Log2Dim. + /// @details This value is initialized to startTime or ray.t0() + /// depending on the constructor used. + /// @note Incurs no computational overhead. + __hostdev__ RealType time() const { return mT0; } + + /// @brief Return the maximum time (parameterized along the Ray). + __hostdev__ RealType maxTime() const { return mT1; } + + /// @brief Return the time (parameterized along the Ray) of the + /// second (i.e. next) hit of a tree node of size 2^Log2Dim. + /// @note Incurs a (small) computational overhead. + __hostdev__ RealType next() const + { + return Min(mT1, Min(mNext[0], Min(mNext[1], mNext[2]))); + } + + __hostdev__ int nextAxis() const + { + return nanovdb::math::MinIndex(mNext); + } + +private: + // helper to implement the general form + template + __hostdev__ bool step() + { +#ifdef ENFORCE_FORWARD_STEPPING + if (mNext[axis] <= mT0) { + mNext[axis] += mT0 - 0.999999f * mNext[axis] + 1.0e-6f; + } +#endif + mT0 = mNext[axis]; + mNext[axis] += mDelta[axis]; + mVoxel[axis] += mStep[axis]; + return mT0 <= mT1; + } + + RealT mT0, mT1; // min and max allowed times + CoordT mVoxel, mStep; // current voxel location and step to next voxel location + Vec3T mDelta, mNext; // delta time and next time +}; // class DDA + +/////////////////////////////////////////// ZeroCrossingNode //////////////////////////////////////////// + +template +inline __hostdev__ bool ZeroCrossingNode(RayT& ray, const NodeT& node, float v0, nanovdb::math::Coord& ijk, float& v, float& t) +{ + math::BBox bbox(node.origin(), node.origin() + Coord(node.dim() - 1)); + + if (!ray.clip(node.bbox())) { + return false; + } + + const float t0 = ray.t0(); + + static const float Delta = 1.0001f; + ijk = Coord::Floor(ray(ray.t0() + Delta)); + + t = t0; + v = 0; + + DDA dda(ray); + while (dda.step()) { + ijk = dda.voxel(); + + if (bbox.isInside(ijk) == false) + return false; + + v = node.getValue(ijk); + if (v * v0 < 0) { + t = dda.time(); + return true; + } + } + return false; +} + +/////////////////////////////////////////// TreeMarcher //////////////////////////////////////////// + +/// @brief returns true if the ray intersects an active value at any level of the grid in the accessor. +/// The empty-space ray-marching is performed at all levels of the tree using an +/// HDDA. If an intersection is detected, then ijk is updated with the index coordinate of the first +/// active voxel or tile, and t is set to the time of its intersection along the ray. +template +inline __hostdev__ bool firstActive(RayT& ray, AccT& acc, Coord &ijk, float& t) +{ + if (!ray.clip(acc.root().bbox()) || ray.t1() > 1e20) {// clip ray to bbox + return false;// missed or undefined bbox + } + static const float Delta = 1.0001f;// forward step-size along the ray to avoid getting stuck + t = ray.t0();// initiate time + ijk = RoundDown(ray.start()); // first voxel inside bbox + for (HDDA hdda(ray, acc.getDim(ijk, ray)); !acc.isActive(ijk); hdda.update(ray, acc.getDim(ijk, ray))) { + if (!hdda.step()) return false;// leap-frog HDDA and exit if ray bound is exceeded + t = hdda.time() + Delta;// update time + ijk = RoundDown( ray(t) );// update ijk + } + return true; +} + +/////////////////////////////////////////// TreeMarcher //////////////////////////////////////////// + +/// @brief A Tree Marcher for Generic Grids + +template +class TreeMarcher +{ +public: + using ChildT = typename NodeT::ChildNodeType; + using RealType = typename RayT::RealType; + using RealT = RealType; + using CoordType = CoordT; + + inline __hostdev__ TreeMarcher(AccT& acc) + : mAcc(acc) + { + } + + /// @brief Initialize the TreeMarcher with an index-space ray. + inline __hostdev__ bool init(const RayT& indexRay) + { + mRay = indexRay; + if (!mRay.clip(mAcc.root().bbox())) + return false; // clip ray to bbox + + // tweak the intersection span into the bbox. + // CAVEAT: this will potentially clip some tiny corner intersections. + static const float Eps = 0.000001f; + const float t0 = mRay.t0() + Eps; + const float t1 = mRay.t1() - Eps; + if (t0 > t1) + return false; + + const CoordT ijk = RoundDown(mRay(t0)); + const uint32_t dim = mAcc.getDim(ijk, mRay); + mHdda.init(mRay, t0, t1, nanovdb::math::Max(dim, NodeT::dim())); + + mT0 = (dim <= ChildT::dim()) ? mHdda.time() : -1; // potentially begin a span. + mTmax = t1; + return true; + } + + /// @brief step the ray through the tree. If the ray hits a node then + /// populate t0 & t1, and the node. + /// @return true when a node of type NodeT is intersected, false otherwise. + inline __hostdev__ bool step(const NodeT** node, float& t0, float& t1) + { + // CAVEAT: if Delta is too large then it will clip corners of nodes in a visible way. + // but it has to be quite large when very far from the grid (due to fp32 rounding) + static const float Delta = 0.01f; + bool hddaIsValid; + + do { + t0 = mT0; + + auto currentNode = mAcc.template getNode(); + + // get next node intersection... + hddaIsValid = mHdda.step(); + const CoordT nextIjk = RoundDown(mRay(mHdda.time() + Delta)); + const auto nextDim = mAcc.getDim(nextIjk, mRay); + mHdda.update(mRay, (int)Max(nextDim, NodeT::dim())); + mT0 = (nextDim <= ChildT::dim()) ? mHdda.time() : -1; // potentially begin a span. + + if (t0 >= 0) { // we are in a span. + t1 = Min(mTmax, mHdda.time()); + + // TODO: clean this up! + if (t0 >= t1 || currentNode == nullptr) + continue; + + *node = currentNode; + return true; + } + + } while (hddaIsValid); + + return false; + } + + inline __hostdev__ const RayT& ray() const { return mRay; } + + inline __hostdev__ RayT& ray() { return mRay; } + +private: + AccT& mAcc; + RayT mRay; + HDDA mHdda; + float mT0; + float mTmax; +};// TreeMarcher + +/////////////////////////////////////////// PointTreeMarcher //////////////////////////////////////////// + +/// @brief A Tree Marcher for Point Grids +/// +/// @note This class will handle correctly offseting the ray by 0.5 to ensure that +/// the underlying HDDA will intersect with the grid-cells. See details below. + +template +class PointTreeMarcher : public TreeMarcher, RayT, AccT, CoordT> +{ + using BaseT = TreeMarcher, RayT, AccT, CoordT>; +public: + __hostdev__ PointTreeMarcher(AccT& acc) : BaseT(acc) {} + + /// @brief Initiates this instance with a ray in index space. + /// + /// @details An offset by 0.5 is applied to the ray to account for the fact that points in vdb + /// grids are bucketed into so-called grid cell, which are centered round grid voxels, + /// whereas the DDA is based on so-called grid nodes, which are coincident with grid + /// voxels. So, rather than offsettting the points by 0.5 to bring them into a grid + /// node representation this method offsets the eye of the ray by 0.5, which effectively + /// ensures that the DDA operates on grid cells as oppose to grid nodes. This subtle + /// but important offset by 0.5 is explined in more details in our online documentation. + __hostdev__ bool init(RayT ray) { return BaseT::init(ray.offsetEye(0.5)); } +};// PointTreeMarcher + +} // namespace nanovdb::math + +#endif // NANOVDB_HDDA_HAS_BEEN_INCLUDED diff --git a/nanovdb/nanovdb/math/Math.h b/nanovdb/nanovdb/math/Math.h new file mode 100644 index 0000000000..84100d499c --- /dev/null +++ b/nanovdb/nanovdb/math/Math.h @@ -0,0 +1,1448 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: MPL-2.0 + +/*! + \file Math.h + + \author Ken Museth + + \date January 8, 2020 + + \brief Math functions and classes + +*/ + +#ifndef NANOVDB_MATH_MATH_H_HAS_BEEN_INCLUDED +#define NANOVDB_MATH_MATH_H_HAS_BEEN_INCLUDED + +#include // for __hostdev__ and lots of other utility functions + +namespace nanovdb {// ================================================================= + +namespace math {// ============================================================= + +// ----------------------------> Various math functions <------------------------------------- + +//@{ +/// @brief Pi constant taken from Boost to match old behaviour +template +inline __hostdev__ constexpr T pi() +{ + return 3.141592653589793238462643383279502884e+00; +} +template<> +inline __hostdev__ constexpr float pi() +{ + return 3.141592653589793238462643383279502884e+00F; +} +template<> +inline __hostdev__ constexpr double pi() +{ + return 3.141592653589793238462643383279502884e+00; +} +template<> +inline __hostdev__ constexpr long double pi() +{ + return 3.141592653589793238462643383279502884e+00L; +} +//@} + +//@{ +/// Tolerance for floating-point comparison +template +struct Tolerance; +template<> +struct Tolerance +{ + __hostdev__ static float value() { return 1e-8f; } +}; +template<> +struct Tolerance +{ + __hostdev__ static double value() { return 1e-15; } +}; +//@} + +//@{ +/// Delta for small floating-point offsets +template +struct Delta; +template<> +struct Delta +{ + __hostdev__ static float value() { return 1e-5f; } +}; +template<> +struct Delta +{ + __hostdev__ static double value() { return 1e-9; } +}; +//@} + +//@{ +/// Maximum floating-point values +template +struct Maximum; +#if defined(__CUDA_ARCH__) || defined(__HIP__) +template<> +struct Maximum +{ + __hostdev__ static int value() { return 2147483647; } +}; +template<> +struct Maximum +{ + __hostdev__ static uint32_t value() { return 4294967295u; } +}; +template<> +struct Maximum +{ + __hostdev__ static float value() { return 1e+38f; } +}; +template<> +struct Maximum +{ + __hostdev__ static double value() { return 1e+308; } +}; +#else +template +struct Maximum +{ + static T value() { return std::numeric_limits::max(); } +}; +#endif +//@} + +template +__hostdev__ inline bool isApproxZero(const Type& x) +{ + return !(x > Tolerance::value()) && !(x < -Tolerance::value()); +} + +template +__hostdev__ inline Type Min(Type a, Type b) +{ + return (a < b) ? a : b; +} +__hostdev__ inline int32_t Min(int32_t a, int32_t b) +{ + return int32_t(fminf(float(a), float(b))); +} +__hostdev__ inline uint32_t Min(uint32_t a, uint32_t b) +{ + return uint32_t(fminf(float(a), float(b))); +} +__hostdev__ inline float Min(float a, float b) +{ + return fminf(a, b); +} +__hostdev__ inline double Min(double a, double b) +{ + return fmin(a, b); +} +template +__hostdev__ inline Type Max(Type a, Type b) +{ + return (a > b) ? a : b; +} + +__hostdev__ inline int32_t Max(int32_t a, int32_t b) +{ + return int32_t(fmaxf(float(a), float(b))); +} +__hostdev__ inline uint32_t Max(uint32_t a, uint32_t b) +{ + return uint32_t(fmaxf(float(a), float(b))); +} +__hostdev__ inline float Max(float a, float b) +{ + return fmaxf(a, b); +} +__hostdev__ inline double Max(double a, double b) +{ + return fmax(a, b); +} +__hostdev__ inline float Clamp(float x, float a, float b) +{ + return Max(Min(x, b), a); +} +__hostdev__ inline double Clamp(double x, double a, double b) +{ + return Max(Min(x, b), a); +} + +__hostdev__ inline float Fract(float x) +{ + return x - floorf(x); +} +__hostdev__ inline double Fract(double x) +{ + return x - floor(x); +} + +__hostdev__ inline int32_t Floor(float x) +{ + return int32_t(floorf(x)); +} +__hostdev__ inline int32_t Floor(double x) +{ + return int32_t(floor(x)); +} + +__hostdev__ inline int32_t Ceil(float x) +{ + return int32_t(ceilf(x)); +} +__hostdev__ inline int32_t Ceil(double x) +{ + return int32_t(ceil(x)); +} + +template +__hostdev__ inline T Pow2(T x) +{ + return x * x; +} + +template +__hostdev__ inline T Pow3(T x) +{ + return x * x * x; +} + +template +__hostdev__ inline T Pow4(T x) +{ + return Pow2(x * x); +} +template +__hostdev__ inline T Abs(T x) +{ + return x < 0 ? -x : x; +} + +template<> +__hostdev__ inline float Abs(float x) +{ + return fabsf(x); +} + +template<> +__hostdev__ inline double Abs(double x) +{ + return fabs(x); +} + +template<> +__hostdev__ inline int Abs(int x) +{ + return abs(x); +} + +template class Vec3T> +__hostdev__ inline CoordT Round(const Vec3T& xyz); + +template class Vec3T> +__hostdev__ inline CoordT Round(const Vec3T& xyz) +{ + return CoordT(int32_t(rintf(xyz[0])), int32_t(rintf(xyz[1])), int32_t(rintf(xyz[2]))); + //return CoordT(int32_t(roundf(xyz[0])), int32_t(roundf(xyz[1])), int32_t(roundf(xyz[2])) ); + //return CoordT(int32_t(floorf(xyz[0] + 0.5f)), int32_t(floorf(xyz[1] + 0.5f)), int32_t(floorf(xyz[2] + 0.5f))); +} + +template class Vec3T> +__hostdev__ inline CoordT Round(const Vec3T& xyz) +{ + return CoordT(int32_t(floor(xyz[0] + 0.5)), int32_t(floor(xyz[1] + 0.5)), int32_t(floor(xyz[2] + 0.5))); +} + +template class Vec3T> +__hostdev__ inline CoordT RoundDown(const Vec3T& xyz) +{ + return CoordT(Floor(xyz[0]), Floor(xyz[1]), Floor(xyz[2])); +} + +//@{ +/// Return the square root of a floating-point value. +__hostdev__ inline float Sqrt(float x) +{ + return sqrtf(x); +} +__hostdev__ inline double Sqrt(double x) +{ + return sqrt(x); +} +//@} + +/// Return the sign of the given value as an integer (either -1, 0 or 1). +template +__hostdev__ inline T Sign(const T& x) +{ + return ((T(0) < x) ? T(1) : T(0)) - ((x < T(0)) ? T(1) : T(0)); +} + +template +__hostdev__ inline int MinIndex(const Vec3T& v) +{ +#if 0 + static const int hashTable[8] = {2, 1, 9, 1, 2, 9, 0, 0}; //9 are dummy values + const int hashKey = ((v[0] < v[1]) << 2) + ((v[0] < v[2]) << 1) + (v[1] < v[2]); // ?*4+?*2+?*1 + return hashTable[hashKey]; +#else + if (v[0] < v[1] && v[0] < v[2]) + return 0; + if (v[1] < v[2]) + return 1; + else + return 2; +#endif +} + +template +__hostdev__ inline int MaxIndex(const Vec3T& v) +{ +#if 0 + static const int hashTable[8] = {2, 1, 9, 1, 2, 9, 0, 0}; //9 are dummy values + const int hashKey = ((v[0] > v[1]) << 2) + ((v[0] > v[2]) << 1) + (v[1] > v[2]); // ?*4+?*2+?*1 + return hashTable[hashKey]; +#else + if (v[0] > v[1] && v[0] > v[2]) + return 0; + if (v[1] > v[2]) + return 1; + else + return 2; +#endif +} + +/// @brief round up byteSize to the nearest wordSize, e.g. to align to machine word: AlignUp +__hostdev__ inline uint64_t AlignUp(uint64_t byteCount) +{ + const uint64_t r = byteCount % wordSize; + return r ? byteCount - r + wordSize : byteCount; +} + +// ------------------------------> Coord <-------------------------------------- + +// forward declaration so we can define Coord::asVec3s and Coord::asVec3d +template +class Vec3; + +/// @brief Signed (i, j, k) 32-bit integer coordinate class, similar to openvdb::math::Coord +class Coord +{ + int32_t mVec[3]; // private member data - three signed index coordinates +public: + using ValueType = int32_t; + using IndexType = uint32_t; + + /// @brief Initialize all coordinates to zero. + __hostdev__ Coord() + : mVec{0, 0, 0} + { + } + + /// @brief Initializes all coordinates to the given signed integer. + __hostdev__ explicit Coord(ValueType n) + : mVec{n, n, n} + { + } + + /// @brief Initializes coordinate to the given signed integers. + __hostdev__ Coord(ValueType i, ValueType j, ValueType k) + : mVec{i, j, k} + { + } + + __hostdev__ Coord(ValueType* ptr) + : mVec{ptr[0], ptr[1], ptr[2]} + { + } + + __hostdev__ int32_t x() const { return mVec[0]; } + __hostdev__ int32_t y() const { return mVec[1]; } + __hostdev__ int32_t z() const { return mVec[2]; } + + __hostdev__ int32_t& x() { return mVec[0]; } + __hostdev__ int32_t& y() { return mVec[1]; } + __hostdev__ int32_t& z() { return mVec[2]; } + + __hostdev__ static Coord max() { return Coord(int32_t((1u << 31) - 1)); } + + __hostdev__ static Coord min() { return Coord(-int32_t((1u << 31) - 1) - 1); } + + __hostdev__ static size_t memUsage() { return sizeof(Coord); } + + /// @brief Return a const reference to the given Coord component. + /// @warning The argument is assumed to be 0, 1, or 2. + __hostdev__ const ValueType& operator[](IndexType i) const { return mVec[i]; } + + /// @brief Return a non-const reference to the given Coord component. + /// @warning The argument is assumed to be 0, 1, or 2. + __hostdev__ ValueType& operator[](IndexType i) { return mVec[i]; } + + /// @brief Assignment operator that works with openvdb::Coord + template + __hostdev__ Coord& operator=(const CoordT& other) + { + static_assert(sizeof(Coord) == sizeof(CoordT), "Mis-matched sizeof"); + mVec[0] = other[0]; + mVec[1] = other[1]; + mVec[2] = other[2]; + return *this; + } + + /// @brief Return a new instance with coordinates masked by the given unsigned integer. + __hostdev__ Coord operator&(IndexType n) const { return Coord(mVec[0] & n, mVec[1] & n, mVec[2] & n); } + + // @brief Return a new instance with coordinates left-shifted by the given unsigned integer. + __hostdev__ Coord operator<<(IndexType n) const { return Coord(mVec[0] << n, mVec[1] << n, mVec[2] << n); } + + // @brief Return a new instance with coordinates right-shifted by the given unsigned integer. + __hostdev__ Coord operator>>(IndexType n) const { return Coord(mVec[0] >> n, mVec[1] >> n, mVec[2] >> n); } + + /// @brief Return true if this Coord is lexicographically less than the given Coord. + __hostdev__ bool operator<(const Coord& rhs) const + { + return mVec[0] < rhs[0] ? true + : mVec[0] > rhs[0] ? false + : mVec[1] < rhs[1] ? true + : mVec[1] > rhs[1] ? false + : mVec[2] < rhs[2] ? true : false; + } + + /// @brief Return true if this Coord is lexicographically less or equal to the given Coord. + __hostdev__ bool operator<=(const Coord& rhs) const + { + return mVec[0] < rhs[0] ? true + : mVec[0] > rhs[0] ? false + : mVec[1] < rhs[1] ? true + : mVec[1] > rhs[1] ? false + : mVec[2] <=rhs[2] ? true : false; + } + + // @brief Return true if this Coord is lexicographically greater than the given Coord. + __hostdev__ bool operator>(const Coord& rhs) const + { + return mVec[0] > rhs[0] ? true + : mVec[0] < rhs[0] ? false + : mVec[1] > rhs[1] ? true + : mVec[1] < rhs[1] ? false + : mVec[2] > rhs[2] ? true : false; + } + + // @brief Return true if this Coord is lexicographically greater or equal to the given Coord. + __hostdev__ bool operator>=(const Coord& rhs) const + { + return mVec[0] > rhs[0] ? true + : mVec[0] < rhs[0] ? false + : mVec[1] > rhs[1] ? true + : mVec[1] < rhs[1] ? false + : mVec[2] >=rhs[2] ? true : false; + } + + // @brief Return true if the Coord components are identical. + __hostdev__ bool operator==(const Coord& rhs) const { return mVec[0] == rhs[0] && mVec[1] == rhs[1] && mVec[2] == rhs[2]; } + __hostdev__ bool operator!=(const Coord& rhs) const { return mVec[0] != rhs[0] || mVec[1] != rhs[1] || mVec[2] != rhs[2]; } + __hostdev__ Coord& operator&=(int n) + { + mVec[0] &= n; + mVec[1] &= n; + mVec[2] &= n; + return *this; + } + __hostdev__ Coord& operator<<=(uint32_t n) + { + mVec[0] <<= n; + mVec[1] <<= n; + mVec[2] <<= n; + return *this; + } + __hostdev__ Coord& operator>>=(uint32_t n) + { + mVec[0] >>= n; + mVec[1] >>= n; + mVec[2] >>= n; + return *this; + } + __hostdev__ Coord& operator+=(int n) + { + mVec[0] += n; + mVec[1] += n; + mVec[2] += n; + return *this; + } + __hostdev__ Coord operator+(const Coord& rhs) const { return Coord(mVec[0] + rhs[0], mVec[1] + rhs[1], mVec[2] + rhs[2]); } + __hostdev__ Coord operator-(const Coord& rhs) const { return Coord(mVec[0] - rhs[0], mVec[1] - rhs[1], mVec[2] - rhs[2]); } + __hostdev__ Coord operator-() const { return Coord(-mVec[0], -mVec[1], -mVec[2]); } + __hostdev__ Coord& operator+=(const Coord& rhs) + { + mVec[0] += rhs[0]; + mVec[1] += rhs[1]; + mVec[2] += rhs[2]; + return *this; + } + __hostdev__ Coord& operator-=(const Coord& rhs) + { + mVec[0] -= rhs[0]; + mVec[1] -= rhs[1]; + mVec[2] -= rhs[2]; + return *this; + } + + /// @brief Perform a component-wise minimum with the other Coord. + __hostdev__ Coord& minComponent(const Coord& other) + { + if (other[0] < mVec[0]) + mVec[0] = other[0]; + if (other[1] < mVec[1]) + mVec[1] = other[1]; + if (other[2] < mVec[2]) + mVec[2] = other[2]; + return *this; + } + + /// @brief Perform a component-wise maximum with the other Coord. + __hostdev__ Coord& maxComponent(const Coord& other) + { + if (other[0] > mVec[0]) + mVec[0] = other[0]; + if (other[1] > mVec[1]) + mVec[1] = other[1]; + if (other[2] > mVec[2]) + mVec[2] = other[2]; + return *this; + } +#if defined(__CUDACC__) // the following functions only run on the GPU! + __device__ inline Coord& minComponentAtomic(const Coord& other) + { + atomicMin(&mVec[0], other[0]); + atomicMin(&mVec[1], other[1]); + atomicMin(&mVec[2], other[2]); + return *this; + } + __device__ inline Coord& maxComponentAtomic(const Coord& other) + { + atomicMax(&mVec[0], other[0]); + atomicMax(&mVec[1], other[1]); + atomicMax(&mVec[2], other[2]); + return *this; + } +#endif + + __hostdev__ Coord offsetBy(ValueType dx, ValueType dy, ValueType dz) const + { + return Coord(mVec[0] + dx, mVec[1] + dy, mVec[2] + dz); + } + + __hostdev__ Coord offsetBy(ValueType n) const { return this->offsetBy(n, n, n); } + + /// Return true if any of the components of @a a are smaller than the + /// corresponding components of @a b. + __hostdev__ static inline bool lessThan(const Coord& a, const Coord& b) + { + return (a[0] < b[0] || a[1] < b[1] || a[2] < b[2]); + } + + /// @brief Return the largest integer coordinates that are not greater + /// than @a xyz (node centered conversion). + template + __hostdev__ static Coord Floor(const Vec3T& xyz) { return Coord(math::Floor(xyz[0]), math::Floor(xyz[1]), math::Floor(xyz[2])); } + + /// @brief Return a hash key derived from the existing coordinates. + /// @details The hash function is originally taken from the SIGGRAPH paper: + /// "VDB: High-resolution sparse volumes with dynamic topology" + /// and the prime numbers are modified based on the ACM Transactions on Graphics paper: + /// "Real-time 3D reconstruction at scale using voxel hashing" (the second number had a typo!) + template + __hostdev__ uint32_t hash() const { return ((1 << Log2N) - 1) & (mVec[0] * 73856093 ^ mVec[1] * 19349669 ^ mVec[2] * 83492791); } + + /// @brief Return the octant of this Coord + //__hostdev__ size_t octant() const { return (uint32_t(mVec[0])>>31) | ((uint32_t(mVec[1])>>31)<<1) | ((uint32_t(mVec[2])>>31)<<2); } + __hostdev__ uint8_t octant() const { return (uint8_t(bool(mVec[0] & (1u << 31)))) | + (uint8_t(bool(mVec[1] & (1u << 31))) << 1) | + (uint8_t(bool(mVec[2] & (1u << 31))) << 2); } + + /// @brief Return a single precision floating-point vector of this coordinate + __hostdev__ inline Vec3 asVec3s() const; + + /// @brief Return a double precision floating-point vector of this coordinate + __hostdev__ inline Vec3 asVec3d() const; + + // returns a copy of itself, so it mimics the behaviour of Vec3::round() + __hostdev__ inline Coord round() const { return *this; } +}; // Coord class + +// ----------------------------> Vec3 <-------------------------------------- + +/// @brief A simple vector class with three components, similar to openvdb::math::Vec3 +template +class Vec3 +{ + T mVec[3]; + +public: + static const int SIZE = 3; + static const int size = 3; // in openvdb::math::Tuple + using ValueType = T; + Vec3() = default; + __hostdev__ explicit Vec3(T x) + : mVec{x, x, x} + { + } + __hostdev__ Vec3(T x, T y, T z) + : mVec{x, y, z} + { + } + template class Vec3T, class T2> + __hostdev__ Vec3(const Vec3T& v) + : mVec{T(v[0]), T(v[1]), T(v[2])} + { + static_assert(Vec3T::size == size, "expected Vec3T::size==3!"); + } + template + __hostdev__ explicit Vec3(const Vec3& v) + : mVec{T(v[0]), T(v[1]), T(v[2])} + { + } + __hostdev__ explicit Vec3(const Coord& ijk) + : mVec{T(ijk[0]), T(ijk[1]), T(ijk[2])} + { + } + __hostdev__ bool operator==(const Vec3& rhs) const { return mVec[0] == rhs[0] && mVec[1] == rhs[1] && mVec[2] == rhs[2]; } + __hostdev__ bool operator!=(const Vec3& rhs) const { return mVec[0] != rhs[0] || mVec[1] != rhs[1] || mVec[2] != rhs[2]; } + template class Vec3T, class T2> + __hostdev__ Vec3& operator=(const Vec3T& rhs) + { + static_assert(Vec3T::size == size, "expected Vec3T::size==3!"); + mVec[0] = rhs[0]; + mVec[1] = rhs[1]; + mVec[2] = rhs[2]; + return *this; + } + __hostdev__ const T& operator[](int i) const { return mVec[i]; } + __hostdev__ T& operator[](int i) { return mVec[i]; } + template + __hostdev__ T dot(const Vec3T& v) const { return mVec[0] * v[0] + mVec[1] * v[1] + mVec[2] * v[2]; } + template + __hostdev__ Vec3 cross(const Vec3T& v) const + { + return Vec3(mVec[1] * v[2] - mVec[2] * v[1], + mVec[2] * v[0] - mVec[0] * v[2], + mVec[0] * v[1] - mVec[1] * v[0]); + } + __hostdev__ T lengthSqr() const + { + return mVec[0] * mVec[0] + mVec[1] * mVec[1] + mVec[2] * mVec[2]; // 5 flops + } + __hostdev__ T length() const { return Sqrt(this->lengthSqr()); } + __hostdev__ Vec3 operator-() const { return Vec3(-mVec[0], -mVec[1], -mVec[2]); } + __hostdev__ Vec3 operator*(const Vec3& v) const { return Vec3(mVec[0] * v[0], mVec[1] * v[1], mVec[2] * v[2]); } + __hostdev__ Vec3 operator/(const Vec3& v) const { return Vec3(mVec[0] / v[0], mVec[1] / v[1], mVec[2] / v[2]); } + __hostdev__ Vec3 operator+(const Vec3& v) const { return Vec3(mVec[0] + v[0], mVec[1] + v[1], mVec[2] + v[2]); } + __hostdev__ Vec3 operator-(const Vec3& v) const { return Vec3(mVec[0] - v[0], mVec[1] - v[1], mVec[2] - v[2]); } + __hostdev__ Vec3 operator+(const Coord& ijk) const { return Vec3(mVec[0] + ijk[0], mVec[1] + ijk[1], mVec[2] + ijk[2]); } + __hostdev__ Vec3 operator-(const Coord& ijk) const { return Vec3(mVec[0] - ijk[0], mVec[1] - ijk[1], mVec[2] - ijk[2]); } + __hostdev__ Vec3 operator*(const T& s) const { return Vec3(s * mVec[0], s * mVec[1], s * mVec[2]); } + __hostdev__ Vec3 operator/(const T& s) const { return (T(1) / s) * (*this); } + __hostdev__ Vec3& operator+=(const Vec3& v) + { + mVec[0] += v[0]; + mVec[1] += v[1]; + mVec[2] += v[2]; + return *this; + } + __hostdev__ Vec3& operator+=(const Coord& ijk) + { + mVec[0] += T(ijk[0]); + mVec[1] += T(ijk[1]); + mVec[2] += T(ijk[2]); + return *this; + } + __hostdev__ Vec3& operator-=(const Vec3& v) + { + mVec[0] -= v[0]; + mVec[1] -= v[1]; + mVec[2] -= v[2]; + return *this; + } + __hostdev__ Vec3& operator-=(const Coord& ijk) + { + mVec[0] -= T(ijk[0]); + mVec[1] -= T(ijk[1]); + mVec[2] -= T(ijk[2]); + return *this; + } + __hostdev__ Vec3& operator*=(const T& s) + { + mVec[0] *= s; + mVec[1] *= s; + mVec[2] *= s; + return *this; + } + __hostdev__ Vec3& operator/=(const T& s) { return (*this) *= T(1) / s; } + __hostdev__ Vec3& normalize() { return (*this) /= this->length(); } + /// @brief Perform a component-wise minimum with the other Coord. + __hostdev__ Vec3& minComponent(const Vec3& other) + { + if (other[0] < mVec[0]) + mVec[0] = other[0]; + if (other[1] < mVec[1]) + mVec[1] = other[1]; + if (other[2] < mVec[2]) + mVec[2] = other[2]; + return *this; + } + + /// @brief Perform a component-wise maximum with the other Coord. + __hostdev__ Vec3& maxComponent(const Vec3& other) + { + if (other[0] > mVec[0]) + mVec[0] = other[0]; + if (other[1] > mVec[1]) + mVec[1] = other[1]; + if (other[2] > mVec[2]) + mVec[2] = other[2]; + return *this; + } + /// @brief Return the smallest vector component + __hostdev__ ValueType min() const + { + return mVec[0] < mVec[1] ? (mVec[0] < mVec[2] ? mVec[0] : mVec[2]) : (mVec[1] < mVec[2] ? mVec[1] : mVec[2]); + } + /// @brief Return the largest vector component + __hostdev__ ValueType max() const + { + return mVec[0] > mVec[1] ? (mVec[0] > mVec[2] ? mVec[0] : mVec[2]) : (mVec[1] > mVec[2] ? mVec[1] : mVec[2]); + } + /// @brief Round each component if this Vec up to its integer value + /// @return Return an integer Coord + __hostdev__ Coord floor() const { return Coord(Floor(mVec[0]), Floor(mVec[1]), Floor(mVec[2])); } + /// @brief Round each component if this Vec down to its integer value + /// @return Return an integer Coord + __hostdev__ Coord ceil() const { return Coord(Ceil(mVec[0]), Ceil(mVec[1]), Ceil(mVec[2])); } + /// @brief Round each component if this Vec to its closest integer value + /// @return Return an integer Coord + __hostdev__ Coord round() const + { + if constexpr(util::is_same::value) { + return Coord(Floor(mVec[0] + 0.5f), Floor(mVec[1] + 0.5f), Floor(mVec[2] + 0.5f)); + } else if constexpr(util::is_same::value) { + return Coord(mVec[0], mVec[1], mVec[2]); + } else { + return Coord(Floor(mVec[0] + 0.5), Floor(mVec[1] + 0.5), Floor(mVec[2] + 0.5)); + } + } + + /// @brief return a non-const raw constant pointer to array of three vector components + __hostdev__ T* asPointer() { return mVec; } + /// @brief return a const raw constant pointer to array of three vector components + __hostdev__ const T* asPointer() const { return mVec; } +}; // Vec3 + +template +__hostdev__ inline Vec3 operator*(T1 scalar, const Vec3& vec) +{ + return Vec3(scalar * vec[0], scalar * vec[1], scalar * vec[2]); +} +template +__hostdev__ inline Vec3 operator/(T1 scalar, const Vec3& vec) +{ + return Vec3(scalar / vec[0], scalar / vec[1], scalar / vec[2]); +} + +/// @brief Return a single precision floating-point vector of this coordinate +__hostdev__ inline Vec3 Coord::asVec3s() const +{ + return Vec3(float(mVec[0]), float(mVec[1]), float(mVec[2])); +} + +/// @brief Return a double precision floating-point vector of this coordinate +__hostdev__ inline Vec3 Coord::asVec3d() const +{ + return Vec3(double(mVec[0]), double(mVec[1]), double(mVec[2])); +} + +// ----------------------------> Vec4 <-------------------------------------- + +/// @brief A simple vector class with four components, similar to openvdb::math::Vec4 +template +class Vec4 +{ + T mVec[4]; + +public: + static const int SIZE = 4; + static const int size = 4; + using ValueType = T; + Vec4() = default; + __hostdev__ explicit Vec4(T x) + : mVec{x, x, x, x} + { + } + __hostdev__ Vec4(T x, T y, T z, T w) + : mVec{x, y, z, w} + { + } + template + __hostdev__ explicit Vec4(const Vec4& v) + : mVec{T(v[0]), T(v[1]), T(v[2]), T(v[3])} + { + } + template class Vec4T, class T2> + __hostdev__ Vec4(const Vec4T& v) + : mVec{T(v[0]), T(v[1]), T(v[2]), T(v[3])} + { + static_assert(Vec4T::size == size, "expected Vec4T::size==4!"); + } + __hostdev__ bool operator==(const Vec4& rhs) const { return mVec[0] == rhs[0] && mVec[1] == rhs[1] && mVec[2] == rhs[2] && mVec[3] == rhs[3]; } + __hostdev__ bool operator!=(const Vec4& rhs) const { return mVec[0] != rhs[0] || mVec[1] != rhs[1] || mVec[2] != rhs[2] || mVec[3] != rhs[3]; } + template class Vec4T, class T2> + __hostdev__ Vec4& operator=(const Vec4T& rhs) + { + static_assert(Vec4T::size == size, "expected Vec4T::size==4!"); + mVec[0] = rhs[0]; + mVec[1] = rhs[1]; + mVec[2] = rhs[2]; + mVec[3] = rhs[3]; + return *this; + } + + __hostdev__ const T& operator[](int i) const { return mVec[i]; } + __hostdev__ T& operator[](int i) { return mVec[i]; } + template + __hostdev__ T dot(const Vec4T& v) const { return mVec[0] * v[0] + mVec[1] * v[1] + mVec[2] * v[2] + mVec[3] * v[3]; } + __hostdev__ T lengthSqr() const + { + return mVec[0] * mVec[0] + mVec[1] * mVec[1] + mVec[2] * mVec[2] + mVec[3] * mVec[3]; // 7 flops + } + __hostdev__ T length() const { return Sqrt(this->lengthSqr()); } + __hostdev__ Vec4 operator-() const { return Vec4(-mVec[0], -mVec[1], -mVec[2], -mVec[3]); } + __hostdev__ Vec4 operator*(const Vec4& v) const { return Vec4(mVec[0] * v[0], mVec[1] * v[1], mVec[2] * v[2], mVec[3] * v[3]); } + __hostdev__ Vec4 operator/(const Vec4& v) const { return Vec4(mVec[0] / v[0], mVec[1] / v[1], mVec[2] / v[2], mVec[3] / v[3]); } + __hostdev__ Vec4 operator+(const Vec4& v) const { return Vec4(mVec[0] + v[0], mVec[1] + v[1], mVec[2] + v[2], mVec[3] + v[3]); } + __hostdev__ Vec4 operator-(const Vec4& v) const { return Vec4(mVec[0] - v[0], mVec[1] - v[1], mVec[2] - v[2], mVec[3] - v[3]); } + __hostdev__ Vec4 operator*(const T& s) const { return Vec4(s * mVec[0], s * mVec[1], s * mVec[2], s * mVec[3]); } + __hostdev__ Vec4 operator/(const T& s) const { return (T(1) / s) * (*this); } + __hostdev__ Vec4& operator+=(const Vec4& v) + { + mVec[0] += v[0]; + mVec[1] += v[1]; + mVec[2] += v[2]; + mVec[3] += v[3]; + return *this; + } + __hostdev__ Vec4& operator-=(const Vec4& v) + { + mVec[0] -= v[0]; + mVec[1] -= v[1]; + mVec[2] -= v[2]; + mVec[3] -= v[3]; + return *this; + } + __hostdev__ Vec4& operator*=(const T& s) + { + mVec[0] *= s; + mVec[1] *= s; + mVec[2] *= s; + mVec[3] *= s; + return *this; + } + __hostdev__ Vec4& operator/=(const T& s) { return (*this) *= T(1) / s; } + __hostdev__ Vec4& normalize() { return (*this) /= this->length(); } + /// @brief Perform a component-wise minimum with the other Coord. + __hostdev__ Vec4& minComponent(const Vec4& other) + { + if (other[0] < mVec[0]) + mVec[0] = other[0]; + if (other[1] < mVec[1]) + mVec[1] = other[1]; + if (other[2] < mVec[2]) + mVec[2] = other[2]; + if (other[3] < mVec[3]) + mVec[3] = other[3]; + return *this; + } + + /// @brief Perform a component-wise maximum with the other Coord. + __hostdev__ Vec4& maxComponent(const Vec4& other) + { + if (other[0] > mVec[0]) + mVec[0] = other[0]; + if (other[1] > mVec[1]) + mVec[1] = other[1]; + if (other[2] > mVec[2]) + mVec[2] = other[2]; + if (other[3] > mVec[3]) + mVec[3] = other[3]; + return *this; + } +}; // Vec4 + +template +__hostdev__ inline Vec4 operator*(T1 scalar, const Vec4& vec) +{ + return Vec4(scalar * vec[0], scalar * vec[1], scalar * vec[2], scalar * vec[3]); +} +template +__hostdev__ inline Vec4 operator/(T1 scalar, const Vec4& vec) +{ + return Vec4(scalar / vec[0], scalar / vec[1], scalar / vec[2], scalar / vec[3]); +} + +// ----------------------------> matMult <-------------------------------------- + +/// @brief Multiply a 3x3 matrix and a 3d vector using 32bit floating point arithmetics +/// @note This corresponds to a linear mapping, e.g. scaling, rotation etc. +/// @tparam Vec3T Template type of the input and output 3d vectors +/// @param mat pointer to an array of floats with the 3x3 matrix +/// @param xyz input vector to be multiplied by the matrix +/// @return result of matrix-vector multiplication, i.e. mat x xyz +template +__hostdev__ inline Vec3T matMult(const float* mat, const Vec3T& xyz) +{ + return Vec3T(fmaf(static_cast(xyz[0]), mat[0], fmaf(static_cast(xyz[1]), mat[1], static_cast(xyz[2]) * mat[2])), + fmaf(static_cast(xyz[0]), mat[3], fmaf(static_cast(xyz[1]), mat[4], static_cast(xyz[2]) * mat[5])), + fmaf(static_cast(xyz[0]), mat[6], fmaf(static_cast(xyz[1]), mat[7], static_cast(xyz[2]) * mat[8]))); // 6 fmaf + 3 mult = 9 flops +} + +/// @brief Multiply a 3x3 matrix and a 3d vector using 64bit floating point arithmetics +/// @note This corresponds to a linear mapping, e.g. scaling, rotation etc. +/// @tparam Vec3T Template type of the input and output 3d vectors +/// @param mat pointer to an array of floats with the 3x3 matrix +/// @param xyz input vector to be multiplied by the matrix +/// @return result of matrix-vector multiplication, i.e. mat x xyz +template +__hostdev__ inline Vec3T matMult(const double* mat, const Vec3T& xyz) +{ + return Vec3T(fma(static_cast(xyz[0]), mat[0], fma(static_cast(xyz[1]), mat[1], static_cast(xyz[2]) * mat[2])), + fma(static_cast(xyz[0]), mat[3], fma(static_cast(xyz[1]), mat[4], static_cast(xyz[2]) * mat[5])), + fma(static_cast(xyz[0]), mat[6], fma(static_cast(xyz[1]), mat[7], static_cast(xyz[2]) * mat[8]))); // 6 fmaf + 3 mult = 9 flops +} + +/// @brief Multiply a 3x3 matrix to a 3d vector and add another 3d vector using 32bit floating point arithmetics +/// @note This corresponds to an affine transformation, i.e a linear mapping followed by a translation. e.g. scale/rotation and translation +/// @tparam Vec3T Template type of the input and output 3d vectors +/// @param mat pointer to an array of floats with the 3x3 matrix +/// @param vec 3d vector to be added AFTER the matrix multiplication +/// @param xyz input vector to be multiplied by the matrix and a translated by @c vec +/// @return result of affine transformation, i.e. (mat x xyz) + vec +template +__hostdev__ inline Vec3T matMult(const float* mat, const float* vec, const Vec3T& xyz) +{ + return Vec3T(fmaf(static_cast(xyz[0]), mat[0], fmaf(static_cast(xyz[1]), mat[1], fmaf(static_cast(xyz[2]), mat[2], vec[0]))), + fmaf(static_cast(xyz[0]), mat[3], fmaf(static_cast(xyz[1]), mat[4], fmaf(static_cast(xyz[2]), mat[5], vec[1]))), + fmaf(static_cast(xyz[0]), mat[6], fmaf(static_cast(xyz[1]), mat[7], fmaf(static_cast(xyz[2]), mat[8], vec[2])))); // 9 fmaf = 9 flops +} + +/// @brief Multiply a 3x3 matrix to a 3d vector and add another 3d vector using 64bit floating point arithmetics +/// @note This corresponds to an affine transformation, i.e a linear mapping followed by a translation. e.g. scale/rotation and translation +/// @tparam Vec3T Template type of the input and output 3d vectors +/// @param mat pointer to an array of floats with the 3x3 matrix +/// @param vec 3d vector to be added AFTER the matrix multiplication +/// @param xyz input vector to be multiplied by the matrix and a translated by @c vec +/// @return result of affine transformation, i.e. (mat x xyz) + vec +template +__hostdev__ inline Vec3T matMult(const double* mat, const double* vec, const Vec3T& xyz) +{ + return Vec3T(fma(static_cast(xyz[0]), mat[0], fma(static_cast(xyz[1]), mat[1], fma(static_cast(xyz[2]), mat[2], vec[0]))), + fma(static_cast(xyz[0]), mat[3], fma(static_cast(xyz[1]), mat[4], fma(static_cast(xyz[2]), mat[5], vec[1]))), + fma(static_cast(xyz[0]), mat[6], fma(static_cast(xyz[1]), mat[7], fma(static_cast(xyz[2]), mat[8], vec[2])))); // 9 fma = 9 flops +} + +/// @brief Multiply the transposed of a 3x3 matrix and a 3d vector using 32bit floating point arithmetics +/// @note This corresponds to an inverse linear mapping, e.g. inverse scaling, inverse rotation etc. +/// @tparam Vec3T Template type of the input and output 3d vectors +/// @param mat pointer to an array of floats with the 3x3 matrix +/// @param xyz input vector to be multiplied by the transposed matrix +/// @return result of matrix-vector multiplication, i.e. mat^T x xyz +template +__hostdev__ inline Vec3T matMultT(const float* mat, const Vec3T& xyz) +{ + return Vec3T(fmaf(static_cast(xyz[0]), mat[0], fmaf(static_cast(xyz[1]), mat[3], static_cast(xyz[2]) * mat[6])), + fmaf(static_cast(xyz[0]), mat[1], fmaf(static_cast(xyz[1]), mat[4], static_cast(xyz[2]) * mat[7])), + fmaf(static_cast(xyz[0]), mat[2], fmaf(static_cast(xyz[1]), mat[5], static_cast(xyz[2]) * mat[8]))); // 6 fmaf + 3 mult = 9 flops +} + +/// @brief Multiply the transposed of a 3x3 matrix and a 3d vector using 64bit floating point arithmetics +/// @note This corresponds to an inverse linear mapping, e.g. inverse scaling, inverse rotation etc. +/// @tparam Vec3T Template type of the input and output 3d vectors +/// @param mat pointer to an array of floats with the 3x3 matrix +/// @param xyz input vector to be multiplied by the transposed matrix +/// @return result of matrix-vector multiplication, i.e. mat^T x xyz +template +__hostdev__ inline Vec3T matMultT(const double* mat, const Vec3T& xyz) +{ + return Vec3T(fma(static_cast(xyz[0]), mat[0], fma(static_cast(xyz[1]), mat[3], static_cast(xyz[2]) * mat[6])), + fma(static_cast(xyz[0]), mat[1], fma(static_cast(xyz[1]), mat[4], static_cast(xyz[2]) * mat[7])), + fma(static_cast(xyz[0]), mat[2], fma(static_cast(xyz[1]), mat[5], static_cast(xyz[2]) * mat[8]))); // 6 fmaf + 3 mult = 9 flops +} + +template +__hostdev__ inline Vec3T matMultT(const float* mat, const float* vec, const Vec3T& xyz) +{ + return Vec3T(fmaf(static_cast(xyz[0]), mat[0], fmaf(static_cast(xyz[1]), mat[3], fmaf(static_cast(xyz[2]), mat[6], vec[0]))), + fmaf(static_cast(xyz[0]), mat[1], fmaf(static_cast(xyz[1]), mat[4], fmaf(static_cast(xyz[2]), mat[7], vec[1]))), + fmaf(static_cast(xyz[0]), mat[2], fmaf(static_cast(xyz[1]), mat[5], fmaf(static_cast(xyz[2]), mat[8], vec[2])))); // 9 fmaf = 9 flops +} + +template +__hostdev__ inline Vec3T matMultT(const double* mat, const double* vec, const Vec3T& xyz) +{ + return Vec3T(fma(static_cast(xyz[0]), mat[0], fma(static_cast(xyz[1]), mat[3], fma(static_cast(xyz[2]), mat[6], vec[0]))), + fma(static_cast(xyz[0]), mat[1], fma(static_cast(xyz[1]), mat[4], fma(static_cast(xyz[2]), mat[7], vec[1]))), + fma(static_cast(xyz[0]), mat[2], fma(static_cast(xyz[1]), mat[5], fma(static_cast(xyz[2]), mat[8], vec[2])))); // 9 fma = 9 flops +} + +// ----------------------------> BBox <------------------------------------- + +// Base-class for static polymorphism (cannot be constructed directly) +template +struct BaseBBox +{ + Vec3T mCoord[2]; + __hostdev__ bool operator==(const BaseBBox& rhs) const { return mCoord[0] == rhs.mCoord[0] && mCoord[1] == rhs.mCoord[1]; }; + __hostdev__ bool operator!=(const BaseBBox& rhs) const { return mCoord[0] != rhs.mCoord[0] || mCoord[1] != rhs.mCoord[1]; }; + __hostdev__ const Vec3T& operator[](int i) const { return mCoord[i]; } + __hostdev__ Vec3T& operator[](int i) { return mCoord[i]; } + __hostdev__ Vec3T& min() { return mCoord[0]; } + __hostdev__ Vec3T& max() { return mCoord[1]; } + __hostdev__ const Vec3T& min() const { return mCoord[0]; } + __hostdev__ const Vec3T& max() const { return mCoord[1]; } + __hostdev__ BaseBBox& translate(const Vec3T& xyz) + { + mCoord[0] += xyz; + mCoord[1] += xyz; + return *this; + } + /// @brief Expand this bounding box to enclose point @c xyz. + __hostdev__ BaseBBox& expand(const Vec3T& xyz) + { + mCoord[0].minComponent(xyz); + mCoord[1].maxComponent(xyz); + return *this; + } + + /// @brief Expand this bounding box to enclose the given bounding box. + __hostdev__ BaseBBox& expand(const BaseBBox& bbox) + { + mCoord[0].minComponent(bbox[0]); + mCoord[1].maxComponent(bbox[1]); + return *this; + } + + /// @brief Intersect this bounding box with the given bounding box. + __hostdev__ BaseBBox& intersect(const BaseBBox& bbox) + { + mCoord[0].maxComponent(bbox[0]); + mCoord[1].minComponent(bbox[1]); + return *this; + } + + //__hostdev__ BaseBBox expandBy(typename Vec3T::ValueType padding) const + //{ + // return BaseBBox(mCoord[0].offsetBy(-padding),mCoord[1].offsetBy(padding)); + //} + __hostdev__ bool isInside(const Vec3T& xyz) + { + if (xyz[0] < mCoord[0][0] || xyz[1] < mCoord[0][1] || xyz[2] < mCoord[0][2]) + return false; + if (xyz[0] > mCoord[1][0] || xyz[1] > mCoord[1][1] || xyz[2] > mCoord[1][2]) + return false; + return true; + } + +protected: + __hostdev__ BaseBBox() {} + __hostdev__ BaseBBox(const Vec3T& min, const Vec3T& max) + : mCoord{min, max} + { + } +}; // BaseBBox + +template::value> +struct BBox; + +/// @brief Partial template specialization for floating point coordinate types. +/// +/// @note Min is inclusive and max is exclusive. If min = max the dimension of +/// the bounding box is zero and therefore it is also empty. +template +struct BBox : public BaseBBox +{ + using Vec3Type = Vec3T; + using ValueType = typename Vec3T::ValueType; + static_assert(util::is_floating_point::value, "Expected a floating point coordinate type"); + using BaseT = BaseBBox; + using BaseT::mCoord; + /// @brief Default construction sets BBox to an empty bbox + __hostdev__ BBox() + : BaseT(Vec3T( Maximum::value()), + Vec3T(-Maximum::value())) + { + } + __hostdev__ BBox(const Vec3T& min, const Vec3T& max) + : BaseT(min, max) + { + } + __hostdev__ BBox(const Coord& min, const Coord& max) + : BaseT(Vec3T(ValueType(min[0]), ValueType(min[1]), ValueType(min[2])), + Vec3T(ValueType(max[0] + 1), ValueType(max[1] + 1), ValueType(max[2] + 1))) + { + } + __hostdev__ static BBox createCube(const Coord& min, typename Coord::ValueType dim) + { + return BBox(min, min.offsetBy(dim)); + } + + __hostdev__ BBox(const BaseBBox& bbox) + : BBox(bbox[0], bbox[1]) + { + } + __hostdev__ bool empty() const { return mCoord[0][0] >= mCoord[1][0] || + mCoord[0][1] >= mCoord[1][1] || + mCoord[0][2] >= mCoord[1][2]; } + __hostdev__ operator bool() const { return mCoord[0][0] < mCoord[1][0] && + mCoord[0][1] < mCoord[1][1] && + mCoord[0][2] < mCoord[1][2]; } + __hostdev__ Vec3T dim() const { return *this ? this->max() - this->min() : Vec3T(0); } + __hostdev__ bool isInside(const Vec3T& p) const + { + return p[0] > mCoord[0][0] && p[1] > mCoord[0][1] && p[2] > mCoord[0][2] && + p[0] < mCoord[1][0] && p[1] < mCoord[1][1] && p[2] < mCoord[1][2]; + } + +}; // BBox + +/// @brief Partial template specialization for integer coordinate types +/// +/// @note Both min and max are INCLUDED in the bbox so dim = max - min + 1. So, +/// if min = max the bounding box contains exactly one point and dim = 1! +template +struct BBox : public BaseBBox +{ + static_assert(util::is_same::value, "Expected \"int\" coordinate type"); + using BaseT = BaseBBox; + using BaseT::mCoord; + /// @brief Iterator over the domain covered by a BBox + /// @details z is the fastest-moving coordinate. + class Iterator + { + const BBox& mBBox; + CoordT mPos; + + public: + __hostdev__ Iterator(const BBox& b) + : mBBox(b) + , mPos(b.min()) + { + } + __hostdev__ Iterator(const BBox& b, const Coord& p) + : mBBox(b) + , mPos(p) + { + } + __hostdev__ Iterator& operator++() + { + if (mPos[2] < mBBox[1][2]) { // this is the most common case + ++mPos[2];// increment z + } else if (mPos[1] < mBBox[1][1]) { + mPos[2] = mBBox[0][2];// reset z + ++mPos[1];// increment y + } else if (mPos[0] <= mBBox[1][0]) { + mPos[2] = mBBox[0][2];// reset z + mPos[1] = mBBox[0][1];// reset y + ++mPos[0];// increment x + } + return *this; + } + __hostdev__ Iterator operator++(int) + { + auto tmp = *this; + ++(*this); + return tmp; + } + __hostdev__ bool operator==(const Iterator& rhs) const + { + NANOVDB_ASSERT(mBBox == rhs.mBBox); + return mPos == rhs.mPos; + } + __hostdev__ bool operator!=(const Iterator& rhs) const + { + NANOVDB_ASSERT(mBBox == rhs.mBBox); + return mPos != rhs.mPos; + } + __hostdev__ bool operator<(const Iterator& rhs) const + { + NANOVDB_ASSERT(mBBox == rhs.mBBox); + return mPos < rhs.mPos; + } + __hostdev__ bool operator<=(const Iterator& rhs) const + { + NANOVDB_ASSERT(mBBox == rhs.mBBox); + return mPos <= rhs.mPos; + } + /// @brief Return @c true if the iterator still points to a valid coordinate. + __hostdev__ operator bool() const { return mPos <= mBBox[1]; } + __hostdev__ const CoordT& operator*() const { return mPos; } + }; // Iterator + __hostdev__ Iterator begin() const { return Iterator{*this}; } + __hostdev__ Iterator end() const { return Iterator{*this, CoordT(mCoord[1][0]+1, mCoord[0][1], mCoord[0][2])}; } + __hostdev__ BBox() + : BaseT(CoordT::max(), CoordT::min()) + { + } + __hostdev__ BBox(const CoordT& min, const CoordT& max) + : BaseT(min, max) + { + } + + template + __hostdev__ BBox(BBox& other, const SplitT&) + : BaseT(other.mCoord[0], other.mCoord[1]) + { + NANOVDB_ASSERT(this->is_divisible()); + const int n = MaxIndex(this->dim()); + mCoord[1][n] = (mCoord[0][n] + mCoord[1][n]) >> 1; + other.mCoord[0][n] = mCoord[1][n] + 1; + } + + __hostdev__ static BBox createCube(const CoordT& min, typename CoordT::ValueType dim) + { + return BBox(min, min.offsetBy(dim - 1)); + } + + __hostdev__ static BBox createCube(typename CoordT::ValueType min, typename CoordT::ValueType max) + { + return BBox(CoordT(min), CoordT(max)); + } + + __hostdev__ bool is_divisible() const { return mCoord[0][0] < mCoord[1][0] && + mCoord[0][1] < mCoord[1][1] && + mCoord[0][2] < mCoord[1][2]; } + /// @brief Return true if this bounding box is empty, e.g. uninitialized + __hostdev__ bool empty() const { return mCoord[0][0] > mCoord[1][0] || + mCoord[0][1] > mCoord[1][1] || + mCoord[0][2] > mCoord[1][2]; } + /// @brief Convert this BBox to boolean true if it is not empty + __hostdev__ operator bool() const { return mCoord[0][0] <= mCoord[1][0] && + mCoord[0][1] <= mCoord[1][1] && + mCoord[0][2] <= mCoord[1][2]; } + __hostdev__ CoordT dim() const { return *this ? this->max() - this->min() + Coord(1) : Coord(0); } + __hostdev__ uint64_t volume() const + { + auto d = this->dim(); + return uint64_t(d[0]) * uint64_t(d[1]) * uint64_t(d[2]); + } + __hostdev__ bool isInside(const CoordT& p) const { return !(CoordT::lessThan(p, this->min()) || CoordT::lessThan(this->max(), p)); } + /// @brief Return @c true if the given bounding box is inside this bounding box. + __hostdev__ bool isInside(const BBox& b) const + { + return !(CoordT::lessThan(b.min(), this->min()) || CoordT::lessThan(this->max(), b.max())); + } + + /// @brief Return @c true if the given bounding box overlaps with this bounding box. + __hostdev__ bool hasOverlap(const BBox& b) const + { + return !(CoordT::lessThan(this->max(), b.min()) || CoordT::lessThan(b.max(), this->min())); + } + + /// @warning This converts a CoordBBox into a floating-point bounding box which implies that max += 1 ! + template + __hostdev__ BBox> asReal() const + { + static_assert(util::is_floating_point::value, "CoordBBox::asReal: Expected a floating point coordinate"); + return BBox>(Vec3(RealT(mCoord[0][0]), RealT(mCoord[0][1]), RealT(mCoord[0][2])), + Vec3(RealT(mCoord[1][0] + 1), RealT(mCoord[1][1] + 1), RealT(mCoord[1][2] + 1))); + } + /// @brief Return a new instance that is expanded by the specified padding. + __hostdev__ BBox expandBy(typename CoordT::ValueType padding) const + { + return BBox(mCoord[0].offsetBy(-padding), mCoord[1].offsetBy(padding)); + } + + /// @brief @brief transform this coordinate bounding box by the specified map + /// @param map mapping of index to world coordinates + /// @return world bounding box + template + __hostdev__ auto transform(const Map& map) const + { + using Vec3T = Vec3; + const Vec3T tmp = map.applyMap(Vec3T(mCoord[0][0], mCoord[0][1], mCoord[0][2])); + BBox bbox(tmp, tmp);// return value + bbox.expand(map.applyMap(Vec3T(mCoord[0][0], mCoord[0][1], mCoord[1][2]))); + bbox.expand(map.applyMap(Vec3T(mCoord[0][0], mCoord[1][1], mCoord[0][2]))); + bbox.expand(map.applyMap(Vec3T(mCoord[1][0], mCoord[0][1], mCoord[0][2]))); + bbox.expand(map.applyMap(Vec3T(mCoord[1][0], mCoord[1][1], mCoord[0][2]))); + bbox.expand(map.applyMap(Vec3T(mCoord[1][0], mCoord[0][1], mCoord[1][2]))); + bbox.expand(map.applyMap(Vec3T(mCoord[0][0], mCoord[1][1], mCoord[1][2]))); + bbox.expand(map.applyMap(Vec3T(mCoord[1][0], mCoord[1][1], mCoord[1][2]))); + return bbox; + } + +#if defined(__CUDACC__) // the following functions only run on the GPU! + __device__ inline BBox& expandAtomic(const CoordT& ijk) + { + mCoord[0].minComponentAtomic(ijk); + mCoord[1].maxComponentAtomic(ijk); + return *this; + } + __device__ inline BBox& expandAtomic(const BBox& bbox) + { + mCoord[0].minComponentAtomic(bbox[0]); + mCoord[1].maxComponentAtomic(bbox[1]); + return *this; + } + __device__ inline BBox& intersectAtomic(const BBox& bbox) + { + mCoord[0].maxComponentAtomic(bbox[0]); + mCoord[1].minComponentAtomic(bbox[1]); + return *this; + } +#endif +}; // BBox + +// --------------------------> Rgba8 <------------------------------------ + +/// @brief 8-bit red, green, blue, alpha packed into 32 bit unsigned int +class Rgba8 +{ + union + { + uint8_t c[4]; // 4 integer color channels of red, green, blue and alpha components. + uint32_t packed; // 32 bit packed representation + } mData; + +public: + static const int SIZE = 4; + using ValueType = uint8_t; + + /// @brief Default copy constructor + Rgba8(const Rgba8&) = default; + + /// @brief Default move constructor + Rgba8(Rgba8&&) = default; + + /// @brief Default move assignment operator + /// @return non-const reference to this instance + Rgba8& operator=(Rgba8&&) = default; + + /// @brief Default copy assignment operator + /// @return non-const reference to this instance + Rgba8& operator=(const Rgba8&) = default; + + /// @brief Default ctor initializes all channels to zero + __hostdev__ Rgba8() + : mData{{0, 0, 0, 0}} + { + static_assert(sizeof(uint32_t) == sizeof(Rgba8), "Unexpected sizeof"); + } + + /// @brief integer r,g,b,a ctor where alpha channel defaults to opaque + /// @note all values should be in the range 0u to 255u + __hostdev__ Rgba8(uint8_t r, uint8_t g, uint8_t b, uint8_t a = 255u) + : mData{{r, g, b, a}} + { + } + + /// @brief @brief ctor where all channels are initialized to the same value + /// @note value should be in the range 0u to 255u + explicit __hostdev__ Rgba8(uint8_t v) + : mData{{v, v, v, v}} + { + } + + /// @brief floating-point r,g,b,a ctor where alpha channel defaults to opaque + /// @note all values should be in the range 0.0f to 1.0f + __hostdev__ Rgba8(float r, float g, float b, float a = 1.0f) + : mData{{static_cast(0.5f + r * 255.0f), // round floats to nearest integers + static_cast(0.5f + g * 255.0f), // double {{}} is needed due to union + static_cast(0.5f + b * 255.0f), + static_cast(0.5f + a * 255.0f)}} + { + } + + /// @brief Vec3f r,g,b ctor (alpha channel it set to 1) + /// @note all values should be in the range 0.0f to 1.0f + __hostdev__ Rgba8(const Vec3& rgb) + : Rgba8(rgb[0], rgb[1], rgb[2]) + { + } + + /// @brief Vec4f r,g,b,a ctor + /// @note all values should be in the range 0.0f to 1.0f + __hostdev__ Rgba8(const Vec4& rgba) + : Rgba8(rgba[0], rgba[1], rgba[2], rgba[3]) + { + } + + __hostdev__ bool operator< (const Rgba8& rhs) const { return mData.packed < rhs.mData.packed; } + __hostdev__ bool operator==(const Rgba8& rhs) const { return mData.packed == rhs.mData.packed; } + __hostdev__ float lengthSqr() const + { + return 0.0000153787005f * (float(mData.c[0]) * mData.c[0] + + float(mData.c[1]) * mData.c[1] + + float(mData.c[2]) * mData.c[2]); //1/255^2 + } + __hostdev__ float length() const { return sqrtf(this->lengthSqr()); } + /// @brief return n'th color channel as a float in the range 0 to 1 + __hostdev__ float asFloat(int n) const { return 0.003921569f*float(mData.c[n]); }// divide by 255 + __hostdev__ const uint8_t& operator[](int n) const { return mData.c[n]; } + __hostdev__ uint8_t& operator[](int n) { return mData.c[n]; } + __hostdev__ const uint32_t& packed() const { return mData.packed; } + __hostdev__ uint32_t& packed() { return mData.packed; } + __hostdev__ const uint8_t& r() const { return mData.c[0]; } + __hostdev__ const uint8_t& g() const { return mData.c[1]; } + __hostdev__ const uint8_t& b() const { return mData.c[2]; } + __hostdev__ const uint8_t& a() const { return mData.c[3]; } + __hostdev__ uint8_t& r() { return mData.c[0]; } + __hostdev__ uint8_t& g() { return mData.c[1]; } + __hostdev__ uint8_t& b() { return mData.c[2]; } + __hostdev__ uint8_t& a() { return mData.c[3]; } + __hostdev__ operator Vec3() const { + return Vec3(this->asFloat(0), this->asFloat(1), this->asFloat(2)); + } + __hostdev__ operator Vec4() const { + return Vec4(this->asFloat(0), this->asFloat(1), this->asFloat(2), this->asFloat(3)); + } +}; // Rgba8 + +using Vec3d = Vec3; +using Vec3f = Vec3; +using Vec3i = Vec3; +using Vec3u = Vec3; +using Vec3u8 = Vec3; +using Vec3u16 = Vec3; + +using Vec4R = Vec4; +using Vec4d = Vec4; +using Vec4f = Vec4; +using Vec4i = Vec4; + +}// namespace math =============================================================== + +using Rgba8 [[deprecated("Use math::Rgba8 instead.")]] = math::Rgba8; +using math::Coord; + +using Vec3d = math::Vec3; +using Vec3f = math::Vec3; +using Vec3i = math::Vec3; +using Vec3u = math::Vec3; +using Vec3u8 = math::Vec3; +using Vec3u16 = math::Vec3; + +using Vec4R = math::Vec4; +using Vec4d = math::Vec4; +using Vec4f = math::Vec4; +using Vec4i = math::Vec4; + +using CoordBBox = math::BBox; +using Vec3dBBox = math::BBox; +using BBoxR [[deprecated("Use Vec3dBBox instead.")]] = math::BBox; + +} // namespace nanovdb =================================================================== + +#endif // end of NANOVDB_MATH_MATH_H_HAS_BEEN_INCLUDED diff --git a/nanovdb/nanovdb/math/Ray.h b/nanovdb/nanovdb/math/Ray.h new file mode 100644 index 0000000000..9f08288007 --- /dev/null +++ b/nanovdb/nanovdb/math/Ray.h @@ -0,0 +1,557 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: MPL-2.0 + +/// @file Ray.h +/// +/// @author Ken Museth +/// +/// @brief A Ray class. + +#ifndef NANOVDB_MATH_RAY_H_HAS_BEEN_INCLUDED +#define NANOVDB_MATH_RAY_H_HAS_BEEN_INCLUDED + +#include // for Vec3 +namespace nanovdb {// =================================================== + +namespace math {// ====================================================== + +template +class Ray +{ +public: + using RealType = RealT; + using Vec3Type = Vec3; + using Vec3T = Vec3Type; + + struct TimeSpan + { + RealT t0, t1; + /// @brief Default constructor + __hostdev__ TimeSpan() {} + /// @brief Constructor + __hostdev__ TimeSpan(RealT _t0, RealT _t1) + : t0(_t0) + , t1(_t1) + { + } + /// @brief Set both times + __hostdev__ void set(RealT _t0, RealT _t1) + { + t0 = _t0; + t1 = _t1; + } + /// @brief Get both times + __hostdev__ void get(RealT& _t0, RealT& _t1) const + { + _t0 = t0; + _t1 = t1; + } + /// @brief Return @c true if t1 is larger than t0 by at least eps. + __hostdev__ bool valid(RealT eps = Delta::value()) const { return (t1 - t0) > eps; } + /// @brief Return the midpoint of the ray. + __hostdev__ RealT mid() const { return 0.5 * (t0 + t1); } + /// @brief Multiplies both times + __hostdev__ void scale(RealT s) + { + assert(s > 0); + t0 *= s; + t1 *= s; + } + /// @brief Return @c true if time is inclusive + __hostdev__ bool test(RealT t) const { return (t >= t0 && t <= t1); } + }; + + __hostdev__ Ray(const Vec3Type& eye = Vec3Type(0, 0, 0), + const Vec3Type& direction = Vec3Type(1, 0, 0), + RealT t0 = Delta::value(), + RealT t1 = Maximum::value()) + : mEye(eye) + , mDir(direction) + , mInvDir(1 / mDir[0], 1 / mDir[1], 1 / mDir[2]) + , mTimeSpan(t0, t1) + , mSign{mInvDir[0] < 0, mInvDir[1] < 0, mInvDir[2] < 0} + { + } + + __hostdev__ Ray& offsetEye(RealT offset) + { + mEye[0] += offset; + mEye[1] += offset; + mEye[2] += offset; + return *this; + } + + __hostdev__ Ray& setEye(const Vec3Type& eye) + { + mEye = eye; + return *this; + } + + __hostdev__ Ray& setDir(const Vec3Type& dir) + { + mDir = dir; + mInvDir[0] = 1.0 / mDir[0]; + mInvDir[1] = 1.0 / mDir[1]; + mInvDir[2] = 1.0 / mDir[2]; + mSign[0] = mInvDir[0] < 0; + mSign[1] = mInvDir[1] < 0; + mSign[2] = mInvDir[2] < 0; + return *this; + } + + __hostdev__ Ray& setMinTime(RealT t0) + { + mTimeSpan.t0 = t0; + return *this; + } + + __hostdev__ Ray& setMaxTime(RealT t1) + { + mTimeSpan.t1 = t1; + return *this; + } + + __hostdev__ Ray& setTimes( + RealT t0 = Delta::value(), + RealT t1 = Maximum::value()) + { + assert(t0 > 0 && t1 > 0); + mTimeSpan.set(t0, t1); + return *this; + } + + __hostdev__ Ray& scaleTimes(RealT scale) + { + mTimeSpan.scale(scale); + return *this; + } + + __hostdev__ Ray& reset( + const Vec3Type& eye, + const Vec3Type& direction, + RealT t0 = Delta::value(), + RealT t1 = Maximum::value()) + { + this->setEye(eye); + this->setDir(direction); + this->setTimes(t0, t1); + return *this; + } + + __hostdev__ const Vec3T& eye() const { return mEye; } + + __hostdev__ const Vec3T& dir() const { return mDir; } + + __hostdev__ const Vec3T& invDir() const { return mInvDir; } + + __hostdev__ RealT t0() const { return mTimeSpan.t0; } + + __hostdev__ RealT t1() const { return mTimeSpan.t1; } + + __hostdev__ int sign(int i) const { return mSign[i]; } + + /// @brief Return the position along the ray at the specified time. + __hostdev__ Vec3T operator()(RealT time) const + { +#if 1 + return Vec3T(fmaf(time, mDir[0], mEye[0]), + fmaf(time, mDir[1], mEye[1]), + fmaf(time, mDir[2], mEye[2])); +#else + return mEye + mDir * time; +#endif + } + + /// @brief Return the starting point of the ray. + __hostdev__ Vec3T start() const { return (*this)(mTimeSpan.t0); } + + /// @brief Return the endpoint of the ray. + __hostdev__ Vec3T end() const { return (*this)(mTimeSpan.t1); } + + /// @brief Return the midpoint of the ray. + __hostdev__ Vec3T mid() const { return (*this)(mTimeSpan.mid()); } + + /// @brief Return @c true if t1 is larger than t0 by at least eps. + __hostdev__ bool valid(RealT eps = Delta::value()) const { return mTimeSpan.valid(eps); } + + /// @brief Return @c true if @a time is within t0 and t1, both inclusive. + __hostdev__ bool test(RealT time) const { return mTimeSpan.test(time); } + + /// @brief Return a new Ray that is transformed with the specified map. + /// + /// @param map the map from which to construct the new Ray. + /// + /// @warning Assumes a linear map and a normalized direction. + /// + /// @details The requirement that the direction is normalized + /// follows from the transformation of t0 and t1 - and that fact that + /// we want applyMap and applyInverseMap to be inverse operations. + template + __hostdev__ Ray applyMap(const MapType& map) const + { + const Vec3T eye = map.applyMap(mEye); + const Vec3T dir = map.applyJacobian(mDir); + const RealT length = dir.length(), invLength = RealT(1) / length; + RealT t1 = mTimeSpan.t1; + if (mTimeSpan.t1 < Maximum::value()) { + t1 *= length; + } + return Ray(eye, dir * invLength, length * mTimeSpan.t0, t1); + } + template + __hostdev__ Ray applyMapF(const MapType& map) const + { + const Vec3T eye = map.applyMapF(mEye); + const Vec3T dir = map.applyJacobianF(mDir); + const RealT length = dir.length(), invLength = RealT(1) / length; + RealT t1 = mTimeSpan.t1; + if (mTimeSpan.t1 < Maximum::value()) { + t1 *= length; + } + return Ray(eye, dir * invLength, length * mTimeSpan.t0, t1); + } + + /// @brief Return a new Ray that is transformed with the inverse of the specified map. + /// + /// @param map the map from which to construct the new Ray by inverse mapping. + /// + /// @warning Assumes a linear map and a normalized direction. + /// + /// @details The requirement that the direction is normalized + /// follows from the transformation of t0 and t1 - and that fact that + /// we want applyMap and applyInverseMap to be inverse operations. + template + __hostdev__ Ray applyInverseMap(const MapType& map) const + { + const Vec3T eye = map.applyInverseMap(mEye); + const Vec3T dir = map.applyInverseJacobian(mDir); + const RealT length = dir.length(), invLength = RealT(1) / length; + return Ray(eye, dir * invLength, length * mTimeSpan.t0, length * mTimeSpan.t1); + } + template + __hostdev__ Ray applyInverseMapF(const MapType& map) const + { + const Vec3T eye = map.applyInverseMapF(mEye); + const Vec3T dir = map.applyInverseJacobianF(mDir); + const RealT length = dir.length(), invLength = RealT(1) / length; + return Ray(eye, dir * invLength, length * mTimeSpan.t0, length * mTimeSpan.t1); + } + + /// @brief Return a new ray in world space, assuming the existing + /// ray is represented in the index space of the specified grid. + template + __hostdev__ Ray indexToWorldF(const GridType& grid) const + { + const Vec3T eye = grid.indexToWorldF(mEye); + const Vec3T dir = grid.indexToWorldDirF(mDir); + const RealT length = dir.length(), invLength = RealT(1) / length; + RealT t1 = mTimeSpan.t1; + if (mTimeSpan.t1 < Maximum::value()) { + t1 *= length; + } + return Ray(eye, dir * invLength, length * mTimeSpan.t0, t1); + } + + /// @brief Return a new ray in index space, assuming the existing + /// ray is represented in the world space of the specified grid. + template + __hostdev__ Ray worldToIndexF(const GridType& grid) const + { + const Vec3T eye = grid.worldToIndexF(mEye); + const Vec3T dir = grid.worldToIndexDirF(mDir); + const RealT length = dir.length(), invLength = RealT(1) / length; + RealT t1 = mTimeSpan.t1; + if (mTimeSpan.t1 < Maximum::value()) { + t1 *= length; + } + return Ray(eye, dir * invLength, length * mTimeSpan.t0, t1); + } + + /// @brief Return true if this ray intersects the specified sphere. + /// + /// @param center The center of the sphere in the same space as this ray. + /// @param radius The radius of the sphere in the same units as this ray. + /// @param t0 The first intersection point if an intersection exists. + /// @param t1 The second intersection point if an intersection exists. + /// + /// @note If the return value is true, i.e. a hit, and t0 = + /// this->t0() or t1 == this->t1() only one true intersection exist. + __hostdev__ bool intersects(const Vec3T& center, RealT radius, RealT& t0, RealT& t1) const + { + const Vec3T origin = mEye - center; + const RealT A = mDir.lengthSqr(); + const RealT B = 2 * mDir.dot(origin); + const RealT C = origin.lengthSqr() - radius * radius; + const RealT D = B * B - 4 * A * C; + + if (D < 0) { + return false; + } + const RealT Q = RealT(-0.5) * (B < 0 ? (B + Sqrt(D)) : (B - Sqrt(D))); + + t0 = Q / A; + t1 = C / Q; + + if (t0 > t1) { + RealT tmp = t0; + t0 = t1; + t1 = tmp; + } + if (t0 < mTimeSpan.t0) { + t0 = mTimeSpan.t0; + } + if (t1 > mTimeSpan.t1) { + t1 = mTimeSpan.t1; + } + return t0 <= t1; + } + + /// @brief Return true if this ray intersects the specified sphere. + /// + /// @param center The center of the sphere in the same space as this ray. + /// @param radius The radius of the sphere in the same units as this ray. + __hostdev__ bool intersects(const Vec3T& center, RealT radius) const + { + RealT t0, t1; + return this->intersects(center, radius, t0, t1) > 0; + } + + /// @brief Return true if this ray intersects the specified sphere. + /// + /// @note For intersection this ray is clipped to the two intersection points. + /// + /// @param center The center of the sphere in the same space as this ray. + /// @param radius The radius of the sphere in the same units as this ray. + __hostdev__ bool clip(const Vec3T& center, RealT radius) + { + RealT t0, t1; + const bool hit = this->intersects(center, radius, t0, t1); + if (hit) { + mTimeSpan.set(t0, t1); + } + return hit; + } +#if 0 + /// @brief Return true if the Ray intersects the specified + /// axisaligned bounding box. + /// + /// @param bbox Axis-aligned bounding box in the same space as the Ray. + /// @param t0 If an intersection is detected this is assigned + /// the time for the first intersection point. + /// @param t1 If an intersection is detected this is assigned + /// the time for the second intersection point. + template + __hostdev__ bool intersects(const BBoxT& bbox, RealT& t0, RealT& t1) const + { + t0 = (bbox[ mSign[0]][0] - mEye[0]) * mInvDir[0]; + RealT t2 = (bbox[1-mSign[1]][1] - mEye[1]) * mInvDir[1]; + if (t0 > t2) return false; + t1 = (bbox[1-mSign[0]][0] - mEye[0]) * mInvDir[0]; + RealT t3 = (bbox[ mSign[1]][1] - mEye[1]) * mInvDir[1]; + if (t3 > t1) return false; + if (t3 > t0) t0 = t3; + if (t2 < t1) t1 = t2; + t3 = (bbox[ mSign[2]][2] - mEye[2]) * mInvDir[2]; + if (t3 > t1) return false; + t2 = (bbox[1-mSign[2]][2] - mEye[2]) * mInvDir[2]; + if (t0 > t2) return false; + if (t3 > t0) t0 = t3; + if (mTimeSpan.t1 < t0) return false; + if (t2 < t1) t1 = t2; + if (mTimeSpan.t0 > t1) return false; + if (mTimeSpan.t0 > t0) t0 = mTimeSpan.t0; + if (mTimeSpan.t1 < t1) t1 = mTimeSpan.t1; + return true; + /* + mTimeSpan.get(_t0, _t1); + double t0 = _t0, t1 = _t1; + for (int i = 0; i < 3; ++i) { + //if (abs(mDir[i])<1e-3) continue; + double a = (double(bbox.min()[i]) - mEye[i]) * mInvDir[i]; + double b = (double(bbox.max()[i]) - mEye[i]) * mInvDir[i]; + if (a > b) { + double tmp = a; + a = b; + b = tmp; + } + if (a > t0) t0 = a; + if (b < t1) t1 = b; + if (t0 > t1) { + //if (gVerbose) printf("Missed BBOX: (%i,%i,%i) -> (%i,%i,%i) t0=%f t1=%f\n", + // bbox.min()[0], bbox.min()[1], bbox.min()[2], + // bbox.max()[0], bbox.max()[1], bbox.max()[2], t0, t1); + return false; + } + } + _t0 = t0; _t1 = t1; + return true; + */ + } +#else + /// @brief Returns true if this ray intersects an index bounding box. + /// If the return value is true t0 and t1 are set to the intersection + /// times along the ray. + /// + /// @warning Intersection with a CoordBBox internally converts to a floating-point bbox + /// which imples that the max is padded with one voxel, i.e. bbox.max += 1! This + /// avoids gaps between neighboring CoordBBox'es, say from neighboring tree nodes. + __hostdev__ bool intersects(const CoordBBox& bbox, RealT& t0, RealT& t1) const + { + mTimeSpan.get(t0, t1); + for (int i = 0; i < 3; ++i) { + RealT a = RealT(bbox.min()[i]), b = RealT(bbox.max()[i] + 1); + if (a >= b) { // empty bounding box + return false; + } + a = (a - mEye[i]) * mInvDir[i]; + b = (b - mEye[i]) * mInvDir[i]; + if (a > b) { + RealT tmp = a; + a = b; + b = tmp; + } + if (a > t0) { + t0 = a; + } + if (b < t1) { + t1 = b; + } + if (t0 > t1) { + return false; + } + } + return true; + } + /// @brief Returns true if this ray intersects a floating-point bounding box. + /// If the return value is true t0 and t1 are set to the intersection + /// times along the ray. + template + __hostdev__ bool intersects(const BBox& bbox, RealT& t0, RealT& t1) const + { + static_assert(util::is_floating_point::value, "Ray::intersects: Expected a floating point coordinate"); + mTimeSpan.get(t0, t1); + for (int i = 0; i < 3; ++i) { + RealT a = RealT(bbox.min()[i]), b = RealT(bbox.max()[i]); + if (a >= b) { // empty bounding box + return false; + } + a = (a - mEye[i]) * mInvDir[i]; + b = (b - mEye[i]) * mInvDir[i]; + if (a > b) { + RealT tmp = a; + a = b; + b = tmp; + } + if (a > t0) { + t0 = a; + } + if (b < t1) { + t1 = b; + } + if (t0 > t1) { + return false; + } + } + return true; + } +#endif + + /// @brief Return true if this ray intersects the specified bounding box. + /// + /// @param bbox Axis-aligned bounding box in the same space as this ray. + /// + /// @warning If @a bbox is of the type CoordBBox it is converted to a floating-point + /// bounding box, which imples that the max is padded with one voxel, i.e. + /// bbox.max += 1! This avoids gaps between neighboring CoordBBox'es, say + /// from neighboring tree nodes. + template + __hostdev__ bool intersects(const BBoxT& bbox) const + { +#if 1 + RealT t0, t1; + return this->intersects(bbox, t0, t1); +#else + //BBox bbox(Vec3T(_bbox[0][0]-1e-4,_bbox[0][1]-1e-4,_bbox[0][2]-1e-4), + // Vec3T(_bbox[1][0]+1e-4,_bbox[1][1]+1e-4,_bbox[1][2]+1e-4)); + RealT t0 = (bbox[mSign[0]][0] - mEye[0]) * mInvDir[0]; + RealT t2 = (bbox[1 - mSign[1]][1] - mEye[1]) * mInvDir[1]; + if (t0 > t2) return false; + RealT t1 = (bbox[1 - mSign[0]][0] - mEye[0]) * mInvDir[0]; + RealT t3 = (bbox[mSign[1]][1] - mEye[1]) * mInvDir[1]; + if (t3 > t1) return false; + if (t3 > t0) t0 = t3; + if (t2 < t1) t1 = t2; + t3 = (bbox[mSign[2]][2] - mEye[2]) * mInvDir[2]; + if (t3 > t1) return false; + t2 = (bbox[1 - mSign[2]][2] - mEye[2]) * mInvDir[2]; + if (t0 > t2) return false; + //if (t3 > t0) t0 = t3; + //if (mTimeSpan.t1 < t0) return false; + //if (t2 < t1) t1 = t2; + //return mTimeSpan.t0 < t1; + return true; +#endif + } + + /// @brief Return true if this ray intersects the specified bounding box. + /// + /// @param bbox Axis-aligned bounding box in the same space as this ray. + /// + /// @warning If @a bbox is of the type CoordBBox it is converted to a floating-point + /// bounding box, which imples that the max is padded with one voxel, i.e. + /// bbox.max += 1! This avoids gaps between neighboring CoordBBox'es, say + /// from neighboring tree nodes. + /// + /// @note For intersection this ray is clipped to the two intersection points. + template + __hostdev__ bool clip(const BBoxT& bbox) + { + RealT t0, t1; + const bool hit = this->intersects(bbox, t0, t1); + if (hit) { + mTimeSpan.set(t0, t1); + } + return hit; + } + + /// @brief Return true if the Ray intersects the plane specified + /// by a normal and distance from the origin. + /// + /// @param normal Normal of the plane. + /// @param distance Distance of the plane to the origin. + /// @param t Time of intersection, if one exists. + __hostdev__ bool intersects(const Vec3T& normal, RealT distance, RealT& t) const + { + const RealT cosAngle = mDir.dot(normal); + if (isApproxZero(cosAngle)) { + return false; // ray is parallel to plane + } + t = (distance - mEye.dot(normal)) / cosAngle; + return this->test(t); + } + + /// @brief Return true if the Ray intersects the plane specified + /// by a normal and point. + /// + /// @param normal Normal of the plane. + /// @param point Point in the plane. + /// @param t Time of intersection, if one exists. + __hostdev__ bool intersects(const Vec3T& normal, const Vec3T& point, RealT& t) const + { + return this->intersects(normal, point.dot(normal), t); + } + +private: + Vec3T mEye, mDir, mInvDir; + TimeSpan mTimeSpan; + int mSign[3]; +}; // end of Ray class + +} // namespace math ========================================================= + +template +using Ray [[deprecated("Use nanovdb::math::Ray instead")]] = math::Ray; + +} // namespace nanovdb ======================================================= + +#endif // NANOVDB_MATH_RAY_HAS_BEEN_INCLUDED diff --git a/nanovdb/nanovdb/math/SampleFromVoxels.h b/nanovdb/nanovdb/math/SampleFromVoxels.h new file mode 100644 index 0000000000..e4f1e26018 --- /dev/null +++ b/nanovdb/nanovdb/math/SampleFromVoxels.h @@ -0,0 +1,996 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: MPL-2.0 + +////////////////////////////////////////////////////////////////////////// +/// +/// @file SampleFromVoxels.h +/// +/// @brief NearestNeighborSampler, TrilinearSampler, TriquadraticSampler and TricubicSampler +/// +/// @note These interpolators employ internal caching for better performance when used repeatedly +/// in the same voxel location, so try to reuse an instance of these classes more than once. +/// +/// @warning While all the interpolators defined below work with both scalars and vectors +/// values (e.g. float and Vec3) TrilinarSampler::zeroCrossing and +/// Trilinear::gradient will only compile with floating point value types. +/// +/// @author Ken Museth +/// +/////////////////////////////////////////////////////////////////////////// + +#ifndef NANOVDB_SAMPLE_FROM_VOXELS_H_HAS_BEEN_INCLUDED +#define NANOVDB_SAMPLE_FROM_VOXELS_H_HAS_BEEN_INCLUDED + +// Only define __hostdev__ when compiling as NVIDIA CUDA +#if defined(__CUDACC__) || defined(__HIP__) +#define __hostdev__ __host__ __device__ +#else +#include // for floor +#define __hostdev__ +#endif + +#include + +namespace nanovdb { + +namespace math { + +// Forward declaration of sampler with specific polynomial orders +template +class SampleFromVoxels; + +/// @brief Factory free-function for a sampler of specific polynomial orders +/// +/// @details This allows for the compact syntax: +/// @code +/// auto acc = grid.getAccessor(); +/// auto smp = nanovdb::math::createSampler<1>( acc ); +/// @endcode +template +__hostdev__ SampleFromVoxels createSampler(const TreeOrAccT& acc) +{ + return SampleFromVoxels(acc); +} + +/// @brief Utility function that returns the Coord of the round-down of @a xyz +/// and redefined @xyz as the fractional part, ie xyz-in = return-value + xyz-out +template class Vec3T> +__hostdev__ inline CoordT Floor(Vec3T& xyz); + +/// @brief Template specialization of Floor for Vec3 +template class Vec3T> +__hostdev__ inline CoordT Floor(Vec3T& xyz) +{ + const float ijk[3] = {floorf(xyz[0]), floorf(xyz[1]), floorf(xyz[2])}; + xyz[0] -= ijk[0]; + xyz[1] -= ijk[1]; + xyz[2] -= ijk[2]; + return CoordT(int32_t(ijk[0]), int32_t(ijk[1]), int32_t(ijk[2])); +} + +/// @brief Template specialization of Floor for Vec3 +template class Vec3T> +__hostdev__ inline CoordT Floor(Vec3T& xyz) +{ + const double ijk[3] = {floor(xyz[0]), floor(xyz[1]), floor(xyz[2])}; + xyz[0] -= ijk[0]; + xyz[1] -= ijk[1]; + xyz[2] -= ijk[2]; + return CoordT(int32_t(ijk[0]), int32_t(ijk[1]), int32_t(ijk[2])); +} + +// ------------------------------> NearestNeighborSampler <-------------------------------------- + +/// @brief Nearest neighbor, i.e. zero order, interpolator with caching +template +class SampleFromVoxels +{ +public: + using ValueT = typename TreeOrAccT::ValueType; + using CoordT = typename TreeOrAccT::CoordType; + + static const int ORDER = 0; + /// @brief Construction from a Tree or ReadAccessor + __hostdev__ SampleFromVoxels(const TreeOrAccT& acc) + : mAcc(acc) + , mPos(CoordT::max()) + { + } + + __hostdev__ const TreeOrAccT& accessor() const { return mAcc; } + + /// @note xyz is in index space space + template + inline __hostdev__ ValueT operator()(const Vec3T& xyz) const; + + inline __hostdev__ ValueT operator()(const CoordT& ijk) const; + +private: + const TreeOrAccT& mAcc; + mutable CoordT mPos; + mutable ValueT mVal; // private cache +}; // SampleFromVoxels + +/// @brief Nearest neighbor, i.e. zero order, interpolator without caching +template +class SampleFromVoxels +{ +public: + using ValueT = typename TreeOrAccT::ValueType; + using CoordT = typename TreeOrAccT::CoordType; + static const int ORDER = 0; + + /// @brief Construction from a Tree or ReadAccessor + __hostdev__ SampleFromVoxels(const TreeOrAccT& acc) + : mAcc(acc) + { + } + + __hostdev__ const TreeOrAccT& accessor() const { return mAcc; } + + /// @note xyz is in index space space + template + inline __hostdev__ ValueT operator()(const Vec3T& xyz) const; + + inline __hostdev__ ValueT operator()(const CoordT& ijk) const { return mAcc.getValue(ijk);} + +private: + const TreeOrAccT& mAcc; +}; // SampleFromVoxels + +template +template +__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels::operator()(const Vec3T& xyz) const +{ + const CoordT ijk = math::Round(xyz); + if (ijk != mPos) { + mPos = ijk; + mVal = mAcc.getValue(mPos); + } + return mVal; +} + +template +__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels::operator()(const CoordT& ijk) const +{ + if (ijk != mPos) { + mPos = ijk; + mVal = mAcc.getValue(mPos); + } + return mVal; +} + +template +template +__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels::operator()(const Vec3T& xyz) const +{ + return mAcc.getValue(math::Round(xyz)); +} + +// ------------------------------> TrilinearSampler <-------------------------------------- + +/// @brief Tri-linear sampler, i.e. first order, interpolator +template +class TrilinearSampler +{ +protected: + const TreeOrAccT& mAcc; + +public: + using ValueT = typename TreeOrAccT::ValueType; + using CoordT = typename TreeOrAccT::CoordType; + static const int ORDER = 1; + + /// @brief Protected constructor from a Tree or ReadAccessor + __hostdev__ TrilinearSampler(const TreeOrAccT& acc) : mAcc(acc) {} + + __hostdev__ const TreeOrAccT& accessor() const { return mAcc; } + + /// @brief Extract the stencil of 8 values + inline __hostdev__ void stencil(CoordT& ijk, ValueT (&v)[2][2][2]) const; + + template class Vec3T> + static inline __hostdev__ ValueT sample(const Vec3T &uvw, const ValueT (&v)[2][2][2]); + + template class Vec3T> + static inline __hostdev__ Vec3T gradient(const Vec3T &uvw, const ValueT (&v)[2][2][2]); + + static inline __hostdev__ bool zeroCrossing(const ValueT (&v)[2][2][2]); +}; // TrilinearSamplerBase + +template +__hostdev__ void TrilinearSampler::stencil(CoordT& ijk, ValueT (&v)[2][2][2]) const +{ + v[0][0][0] = mAcc.getValue(ijk); // i, j, k + + ijk[2] += 1; + v[0][0][1] = mAcc.getValue(ijk); // i, j, k + 1 + + ijk[1] += 1; + v[0][1][1] = mAcc.getValue(ijk); // i, j+1, k + 1 + + ijk[2] -= 1; + v[0][1][0] = mAcc.getValue(ijk); // i, j+1, k + + ijk[0] += 1; + ijk[1] -= 1; + v[1][0][0] = mAcc.getValue(ijk); // i+1, j, k + + ijk[2] += 1; + v[1][0][1] = mAcc.getValue(ijk); // i+1, j, k + 1 + + ijk[1] += 1; + v[1][1][1] = mAcc.getValue(ijk); // i+1, j+1, k + 1 + + ijk[2] -= 1; + v[1][1][0] = mAcc.getValue(ijk); // i+1, j+1, k +} + +template +template class Vec3T> +__hostdev__ typename TreeOrAccT::ValueType TrilinearSampler::sample(const Vec3T &uvw, const ValueT (&v)[2][2][2]) +{ +#if 0 + auto lerp = [](ValueT a, ValueT b, ValueT w){ return fma(w, b-a, a); };// = w*(b-a) + a + //auto lerp = [](ValueT a, ValueT b, ValueT w){ return fma(w, b, fma(-w, a, a));};// = (1-w)*a + w*b +#else + auto lerp = [](ValueT a, ValueT b, RealT w) { return a + ValueT(w) * (b - a); }; +#endif + return lerp(lerp(lerp(v[0][0][0], v[0][0][1], uvw[2]), lerp(v[0][1][0], v[0][1][1], uvw[2]), uvw[1]), + lerp(lerp(v[1][0][0], v[1][0][1], uvw[2]), lerp(v[1][1][0], v[1][1][1], uvw[2]), uvw[1]), + uvw[0]); +} + +template +template class Vec3T> +__hostdev__ Vec3T TrilinearSampler::gradient(const Vec3T &uvw, const ValueT (&v)[2][2][2]) +{ + static_assert(util::is_floating_point::value, "TrilinearSampler::gradient requires a floating-point type"); +#if 0 + auto lerp = [](ValueT a, ValueT b, ValueT w){ return fma(w, b-a, a); };// = w*(b-a) + a + //auto lerp = [](ValueT a, ValueT b, ValueT w){ return fma(w, b, fma(-w, a, a));};// = (1-w)*a + w*b +#else + auto lerp = [](ValueT a, ValueT b, RealT w) { return a + ValueT(w) * (b - a); }; +#endif + + ValueT D[4] = {v[0][0][1] - v[0][0][0], v[0][1][1] - v[0][1][0], v[1][0][1] - v[1][0][0], v[1][1][1] - v[1][1][0]}; + + // Z component + Vec3T grad(0, 0, lerp(lerp(D[0], D[1], uvw[1]), lerp(D[2], D[3], uvw[1]), uvw[0])); + + const ValueT w = ValueT(uvw[2]); + D[0] = v[0][0][0] + D[0] * w; + D[1] = v[0][1][0] + D[1] * w; + D[2] = v[1][0][0] + D[2] * w; + D[3] = v[1][1][0] + D[3] * w; + + // X component + grad[0] = lerp(D[2], D[3], uvw[1]) - lerp(D[0], D[1], uvw[1]); + + // Y component + grad[1] = lerp(D[1] - D[0], D[3] - D[2], uvw[0]); + + return grad; +} + +template +__hostdev__ bool TrilinearSampler::zeroCrossing(const ValueT (&v)[2][2][2]) +{ + static_assert(util::is_floating_point::value, "TrilinearSampler::zeroCrossing requires a floating-point type"); + const bool less = v[0][0][0] < ValueT(0); + return (less ^ (v[0][0][1] < ValueT(0))) || + (less ^ (v[0][1][1] < ValueT(0))) || + (less ^ (v[0][1][0] < ValueT(0))) || + (less ^ (v[1][0][0] < ValueT(0))) || + (less ^ (v[1][0][1] < ValueT(0))) || + (less ^ (v[1][1][1] < ValueT(0))) || + (less ^ (v[1][1][0] < ValueT(0))); +} + +/// @brief Template specialization that does not use caching of stencil points +template +class SampleFromVoxels : public TrilinearSampler +{ + using BaseT = TrilinearSampler; + using ValueT = typename TreeOrAccT::ValueType; + using CoordT = typename TreeOrAccT::CoordType; + +public: + + /// @brief Construction from a Tree or ReadAccessor + __hostdev__ SampleFromVoxels(const TreeOrAccT& acc) : BaseT(acc) {} + + /// @note xyz is in index space space + template class Vec3T> + inline __hostdev__ ValueT operator()(Vec3T xyz) const; + + /// @note ijk is in index space space + __hostdev__ ValueT operator()(const CoordT &ijk) const {return BaseT::mAcc.getValue(ijk);} + + /// @brief Return the gradient in index space. + /// + /// @warning Will only compile with floating point value types + template class Vec3T> + inline __hostdev__ Vec3T gradient(Vec3T xyz) const; + + /// @brief Return true if the tr-linear stencil has a zero crossing at the specified index position. + /// + /// @warning Will only compile with floating point value types + template class Vec3T> + inline __hostdev__ bool zeroCrossing(Vec3T xyz) const; + +}; // SampleFromVoxels + +/// @brief Template specialization with caching of stencil values +template +class SampleFromVoxels : public TrilinearSampler +{ + using BaseT = TrilinearSampler; + using ValueT = typename TreeOrAccT::ValueType; + using CoordT = typename TreeOrAccT::CoordType; + + mutable CoordT mPos; + mutable ValueT mVal[2][2][2]; + + template class Vec3T> + __hostdev__ void cache(Vec3T& xyz) const; +public: + + /// @brief Construction from a Tree or ReadAccessor + __hostdev__ SampleFromVoxels(const TreeOrAccT& acc) : BaseT(acc), mPos(CoordT::max()){} + + /// @note xyz is in index space space + template class Vec3T> + inline __hostdev__ ValueT operator()(Vec3T xyz) const; + + // @note ijk is in index space space + __hostdev__ ValueT operator()(const CoordT &ijk) const; + + /// @brief Return the gradient in index space. + /// + /// @warning Will only compile with floating point value types + template class Vec3T> + inline __hostdev__ Vec3T gradient(Vec3T xyz) const; + + /// @brief Return true if the tr-linear stencil has a zero crossing at the specified index position. + /// + /// @warning Will only compile with floating point value types + template class Vec3T> + inline __hostdev__ bool zeroCrossing(Vec3T xyz) const; + + /// @brief Return true if the cached tri-linear stencil has a zero crossing. + /// + /// @warning Will only compile with floating point value types + __hostdev__ bool zeroCrossing() const { return BaseT::zeroCrossing(mVal); } + +}; // SampleFromVoxels + +template +template class Vec3T> +__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels::operator()(Vec3T xyz) const +{ + this->cache(xyz); + return BaseT::sample(xyz, mVal); +} + +template +__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels::operator()(const CoordT &ijk) const +{ + return ijk == mPos ? mVal[0][0][0] : BaseT::mAcc.getValue(ijk); +} + +template +template class Vec3T> +__hostdev__ Vec3T SampleFromVoxels::gradient(Vec3T xyz) const +{ + this->cache(xyz); + return BaseT::gradient(xyz, mVal); +} + +template +template class Vec3T> +__hostdev__ bool SampleFromVoxels::zeroCrossing(Vec3T xyz) const +{ + this->cache(xyz); + return BaseT::zeroCrossing(mVal); +} + +template +template class Vec3T> +__hostdev__ void SampleFromVoxels::cache(Vec3T& xyz) const +{ + CoordT ijk = Floor(xyz); + if (ijk != mPos) { + mPos = ijk; + BaseT::stencil(ijk, mVal); + } +} + +#if 0 + +template +template class Vec3T> +__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels::operator()(Vec3T xyz) const +{ + ValueT val[2][2][2]; + CoordT ijk = Floor(xyz); + BaseT::stencil(ijk, val); + return BaseT::sample(xyz, val); +} + +#else + +template +template class Vec3T> +__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels::operator()(Vec3T xyz) const +{ + auto lerp = [](ValueT a, ValueT b, RealT w) { return a + ValueT(w) * (b - a); }; + + CoordT coord = Floor(xyz); + + ValueT vx, vx1, vy, vy1, vz, vz1; + + vz = BaseT::mAcc.getValue(coord); + coord[2] += 1; + vz1 = BaseT::mAcc.getValue(coord); + vy = lerp(vz, vz1, xyz[2]); + + coord[1] += 1; + + vz1 = BaseT::mAcc.getValue(coord); + coord[2] -= 1; + vz = BaseT::mAcc.getValue(coord); + vy1 = lerp(vz, vz1, xyz[2]); + + vx = lerp(vy, vy1, xyz[1]); + + coord[0] += 1; + + vz = BaseT::mAcc.getValue(coord); + coord[2] += 1; + vz1 = BaseT::mAcc.getValue(coord); + vy1 = lerp(vz, vz1, xyz[2]); + + coord[1] -= 1; + + vz1 = BaseT::mAcc.getValue(coord); + coord[2] -= 1; + vz = BaseT::mAcc.getValue(coord); + vy = lerp(vz, vz1, xyz[2]); + + vx1 = lerp(vy, vy1, xyz[1]); + + return lerp(vx, vx1, xyz[0]); +} +#endif + + +template +template class Vec3T> +__hostdev__ inline Vec3T SampleFromVoxels::gradient(Vec3T xyz) const +{ + ValueT val[2][2][2]; + CoordT ijk = Floor(xyz); + BaseT::stencil(ijk, val); + return BaseT::gradient(xyz, val); +} + +template +template class Vec3T> +__hostdev__ bool SampleFromVoxels::zeroCrossing(Vec3T xyz) const +{ + ValueT val[2][2][2]; + CoordT ijk = Floor(xyz); + BaseT::stencil(ijk, val); + return BaseT::zeroCrossing(val); +} + +// ------------------------------> TriquadraticSampler <-------------------------------------- + +/// @brief Tri-quadratic sampler, i.e. second order, interpolator +template +class TriquadraticSampler +{ +protected: + const TreeOrAccT& mAcc; + +public: + using ValueT = typename TreeOrAccT::ValueType; + using CoordT = typename TreeOrAccT::CoordType; + static const int ORDER = 1; + + /// @brief Protected constructor from a Tree or ReadAccessor + __hostdev__ TriquadraticSampler(const TreeOrAccT& acc) : mAcc(acc) {} + + __hostdev__ const TreeOrAccT& accessor() const { return mAcc; } + + /// @brief Extract the stencil of 27 values + inline __hostdev__ void stencil(const CoordT &ijk, ValueT (&v)[3][3][3]) const; + + template class Vec3T> + static inline __hostdev__ ValueT sample(const Vec3T &uvw, const ValueT (&v)[3][3][3]); + + static inline __hostdev__ bool zeroCrossing(const ValueT (&v)[3][3][3]); +}; // TriquadraticSamplerBase + +template +__hostdev__ void TriquadraticSampler::stencil(const CoordT &ijk, ValueT (&v)[3][3][3]) const +{ + CoordT p(ijk[0] - 1, 0, 0); + for (int dx = 0; dx < 3; ++dx, ++p[0]) { + p[1] = ijk[1] - 1; + for (int dy = 0; dy < 3; ++dy, ++p[1]) { + p[2] = ijk[2] - 1; + for (int dz = 0; dz < 3; ++dz, ++p[2]) { + v[dx][dy][dz] = mAcc.getValue(p);// extract the stencil of 27 values + } + } + } +} + +template +template class Vec3T> +__hostdev__ typename TreeOrAccT::ValueType TriquadraticSampler::sample(const Vec3T &uvw, const ValueT (&v)[3][3][3]) +{ + auto kernel = [](const ValueT* value, double weight)->ValueT { + return weight * (weight * (0.5f * (value[0] + value[2]) - value[1]) + + 0.5f * (value[2] - value[0])) + value[1]; + }; + + ValueT vx[3]; + for (int dx = 0; dx < 3; ++dx) { + ValueT vy[3]; + for (int dy = 0; dy < 3; ++dy) { + vy[dy] = kernel(&v[dx][dy][0], uvw[2]); + }//loop over y + vx[dx] = kernel(vy, uvw[1]); + }//loop over x + return kernel(vx, uvw[0]); +} + +template +__hostdev__ bool TriquadraticSampler::zeroCrossing(const ValueT (&v)[3][3][3]) +{ + static_assert(util::is_floating_point::value, "TrilinearSampler::zeroCrossing requires a floating-point type"); + const bool less = v[0][0][0] < ValueT(0); + for (int dx = 0; dx < 3; ++dx) { + for (int dy = 0; dy < 3; ++dy) { + for (int dz = 0; dz < 3; ++dz) { + if (less ^ (v[dx][dy][dz] < ValueT(0))) return true; + } + } + } + return false; +} + +/// @brief Template specialization that does not use caching of stencil points +template +class SampleFromVoxels : public TriquadraticSampler +{ + using BaseT = TriquadraticSampler; + using ValueT = typename TreeOrAccT::ValueType; + using CoordT = typename TreeOrAccT::CoordType; +public: + + /// @brief Construction from a Tree or ReadAccessor + __hostdev__ SampleFromVoxels(const TreeOrAccT& acc) : BaseT(acc) {} + + /// @note xyz is in index space space + template class Vec3T> + inline __hostdev__ ValueT operator()(Vec3T xyz) const; + + __hostdev__ ValueT operator()(const CoordT &ijk) const {return BaseT::mAcc.getValue(ijk);} + + /// @brief Return true if the tr-linear stencil has a zero crossing at the specified index position. + /// + /// @warning Will only compile with floating point value types + template class Vec3T> + inline __hostdev__ bool zeroCrossing(Vec3T xyz) const; + +}; // SampleFromVoxels + +/// @brief Template specialization with caching of stencil values +template +class SampleFromVoxels : public TriquadraticSampler +{ + using BaseT = TriquadraticSampler; + using ValueT = typename TreeOrAccT::ValueType; + using CoordT = typename TreeOrAccT::CoordType; + + mutable CoordT mPos; + mutable ValueT mVal[3][3][3]; + + template class Vec3T> + __hostdev__ void cache(Vec3T& xyz) const; +public: + + /// @brief Construction from a Tree or ReadAccessor + __hostdev__ SampleFromVoxels(const TreeOrAccT& acc) : BaseT(acc), mPos(CoordT::max()){} + + /// @note xyz is in index space space + template class Vec3T> + inline __hostdev__ ValueT operator()(Vec3T xyz) const; + + inline __hostdev__ ValueT operator()(const CoordT &ijk) const; + + /// @brief Return true if the tr-linear stencil has a zero crossing at the specified index position. + /// + /// @warning Will only compile with floating point value types + template class Vec3T> + inline __hostdev__ bool zeroCrossing(Vec3T xyz) const; + + /// @brief Return true if the cached tri-linear stencil has a zero crossing. + /// + /// @warning Will only compile with floating point value types + __hostdev__ bool zeroCrossing() const { return BaseT::zeroCrossing(mVal); } + +}; // SampleFromVoxels + +template +template class Vec3T> +__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels::operator()(Vec3T xyz) const +{ + this->cache(xyz); + return BaseT::sample(xyz, mVal); +} + +template +__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels::operator()(const CoordT &ijk) const +{ + return ijk == mPos ? mVal[1][1][1] : BaseT::mAcc.getValue(ijk); +} + +template +template class Vec3T> +__hostdev__ bool SampleFromVoxels::zeroCrossing(Vec3T xyz) const +{ + this->cache(xyz); + return BaseT::zeroCrossing(mVal); +} + +template +template class Vec3T> +__hostdev__ void SampleFromVoxels::cache(Vec3T& xyz) const +{ + CoordT ijk = Floor(xyz); + if (ijk != mPos) { + mPos = ijk; + BaseT::stencil(ijk, mVal); + } +} + +template +template class Vec3T> +__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels::operator()(Vec3T xyz) const +{ + ValueT val[3][3][3]; + CoordT ijk = Floor(xyz); + BaseT::stencil(ijk, val); + return BaseT::sample(xyz, val); +} + +template +template class Vec3T> +__hostdev__ bool SampleFromVoxels::zeroCrossing(Vec3T xyz) const +{ + ValueT val[3][3][3]; + CoordT ijk = Floor(xyz); + BaseT::stencil(ijk, val); + return BaseT::zeroCrossing(val); +} + +// ------------------------------> TricubicSampler <-------------------------------------- + +/// @brief Tri-cubic sampler, i.e. third order, interpolator. +/// +/// @details See the following paper for implementation details: +/// Lekien, F. and Marsden, J.: Tricubic interpolation in three dimensions. +/// In: International Journal for Numerical Methods +/// in Engineering (2005), No. 63, p. 455-471 + +template +class TricubicSampler +{ +protected: + using ValueT = typename TreeOrAccT::ValueType; + using CoordT = typename TreeOrAccT::CoordType; + + const TreeOrAccT& mAcc; + +public: + /// @brief Construction from a Tree or ReadAccessor + __hostdev__ TricubicSampler(const TreeOrAccT& acc) + : mAcc(acc) + { + } + + __hostdev__ const TreeOrAccT& accessor() const { return mAcc; } + + /// @brief Extract the stencil of 8 values + inline __hostdev__ void stencil(const CoordT& ijk, ValueT (&c)[64]) const; + + template class Vec3T> + static inline __hostdev__ ValueT sample(const Vec3T &uvw, const ValueT (&c)[64]); +}; // TricubicSampler + +template +__hostdev__ void TricubicSampler::stencil(const CoordT& ijk, ValueT (&C)[64]) const +{ + auto fetch = [&](int i, int j, int k) -> ValueT& { return C[((i + 1) << 4) + ((j + 1) << 2) + k + 1]; }; + + // fetch 64 point stencil values + for (int i = -1; i < 3; ++i) { + for (int j = -1; j < 3; ++j) { + fetch(i, j, -1) = mAcc.getValue(ijk + CoordT(i, j, -1)); + fetch(i, j, 0) = mAcc.getValue(ijk + CoordT(i, j, 0)); + fetch(i, j, 1) = mAcc.getValue(ijk + CoordT(i, j, 1)); + fetch(i, j, 2) = mAcc.getValue(ijk + CoordT(i, j, 2)); + } + } + const ValueT half(0.5), quarter(0.25), eighth(0.125); + const ValueT X[64] = {// values of f(x,y,z) at the 8 corners (each from 1 stencil value). + fetch(0, 0, 0), + fetch(1, 0, 0), + fetch(0, 1, 0), + fetch(1, 1, 0), + fetch(0, 0, 1), + fetch(1, 0, 1), + fetch(0, 1, 1), + fetch(1, 1, 1), + // values of df/dx at the 8 corners (each from 2 stencil values). + half * (fetch(1, 0, 0) - fetch(-1, 0, 0)), + half * (fetch(2, 0, 0) - fetch(0, 0, 0)), + half * (fetch(1, 1, 0) - fetch(-1, 1, 0)), + half * (fetch(2, 1, 0) - fetch(0, 1, 0)), + half * (fetch(1, 0, 1) - fetch(-1, 0, 1)), + half * (fetch(2, 0, 1) - fetch(0, 0, 1)), + half * (fetch(1, 1, 1) - fetch(-1, 1, 1)), + half * (fetch(2, 1, 1) - fetch(0, 1, 1)), + // values of df/dy at the 8 corners (each from 2 stencil values). + half * (fetch(0, 1, 0) - fetch(0, -1, 0)), + half * (fetch(1, 1, 0) - fetch(1, -1, 0)), + half * (fetch(0, 2, 0) - fetch(0, 0, 0)), + half * (fetch(1, 2, 0) - fetch(1, 0, 0)), + half * (fetch(0, 1, 1) - fetch(0, -1, 1)), + half * (fetch(1, 1, 1) - fetch(1, -1, 1)), + half * (fetch(0, 2, 1) - fetch(0, 0, 1)), + half * (fetch(1, 2, 1) - fetch(1, 0, 1)), + // values of df/dz at the 8 corners (each from 2 stencil values). + half * (fetch(0, 0, 1) - fetch(0, 0, -1)), + half * (fetch(1, 0, 1) - fetch(1, 0, -1)), + half * (fetch(0, 1, 1) - fetch(0, 1, -1)), + half * (fetch(1, 1, 1) - fetch(1, 1, -1)), + half * (fetch(0, 0, 2) - fetch(0, 0, 0)), + half * (fetch(1, 0, 2) - fetch(1, 0, 0)), + half * (fetch(0, 1, 2) - fetch(0, 1, 0)), + half * (fetch(1, 1, 2) - fetch(1, 1, 0)), + // values of d2f/dxdy at the 8 corners (each from 4 stencil values). + quarter * (fetch(1, 1, 0) - fetch(-1, 1, 0) - fetch(1, -1, 0) + fetch(-1, -1, 0)), + quarter * (fetch(2, 1, 0) - fetch(0, 1, 0) - fetch(2, -1, 0) + fetch(0, -1, 0)), + quarter * (fetch(1, 2, 0) - fetch(-1, 2, 0) - fetch(1, 0, 0) + fetch(-1, 0, 0)), + quarter * (fetch(2, 2, 0) - fetch(0, 2, 0) - fetch(2, 0, 0) + fetch(0, 0, 0)), + quarter * (fetch(1, 1, 1) - fetch(-1, 1, 1) - fetch(1, -1, 1) + fetch(-1, -1, 1)), + quarter * (fetch(2, 1, 1) - fetch(0, 1, 1) - fetch(2, -1, 1) + fetch(0, -1, 1)), + quarter * (fetch(1, 2, 1) - fetch(-1, 2, 1) - fetch(1, 0, 1) + fetch(-1, 0, 1)), + quarter * (fetch(2, 2, 1) - fetch(0, 2, 1) - fetch(2, 0, 1) + fetch(0, 0, 1)), + // values of d2f/dxdz at the 8 corners (each from 4 stencil values). + quarter * (fetch(1, 0, 1) - fetch(-1, 0, 1) - fetch(1, 0, -1) + fetch(-1, 0, -1)), + quarter * (fetch(2, 0, 1) - fetch(0, 0, 1) - fetch(2, 0, -1) + fetch(0, 0, -1)), + quarter * (fetch(1, 1, 1) - fetch(-1, 1, 1) - fetch(1, 1, -1) + fetch(-1, 1, -1)), + quarter * (fetch(2, 1, 1) - fetch(0, 1, 1) - fetch(2, 1, -1) + fetch(0, 1, -1)), + quarter * (fetch(1, 0, 2) - fetch(-1, 0, 2) - fetch(1, 0, 0) + fetch(-1, 0, 0)), + quarter * (fetch(2, 0, 2) - fetch(0, 0, 2) - fetch(2, 0, 0) + fetch(0, 0, 0)), + quarter * (fetch(1, 1, 2) - fetch(-1, 1, 2) - fetch(1, 1, 0) + fetch(-1, 1, 0)), + quarter * (fetch(2, 1, 2) - fetch(0, 1, 2) - fetch(2, 1, 0) + fetch(0, 1, 0)), + // values of d2f/dydz at the 8 corners (each from 4 stencil values). + quarter * (fetch(0, 1, 1) - fetch(0, -1, 1) - fetch(0, 1, -1) + fetch(0, -1, -1)), + quarter * (fetch(1, 1, 1) - fetch(1, -1, 1) - fetch(1, 1, -1) + fetch(1, -1, -1)), + quarter * (fetch(0, 2, 1) - fetch(0, 0, 1) - fetch(0, 2, -1) + fetch(0, 0, -1)), + quarter * (fetch(1, 2, 1) - fetch(1, 0, 1) - fetch(1, 2, -1) + fetch(1, 0, -1)), + quarter * (fetch(0, 1, 2) - fetch(0, -1, 2) - fetch(0, 1, 0) + fetch(0, -1, 0)), + quarter * (fetch(1, 1, 2) - fetch(1, -1, 2) - fetch(1, 1, 0) + fetch(1, -1, 0)), + quarter * (fetch(0, 2, 2) - fetch(0, 0, 2) - fetch(0, 2, 0) + fetch(0, 0, 0)), + quarter * (fetch(1, 2, 2) - fetch(1, 0, 2) - fetch(1, 2, 0) + fetch(1, 0, 0)), + // values of d3f/dxdydz at the 8 corners (each from 8 stencil values). + eighth * (fetch(1, 1, 1) - fetch(-1, 1, 1) - fetch(1, -1, 1) + fetch(-1, -1, 1) - fetch(1, 1, -1) + fetch(-1, 1, -1) + fetch(1, -1, -1) - fetch(-1, -1, -1)), + eighth * (fetch(2, 1, 1) - fetch(0, 1, 1) - fetch(2, -1, 1) + fetch(0, -1, 1) - fetch(2, 1, -1) + fetch(0, 1, -1) + fetch(2, -1, -1) - fetch(0, -1, -1)), + eighth * (fetch(1, 2, 1) - fetch(-1, 2, 1) - fetch(1, 0, 1) + fetch(-1, 0, 1) - fetch(1, 2, -1) + fetch(-1, 2, -1) + fetch(1, 0, -1) - fetch(-1, 0, -1)), + eighth * (fetch(2, 2, 1) - fetch(0, 2, 1) - fetch(2, 0, 1) + fetch(0, 0, 1) - fetch(2, 2, -1) + fetch(0, 2, -1) + fetch(2, 0, -1) - fetch(0, 0, -1)), + eighth * (fetch(1, 1, 2) - fetch(-1, 1, 2) - fetch(1, -1, 2) + fetch(-1, -1, 2) - fetch(1, 1, 0) + fetch(-1, 1, 0) + fetch(1, -1, 0) - fetch(-1, -1, 0)), + eighth * (fetch(2, 1, 2) - fetch(0, 1, 2) - fetch(2, -1, 2) + fetch(0, -1, 2) - fetch(2, 1, 0) + fetch(0, 1, 0) + fetch(2, -1, 0) - fetch(0, -1, 0)), + eighth * (fetch(1, 2, 2) - fetch(-1, 2, 2) - fetch(1, 0, 2) + fetch(-1, 0, 2) - fetch(1, 2, 0) + fetch(-1, 2, 0) + fetch(1, 0, 0) - fetch(-1, 0, 0)), + eighth * (fetch(2, 2, 2) - fetch(0, 2, 2) - fetch(2, 0, 2) + fetch(0, 0, 2) - fetch(2, 2, 0) + fetch(0, 2, 0) + fetch(2, 0, 0) - fetch(0, 0, 0))}; + + // 4Kb of static table (int8_t has a range of -127 -> 127 which suffices) + static const int8_t A[64][64] = { + {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {-3, 3, 0, 0, 0, 0, 0, 0, -2, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {2, -2, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {-3, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, -3, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {9, -9, -9, 9, 0, 0, 0, 0, 6, 3, -6, -3, 0, 0, 0, 0, 6, -6, 3, -3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 2, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {-6, 6, 6, -6, 0, 0, 0, 0, -3, -3, 3, 3, 0, 0, 0, 0, -4, 4, -2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, -2, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {2, 0, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 2, 0, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {-6, 6, 6, -6, 0, 0, 0, 0, -4, -2, 4, 2, 0, 0, 0, 0, -3, 3, -3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, -1, -2, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {4, -4, -4, 4, 0, 0, 0, 0, 2, 2, -2, -2, 0, 0, 0, 0, 2, -2, 2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, 3, 0, 0, 0, 0, 0, 0, -2, -1, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, -2, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, -1, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, -9, -9, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 3, -6, -3, 0, 0, 0, 0, 6, -6, 3, -3, 0, 0, 0, 0, 4, 2, 2, 1, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -6, 6, 6, -6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, -3, 3, 3, 0, 0, 0, 0, -4, 4, -2, 2, 0, 0, 0, 0, -2, -2, -1, -1, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -6, 6, 6, -6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, -2, 4, 2, 0, 0, 0, 0, -3, 3, -3, 3, 0, 0, 0, 0, -2, -1, -2, -1, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, -4, -4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, -2, -2, 0, 0, 0, 0, 2, -2, 2, -2, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0}, + {-3, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, -3, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {9, -9, 0, 0, -9, 9, 0, 0, 6, 3, 0, 0, -6, -3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, -6, 0, 0, 3, -3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 2, 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {-6, 6, 0, 0, 6, -6, 0, 0, -3, -3, 0, 0, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, 4, 0, 0, -2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, -2, 0, 0, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, 0, 0, -1, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, -9, 0, 0, -9, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 3, 0, 0, -6, -3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, -6, 0, 0, 3, -3, 0, 0, 4, 2, 0, 0, 2, 1, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -6, 6, 0, 0, 6, -6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, -3, 0, 0, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, 4, 0, 0, -2, 2, 0, 0, -2, -2, 0, 0, -1, -1, 0, 0}, + {9, 0, -9, 0, -9, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 3, 0, -6, 0, -3, 0, 6, 0, -6, 0, 3, 0, -3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 2, 0, 2, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 9, 0, -9, 0, -9, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 3, 0, -6, 0, -3, 0, 6, 0, -6, 0, 3, 0, -3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 2, 0, 2, 0, 1, 0}, + {-27, 27, 27, -27, 27, -27, -27, 27, -18, -9, 18, 9, 18, 9, -18, -9, -18, 18, -9, 9, 18, -18, 9, -9, -18, 18, 18, -18, -9, 9, 9, -9, -12, -6, -6, -3, 12, 6, 6, 3, -12, -6, 12, 6, -6, -3, 6, 3, -12, 12, -6, 6, -6, 6, -3, 3, -8, -4, -4, -2, -4, -2, -2, -1}, + {18, -18, -18, 18, -18, 18, 18, -18, 9, 9, -9, -9, -9, -9, 9, 9, 12, -12, 6, -6, -12, 12, -6, 6, 12, -12, -12, 12, 6, -6, -6, 6, 6, 6, 3, 3, -6, -6, -3, -3, 6, 6, -6, -6, 3, 3, -3, -3, 8, -8, 4, -4, 4, -4, 2, -2, 4, 4, 2, 2, 2, 2, 1, 1}, + {-6, 0, 6, 0, 6, 0, -6, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, 0, -3, 0, 3, 0, 3, 0, -4, 0, 4, 0, -2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, -2, 0, -1, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, -6, 0, 6, 0, 6, 0, -6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, 0, -3, 0, 3, 0, 3, 0, -4, 0, 4, 0, -2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, -2, 0, -1, 0, -1, 0}, + {18, -18, -18, 18, -18, 18, 18, -18, 12, 6, -12, -6, -12, -6, 12, 6, 9, -9, 9, -9, -9, 9, -9, 9, 12, -12, -12, 12, 6, -6, -6, 6, 6, 3, 6, 3, -6, -3, -6, -3, 8, 4, -8, -4, 4, 2, -4, -2, 6, -6, 6, -6, 3, -3, 3, -3, 4, 2, 4, 2, 2, 1, 2, 1}, + {-12, 12, 12, -12, 12, -12, -12, 12, -6, -6, 6, 6, 6, 6, -6, -6, -6, 6, -6, 6, 6, -6, 6, -6, -8, 8, 8, -8, -4, 4, 4, -4, -3, -3, -3, -3, 3, 3, 3, 3, -4, -4, 4, 4, -2, -2, 2, 2, -4, 4, -4, 4, -2, 2, -2, 2, -2, -2, -2, -2, -1, -1, -1, -1}, + {2, 0, 0, 0, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {-6, 6, 0, 0, 6, -6, 0, 0, -4, -2, 0, 0, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, 3, 0, 0, -3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, -1, 0, 0, -2, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {4, -4, 0, 0, -4, 4, 0, 0, 2, 2, 0, 0, -2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, -2, 0, 0, 2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -6, 6, 0, 0, 6, -6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, -2, 0, 0, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, 3, 0, 0, -3, 3, 0, 0, -2, -1, 0, 0, -2, -1, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, -4, 0, 0, -4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0, -2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, -2, 0, 0, 2, -2, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0}, + {-6, 0, 6, 0, 6, 0, -6, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, 0, -2, 0, 4, 0, 2, 0, -3, 0, 3, 0, -3, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, -1, 0, -2, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, -6, 0, 6, 0, 6, 0, -6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, 0, -2, 0, 4, 0, 2, 0, -3, 0, 3, 0, -3, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, -1, 0, -2, 0, -1, 0}, + {18, -18, -18, 18, -18, 18, 18, -18, 12, 6, -12, -6, -12, -6, 12, 6, 12, -12, 6, -6, -12, 12, -6, 6, 9, -9, -9, 9, 9, -9, -9, 9, 8, 4, 4, 2, -8, -4, -4, -2, 6, 3, -6, -3, 6, 3, -6, -3, 6, -6, 3, -3, 6, -6, 3, -3, 4, 2, 2, 1, 4, 2, 2, 1}, + {-12, 12, 12, -12, 12, -12, -12, 12, -6, -6, 6, 6, 6, 6, -6, -6, -8, 8, -4, 4, 8, -8, 4, -4, -6, 6, 6, -6, -6, 6, 6, -6, -4, -4, -2, -2, 4, 4, 2, 2, -3, -3, 3, 3, -3, -3, 3, 3, -4, 4, -2, 2, -4, 4, -2, 2, -2, -2, -1, -1, -2, -2, -1, -1}, + {4, 0, -4, 0, -4, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, -2, 0, -2, 0, 2, 0, -2, 0, 2, 0, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 4, 0, -4, 0, -4, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, -2, 0, -2, 0, 2, 0, -2, 0, 2, 0, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0}, + {-12, 12, 12, -12, 12, -12, -12, 12, -8, -4, 8, 4, 8, 4, -8, -4, -6, 6, -6, 6, 6, -6, 6, -6, -6, 6, 6, -6, -6, 6, 6, -6, -4, -2, -4, -2, 4, 2, 4, 2, -4, -2, 4, 2, -4, -2, 4, 2, -3, 3, -3, 3, -3, 3, -3, 3, -2, -1, -2, -1, -2, -1, -2, -1}, + {8, -8, -8, 8, -8, 8, 8, -8, 4, 4, -4, -4, -4, -4, 4, 4, 4, -4, 4, -4, -4, 4, -4, 4, 4, -4, -4, 4, 4, -4, -4, 4, 2, 2, 2, 2, -2, -2, -2, -2, 2, 2, -2, -2, 2, 2, -2, -2, 2, -2, 2, -2, 2, -2, 2, -2, 1, 1, 1, 1, 1, 1, 1, 1}}; + + for (int i = 0; i < 64; ++i) { // C = A * X + C[i] = ValueT(0); +#if 0 + for (int j = 0; j < 64; j += 4) { + C[i] = fma(A[i][j], X[j], fma(A[i][j+1], X[j+1], fma(A[i][j+2], X[j+2], fma(A[i][j+3], X[j+3], C[i])))); + } +#else + for (int j = 0; j < 64; j += 4) { + C[i] += A[i][j] * X[j] + A[i][j + 1] * X[j + 1] + A[i][j + 2] * X[j + 2] + A[i][j + 3] * X[j + 3]; + } +#endif + } +} + +template +template class Vec3T> +__hostdev__ typename TreeOrAccT::ValueType TricubicSampler::sample(const Vec3T &xyz, const ValueT (&C)[64]) +{ + ValueT zPow(1), sum(0); + for (int k = 0, n = 0; k < 4; ++k) { + ValueT yPow(1); + for (int j = 0; j < 4; ++j, n += 4) { +#if 0 + sum = fma( yPow, zPow * fma(xyz[0], fma(xyz[0], fma(xyz[0], C[n + 3], C[n + 2]), C[n + 1]), C[n]), sum); +#else + sum += yPow * zPow * (C[n] + xyz[0] * (C[n + 1] + xyz[0] * (C[n + 2] + xyz[0] * C[n + 3]))); +#endif + yPow *= xyz[1]; + } + zPow *= xyz[2]; + } + return sum; +} + +template +class SampleFromVoxels : public TricubicSampler +{ + using BaseT = TricubicSampler; + using ValueT = typename TreeOrAccT::ValueType; + using CoordT = typename TreeOrAccT::CoordType; + + mutable CoordT mPos; + mutable ValueT mC[64]; + + template class Vec3T> + __hostdev__ void cache(Vec3T& xyz) const; + +public: + /// @brief Construction from a Tree or ReadAccessor + __hostdev__ SampleFromVoxels(const TreeOrAccT& acc) + : BaseT(acc) + { + } + + /// @note xyz is in index space space + template class Vec3T> + inline __hostdev__ ValueT operator()(Vec3T xyz) const; + + // @brief Return value at the coordinate @a ijk in index space space + __hostdev__ ValueT operator()(const CoordT &ijk) const {return BaseT::mAcc.getValue(ijk);} + +}; // SampleFromVoxels + +template +template class Vec3T> +__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels::operator()(Vec3T xyz) const +{ + this->cache(xyz); + return BaseT::sample(xyz, mC); +} + +template +template class Vec3T> +__hostdev__ void SampleFromVoxels::cache(Vec3T& xyz) const +{ + CoordT ijk = Floor(xyz); + if (ijk != mPos) { + mPos = ijk; + BaseT::stencil(ijk, mC); + } +} + +template +class SampleFromVoxels : public TricubicSampler +{ + using BaseT = TricubicSampler; + using ValueT = typename TreeOrAccT::ValueType; + using CoordT = typename TreeOrAccT::CoordType; + +public: + /// @brief Construction from a Tree or ReadAccessor + __hostdev__ SampleFromVoxels(const TreeOrAccT& acc) + : BaseT(acc) + { + } + + /// @note xyz is in index space space + template class Vec3T> + inline __hostdev__ ValueT operator()(Vec3T xyz) const; + + __hostdev__ ValueT operator()(const CoordT &ijk) const {return BaseT::mAcc.getValue(ijk);} + +}; // SampleFromVoxels + +template +template class Vec3T> +__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels::operator()(Vec3T xyz) const +{ + ValueT C[64]; + CoordT ijk = Floor(xyz); + BaseT::stencil(ijk, C); + return BaseT::sample(xyz, C); +} + +}// namespace math + +template +[[deprecated("Use nanovdb::math::createSampler instead")]] +__hostdev__ math::SampleFromVoxels createSampler(const TreeOrAccT& acc) +{ + return math::SampleFromVoxels(acc); +} + +} // namespace nanovdb + +#endif // NANOVDB_SAMPLE_FROM_VOXELS_H_HAS_BEEN_INCLUDED diff --git a/nanovdb/nanovdb/math/Stencils.h b/nanovdb/nanovdb/math/Stencils.h new file mode 100644 index 0000000000..cc935bf827 --- /dev/null +++ b/nanovdb/nanovdb/math/Stencils.h @@ -0,0 +1,1032 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: MPL-2.0 +// +/// @author Ken Museth +/// +/// @date April 9, 2021 +/// +/// @file Stencils.h +/// +/// @brief Defines various finite-difference stencils that allow for the +/// computation of gradients of order 1 to 5, mean curvatures, +/// gaussian curvatures, principal curvatures, tri-linear interpolation, +/// zero-crossing, laplacian, and closest point transform. + +#ifndef NANOVDB_MATH_STENCILS_HAS_BEEN_INCLUDED +#define NANOVDB_MATH_STENCILS_HAS_BEEN_INCLUDED + +#include // for __hostdev__, Vec3, Min, Max, Pow2, Pow3, Pow4 + +namespace nanovdb { + +namespace math { + +// ---------------------------- WENO5 ---------------------------- + +/// @brief Implementation of nominally fifth-order finite-difference WENO +/// @details This function returns the numerical flux. See "High Order Finite Difference and +/// Finite Volume WENO Schemes and Discontinuous Galerkin Methods for CFD" - Chi-Wang Shu +/// ICASE Report No 2001-11 (page 6). Also see ICASE No 97-65 for a more complete reference +/// (Shu, 1997). +/// Given v1 = f(x-2dx), v2 = f(x-dx), v3 = f(x), v4 = f(x+dx) and v5 = f(x+2dx), +/// return an interpolated value f(x+dx/2) with the special property that +/// ( f(x+dx/2) - f(x-dx/2) ) / dx = df/dx (x) + error, +/// where the error is fifth-order in smooth regions: O(dx) <= error <=O(dx^5) +template +__hostdev__ inline ValueType +WENO5(const ValueType& v1, + const ValueType& v2, + const ValueType& v3, + const ValueType& v4, + const ValueType& v5, + RealT scale2 = 1.0)// openvdb uses scale2 = 0.01 +{ + static const RealT C = 13.0 / 12.0; + // WENO is formulated for non-dimensional equations, here the optional scale2 + // is a reference value (squared) for the function being interpolated. For + // example if 'v' is of order 1000, then scale2 = 10^6 is ok. But in practice + // leave scale2 = 1. + const RealT eps = RealT(1.0e-6) * scale2; + // {\tilde \omega_k} = \gamma_k / ( \beta_k + \epsilon)^2 in Shu's ICASE report) + const RealT A1 = RealT(0.1)/Pow2(C*Pow2(v1-2*v2+v3)+RealT(0.25)*Pow2(v1-4*v2+3*v3)+eps), + A2 = RealT(0.6)/Pow2(C*Pow2(v2-2*v3+v4)+RealT(0.25)*Pow2(v2-v4)+eps), + A3 = RealT(0.3)/Pow2(C*Pow2(v3-2*v4+v5)+RealT(0.25)*Pow2(3*v3-4*v4+v5)+eps); + + return static_cast((A1*(2*v1 - 7*v2 + 11*v3) + + A2*(5*v3 - v2 + 2*v4) + + A3*(2*v3 + 5*v4 - v5))/(6*(A1+A2+A3))); +} + +// ---------------------------- GodunovsNormSqrd ---------------------------- + +template +__hostdev__ inline RealT +GodunovsNormSqrd(bool isOutside, + RealT dP_xm, RealT dP_xp, + RealT dP_ym, RealT dP_yp, + RealT dP_zm, RealT dP_zp) +{ + RealT dPLen2; + if (isOutside) { // outside + dPLen2 = Max(Pow2(Max(dP_xm, RealT(0))), Pow2(Min(dP_xp, RealT(0)))); // (dP/dx)2 + dPLen2 += Max(Pow2(Max(dP_ym, RealT(0))), Pow2(Min(dP_yp, RealT(0)))); // (dP/dy)2 + dPLen2 += Max(Pow2(Max(dP_zm, RealT(0))), Pow2(Min(dP_zp, RealT(0)))); // (dP/dz)2 + } else { // inside + dPLen2 = Max(Pow2(Min(dP_xm, RealT(0))), Pow2(Max(dP_xp, RealT(0)))); // (dP/dx)2 + dPLen2 += Max(Pow2(Min(dP_ym, RealT(0))), Pow2(Max(dP_yp, RealT(0)))); // (dP/dy)2 + dPLen2 += Max(Pow2(Min(dP_zm, RealT(0))), Pow2(Max(dP_zp, RealT(0)))); // (dP/dz)2 + } + return dPLen2; // |\nabla\phi|^2 +} + +template +__hostdev__ inline RealT +GodunovsNormSqrd(bool isOutside, + const Vec3& gradient_m, + const Vec3& gradient_p) +{ + return GodunovsNormSqrd(isOutside, + gradient_m[0], gradient_p[0], + gradient_m[1], gradient_p[1], + gradient_m[2], gradient_p[2]); +} + +// ---------------------------- BaseStencil ---------------------------- + +// BaseStencil uses curiously recurring template pattern (CRTP) +template +class BaseStencil +{ +public: + using ValueType = typename GridT::ValueType; + using GridType = GridT; + using TreeType = typename GridT::TreeType; + using AccessorType = typename GridT::AccessorType;// ReadAccessor; + + /// @brief Initialize the stencil buffer with the values of voxel (i, j, k) + /// and its neighbors. + /// @param ijk Index coordinates of stencil center + __hostdev__ inline void moveTo(const Coord& ijk) + { + mCenter = ijk; + mValues[0] = mAcc.getValue(ijk); + static_cast(*this).init(mCenter); + } + + /// @brief Initialize the stencil buffer with the values of voxel (i, j, k) + /// and its neighbors. The method also takes a value of the center + /// element of the stencil, assuming it is already known. + /// @param ijk Index coordinates of stencil center + /// @param centerValue Value of the center element of the stencil + __hostdev__ inline void moveTo(const Coord& ijk, const ValueType& centerValue) + { + mCenter = ijk; + mValues[0] = centerValue; + static_cast(*this).init(mCenter); + } + + /// @brief Initialize the stencil buffer with the values of voxel + /// (x, y, z) and its neighbors. + /// + /// @note This version is slightly faster than the one above, since + /// the center voxel's value is read directly from the iterator. + template + __hostdev__ inline void moveTo(const IterType& iter) + { + mCenter = iter.getCoord(); + mValues[0] = *iter; + static_cast(*this).init(mCenter); + } + + /// @brief Initialize the stencil buffer with the values of voxel (x, y, z) + /// and its neighbors. + /// @param xyz Floating point voxel coordinates of stencil center + /// @details This method will check to see if it is necessary to + /// update the stencil based on the cached index coordinates of + /// the center point. + template + __hostdev__ inline void moveTo(const Vec3& xyz) + { + Coord ijk = RoundDown(xyz); + if (ijk != mCenter) this->moveTo(ijk); + } + + /// @brief Return the value from the stencil buffer with linear + /// offset pos. + /// + /// @note The default (@a pos = 0) corresponds to the first element + /// which is typically the center point of the stencil. + __hostdev__ inline const ValueType& getValue(unsigned int pos = 0) const + { + NANOVDB_ASSERT(pos < SIZE); + return mValues[pos]; + } + + /// @brief Return the value at the specified location relative to the center of the stencil + template + __hostdev__ inline const ValueType& getValue() const + { + return mValues[static_cast(*this).template pos()]; + } + + /// @brief Set the value at the specified location relative to the center of the stencil + template + __hostdev__ inline void setValue(const ValueType& value) + { + mValues[static_cast(*this).template pos()] = value; + } + + /// @brief Return the size of the stencil buffer. + __hostdev__ static int size() { return SIZE; } + + /// @brief Return the mean value of the current stencil. + __hostdev__ inline ValueType mean() const + { + ValueType sum = 0.0; + for (int i = 0; i < SIZE; ++i) sum += mValues[i]; + return sum / ValueType(SIZE); + } + + /// @brief Return the smallest value in the stencil buffer. + __hostdev__ inline ValueType min() const + { + ValueType v = mValues[0]; + for (int i=1; i v) v = mValues[i]; + } + return v; + } + + /// @brief Return the coordinates of the center point of the stencil. + __hostdev__ inline const Coord& getCenterCoord() const { return mCenter; } + + /// @brief Return the value at the center of the stencil + __hostdev__ inline const ValueType& getCenterValue() const { return mValues[0]; } + + /// @brief Return true if the center of the stencil intersects the + /// iso-contour specified by the isoValue + __hostdev__ inline bool intersects(const ValueType &isoValue = ValueType(0) ) const + { + const bool less = this->getValue< 0, 0, 0>() < isoValue; + return (less ^ (this->getValue<-1, 0, 0>() < isoValue)) || + (less ^ (this->getValue< 1, 0, 0>() < isoValue)) || + (less ^ (this->getValue< 0,-1, 0>() < isoValue)) || + (less ^ (this->getValue< 0, 1, 0>() < isoValue)) || + (less ^ (this->getValue< 0, 0,-1>() < isoValue)) || + (less ^ (this->getValue< 0, 0, 1>() < isoValue)) ; + } + struct Mask { + uint8_t bits; + __hostdev__ Mask() : bits(0u) {} + __hostdev__ void set(int i) { bits |= (1 << i); } + __hostdev__ bool test(int i) const { return bits & (1 << i); } + __hostdev__ bool any() const { return bits > 0u; } + __hostdev__ bool all() const { return bits == 255u; } + __hostdev__ bool none() const { return bits == 0u; } + __hostdev__ int count() const { return util::countOn(bits); } + };// Mask + + /// @brief Return true a bit-mask where the 6 lower bits indicates if the + /// center of the stencil intersects the iso-contour specified by the isoValue. + /// + /// @note There are 2^6 = 64 different possible cases, including no intersections! + /// + /// @details The ordering of bit mask is ( -x, +x, -y, +y, -z, +z ), so to + /// check if there is an intersection in -y use (mask & (1u<<2)) where mask is + /// ther return value from this function. To check if there are any + /// intersections use mask!=0u, and for no intersections use mask==0u. + /// To count the number of intersections use __builtin_popcount(mask). + __hostdev__ inline Mask intersectionMask(ValueType isoValue = ValueType(0)) const + { + Mask mask; + const bool less = this->getValue< 0, 0, 0>() < isoValue; + if (less ^ (this->getValue<-1, 0, 0>() < isoValue)) mask.set(0);// |= 1u; + if (less ^ (this->getValue< 1, 0, 0>() < isoValue)) mask.set(1);// |= 2u; + if (less ^ (this->getValue< 0,-1, 0>() < isoValue)) mask.set(2);// |= 4u; + if (less ^ (this->getValue< 0, 1, 0>() < isoValue)) mask.set(3);// |= 8u; + if (less ^ (this->getValue< 0, 0,-1>() < isoValue)) mask.set(4);// |= 16u; + if (less ^ (this->getValue< 0, 0, 1>() < isoValue)) mask.set(5);// |= 32u; + return mask; + } + + /// @brief Return a const reference to the grid from which this + /// stencil was constructed. + __hostdev__ inline const GridType& grid() const { return *mGrid; } + + /// @brief Return a const reference to the ValueAccessor + /// associated with this Stencil. + __hostdev__ inline const AccessorType& accessor() const { return mAcc; } + +protected: + // Constructor is protected to prevent direct instantiation. + __hostdev__ BaseStencil(const GridType& grid) + : mGrid(&grid) + , mAcc(grid) + , mCenter(Coord::max()) + { + } + + const GridType* mGrid; + AccessorType mAcc; + ValueType mValues[SIZE]; + Coord mCenter; + +}; // BaseStencil class + + +// ---------------------------- BoxStencil ---------------------------- + + +namespace { // anonymous namespace for stencil-layout map + + // the eight point box stencil + template struct BoxPt {}; + template<> struct BoxPt< 0, 0, 0> { enum { idx = 0 }; }; + template<> struct BoxPt< 0, 0, 1> { enum { idx = 1 }; }; + template<> struct BoxPt< 0, 1, 1> { enum { idx = 2 }; }; + template<> struct BoxPt< 0, 1, 0> { enum { idx = 3 }; }; + template<> struct BoxPt< 1, 0, 0> { enum { idx = 4 }; }; + template<> struct BoxPt< 1, 0, 1> { enum { idx = 5 }; }; + template<> struct BoxPt< 1, 1, 1> { enum { idx = 6 }; }; + template<> struct BoxPt< 1, 1, 0> { enum { idx = 7 }; }; + +} + +template +class BoxStencil: public BaseStencil, 8, GridT> +{ + using SelfT = BoxStencil; + using BaseType = BaseStencil; +public: + using GridType = GridT; + using TreeType = typename GridT::TreeType; + using ValueType = typename GridT::ValueType; + + static constexpr int SIZE = 8; + + __hostdev__ BoxStencil(const GridType& grid) : BaseType(grid) {} + + /// Return linear offset for the specified stencil point relative to its center + template + __hostdev__ unsigned int pos() const { return BoxPt::idx; } + + /// @brief Return true if the center of the stencil intersects the + /// iso-contour specified by the isoValue + __hostdev__ inline bool intersects(ValueType isoValue = ValueType(0)) const + { + const bool less = mValues[0] < isoValue; + return (less ^ (mValues[1] < isoValue)) || + (less ^ (mValues[2] < isoValue)) || + (less ^ (mValues[3] < isoValue)) || + (less ^ (mValues[4] < isoValue)) || + (less ^ (mValues[5] < isoValue)) || + (less ^ (mValues[6] < isoValue)) || + (less ^ (mValues[7] < isoValue)) ; + } + + /// @brief Return the trilinear interpolation at the normalized position. + /// @param xyz Floating point coordinate position. Index space and NOT world space. + /// @warning It is assumed that the stencil has already been moved + /// to the relevant voxel position, e.g. using moveTo(xyz). + /// @note Trilinear interpolation kernal reads as: + /// v000 (1-u)(1-v)(1-w) + v001 (1-u)(1-v)w + v010 (1-u)v(1-w) + v011 (1-u)vw + /// + v100 u(1-v)(1-w) + v101 u(1-v)w + v110 uv(1-w) + v111 uvw + __hostdev__ inline ValueType interpolation(const Vec3& xyz) const + { + const ValueType u = xyz[0] - mCenter[0]; + const ValueType v = xyz[1] - mCenter[1]; + const ValueType w = xyz[2] - mCenter[2]; + + NANOVDB_ASSERT(u>=0 && u<=1); + NANOVDB_ASSERT(v>=0 && v<=1); + NANOVDB_ASSERT(w>=0 && w<=1); + + ValueType V = BaseType::template getValue<0,0,0>(); + ValueType A = V + (BaseType::template getValue<0,0,1>() - V) * w; + V = BaseType::template getValue< 0, 1, 0>(); + ValueType B = V + (BaseType::template getValue<0,1,1>() - V) * w; + ValueType C = A + (B - A) * v; + + V = BaseType::template getValue<1,0,0>(); + A = V + (BaseType::template getValue<1,0,1>() - V) * w; + V = BaseType::template getValue<1,1,0>(); + B = V + (BaseType::template getValue<1,1,1>() - V) * w; + ValueType D = A + (B - A) * v; + + return C + (D - C) * u; + } + + /// @brief Return the gradient in world space of the trilinear interpolation kernel. + /// @param xyz Floating point coordinate position. + /// @warning It is assumed that the stencil has already been moved + /// to the relevant voxel position, e.g. using moveTo(xyz). + /// @note Computed as partial derivatives of the trilinear interpolation kernel: + /// v000 (1-u)(1-v)(1-w) + v001 (1-u)(1-v)w + v010 (1-u)v(1-w) + v011 (1-u)vw + /// + v100 u(1-v)(1-w) + v101 u(1-v)w + v110 uv(1-w) + v111 uvw + __hostdev__ inline Vec3 gradient(const Vec3& xyz) const + { + const ValueType u = xyz[0] - mCenter[0]; + const ValueType v = xyz[1] - mCenter[1]; + const ValueType w = xyz[2] - mCenter[2]; + + NANOVDB_ASSERT(u>=0 && u<=1); + NANOVDB_ASSERT(v>=0 && v<=1); + NANOVDB_ASSERT(w>=0 && w<=1); + + ValueType D[4]={BaseType::template getValue<0,0,1>()-BaseType::template getValue<0,0,0>(), + BaseType::template getValue<0,1,1>()-BaseType::template getValue<0,1,0>(), + BaseType::template getValue<1,0,1>()-BaseType::template getValue<1,0,0>(), + BaseType::template getValue<1,1,1>()-BaseType::template getValue<1,1,0>()}; + + // Z component + ValueType A = D[0] + (D[1]- D[0]) * v; + ValueType B = D[2] + (D[3]- D[2]) * v; + Vec3 grad(0, 0, A + (B - A) * u); + + D[0] = BaseType::template getValue<0,0,0>() + D[0] * w; + D[1] = BaseType::template getValue<0,1,0>() + D[1] * w; + D[2] = BaseType::template getValue<1,0,0>() + D[2] * w; + D[3] = BaseType::template getValue<1,1,0>() + D[3] * w; + + // X component + A = D[0] + (D[1] - D[0]) * v; + B = D[2] + (D[3] - D[2]) * v; + + grad[0] = B - A; + + // Y component + A = D[1] - D[0]; + B = D[3] - D[2]; + + grad[1] = A + (B - A) * u; + + return BaseType::mGrid->map().applyIJT(grad); + } + +private: + __hostdev__ inline void init(const Coord& ijk) + { + mValues[ 1] = mAcc.getValue(ijk.offsetBy( 0, 0, 1)); + mValues[ 2] = mAcc.getValue(ijk.offsetBy( 0, 1, 1)); + mValues[ 3] = mAcc.getValue(ijk.offsetBy( 0, 1, 0)); + mValues[ 4] = mAcc.getValue(ijk.offsetBy( 1, 0, 0)); + mValues[ 5] = mAcc.getValue(ijk.offsetBy( 1, 0, 1)); + mValues[ 6] = mAcc.getValue(ijk.offsetBy( 1, 1, 1)); + mValues[ 7] = mAcc.getValue(ijk.offsetBy( 1, 1, 0)); + } + + template friend class BaseStencil; // allow base class to call init() + using BaseType::mAcc; + using BaseType::mValues; + using BaseType::mCenter; +};// BoxStencil class + + +// ---------------------------- GradStencil ---------------------------- + +namespace { // anonymous namespace for stencil-layout map + + template struct GradPt {}; + template<> struct GradPt< 0, 0, 0> { enum { idx = 0 }; }; + template<> struct GradPt< 1, 0, 0> { enum { idx = 2 }; }; + template<> struct GradPt< 0, 1, 0> { enum { idx = 4 }; }; + template<> struct GradPt< 0, 0, 1> { enum { idx = 6 }; }; + template<> struct GradPt<-1, 0, 0> { enum { idx = 1 }; }; + template<> struct GradPt< 0,-1, 0> { enum { idx = 3 }; }; + template<> struct GradPt< 0, 0,-1> { enum { idx = 5 }; }; +} + +/// This is a simple 7-point nearest neighbor stencil that supports +/// gradient by second-order central differencing, first-order upwinding, +/// Laplacian, closest-point transform and zero-crossing test. +/// +/// @note For optimal random access performance this class +/// includes its own grid accessor. +template +class GradStencil : public BaseStencil, 7, GridT> +{ + using SelfT = GradStencil; + using BaseType = BaseStencil; +public: + using GridType = GridT; + using TreeType = typename GridT::TreeType; + using ValueType = typename GridT::ValueType; + + static constexpr int SIZE = 7; + + __hostdev__ GradStencil(const GridType& grid) + : BaseType(grid) + , mInv2Dx(ValueType(0.5 / grid.voxelSize()[0])) + , mInvDx2(ValueType(4.0 * mInv2Dx * mInv2Dx)) + { + } + + __hostdev__ GradStencil(const GridType& grid, double dx) + : BaseType(grid) + , mInv2Dx(ValueType(0.5 / dx)) + , mInvDx2(ValueType(4.0 * mInv2Dx * mInv2Dx)) + { + } + + /// @brief Return the norm square of the single-sided upwind gradient + /// (computed via Godunov's scheme) at the previously buffered location. + /// + /// @note This method should not be called until the stencil + /// buffer has been populated via a call to moveTo(ijk). + __hostdev__ inline ValueType normSqGrad() const + { + return mInvDx2 * GodunovsNormSqrd(mValues[0] > ValueType(0), + mValues[0] - mValues[1], + mValues[2] - mValues[0], + mValues[0] - mValues[3], + mValues[4] - mValues[0], + mValues[0] - mValues[5], + mValues[6] - mValues[0]); + } + + /// @brief Return the gradient computed at the previously buffered + /// location by second order central differencing. + /// + /// @note This method should not be called until the stencil + /// buffer has been populated via a call to moveTo(ijk). + __hostdev__ inline Vec3 gradient() const + { + return Vec3(mValues[2] - mValues[1], + mValues[4] - mValues[3], + mValues[6] - mValues[5])*mInv2Dx; + } + /// @brief Return the first-order upwind gradient corresponding to the direction V. + /// + /// @note This method should not be called until the stencil + /// buffer has been populated via a call to moveTo(ijk). + __hostdev__ inline Vec3 gradient(const Vec3& V) const + { + return Vec3( + V[0]>0 ? mValues[0] - mValues[1] : mValues[2] - mValues[0], + V[1]>0 ? mValues[0] - mValues[3] : mValues[4] - mValues[0], + V[2]>0 ? mValues[0] - mValues[5] : mValues[6] - mValues[0])*2*mInv2Dx; + } + + /// Return the Laplacian computed at the previously buffered + /// location by second-order central differencing. + __hostdev__ inline ValueType laplacian() const + { + return mInvDx2 * (mValues[1] + mValues[2] + + mValues[3] + mValues[4] + + mValues[5] + mValues[6] - 6*mValues[0]); + } + + /// Return @c true if the sign of the value at the center point of the stencil + /// is different from the signs of any of its six nearest neighbors. + __hostdev__ inline bool zeroCrossing() const + { + return (mValues[0]>0 ? (mValues[1]<0 || mValues[2]<0 || mValues[3]<0 || mValues[4]<0 || mValues[5]<0 || mValues[6]<0) + : (mValues[1]>0 || mValues[2]>0 || mValues[3]>0 || mValues[4]>0 || mValues[5]>0 || mValues[6]>0)); + } + + /// @brief Compute the closest-point transform to a level set. + /// @return the closest point in index space to the surface + /// from which the level set was derived. + /// + /// @note This method assumes that the grid represents a level set + /// with distances in world units and a simple affine transfrom + /// with uniform scaling. + __hostdev__ inline Vec3 cpt() + { + const Coord& ijk = BaseType::getCenterCoord(); + const ValueType d = ValueType(mValues[0] * 0.5 * mInvDx2); // distance in voxels / (2dx^2) + const auto value = Vec3(ijk[0] - d*(mValues[2] - mValues[1]), + ijk[1] - d*(mValues[4] - mValues[3]), + ijk[2] - d*(mValues[6] - mValues[5])); + return value; + } + + /// Return linear offset for the specified stencil point relative to its center + template + __hostdev__ unsigned int pos() const { return GradPt::idx; } + +private: + + __hostdev__ inline void init(const Coord& ijk) + { + mValues[ 1] = mAcc.getValue(ijk.offsetBy(-1, 0, 0)); + mValues[ 2] = mAcc.getValue(ijk.offsetBy( 1, 0, 0)); + + mValues[ 3] = mAcc.getValue(ijk.offsetBy( 0,-1, 0)); + mValues[ 4] = mAcc.getValue(ijk.offsetBy( 0, 1, 0)); + + mValues[ 5] = mAcc.getValue(ijk.offsetBy( 0, 0,-1)); + mValues[ 6] = mAcc.getValue(ijk.offsetBy( 0, 0, 1)); + } + + template friend class BaseStencil; // allow base class to call init() + using BaseType::mAcc; + using BaseType::mValues; + const ValueType mInv2Dx, mInvDx2; +}; // GradStencil class + + +// ---------------------------- WenoStencil ---------------------------- + +namespace { // anonymous namespace for stencil-layout map + + template struct WenoPt {}; + template<> struct WenoPt< 0, 0, 0> { enum { idx = 0 }; }; + + template<> struct WenoPt<-3, 0, 0> { enum { idx = 1 }; }; + template<> struct WenoPt<-2, 0, 0> { enum { idx = 2 }; }; + template<> struct WenoPt<-1, 0, 0> { enum { idx = 3 }; }; + template<> struct WenoPt< 1, 0, 0> { enum { idx = 4 }; }; + template<> struct WenoPt< 2, 0, 0> { enum { idx = 5 }; }; + template<> struct WenoPt< 3, 0, 0> { enum { idx = 6 }; }; + + template<> struct WenoPt< 0,-3, 0> { enum { idx = 7 }; }; + template<> struct WenoPt< 0,-2, 0> { enum { idx = 8 }; }; + template<> struct WenoPt< 0,-1, 0> { enum { idx = 9 }; }; + template<> struct WenoPt< 0, 1, 0> { enum { idx =10 }; }; + template<> struct WenoPt< 0, 2, 0> { enum { idx =11 }; }; + template<> struct WenoPt< 0, 3, 0> { enum { idx =12 }; }; + + template<> struct WenoPt< 0, 0,-3> { enum { idx =13 }; }; + template<> struct WenoPt< 0, 0,-2> { enum { idx =14 }; }; + template<> struct WenoPt< 0, 0,-1> { enum { idx =15 }; }; + template<> struct WenoPt< 0, 0, 1> { enum { idx =16 }; }; + template<> struct WenoPt< 0, 0, 2> { enum { idx =17 }; }; + template<> struct WenoPt< 0, 0, 3> { enum { idx =18 }; }; + +} + +/// @brief This is a special 19-point stencil that supports optimal fifth-order WENO +/// upwinding, second-order central differencing, Laplacian, and zero-crossing test. +/// +/// @note For optimal random access performance this class +/// includes its own grid accessor. +template +class WenoStencil: public BaseStencil, 19, GridT> +{ + using SelfT = WenoStencil; + using BaseType = BaseStencil; +public: + using GridType = GridT; + using TreeType = typename GridT::TreeType; + using ValueType = typename GridT::ValueType; + + static constexpr int SIZE = 19; + + __hostdev__ WenoStencil(const GridType& grid) + : BaseType(grid) + , mDx2(ValueType(Pow2(grid.voxelSize()[0]))) + , mInv2Dx(ValueType(0.5 / grid.voxelSize()[0])) + , mInvDx2(ValueType(1.0 / mDx2)) + { + } + + __hostdev__ WenoStencil(const GridType& grid, double dx) + : BaseType(grid) + , mDx2(ValueType(dx * dx)) + , mInv2Dx(ValueType(0.5 / dx)) + , mInvDx2(ValueType(1.0 / mDx2)) + { + } + + /// @brief Return the norm-square of the WENO upwind gradient (computed via + /// WENO upwinding and Godunov's scheme) at the previously buffered location. + /// + /// @note This method should not be called until the stencil + /// buffer has been populated via a call to moveTo(ijk). + __hostdev__ inline ValueType normSqGrad(ValueType isoValue = ValueType(0)) const + { + const ValueType* v = mValues; + const RealT + dP_xm = WENO5(v[ 2]-v[ 1],v[ 3]-v[ 2],v[ 0]-v[ 3],v[ 4]-v[ 0],v[ 5]-v[ 4],mDx2), + dP_xp = WENO5(v[ 6]-v[ 5],v[ 5]-v[ 4],v[ 4]-v[ 0],v[ 0]-v[ 3],v[ 3]-v[ 2],mDx2), + dP_ym = WENO5(v[ 8]-v[ 7],v[ 9]-v[ 8],v[ 0]-v[ 9],v[10]-v[ 0],v[11]-v[10],mDx2), + dP_yp = WENO5(v[12]-v[11],v[11]-v[10],v[10]-v[ 0],v[ 0]-v[ 9],v[ 9]-v[ 8],mDx2), + dP_zm = WENO5(v[14]-v[13],v[15]-v[14],v[ 0]-v[15],v[16]-v[ 0],v[17]-v[16],mDx2), + dP_zp = WENO5(v[18]-v[17],v[17]-v[16],v[16]-v[ 0],v[ 0]-v[15],v[15]-v[14],mDx2); + return mInvDx2*static_cast( + GodunovsNormSqrd(v[0]>isoValue, dP_xm, dP_xp, dP_ym, dP_yp, dP_zm, dP_zp)); + } + + /// Return the optimal fifth-order upwind gradient corresponding to the + /// direction V. + /// + /// @note This method should not be called until the stencil + /// buffer has been populated via a call to moveTo(ijk). + __hostdev__ inline Vec3 gradient(const Vec3& V) const + { + const ValueType* v = mValues; + return 2*mInv2Dx * Vec3( + V[0]>0 ? WENO5(v[ 2]-v[ 1],v[ 3]-v[ 2],v[ 0]-v[ 3], v[ 4]-v[ 0],v[ 5]-v[ 4],mDx2) + : WENO5(v[ 6]-v[ 5],v[ 5]-v[ 4],v[ 4]-v[ 0], v[ 0]-v[ 3],v[ 3]-v[ 2],mDx2), + V[1]>0 ? WENO5(v[ 8]-v[ 7],v[ 9]-v[ 8],v[ 0]-v[ 9], v[10]-v[ 0],v[11]-v[10],mDx2) + : WENO5(v[12]-v[11],v[11]-v[10],v[10]-v[ 0], v[ 0]-v[ 9],v[ 9]-v[ 8],mDx2), + V[2]>0 ? WENO5(v[14]-v[13],v[15]-v[14],v[ 0]-v[15], v[16]-v[ 0],v[17]-v[16],mDx2) + : WENO5(v[18]-v[17],v[17]-v[16],v[16]-v[ 0], v[ 0]-v[15],v[15]-v[14],mDx2)); + } + /// Return the gradient computed at the previously buffered + /// location by second-order central differencing. + /// + /// @note This method should not be called until the stencil + /// buffer has been populated via a call to moveTo(ijk). + __hostdev__ inline Vec3 gradient() const + { + return mInv2Dx * Vec3(mValues[ 4] - mValues[ 3], + mValues[10] - mValues[ 9], + mValues[16] - mValues[15]); + } + + /// Return the Laplacian computed at the previously buffered + /// location by second-order central differencing. + /// + /// @note This method should not be called until the stencil + /// buffer has been populated via a call to moveTo(ijk). + __hostdev__ inline ValueType laplacian() const + { + return mInvDx2 * ( + mValues[ 3] + mValues[ 4] + + mValues[ 9] + mValues[10] + + mValues[15] + mValues[16] - 6*mValues[0]); + } + + /// Return @c true if the sign of the value at the center point of the stencil + /// differs from the sign of any of its six nearest neighbors + __hostdev__ inline bool zeroCrossing() const + { + const ValueType* v = mValues; + return (v[ 0]>0 ? (v[ 3]<0 || v[ 4]<0 || v[ 9]<0 || v[10]<0 || v[15]<0 || v[16]<0) + : (v[ 3]>0 || v[ 4]>0 || v[ 9]>0 || v[10]>0 || v[15]>0 || v[16]>0)); + } + + /// Return linear offset for the specified stencil point relative to its center + template + __hostdev__ unsigned int pos() const { return WenoPt::idx; } + +private: + __hostdev__ inline void init(const Coord& ijk) + { + mValues[ 1] = mAcc.getValue(ijk.offsetBy(-3, 0, 0)); + mValues[ 2] = mAcc.getValue(ijk.offsetBy(-2, 0, 0)); + mValues[ 3] = mAcc.getValue(ijk.offsetBy(-1, 0, 0)); + mValues[ 4] = mAcc.getValue(ijk.offsetBy( 1, 0, 0)); + mValues[ 5] = mAcc.getValue(ijk.offsetBy( 2, 0, 0)); + mValues[ 6] = mAcc.getValue(ijk.offsetBy( 3, 0, 0)); + + mValues[ 7] = mAcc.getValue(ijk.offsetBy( 0, -3, 0)); + mValues[ 8] = mAcc.getValue(ijk.offsetBy( 0, -2, 0)); + mValues[ 9] = mAcc.getValue(ijk.offsetBy( 0, -1, 0)); + mValues[10] = mAcc.getValue(ijk.offsetBy( 0, 1, 0)); + mValues[11] = mAcc.getValue(ijk.offsetBy( 0, 2, 0)); + mValues[12] = mAcc.getValue(ijk.offsetBy( 0, 3, 0)); + + mValues[13] = mAcc.getValue(ijk.offsetBy( 0, 0, -3)); + mValues[14] = mAcc.getValue(ijk.offsetBy( 0, 0, -2)); + mValues[15] = mAcc.getValue(ijk.offsetBy( 0, 0, -1)); + mValues[16] = mAcc.getValue(ijk.offsetBy( 0, 0, 1)); + mValues[17] = mAcc.getValue(ijk.offsetBy( 0, 0, 2)); + mValues[18] = mAcc.getValue(ijk.offsetBy( 0, 0, 3)); + } + + template friend class BaseStencil; // allow base class to call init() + using BaseType::mAcc; + using BaseType::mValues; + const ValueType mDx2, mInv2Dx, mInvDx2; +}; // WenoStencil class + + +// ---------------------------- CurvatureStencil ---------------------------- + +namespace { // anonymous namespace for stencil-layout map + + template struct CurvPt {}; + template<> struct CurvPt< 0, 0, 0> { enum { idx = 0 }; }; + + template<> struct CurvPt<-1, 0, 0> { enum { idx = 1 }; }; + template<> struct CurvPt< 1, 0, 0> { enum { idx = 2 }; }; + + template<> struct CurvPt< 0,-1, 0> { enum { idx = 3 }; }; + template<> struct CurvPt< 0, 1, 0> { enum { idx = 4 }; }; + + template<> struct CurvPt< 0, 0,-1> { enum { idx = 5 }; }; + template<> struct CurvPt< 0, 0, 1> { enum { idx = 6 }; }; + + template<> struct CurvPt<-1,-1, 0> { enum { idx = 7 }; }; + template<> struct CurvPt< 1,-1, 0> { enum { idx = 8 }; }; + template<> struct CurvPt<-1, 1, 0> { enum { idx = 9 }; }; + template<> struct CurvPt< 1, 1, 0> { enum { idx =10 }; }; + + template<> struct CurvPt<-1, 0,-1> { enum { idx =11 }; }; + template<> struct CurvPt< 1, 0,-1> { enum { idx =12 }; }; + template<> struct CurvPt<-1, 0, 1> { enum { idx =13 }; }; + template<> struct CurvPt< 1, 0, 1> { enum { idx =14 }; }; + + template<> struct CurvPt< 0,-1,-1> { enum { idx =15 }; }; + template<> struct CurvPt< 0, 1,-1> { enum { idx =16 }; }; + template<> struct CurvPt< 0,-1, 1> { enum { idx =17 }; }; + template<> struct CurvPt< 0, 1, 1> { enum { idx =18 }; }; + +} + +template +class CurvatureStencil: public BaseStencil, 19, GridT> +{ + using SelfT = CurvatureStencil; + using BaseType = BaseStencil; +public: + using GridType = GridT; + using TreeType = typename GridT::TreeType; + using ValueType = typename GridT::ValueType; + + static constexpr int SIZE = 19; + + __hostdev__ CurvatureStencil(const GridType& grid) + : BaseType(grid) + , mInv2Dx(ValueType(0.5 / grid.voxelSize()[0])) + , mInvDx2(ValueType(4.0 * mInv2Dx * mInv2Dx)) + { + } + + __hostdev__ CurvatureStencil(const GridType& grid, double dx) + : BaseType(grid) + , mInv2Dx(ValueType(0.5 / dx)) + , mInvDx2(ValueType(4.0 * mInv2Dx * mInv2Dx)) + { + } + + /// @brief Return the mean curvature at the previously buffered location. + /// + /// @note This method should not be called until the stencil + /// buffer has been populated via a call to moveTo(ijk). + __hostdev__ inline ValueType meanCurvature() const + { + RealT alpha, normGrad; + return this->meanCurvature(alpha, normGrad) ? + ValueType(alpha*mInv2Dx/Pow3(normGrad)) : 0; + } + + /// @brief Return the Gaussian curvature at the previously buffered location. + /// + /// @note This method should not be called until the stencil + /// buffer has been populated via a call to moveTo(ijk). + __hostdev__ inline ValueType gaussianCurvature() const + { + RealT alpha, normGrad; + return this->gaussianCurvature(alpha, normGrad) ? + ValueType(alpha*mInvDx2/Pow4(normGrad)) : 0; + } + + /// @brief Return both the mean and the Gaussian curvature at the + /// previously buffered location. + /// + /// @note This method should not be called until the stencil + /// buffer has been populated via a call to moveTo(ijk). + __hostdev__ inline void curvatures(ValueType &mean, ValueType& gauss) const + { + RealT alphaM, alphaG, normGrad; + if (this->curvatures(alphaM, alphaG, normGrad)) { + mean = ValueType(alphaM*mInv2Dx/Pow3(normGrad)); + gauss = ValueType(alphaG*mInvDx2/Pow4(normGrad)); + } else { + mean = gauss = 0; + } + } + + /// Return the mean curvature multiplied by the norm of the + /// central-difference gradient. This method is very useful for + /// mean-curvature flow of level sets! + /// + /// @note This method should not be called until the stencil + /// buffer has been populated via a call to moveTo(ijk). + __hostdev__ inline ValueType meanCurvatureNormGrad() const + { + RealT alpha, normGrad; + return this->meanCurvature(alpha, normGrad) ? + ValueType(alpha*mInvDx2/(2*Pow2(normGrad))) : 0; + } + + /// Return the mean Gaussian multiplied by the norm of the + /// central-difference gradient. + /// + /// @note This method should not be called until the stencil + /// buffer has been populated via a call to moveTo(ijk). + __hostdev__ inline ValueType gaussianCurvatureNormGrad() const + { + RealT alpha, normGrad; + return this->gaussianCurvature(alpha, normGrad) ? + ValueType(2*alpha*mInv2Dx*mInvDx2/Pow3(normGrad)) : 0; + } + + /// @brief Return both the mean and the Gaussian curvature at the + /// previously buffered location. + /// + /// @note This method should not be called until the stencil + /// buffer has been populated via a call to moveTo(ijk). + __hostdev__ inline void curvaturesNormGrad(ValueType &mean, ValueType& gauss) const + { + RealT alphaM, alphaG, normGrad; + if (this->curvatures(alphaM, alphaG, normGrad)) { + mean = ValueType(alphaM*mInvDx2/(2*Pow2(normGrad))); + gauss = ValueType(2*alphaG*mInv2Dx*mInvDx2/Pow3(normGrad)); + } else { + mean = gauss = 0; + } + } + + /// @brief Computes the minimum and maximum principal curvature at the + /// previously buffered location. + /// + /// @note This method should not be called until the stencil + /// buffer has been populated via a call to moveTo(ijk). + __hostdev__ inline void principalCurvatures(ValueType &min, ValueType &max) const + { + min = max = 0; + RealT alphaM, alphaG, normGrad; + if (this->curvatures(alphaM, alphaG, normGrad)) { + const RealT mean = alphaM*mInv2Dx/Pow3(normGrad); + const RealT tmp = Sqrt(mean*mean - alphaG*mInvDx2/Pow4(normGrad)); + min = ValueType(mean - tmp); + max = ValueType(mean + tmp); + } + } + + /// Return the Laplacian computed at the previously buffered + /// location by second-order central differencing. + /// + /// @note This method should not be called until the stencil + /// buffer has been populated via a call to moveTo(ijk). + __hostdev__ inline ValueType laplacian() const + { + return mInvDx2 * ( + mValues[1] + mValues[2] + + mValues[3] + mValues[4] + + mValues[5] + mValues[6] - 6*mValues[0]); + } + + /// Return the gradient computed at the previously buffered + /// location by second-order central differencing. + /// + /// @note This method should not be called until the stencil + /// buffer has been populated via a call to moveTo(ijk). + __hostdev__ inline Vec3 gradient() const + { + return Vec3( + mValues[2] - mValues[1], + mValues[4] - mValues[3], + mValues[6] - mValues[5])*mInv2Dx; + } + + /// Return linear offset for the specified stencil point relative to its center + template + __hostdev__ unsigned int pos() const { return CurvPt::idx; } + +private: + __hostdev__ inline void init(const Coord &ijk) + { + mValues[ 1] = mAcc.getValue(ijk.offsetBy(-1, 0, 0)); + mValues[ 2] = mAcc.getValue(ijk.offsetBy( 1, 0, 0)); + + mValues[ 3] = mAcc.getValue(ijk.offsetBy( 0, -1, 0)); + mValues[ 4] = mAcc.getValue(ijk.offsetBy( 0, 1, 0)); + + mValues[ 5] = mAcc.getValue(ijk.offsetBy( 0, 0, -1)); + mValues[ 6] = mAcc.getValue(ijk.offsetBy( 0, 0, 1)); + + mValues[ 7] = mAcc.getValue(ijk.offsetBy(-1, -1, 0)); + mValues[ 8] = mAcc.getValue(ijk.offsetBy( 1, -1, 0)); + mValues[ 9] = mAcc.getValue(ijk.offsetBy(-1, 1, 0)); + mValues[10] = mAcc.getValue(ijk.offsetBy( 1, 1, 0)); + + mValues[11] = mAcc.getValue(ijk.offsetBy(-1, 0, -1)); + mValues[12] = mAcc.getValue(ijk.offsetBy( 1, 0, -1)); + mValues[13] = mAcc.getValue(ijk.offsetBy(-1, 0, 1)); + mValues[14] = mAcc.getValue(ijk.offsetBy( 1, 0, 1)); + + mValues[15] = mAcc.getValue(ijk.offsetBy( 0, -1, -1)); + mValues[16] = mAcc.getValue(ijk.offsetBy( 0, 1, -1)); + mValues[17] = mAcc.getValue(ijk.offsetBy( 0, -1, 1)); + mValues[18] = mAcc.getValue(ijk.offsetBy( 0, 1, 1)); + } + + __hostdev__ inline RealT Dx() const { return 0.5*(mValues[2] - mValues[1]); }// * 1/dx + __hostdev__ inline RealT Dy() const { return 0.5*(mValues[4] - mValues[3]); }// * 1/dx + __hostdev__ inline RealT Dz() const { return 0.5*(mValues[6] - mValues[5]); }// * 1/dx + __hostdev__ inline RealT Dxx() const { return mValues[2] - 2 * mValues[0] + mValues[1]; }// * 1/dx2 + __hostdev__ inline RealT Dyy() const { return mValues[4] - 2 * mValues[0] + mValues[3]; }// * 1/dx2} + __hostdev__ inline RealT Dzz() const { return mValues[6] - 2 * mValues[0] + mValues[5]; }// * 1/dx2 + __hostdev__ inline RealT Dxy() const { return 0.25 * (mValues[10] - mValues[ 8] + mValues[ 7] - mValues[ 9]); }// * 1/dx2 + __hostdev__ inline RealT Dxz() const { return 0.25 * (mValues[14] - mValues[12] + mValues[11] - mValues[13]); }// * 1/dx2 + __hostdev__ inline RealT Dyz() const { return 0.25 * (mValues[18] - mValues[16] + mValues[15] - mValues[17]); }// * 1/dx2 + + __hostdev__ inline bool meanCurvature(RealT& alpha, RealT& normGrad) const + { + // For performance all finite differences are unscaled wrt dx + const RealT Dx = this->Dx(), Dy = this->Dy(), Dz = this->Dz(), + Dx2 = Dx*Dx, Dy2 = Dy*Dy, Dz2 = Dz*Dz, normGrad2 = Dx2 + Dy2 + Dz2; + if (normGrad2 <= Tolerance::value()) { + alpha = normGrad = 0; + return false; + } + const RealT Dxx = this->Dxx(), Dyy = this->Dyy(), Dzz = this->Dzz(); + alpha = Dx2*(Dyy + Dzz) + Dy2*(Dxx + Dzz) + Dz2*(Dxx + Dyy) - + 2*(Dx*(Dy*this->Dxy() + Dz*this->Dxz()) + Dy*Dz*this->Dyz());// * 1/dx^4 + normGrad = Sqrt(normGrad2); // * 1/dx + return true; + } + + __hostdev__ inline bool gaussianCurvature(RealT& alpha, RealT& normGrad) const + { + // For performance all finite differences are unscaled wrt dx + const RealT Dx = this->Dx(), Dy = this->Dy(), Dz = this->Dz(), + Dx2 = Dx*Dx, Dy2 = Dy*Dy, Dz2 = Dz*Dz, normGrad2 = Dx2 + Dy2 + Dz2; + if (normGrad2 <= Tolerance::value()) { + alpha = normGrad = 0; + return false; + } + const RealT Dxx = this->Dxx(), Dyy = this->Dyy(), Dzz = this->Dzz(), + Dxy = this->Dxy(), Dxz = this->Dxz(), Dyz = this->Dyz(); + alpha = Dx2*(Dyy*Dzz - Dyz*Dyz) + Dy2*(Dxx*Dzz - Dxz*Dxz) + Dz2*(Dxx*Dyy - Dxy*Dxy) + + 2*( Dy*Dz*(Dxy*Dxz - Dyz*Dxx) + Dx*Dz*(Dxy*Dyz - Dxz*Dyy) + Dx*Dy*(Dxz*Dyz - Dxy*Dzz) );// * 1/dx^6 + normGrad = Sqrt(normGrad2); // * 1/dx + return true; + } + + __hostdev__ inline bool curvatures(RealT& alphaM, RealT& alphaG, RealT& normGrad) const + { + // For performance all finite differences are unscaled wrt dx + const RealT Dx = this->Dx(), Dy = this->Dy(), Dz = this->Dz(), + Dx2 = Dx*Dx, Dy2 = Dy*Dy, Dz2 = Dz*Dz, normGrad2 = Dx2 + Dy2 + Dz2; + if (normGrad2 <= Tolerance::value()) { + alphaM = alphaG =normGrad = 0; + return false; + } + const RealT Dxx = this->Dxx(), Dyy = this->Dyy(), Dzz = this->Dzz(), + Dxy = this->Dxy(), Dxz = this->Dxz(), Dyz = this->Dyz(); + alphaM = Dx2*(Dyy + Dzz) + Dy2*(Dxx + Dzz) + Dz2*(Dxx + Dyy) - + 2*(Dx*(Dy*Dxy + Dz*Dxz) + Dy*Dz*Dyz);// *1/dx^4 + alphaG = Dx2*(Dyy*Dzz - Dyz*Dyz) + Dy2*(Dxx*Dzz - Dxz*Dxz) + Dz2*(Dxx*Dyy - Dxy*Dxy) + + 2*( Dy*Dz*(Dxy*Dxz - Dyz*Dxx) + Dx*Dz*(Dxy*Dyz - Dxz*Dyy) + Dx*Dy*(Dxz*Dyz - Dxy*Dzz) );// *1/dx^6 + normGrad = Sqrt(normGrad2); // * 1/dx + return true; + } + + template friend class BaseStencil; // allow base class to call init() + using BaseType::mAcc; + using BaseType::mValues; + const ValueType mInv2Dx, mInvDx2; +}; // CurvatureStencil class + +}// namespace math + +} // end nanovdb namespace + +#endif // NANOVDB_MATH_STENCILS_HAS_BEEN_INCLUDED diff --git a/nanovdb/nanovdb/tools/CreateNanoGrid.h b/nanovdb/nanovdb/tools/CreateNanoGrid.h new file mode 100644 index 0000000000..0615cb6e22 --- /dev/null +++ b/nanovdb/nanovdb/tools/CreateNanoGrid.h @@ -0,0 +1,2073 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: MPL-2.0 + +/*! + \file nanovdb/tools/CreateNanoGrid.h + + \author Ken Museth + + \date June 26, 2020 + + \note In the examples below we assume that @c srcGrid is a exiting grid of type + SrcGridT = @c openvdb::FloatGrid, @c openvdb::FloatGrid or @c nanovdb::tools::build::FloatGrid. + + \brief Convert any grid to a nanovdb grid of the same type, e.g. float->float + \code + auto handle = nanovdb::tools::createNanoGrid(srcGrid); + auto *dstGrid = handle.grid(); + \endcode + + \brief Convert a grid to a nanovdb grid of a different type, e.g. float->half + \code + auto handle = nanovdb::tools::createNanoGrid(srcGrid); + auto *dstGrid = handle.grid(); + \endcode + + \brief Convert a grid to a nanovdb grid of the same type but using a CUDA buffer + \code + auto handle = nanovdb::tools::createNanoGrid(srcGrid); + auto *dstGrid = handle.grid(); + \endcode + + \brief Create a nanovdb grid that indices values in an existing source grid of any type. + If DstBuildT = nanovdb::ValueIndex both active and in-active values are indexed + and if DstBuildT = nanovdb::ValueOnIndex only active values are indexed. + \code + using DstBuildT = nanovdb::ValueIndex;// index both active an inactive values + auto handle = nanovdb::tools::createNanoGridSrcGridT,DstBuildT>(srcGrid,0,false,false);//no blind data, tile values or stats + auto *dstGrid = handle.grid(); + \endcode + + \brief Create a NanoVDB grid from scratch + \code +#if defined(NANOVDB_USE_OPENVDB) && !defined(__CUDACC__) + using SrcGridT = openvdb::FloatGrid; +#else + using SrcGridT = nanovdb::tools::build::FloatGrid; +#endif + SrcGridT srcGrid(0.0f);// create an empty source grid + auto srcAcc = srcGrid.getAccessor();// create an accessor + srcAcc.setValue(nanovdb::Coord(1,2,3), 1.0f);// set a voxel value + + auto handle = nanovdb::tools::createNanoGrid(srcGrid);// convert source grid to a grid handle + auto dstGrid = handle.grid();// get a pointer to the destination grid + \endcode + + \brief Convert a base-pointer to an openvdb grid, denoted srcGrid, to a nanovdb + grid of the same type, e.g. float -> float or openvdb::Vec3f -> nanovdb::Vec3f + \code + auto handle = nanovdb::openToNanoVDB(*srcGrid);// convert source grid to a grid handle + auto dstGrid = handle.grid();// get a pointer to the destination grid + \endcode + + \brief Converts any existing grid to a NanoVDB grid, for example: + nanovdb::tools::build::Grid -> nanovdb::Grid + nanovdb::Grid -> nanovdb::Grid + nanovdb::Grid -> nanovdb::Grid + openvdb::Grid -> nanovdb::Grid + openvdb::Grid -> nanovdb::Grid + openvdb::Grid -> nanovdb::Grid + openvdb::Grid -> nanovdb::Grid + + \note This files replaces GridBuilder.h, IndexGridBuilder.h and OpenToNanoVDB.h +*/ + +#ifndef NANOVDB_TOOLS_CREATENANOGRID_H_HAS_BEEN_INCLUDED +#define NANOVDB_TOOLS_CREATENANOGRID_H_HAS_BEEN_INCLUDED + +#if defined(NANOVDB_USE_OPENVDB) && !defined(__CUDACC__) +#include +#include +#include +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include // for nanovdb::math::DitherLUT + +#include +#include +#include +#include // for memcpy +#include + +namespace nanovdb {// ============================================================================ + +namespace tools {// ============================================================================== + +// Forward declarations (defined below) +template class CreateNanoGrid; +class AbsDiff; +template struct MapToNano; + +//================================================================================================ + +#if defined(NANOVDB_USE_OPENVDB) && !defined(__CUDACC__) +/// @brief Forward declaration of free-standing function that converts an OpenVDB GridBase into a NanoVDB GridHandle +/// @tparam BufferT Type of the buffer used to allocate the destination grid +/// @param base Shared pointer to a base openvdb grid to be converted +/// @param sMode Mode for computing statistics of the destination grid +/// @param cMode Mode for computing checksums of the destination grid +/// @param verbose Mode of verbosity +/// @return Handle to the destination NanoGrid +template +GridHandle +openToNanoVDB(const openvdb::GridBase::Ptr& base, + StatsMode sMode = StatsMode::Default, + CheckMode cMode = CheckMode::Default, + int verbose = 0); +#endif + +//================================================================================================ + +/// @brief Freestanding function that creates a NanoGrid from any source grid +/// @tparam SrcGridT Type of in input (source) grid, e.g. openvdb::Grid or nanovdb::Grid +/// @tparam DstBuildT Type of values in the output (destination) nanovdb Grid, e.g. float or nanovdb::Fp16 +/// @tparam BufferT Type of the buffer used ti allocate the destination grid +/// @param srcGrid Input (source) grid to be converted +/// @param sMode Mode for computing statistics of the destination grid +/// @param cMode Mode for computing checksums of the destination grid +/// @param verbose Mode of verbosity +/// @param buffer Instance of a buffer used for allocation +/// @return Handle to the destination NanoGrid +template::type, + typename BufferT = HostBuffer> +typename util::disable_if::is_index || BuildTraits::is_Fp, GridHandle>::type +createNanoGrid(const SrcGridT &srcGrid, + StatsMode sMode = StatsMode::Default, + CheckMode cMode = CheckMode::Default, + int verbose = 0, + const BufferT &buffer = BufferT()); + +//================================================================================================ + +/// @brief Freestanding function that creates a NanoGrid or NanoGrid from any source grid +/// @tparam SrcGridT Type of in input (source) grid, e.g. openvdb::Grid or nanovdb::Grid +/// @tparam DstBuildT If ValueIndex all (active and inactive) values are indexed and if +/// it is ValueOnIndex only active values are indexed. +/// @tparam BufferT BufferT Type of the buffer used ti allocate the destination grid +/// @param channels If non-zero the values (active or all) in @c srcGrid are encoded as blind +/// data in the output index grid. @c channels indicates the number of copies +/// of these blind data +/// @param includeStats If true all tree nodes will includes indices for stats, i.e. min/max/avg/std-div +/// @param includeTiles If false on values in leaf nodes are indexed +/// @param verbose Mode of verbosity +/// @param buffer Instance of a buffer used for allocation +/// @return Handle to the destination NanoGrid where T = ValueIndex or ValueOnIndex +template::type, + typename BufferT = HostBuffer> +typename util::enable_if::is_index, GridHandle>::type +createNanoGrid(const SrcGridT &srcGrid, + uint32_t channels = 0u, + bool includeStats = true, + bool includeTiles = true, + int verbose = 0, + const BufferT &buffer = BufferT()); + +//================================================================================================ + +/// @brief Freestanding function to create a NanoGrid from any source grid +/// @tparam SrcGridT Type of in input (source) grid, e.g. openvdb::Grid or nanovdb::Grid +/// @tparam DstBuildT = FpN, i.e. variable bit-width of the output grid +/// @tparam OracleT Type of the oracle used to determine the local bit-width, i.e. N in FpN +/// @tparam BufferT Type of the buffer used to allocate the destination grid +/// @param srcGrid Input (source) grid to be converted +/// @param ditherOn switch to enable or disable dithering of quantization error +/// @param sMode Mode for computing statistics of the destination grid +/// @param cMode Mode for computing checksums of the destination grid +/// @param verbose Mode of verbosity +/// @param oracle Instance of a oracle used to determine the local bit-width, i.e. N in FpN +/// @param buffer Instance of a buffer used for allocation +/// @return Handle to the destination NanoGrid +template::type, + typename OracleT = AbsDiff, + typename BufferT = HostBuffer> +typename util::enable_if::value, GridHandle>::type +createNanoGrid(const SrcGridT &srcGrid, + StatsMode sMode = StatsMode::Default, + CheckMode cMode = CheckMode::Default, + bool ditherOn = false, + int verbose = 0, + const OracleT &oracle = OracleT(), + const BufferT &buffer = BufferT()); + +//================================================================================================ + +/// @brief Freestanding function to create a NanoGrid from any source grid, X=4,8,16 +/// @tparam SrcGridT Type of in input (source) grid, e.g. openvdb::Grid or nanovdb::Grid +/// @tparam DstBuildT = Fp4, Fp8 or Fp16, i.e. quantization bit-width of the output grid +/// @tparam BufferT Type of the buffer used to allocate the destination grid +/// @param srcGrid Input (source) grid to be converted +/// @param ditherOn switch to enable or disable dithering of quantization error +/// @param sMode Mode for computing statistics of the destination grid +/// @param cMode Mode for computing checksums of the destination grid +/// @param verbose Mode of verbosity +/// @param buffer Instance of a buffer used for allocation +/// @return Handle to the destination NanoGrid +template::type, + typename BufferT = HostBuffer> +typename util::enable_if::is_FpX, GridHandle>::type +createNanoGrid(const SrcGridT &srcGrid, + StatsMode sMode = StatsMode::Default, + CheckMode cMode = CheckMode::Default, + bool ditherOn = false, + int verbose = 0, + const BufferT &buffer = BufferT()); + +//================================================================================================ + +/// @brief Compression oracle based on absolute difference +class AbsDiff +{ + float mTolerance;// absolute error tolerance +public: + /// @note The default value of -1 means it's un-initialized! + AbsDiff(float tolerance = -1.0f) : mTolerance(tolerance) {} + AbsDiff(const AbsDiff&) = default; + ~AbsDiff() = default; + operator bool() const {return mTolerance>=0.0f;} + void init(nanovdb::GridClass gClass, float background) { + if (gClass == GridClass::LevelSet) { + static const float halfWidth = 3.0f; + mTolerance = 0.1f * background / halfWidth;// range of ls: [-3dx; 3dx] + } else if (gClass == GridClass::FogVolume) { + mTolerance = 0.01f;// range of FOG volumes: [0;1] + } else { + mTolerance = 0.0f; + } + } + void setTolerance(float tolerance) { mTolerance = tolerance; } + float getTolerance() const { return mTolerance; } + /// @brief Return true if the approximate value is within the accepted + /// absolute error bounds of the exact value. + /// + /// @details Required member method + bool operator()(float exact, float approx) const + { + return math::Abs(exact - approx) <= mTolerance; + } +};// AbsDiff + +inline std::ostream& operator<<(std::ostream& os, const AbsDiff& diff) +{ + os << "Absolute tolerance: " << diff.getTolerance(); + return os; +} + +//================================================================================================ + +/// @brief Compression oracle based on relative difference +class RelDiff +{ + float mTolerance;// relative error tolerance +public: + /// @note The default value of -1 means it's un-initialized! + RelDiff(float tolerance = -1.0f) : mTolerance(tolerance) {} + RelDiff(const RelDiff&) = default; + ~RelDiff() = default; + operator bool() const {return mTolerance>=0.0f;} + void setTolerance(float tolerance) { mTolerance = tolerance; } + float getTolerance() const { return mTolerance; } + /// @brief Return true if the approximate value is within the accepted + /// relative error bounds of the exact value. + /// + /// @details Required member method + bool operator()(float exact, float approx) const + { + return math::Abs(exact - approx)/math::Max(math::Abs(exact), math::Abs(approx)) <= mTolerance; + } +};// RelDiff + +inline std::ostream& operator<<(std::ostream& os, const RelDiff& diff) +{ + os << "Relative tolerance: " << diff.getTolerance(); + return os; +} + +//================================================================================================ + +/// @brief The NodeAccessor provides a uniform API for accessing nodes got NanoVDB, OpenVDB and build Grids +/// +/// @note General implementation that works with nanovdb::tools::build::Grid +template +class NodeAccessor +{ +public: + static constexpr bool IS_OPENVDB = false; + static constexpr bool IS_NANOVDB = false; + using BuildType = typename GridT::BuildType; + using ValueType = typename GridT::ValueType; + using GridType = GridT; + using TreeType = typename GridT::TreeType; + using RootType = typename TreeType::RootNodeType; + template + using NodeType = typename NodeTrait::type; + NodeAccessor(const GridT &grid) : mMgr(const_cast(grid)) {} + const GridType& grid() const {return mMgr.grid();} + const TreeType& tree() const {return mMgr.tree();} + const RootType& root() const {return mMgr.root();} + uint64_t nodeCount(int level) const { return mMgr.nodeCount(level); } + template + const NodeType& node(uint32_t i) const {return mMgr.template node(i); } + const std::string& getName() const {return this->grid().getName();}; + bool hasLongGridName() const {return this->grid().getName().length() >= GridData::MaxNameSize;} + const nanovdb::Map& map() const {return this->grid().map();} + GridClass gridClass() const {return this->grid().gridClass();} +private: + build::NodeManager mMgr; +};// NodeAccessor + +//================================================================================================ + +/// @brief Template specialization for nanovdb::Grid which is special since its NodeManage +/// uses a handle in order to support node access on the GPU! +template +class NodeAccessor< NanoGrid > +{ +public: + static constexpr bool IS_OPENVDB = false; + static constexpr bool IS_NANOVDB = true; + using BuildType = BuildT; + using BufferType = HostBuffer; + using GridType = NanoGrid; + using ValueType = typename GridType::ValueType; + using TreeType = typename GridType::TreeType; + using RootType = typename TreeType::RootType; + template + using NodeType = typename NodeTrait::type; + NodeAccessor(const GridType &grid) + : mHandle(createNodeManager(grid)) + , mMgr(*(mHandle.template mgr())) {} + const GridType& grid() const {return mMgr.grid();} + const TreeType& tree() const {return mMgr.tree();} + const RootType& root() const {return mMgr.root();} + uint64_t nodeCount(int level) const { return mMgr.nodeCount(level); } + template + const NodeType& node(uint32_t i) const {return mMgr.template node(i); } + std::string getName() const {return std::string(this->grid().gridName());}; + bool hasLongGridName() const {return this->grid().hasLongGridName();} + const nanovdb::Map& map() const {return this->grid().map();} + GridClass gridClass() const {return this->grid().gridClass();} +private: + NodeManagerHandle mHandle; + const NodeManager &mMgr; +};// NodeAccessor + +//================================================================================================ + +/// @brief Trait that maps any type to the corresponding nanovdb type +/// @tparam T Type to be mapped +template +struct MapToNano { using type = T; }; + +#if defined(NANOVDB_USE_OPENVDB) && !defined(__CUDACC__) + +template<> +struct MapToNano {using type = nanovdb::ValueMask;}; +template +struct MapToNano>{using type = nanovdb::math::Vec3;}; +template +struct MapToNano>{using type = nanovdb::math::Vec4;}; +template<> +struct MapToNano {using type = uint32_t;}; +template<> +struct MapToNano {using type = uint32_t;}; + +/// Templated Grid with default 32->16->8 configuration +template +using OpenLeaf = openvdb::tree::LeafNode; +template +using OpenLower = openvdb::tree::InternalNode,4>; +template +using OpenUpper = openvdb::tree::InternalNode,5>; +template +using OpenRoot = openvdb::tree::RootNode>; +template +using OpenTree = openvdb::tree::Tree>; +template +using OpenGrid = openvdb::Grid>; + +//================================================================================================ + +/// @brief Template specialization for openvdb::Grid +template +class NodeAccessor> +{ +public: + static constexpr bool IS_OPENVDB = true; + static constexpr bool IS_NANOVDB = false; + using BuildType = BuildT; + using GridType = OpenGrid; + using ValueType = typename GridType::ValueType; + using TreeType = OpenTree; + using RootType = OpenRoot; + template + using NodeType = typename NodeTrait::type; + NodeAccessor(const GridType &grid) : mMgr(const_cast(grid)) { + const auto mat4 = this->grid().transform().baseMap()->getAffineMap()->getMat4(); + mMap.set(mat4, mat4.inverse()); + } + const GridType& grid() const {return mMgr.grid();} + const TreeType& tree() const {return mMgr.tree();} + const RootType& root() const {return mMgr.root();} + uint64_t nodeCount(int level) const { return mMgr.nodeCount(level); } + template + const NodeType& node(uint32_t i) const {return mMgr.template node(i); } + std::string getName() const { return this->grid().getName(); }; + bool hasLongGridName() const {return this->grid().getName().length() >= GridData::MaxNameSize;} + const nanovdb::Map& map() const {return mMap;} + GridClass gridClass() const { + switch (this->grid().getGridClass()) { + case openvdb::GRID_LEVEL_SET: + if (!util::is_floating_point::value) OPENVDB_THROW(openvdb::ValueError, "processGrid: Level sets are expected to be floating point types"); + return GridClass::LevelSet; + case openvdb::GRID_FOG_VOLUME: + return GridClass::FogVolume; + case openvdb::GRID_STAGGERED: + return GridClass::Staggered; + default: + return GridClass::Unknown; + } + } +private: + build::NodeManager mMgr; + nanovdb::Map mMap; +};// NodeAccessor> + +//================================================================================================ + +/// @brief Template specialization for openvdb::tools::PointIndexGrid +template <> +class NodeAccessor +{ +public: + static constexpr bool IS_OPENVDB = true; + static constexpr bool IS_NANOVDB = false; + using BuildType = openvdb::PointIndex32; + using GridType = openvdb::tools::PointIndexGrid; + using TreeType = openvdb::tools::PointIndexTree; + using RootType = typename TreeType::RootNodeType; + using ValueType = typename GridType::ValueType; + template + using NodeType = typename NodeTrait::type; + NodeAccessor(const GridType &grid) : mMgr(const_cast(grid)) { + const auto mat4 = this->grid().transform().baseMap()->getAffineMap()->getMat4(); + mMap.set(mat4, mat4.inverse()); + } + const GridType& grid() const {return mMgr.grid();} + const TreeType& tree() const {return mMgr.tree();} + const RootType& root() const {return mMgr.root();} + uint64_t nodeCount(int level) const { return mMgr.nodeCount(level); } + template + const NodeType& node(uint32_t i) const {return mMgr.template node(i); } + std::string getName() const { return this->grid().getName(); }; + bool hasLongGridName() const {return this->grid().getName().length() >= GridData::MaxNameSize;} + const nanovdb::Map& map() const {return mMap;} + GridClass gridClass() const {return GridClass::PointIndex;} +private: + build::NodeManager mMgr; + nanovdb::Map mMap; +};// NodeAccessor + +//================================================================================================ + +// @brief Template specialization for openvdb::points::PointDataGrid +template <> +class NodeAccessor +{ +public: + static constexpr bool IS_OPENVDB = true; + static constexpr bool IS_NANOVDB = false; + using BuildType = openvdb::PointDataIndex32; + using GridType = openvdb::points::PointDataGrid; + using TreeType = openvdb::points::PointDataTree; + using RootType = typename TreeType::RootNodeType; + using ValueType = typename GridType::ValueType; + template + using NodeType = typename NodeTrait::type; + NodeAccessor(const GridType &grid) : mMgr(const_cast(grid)) { + const auto mat4 = this->grid().transform().baseMap()->getAffineMap()->getMat4(); + mMap.set(mat4, mat4.inverse()); + } + const GridType& grid() const {return mMgr.grid();} + const TreeType& tree() const {return mMgr.tree();} + const RootType& root() const {return mMgr.root();} + uint64_t nodeCount(int level) const { return mMgr.nodeCount(level); } + template + const NodeType& node(uint32_t i) const {return mMgr.template node(i); } + std::string getName() const { return this->grid().getName(); }; + bool hasLongGridName() const {return this->grid().getName().length() >= GridData::MaxNameSize;} + const nanovdb::Map& map() const {return mMap;} + GridClass gridClass() const {return GridClass::PointData;} +private: + build::NodeManager mMgr; + nanovdb::Map mMap; +};// NodeAccessor + +#endif + +//================================================================================================ + +/// @brief Creates any nanovdb Grid from any source grid (certain combinations are obviously not allowed) +template +class CreateNanoGrid +{ +public: + // SrcGridT can be either openvdb::Grid, nanovdb::Grid or nanovdb::tools::build::Grid + using SrcNodeAccT = NodeAccessor; + using SrcBuildT = typename SrcNodeAccT::BuildType; + using SrcValueT = typename SrcNodeAccT::ValueType; + using SrcTreeT = typename SrcNodeAccT::TreeType; + using SrcRootT = typename SrcNodeAccT::RootType; + template + using SrcNodeT = typename NodeTrait::type; + + /// @brief Constructor from a source grid + /// @param srcGrid Source grid of type SrcGridT + CreateNanoGrid(const SrcGridT &srcGrid); + + /// @brief Constructor from a source node accessor (defined above) + /// @param srcNodeAcc Source node accessor of type SrcNodeAccT + CreateNanoGrid(const SrcNodeAccT &srcNodeAcc); + + /// @brief Set the level of verbosity + /// @param mode level of verbosity, mode=0 means quiet + void setVerbose(int mode = 1) { mVerbose = mode; } + + /// @brief Enable or disable dithering, i.e. randomization of the quantization error. + /// @param on enable or disable dithering + /// @warning Dithering only has an affect when DstBuildT = {Fp4, Fp8, Fp16, FpN} + void enableDithering(bool on = true) { mDitherOn = on; } + + /// @brief Set the mode used for computing statistics of the destination grid + /// @param mode specify the mode of statistics + void setStats(StatsMode mode = StatsMode::Default) { mStats = mode; } + + /// @brief Set the mode used for computing checksums of the destination grid + /// @param mode specify the mode of checksum + void setChecksum(CheckMode mode = CheckMode::Default) { mChecksum = mode; } + + /// @brief Converts the source grid into a nanovdb grid with the specified destination build type + /// @tparam DstBuildT build type of the destination, output, grid + /// @tparam BufferT Type of the buffer used for allocating the destination grid + /// @param buffer instance of the buffer use for allocation + /// @return Return an instance of a GridHandle (invoking move semantics) + /// @note This version is when DstBuildT != {FpN, ValueIndex, ValueOnIndex} + template::type, typename BufferT = HostBuffer> + typename util::disable_if::value || + BuildTraits::is_index, GridHandle>::type + getHandle(const BufferT &buffer = BufferT()); + + /// @brief Converts the source grid into a nanovdb grid with variable bit quantization + /// @tparam DstBuildT FpN, i.e. the destination grid uses variable bit quantization + /// @tparam OracleT Type of oracle used to determine the N in FpN + /// @tparam BufferT Type of the buffer used for allocating the destination grid + /// @param oracle Instance of the oracle used to determine the N in FpN + /// @param buffer instance of the buffer use for allocation + /// @return Return an instance of a GridHandle (invoking move semantics) + /// @note This version assumes DstBuildT == FpN + template::type, typename OracleT = AbsDiff, typename BufferT = HostBuffer> + typename util::enable_if::value, GridHandle>::type + getHandle(const OracleT &oracle = OracleT(), + const BufferT &buffer = BufferT()); + + /// @brief Converts the source grid into a nanovdb grid with indices to external arrays of values + /// @tparam DstBuildT ValueIndex or ValueOnIndex, i.e. index all or just active values + /// @tparam BufferT Type of the buffer used for allocating the destination grid + /// @param channels Number of copies of values encoded as blind data in the destination grid + /// @param includeStats Specify if statics should be indexed + /// @param includeTiles Specify if tile values, i.e. non-leaf-node-values, should be indexed + /// @param buffer instance of the buffer use for allocation + /// @return Return an instance of a GridHandle (invoking move semantics) + template::type, typename BufferT = HostBuffer> + typename util::enable_if::is_index, GridHandle>::type + getHandle(uint32_t channels = 0u, + bool includeStats = true, + bool includeTiles = true, + const BufferT &buffer = BufferT()); + + /// @brief Add blind data to the destination grid + /// @param name String name of the blind data + /// @param dataSemantic Semantics of the blind data + /// @param dataClass Class of the blind data + /// @param dataType Type of the blind data + /// @param count Element count of the blind data + /// @param size Size of each element of the blind data + /// @return Return the index used to access the blind data + uint64_t addBlindData(const std::string& name, + GridBlindDataSemantic dataSemantic, + GridBlindDataClass dataClass, + GridType dataType, + size_t count, size_t size) + { + const size_t order = mBlindMetaData.size(); + mBlindMetaData.emplace(name, dataSemantic, dataClass, dataType, order, count, size); + return order; + } + + /// @brief This method only has affect when getHandle was called with DstBuildT = ValueIndex or ValueOnIndex + /// @return Return the number of indexed values. If called before getHandle was called with + /// DstBuildT = ValueIndex or ValueOnIndex the return value is zero. Else it is a value larger than zero. + uint64_t valueCount() const {return mValIdx[0].empty() ? 0u : mValIdx[0].back();} + + /// @brief Copy values from the source grid into a provided buffer + /// @tparam DstBuildT Must be ValueIndex or ValueOnIndex, i.e. a index grid + /// @param buffer point in which to write values + template + typename util::enable_if::is_index>::type + copyValues(SrcValueT *buffer); + +private: + + // ========================================================= + + template + typename util::enable_if::value&&LEVEL==0), typename NodeTrait, LEVEL>::type*>::type + dstNode(uint64_t i) const { + static_assert(LEVEL==0 || LEVEL==1 || LEVEL==2, "Expected LEVEL== {0,1,2}"); + using NodeT = typename NodeTrait, LEVEL>::type; + return util::PtrAdd(mBufferPtr, mOffset[5-LEVEL]) + i; + } + template + typename util::enable_if::value && LEVEL==0, NanoLeaf*>::type + dstNode(uint64_t i) const {return util::PtrAdd>(mBufferPtr, mCodec[i].offset);} + + template NanoRoot* dstRoot() const {return util::PtrAdd>(mBufferPtr, mOffset.root);} + template NanoTree* dstTree() const {return util::PtrAdd>(mBufferPtr, mOffset.tree);} + template NanoGrid* dstGrid() const {return util::PtrAdd>(mBufferPtr, mOffset.grid);} + GridBlindMetaData* dstMeta(uint32_t i) const { return util::PtrAdd(mBufferPtr, mOffset.meta) + i;}; + + // ========================================================= + + template + typename util::disable_if::value || BuildTraits::is_index>::type + preProcess(); + + template + typename util::enable_if::is_index>::type + preProcess(uint32_t channels); + + template + typename util::enable_if::value>::type + preProcess(OracleT oracle); + + // ========================================================= + + // Below are private methods use to serialize nodes into NanoVDB + template + GridHandle initHandle(const BufferT& buffer); + + // ========================================================= + + template + inline typename util::enable_if::is_index>::type + postProcess(uint32_t channels); + + template + inline typename util::disable_if::is_index>::type + postProcess(); + + // ======================================================== + + template + typename util::disable_if::is_special>::type + processLeafs(); + + template + typename util::enable_if::is_index>::type + processLeafs(); + + template + typename util::enable_if::is_FpX>::type + processLeafs(); + + template + typename util::enable_if::value>::type + processLeafs(); + + template + typename util::enable_if::value>::type + processLeafs(); + + template + typename util::enable_if::value>::type + processLeafs(); + + // ========================================================= + + template + typename util::enable_if::is_index>::type + processInternalNodes(); + + template + typename util::enable_if::is_index>::type + processInternalNodes(); + + // ========================================================= + + template + typename util::enable_if::is_index>::type + processRoot(); + + template + typename util::enable_if::is_index>::type + processRoot(); + + // ========================================================= + + template + void processTree(); + + template + void processGrid(); + + template + typename util::enable_if::is_index, uint64_t>::type + countTileValues(uint64_t valueCount); + + template + typename util::enable_if::is_index, uint64_t>::type + countValues(); + +#if defined(NANOVDB_USE_OPENVDB) && !defined(__CUDACC__) + template + typename util::disable_if::value || + util::is_same::value, uint64_t>::type + countPoints() const; + + template + typename util::enable_if::value || + util::is_same::value, uint64_t>::type + countPoints() const; + + template + typename util::enable_if::value>::type + copyPointAttribute(size_t attIdx, AttT *attPtr); +#else + uint64_t countPoints() const {return 0u;} +#endif + + void* mBufferPtr;// pointer to the beginning of the destination nanovdb grid buffer + struct BufferOffsets { + uint64_t grid, tree, root, upper, lower, leaf, meta, blind, size; + uint64_t operator[](int i) const { return *(reinterpret_cast(this)+i); } + } mOffset; + int mVerbose; + uint64_t mLeafNodeSize;// non-trivial when DstBuiltT = FpN + + std::unique_ptr mSrcNodeAccPtr;// placeholder for potential local instance + const SrcNodeAccT &mSrcNodeAcc; + struct BlindMetaData; // forward declaration + std::set mBlindMetaData; // sorted according to BlindMetaData.order + struct Codec { float min, max; uint64_t offset; uint8_t log2; };// used for adaptive bit-rate quantization + std::unique_ptr mCodec;// defines a codec per leaf node when DstBuildT = FpN + StatsMode mStats; + CheckMode mChecksum; + bool mDitherOn, mIncludeStats, mIncludeTiles; + std::vector mValIdx[3];// store id of first value in node +}; // CreateNanoGrid + +//================================================================================================ + +template +CreateNanoGrid::CreateNanoGrid(const SrcGridT &srcGrid) + : mVerbose(0) + , mSrcNodeAccPtr(new SrcNodeAccT(srcGrid)) + , mSrcNodeAcc(*mSrcNodeAccPtr) + , mStats(StatsMode::Default) + , mChecksum(CheckMode::Default) + , mDitherOn(false) + , mIncludeStats(true) + , mIncludeTiles(true) +{ +} + +//================================================================================================ + +template +CreateNanoGrid::CreateNanoGrid(const SrcNodeAccT &srcNodeAcc) + : mVerbose(0) + , mSrcNodeAccPtr(nullptr) + , mSrcNodeAcc(srcNodeAcc) + , mStats(StatsMode::Default) + , mChecksum(CheckMode::Default) + , mDitherOn(false) + , mIncludeStats(true) + , mIncludeTiles(true) +{ +} + +//================================================================================================ + +template +struct CreateNanoGrid::BlindMetaData +{ + BlindMetaData(const std::string& name,// name + used to derive GridBlindDataSemantic + const std::string& type,// used to derive GridType of blind data + GridBlindDataClass dataClass, + size_t i, size_t valueCount, size_t valueSize) + : metaData(reinterpret_cast(new char[sizeof(GridBlindMetaData)])) + , order(i)// sorted id of meta data + , size(math::AlignUp(valueCount * valueSize)) + { + util::memzero(metaData, sizeof(GridBlindMetaData));// zero out all meta data + if (name.length()>=GridData::MaxNameSize) throw std::runtime_error("blind data name exceeds limit"); + std::memcpy(metaData->mName, name.c_str(), name.length() + 1); + metaData->mValueCount = valueCount; + metaData->mSemantic = BlindMetaData::mapToSemantics(name); + metaData->mDataClass = dataClass; + metaData->mDataType = BlindMetaData::mapToType(type); + metaData->mValueSize = valueSize; + NANOVDB_ASSERT(metaData->isValid()); + } + BlindMetaData(const std::string& name,// only name + GridBlindDataSemantic dataSemantic, + GridBlindDataClass dataClass, + GridType dataType, + size_t i, size_t valueCount, size_t valueSize) + : metaData(reinterpret_cast(new char[sizeof(GridBlindMetaData)])) + , order(i)// sorted id of meta data + , size(math::AlignUp(valueCount * valueSize)) + { + std::memset(metaData, 0, sizeof(GridBlindMetaData));// zero out all meta data + if (name.length()>=GridData::MaxNameSize) throw std::runtime_error("blind data name exceeds character limit"); + std::memcpy(metaData->mName, name.c_str(), name.length() + 1); + metaData->mValueCount = valueCount; + metaData->mSemantic = dataSemantic; + metaData->mDataClass = dataClass; + metaData->mDataType = dataType; + metaData->mValueSize = valueSize; + NANOVDB_ASSERT(metaData->isValid()); + } + ~BlindMetaData(){ delete [] reinterpret_cast(metaData); } + bool operator<(const BlindMetaData& other) const { return order < other.order; } // required by std::set + static GridType mapToType(const std::string& name) + { + GridType type = GridType::Unknown; + if ("uint32_t" == name) { + type = GridType::UInt32; + } else if ("float" == name) { + type = GridType::Float; + } else if ("vec3s"== name) { + type = GridType::Vec3f; + } else if ("int32" == name) { + type = GridType::Int32; + } else if ("int64" == name) { + type = GridType::Int64; + } + return type; + } + static GridBlindDataSemantic mapToSemantics(const std::string& name) + { + GridBlindDataSemantic semantic = GridBlindDataSemantic::Unknown; + if ("P" == name) { + semantic = GridBlindDataSemantic::PointPosition; + } else if ("V" == name) { + semantic = GridBlindDataSemantic::PointVelocity; + } else if ("Cd" == name) { + semantic = GridBlindDataSemantic::PointColor; + } else if ("N" == name) { + semantic = GridBlindDataSemantic::PointNormal; + } else if ("id" == name) { + semantic = GridBlindDataSemantic::PointId; + } + return semantic; + } + GridBlindMetaData *metaData; + const size_t order, size; +}; // CreateNanoGrid::BlindMetaData + +//================================================================================================ + +template +template +typename util::disable_if::value || + BuildTraits::is_index, GridHandle>::type +CreateNanoGrid::getHandle(const BufferT& pool) +{ + this->template preProcess(); + auto handle = this->template initHandle(pool); + this->template postProcess(); + return handle; +} // CreateNanoGrid::getHandle + +//================================================================================================ + +template +template +typename util::enable_if::value, GridHandle>::type +CreateNanoGrid::getHandle(const OracleT& oracle, const BufferT& pool) +{ + this->template preProcess(oracle); + auto handle = this->template initHandle(pool); + this->template postProcess(); + return handle; +} // CreateNanoGrid::getHandle + +//================================================================================================ + +template +template +typename util::enable_if::is_index, GridHandle>::type +CreateNanoGrid::getHandle(uint32_t channels, + bool includeStats, + bool includeTiles, + const BufferT &pool) +{ + mIncludeStats = includeStats; + mIncludeTiles = includeTiles; + this->template preProcess(channels); + auto handle = this->template initHandle(pool); + this->template postProcess(channels); + return handle; +}// CreateNanoGrid::getHandle + +//================================================================================================ + +template +template +GridHandle CreateNanoGrid::initHandle(const BufferT& pool) +{ + mOffset.grid = 0;// grid is always stored at the start of the buffer! + mOffset.tree = NanoGrid::memUsage(); // grid ends and tree begins + mOffset.root = mOffset.tree + NanoTree::memUsage(); // tree ends and root node begins + mOffset.upper = mOffset.root + NanoRoot::memUsage(mSrcNodeAcc.root().getTableSize()); // root node ends and upper internal nodes begin + mOffset.lower = mOffset.upper + NanoUpper::memUsage()*mSrcNodeAcc.nodeCount(2); // upper internal nodes ends and lower internal nodes begin + mOffset.leaf = mOffset.lower + NanoLower::memUsage()*mSrcNodeAcc.nodeCount(1); // lower internal nodes ends and leaf nodes begin + mOffset.meta = mOffset.leaf + mLeafNodeSize;// leaf nodes end and blind meta data begins + mOffset.blind = mOffset.meta + sizeof(GridBlindMetaData)*mBlindMetaData.size(); // meta data ends and blind data begins + mOffset.size = mOffset.blind;// end of buffer + for (const auto& b : mBlindMetaData) mOffset.size += b.size; // accumulate all the blind data + + auto buffer = BufferT::create(mOffset.size, &pool); + mBufferPtr = buffer.data(); + + // Concurrent processing of all tree levels! + util::invoke( [&](){this->template processLeafs();}, + [&](){this->template processInternalNodes();}, + [&](){this->template processInternalNodes();}, + [&](){this->template processRoot();}, + [&](){this->template processTree();}, + [&](){this->template processGrid();} ); + + return GridHandle(std::move(buffer)); +} // CreateNanoGrid::initHandle + +//================================================================================================ + +template +template +inline typename util::disable_if::value || BuildTraits::is_index>::type +CreateNanoGrid::preProcess() +{ + if (const uint64_t pointCount = this->countPoints()) { +#if defined(NANOVDB_USE_OPENVDB) && !defined(__CUDACC__) + if constexpr(util::is_same::value) { + if (!mBlindMetaData.empty()) throw std::runtime_error("expected no blind meta data"); + this->addBlindData("index", + GridBlindDataSemantic::PointId, + GridBlindDataClass::IndexArray, + GridType::UInt32, + pointCount, + sizeof(uint32_t)); + } else if constexpr(util::is_same::value) { + if (!mBlindMetaData.empty()) throw std::runtime_error("expected no blind meta data"); + auto &srcLeaf = mSrcNodeAcc.template node<0>(0); + const auto& attributeSet = srcLeaf.attributeSet(); + const auto& descriptor = attributeSet.descriptor(); + const auto& nameMap = descriptor.map(); + for (auto it = nameMap.begin(); it != nameMap.end(); ++it) { + const size_t index = it->second; + auto& attArray = srcLeaf.constAttributeArray(index); + mBlindMetaData.emplace(it->first, // name used to derive semantics + descriptor.valueType(index), // type + it->first == "id" ? GridBlindDataClass::IndexArray : GridBlindDataClass::AttributeArray, // class + index, // order + pointCount, // element count + attArray.valueTypeSize()); // element size + } + } +#endif + } + if (mSrcNodeAcc.hasLongGridName()) { + this->addBlindData("grid name", + GridBlindDataSemantic::Unknown, + GridBlindDataClass::GridName, + GridType::Unknown, + mSrcNodeAcc.getName().length() + 1, 1); + } + mLeafNodeSize = mSrcNodeAcc.nodeCount(0)*NanoLeaf::DataType::memUsage(); +}// CreateNanoGrid::preProcess + +//================================================================================================ + +template +template +inline typename util::enable_if::value>::type +CreateNanoGrid::preProcess(OracleT oracle) +{ + static_assert(util::is_same::value, "preProcess: expected SrcValueT == float"); + + const size_t leafCount = mSrcNodeAcc.nodeCount(0); + if (leafCount==0) { + mLeafNodeSize = 0u; + return; + } + mCodec.reset(new Codec[leafCount]); + + if constexpr(util::is_same::value) { + if (!oracle) oracle.init(mSrcNodeAcc.gridClass(), mSrcNodeAcc.root().background()); + } + + math::DitherLUT lut(mDitherOn); + util::forEach(0, leafCount, 4, [&](const util::Range1D &r) { + for (auto i=r.begin(); i!=r.end(); ++i) { + const auto &srcLeaf = mSrcNodeAcc.template node<0>(i); + float &min = mCodec[i].min = std::numeric_limits::max(); + float &max = mCodec[i].max = -min; + for (int j=0; j<512; ++j) { + float v = srcLeaf.getValue(j); + if (vmax) max = v; + } + const float range = max - min; + uint8_t &logBitWidth = mCodec[i].log2 = 0;// 0,1,2,3,4 => 1,2,4,8,16 bits + while (range > 0.0f && logBitWidth < 4u) { + const uint32_t mask = (uint32_t(1) << (uint32_t(1) << logBitWidth)) - 1u; + const float encode = mask/range; + const float decode = range/mask; + int j = 0; + do { + const float exact = srcLeaf.getValue(j);//data[j];// exact value + const uint32_t code = uint32_t(encode*(exact - min) + lut(j)); + const float approx = code * decode + min;// approximate value + j += oracle(exact, approx) ? 1 : 513; + } while(j < 512); + if (j == 512) break; + ++logBitWidth; + } + } + }); + + auto getOffset = [&](size_t i){ + --i; + return mCodec[i].offset + NanoLeaf::DataType::memUsage(1u << mCodec[i].log2); + }; + mCodec[0].offset = NanoGrid::memUsage() + + NanoTree::memUsage() + + NanoRoot::memUsage(mSrcNodeAcc.root().getTableSize()) + + NanoUpper::memUsage()*mSrcNodeAcc.nodeCount(2) + + NanoLower::memUsage()*mSrcNodeAcc.nodeCount(1); + for (size_t i=1; iaddBlindData("grid name", + GridBlindDataSemantic::Unknown, + GridBlindDataClass::GridName, + GridType::Unknown, + mSrcNodeAcc.getName().length() + 1, 1); + } +}// CreateNanoGrid::preProcess + +//================================================================================================ + +template +template +inline typename util::enable_if::is_index, uint64_t>::type +CreateNanoGrid::countTileValues(uint64_t valueCount) +{ + const uint64_t stats = mIncludeStats ? 4u : 0u;// minimum, maximum, average, and deviation + mValIdx[LEVEL].clear(); + mValIdx[LEVEL].resize(mSrcNodeAcc.nodeCount(LEVEL) + 1, stats);// minimum 1 entry + util::forEach(1, mValIdx[LEVEL].size(), 8, [&](const util::Range1D& r){ + for (auto i = r.begin(); i!=r.end(); ++i) { + auto &srcNode = mSrcNodeAcc.template node(i-1); + if constexpr(BuildTraits::is_onindex) {// resolved at compile time + mValIdx[LEVEL][i] += srcNode.getValueMask().countOn(); + } else { + static const uint64_t maxTileCount = uint64_t(1u) << 3*srcNode.LOG2DIM; + mValIdx[LEVEL][i] += maxTileCount - srcNode.getChildMask().countOn(); + } + } + }); + mValIdx[LEVEL][0] = valueCount; + for (size_t i=1; i + +//================================================================================================ + +template +template +inline typename util::enable_if::is_index, uint64_t>::type +CreateNanoGrid::countValues() +{ + const uint64_t stats = mIncludeStats ? 4u : 0u;// minimum, maximum, average, and deviation + uint64_t valueCount = 1u;// offset 0 corresponds to the background value + if (mIncludeTiles) { + if constexpr(BuildTraits::is_onindex) { + for (auto it = mSrcNodeAcc.root().cbeginValueOn(); it; ++it) ++valueCount; + } else { + for (auto it = mSrcNodeAcc.root().cbeginValueAll(); it; ++it) ++valueCount; + } + valueCount += stats;// optionally append stats for the root node + valueCount = countTileValues(valueCount); + valueCount = countTileValues(valueCount); + } + mValIdx[0].clear(); + mValIdx[0].resize(mSrcNodeAcc.nodeCount(0) + 1, 512u + stats);// minimum 1 entry + if constexpr(BuildTraits::is_onindex) { + util::forEach(1, mValIdx[0].size(), 8, [&](const util::Range1D& r) { + for (auto i = r.begin(); i != r.end(); ++i) { + mValIdx[0][i] = stats; + mValIdx[0][i] += mSrcNodeAcc.template node<0>(i-1).getValueMask().countOn(); + } + }); + } + mValIdx[0][0] = valueCount; + util::prefixSum(mValIdx[0], true);// inclusive prefix sum + return mValIdx[0].back(); +}// CreateNanoGrid::countValues() + +//================================================================================================ + +template +template +inline typename util::enable_if::is_index>::type +CreateNanoGrid::preProcess(uint32_t channels) +{ + const uint64_t valueCount = this->template countValues(); + mLeafNodeSize = mSrcNodeAcc.nodeCount(0)*NanoLeaf::DataType::memUsage(); + + uint32_t order = mBlindMetaData.size(); + char str[16]; + for (uint32_t i=0; i()), + GridBlindDataClass::AttributeArray, + order++, + valueCount, + sizeof(SrcValueT)); + } + if (mSrcNodeAcc.hasLongGridName()) { + this->addBlindData("grid name", + GridBlindDataSemantic::Unknown, + GridBlindDataClass::GridName, + GridType::Unknown, + mSrcNodeAcc.getName().length() + 1, 1); + } +}// preProcess + +//================================================================================================ + +template +template +inline typename util::disable_if::is_special>::type +CreateNanoGrid::processLeafs() +{ + using DstDataT = typename NanoLeaf::DataType; + using DstValueT = typename DstDataT::ValueType; + static_assert(DstDataT::FIXED_SIZE, "Expected destination LeafNode to have fixed size"); + util::forEach(0, mSrcNodeAcc.nodeCount(0), 8, [&](const util::Range1D& r) { + auto *dstLeaf = this->template dstNode(r.begin()); + for (auto i = r.begin(); i != r.end(); ++i, ++dstLeaf) { + auto &srcLeaf = mSrcNodeAcc.template node<0>(i); + if (DstDataT::padding()>0u) { + util::memzero(dstLeaf, DstDataT::memUsage()); + } else { + dstLeaf->mBBoxDif[0] = dstLeaf->mBBoxDif[1] = dstLeaf->mBBoxDif[2] = 0u; + dstLeaf->mFlags = 0u;// enable rendering, no bbox, no stats + dstLeaf->mMinimum = dstLeaf->mMaximum = typename DstDataT::ValueType(); + dstLeaf->mAverage = dstLeaf->mStdDevi = 0; + } + dstLeaf->mBBoxMin = srcLeaf.origin(); // copy origin of node + dstLeaf->mValueMask = srcLeaf.getValueMask(); // copy value mask + DstValueT *dst = dstLeaf->mValues; + if constexpr(util::is_same::value && SrcNodeAccT::IS_OPENVDB) { + const SrcValueT *src = srcLeaf.buffer().data(); + for (auto *end = dst + 512u; dst != end; dst += 4, src += 4) { + dst[0] = src[0]; // copy *all* voxel values in sets of four, i.e. loop-unrolling + dst[1] = src[1]; + dst[2] = src[2]; + dst[3] = src[3]; + } + } else { + for (uint32_t j=0; j<512u; ++j) *dst++ = static_cast(srcLeaf.getValue(j)); + } + } + }); +} // CreateNanoGrid::processLeafs + +//================================================================================================ + +template +template +inline typename util::enable_if::is_index>::type +CreateNanoGrid::processLeafs() +{ + using DstDataT = typename NanoLeaf::DataType; + static_assert(DstDataT::FIXED_SIZE, "Expected destination LeafNode to have fixed size"); + static_assert(DstDataT::padding()==0u, "Expected leaf nodes to have no padding"); + + util::forEach(0, mSrcNodeAcc.nodeCount(0), 8, [&](const util::Range1D& r) { + const uint8_t flags = mIncludeStats ? 16u : 0u;// 4th bit indicates stats + DstDataT *dstLeaf = this->template dstNode(r.begin());// fixed size + for (auto i = r.begin(); i != r.end(); ++i, ++dstLeaf) { + auto &srcLeaf = mSrcNodeAcc.template node<0>(i); + dstLeaf->mBBoxMin = srcLeaf.origin(); // copy origin of node + dstLeaf->mBBoxDif[0] = dstLeaf->mBBoxDif[1] = dstLeaf->mBBoxDif[2] = 0u; + dstLeaf->mFlags = flags; + dstLeaf->mValueMask = srcLeaf.getValueMask(); // copy value mask + dstLeaf->mOffset = mValIdx[0][i]; + if constexpr(BuildTraits::is_onindex) { + const uint64_t *w = dstLeaf->mValueMask.words(); +#ifdef USE_OLD_VALUE_ON_INDEX + int32_t sum = CountOn(*w++); + uint8_t *p = reinterpret_cast(&dstLeaf->mPrefixSum), *q = p + 7; + for (int j=0; j<7; ++j) { + *p++ = sum & 255u; + *q |= (sum >> 8) << j; + sum += CountOn(*w++); + } +#else + uint64_t &prefixSum = dstLeaf->mPrefixSum, sum = util::countOn(*w++); + prefixSum = sum; + for (int n = 9; n < 55; n += 9) {// n=i*9 where i=1,2,..6 + sum += util::countOn(*w++); + prefixSum |= sum << n;// each pre-fixed sum is encoded in 9 bits + } +#endif + } else { + dstLeaf->mPrefixSum = 0u; + } + if constexpr(BuildTraits::is_indexmask) dstLeaf->mMask = dstLeaf->mValueMask; + } + }); +} // CreateNanoGrid::processLeafs + +//================================================================================================ + +template +template +inline typename util::enable_if::value>::type +CreateNanoGrid::processLeafs() +{ + using DstDataT = typename NanoLeaf::DataType; + static_assert(DstDataT::FIXED_SIZE, "Expected destination LeafNode to have fixed size"); + util::forEach(0, mSrcNodeAcc.nodeCount(0), 8, [&](const util::Range1D& r) { + auto *dstLeaf = this->template dstNode(r.begin()); + for (auto i = r.begin(); i != r.end(); ++i, ++dstLeaf) { + auto &srcLeaf = mSrcNodeAcc.template node<0>(i); + if (DstDataT::padding()>0u) { + util::memzero(dstLeaf, DstDataT::memUsage()); + } else { + dstLeaf->mBBoxDif[0] = dstLeaf->mBBoxDif[1] = dstLeaf->mBBoxDif[2] = 0u; + dstLeaf->mFlags = 0u;// enable rendering, no bbox, no stats + dstLeaf->mPadding[0] = dstLeaf->mPadding[1] = 0u; + } + dstLeaf->mBBoxMin = srcLeaf.origin(); // copy origin of node + dstLeaf->mValueMask = srcLeaf.getValueMask(); // copy value mask + } + }); +} // CreateNanoGrid::processLeafs + +//================================================================================================ + +template +template +inline typename util::enable_if::value>::type +CreateNanoGrid::processLeafs() +{ + using DstDataT = typename NanoLeaf::DataType; + static_assert(DstDataT::FIXED_SIZE, "Expected destination LeafNode to have fixed size"); + util::forEach(0, mSrcNodeAcc.nodeCount(0), 8, [&](const util::Range1D& r) { + auto *dstLeaf = this->template dstNode(r.begin()); + for (auto i = r.begin(); i != r.end(); ++i, ++dstLeaf) { + auto &srcLeaf = mSrcNodeAcc.template node<0>(i); + if (DstDataT::padding()>0u) { + util::memzero(dstLeaf, DstDataT::memUsage()); + } else { + dstLeaf->mBBoxDif[0] = dstLeaf->mBBoxDif[1] = dstLeaf->mBBoxDif[2] = 0u; + dstLeaf->mFlags = 0u;// enable rendering, no bbox, no stats + } + dstLeaf->mBBoxMin = srcLeaf.origin(); // copy origin of node + dstLeaf->mValueMask = srcLeaf.getValueMask(); // copy value mask + if constexpr(!util::is_same::value) { + for (int j=0; j<512; ++j) dstLeaf->mValues.set(j, static_cast(srcLeaf.getValue(j))); + } else if constexpr(SrcNodeAccT::IS_OPENVDB) { + dstLeaf->mValues = *reinterpret_cast*>(srcLeaf.buffer().data()); + } else if constexpr(SrcNodeAccT::IS_NANOVDB) { + dstLeaf->mValues = srcLeaf.data()->mValues; + } else {// tools::Leaf + dstLeaf->mValues = srcLeaf.mValues; // copy value mask + } + } + }); +} // CreateNanoGrid::processLeafs + +//================================================================================================ + +template +template +inline typename util::enable_if::is_FpX>::type +CreateNanoGrid::processLeafs() +{ + using DstDataT = typename NanoLeaf::DataType; + static_assert(DstDataT::FIXED_SIZE, "Expected destination LeafNode to have fixed size"); + using ArrayT = typename DstDataT::ArrayType; + static_assert(util::is_same::value, "Expected ValueT == float"); + using FloatT = typename std::conditional=16, double, float>::type;// 16 compression and higher requires double + static constexpr FloatT UNITS = FloatT((1 << DstDataT::bitWidth()) - 1);// # of unique non-zero values + math::DitherLUT lut(mDitherOn); + + util::forEach(0, mSrcNodeAcc.nodeCount(0), 8, [&](const util::Range1D& r) { + auto *dstLeaf = this->template dstNode(r.begin()); + for (auto i = r.begin(); i != r.end(); ++i, ++dstLeaf) { + auto &srcLeaf = mSrcNodeAcc.template node<0>(i); + if (DstDataT::padding()>0u) { + util::memzero(dstLeaf, DstDataT::memUsage()); + } else { + dstLeaf->mFlags = dstLeaf->mBBoxDif[2] = dstLeaf->mBBoxDif[1] = dstLeaf->mBBoxDif[0] = 0u; + dstLeaf->mDev = dstLeaf->mAvg = dstLeaf->mMax = dstLeaf->mMin = 0u; + } + dstLeaf->mBBoxMin = srcLeaf.origin(); // copy origin of node + dstLeaf->mValueMask = srcLeaf.getValueMask(); // copy value mask + // compute extrema values + float min = std::numeric_limits::max(), max = -min; + for (uint32_t j=0; j<512u; ++j) { + const float v = srcLeaf.getValue(j); + if (v < min) min = v; + if (v > max) max = v; + } + dstLeaf->init(min, max, DstDataT::bitWidth()); + // perform quantization relative to the values in the current leaf node + const FloatT encode = UNITS/(max-min); + uint32_t offset = 0; + auto quantize = [&]()->ArrayT{ + const ArrayT tmp = static_cast(encode * (srcLeaf.getValue(offset) - min) + lut(offset)); + ++offset; + return tmp; + }; + auto *code = reinterpret_cast(dstLeaf->mCode); + if (util::is_same::value) {// resolved at compile-time + for (uint32_t j=0; j<128u; ++j) { + auto tmp = quantize(); + *code++ = quantize() << 4 | tmp; + tmp = quantize(); + *code++ = quantize() << 4 | tmp; + } + } else { + for (uint32_t j=0; j<128u; ++j) { + *code++ = quantize(); + *code++ = quantize(); + *code++ = quantize(); + *code++ = quantize(); + } + } + } + }); +} // CreateNanoGrid::processLeafs + +//================================================================================================ + +template +template +inline typename util::enable_if::value>::type +CreateNanoGrid::processLeafs() +{ + static_assert(util::is_same::value, "Expected SrcValueT == float"); + math::DitherLUT lut(mDitherOn); + util::forEach(0, mSrcNodeAcc.nodeCount(0), 8, [&](const util::Range1D& r) { + for (auto i = r.begin(); i != r.end(); ++i) { + auto &srcLeaf = mSrcNodeAcc.template node<0>(i); + auto *dstLeaf = this->template dstNode(i); + dstLeaf->mBBoxMin = srcLeaf.origin(); // copy origin of node + dstLeaf->mBBoxDif[0] = dstLeaf->mBBoxDif[1] = dstLeaf->mBBoxDif[2] = 0u; + const uint8_t logBitWidth = mCodec[i].log2; + dstLeaf->mFlags = logBitWidth << 5;// pack logBitWidth into 3 MSB of mFlag + dstLeaf->mValueMask = srcLeaf.getValueMask(); // copy value mask + const float min = mCodec[i].min, max = mCodec[i].max; + dstLeaf->init(min, max, uint8_t(1) << logBitWidth); + // perform quantization relative to the values in the current leaf node + uint32_t offset = 0; + float encode = 0.0f; + auto quantize = [&]()->uint8_t{ + const uint8_t tmp = static_cast(encode * (srcLeaf.getValue(offset) - min) + lut(offset)); + ++offset; + return tmp; + }; + auto *dst = reinterpret_cast(dstLeaf+1); + switch (logBitWidth) { + case 0u: {// 1 bit + encode = 1.0f/(max - min); + for (int j=0; j<64; ++j) { + uint8_t a = 0; + for (int k=0; k<8; ++k) a |= quantize() << k; + *dst++ = a; + } + } + break; + case 1u: {// 2 bits + encode = 3.0f/(max - min); + for (int j=0; j<128; ++j) { + auto a = quantize(); + a |= quantize() << 2; + a |= quantize() << 4; + *dst++ = quantize() << 6 | a; + } + } + break; + case 2u: {// 4 bits + encode = 15.0f/(max - min); + for (int j=0; j<128; ++j) { + auto a = quantize(); + *dst++ = quantize() << 4 | a; + a = quantize(); + *dst++ = quantize() << 4 | a; + } + } + break; + case 3u: {// 8 bits + encode = 255.0f/(max - min); + for (int j=0; j<128; ++j) { + *dst++ = quantize(); + *dst++ = quantize(); + *dst++ = quantize(); + *dst++ = quantize(); + } + } + break; + default: {// 16 bits - special implementation using higher bit-precision + auto *dst = reinterpret_cast(dstLeaf+1); + const double encode = 65535.0/(max - min);// note that double is required! + for (int j=0; j<128; ++j) { + *dst++ = uint16_t(encode * (srcLeaf.getValue(offset) - min) + lut(offset)); ++offset; + *dst++ = uint16_t(encode * (srcLeaf.getValue(offset) - min) + lut(offset)); ++offset; + *dst++ = uint16_t(encode * (srcLeaf.getValue(offset) - min) + lut(offset)); ++offset; + *dst++ = uint16_t(encode * (srcLeaf.getValue(offset) - min) + lut(offset)); ++offset; + } + } + }// end switch + } + });// kernel +} // CreateNanoGrid::processLeafs + +//================================================================================================ + +template +template +inline typename util::enable_if::is_index>::type +CreateNanoGrid::processInternalNodes() +{ + using DstNodeT = typename NanoNode::type; + using DstValueT = typename DstNodeT::ValueType; + using DstChildT = typename NanoNode::type; + static_assert(LEVEL == 1 || LEVEL == 2, "Expected internal node"); + + const uint64_t nodeCount = mSrcNodeAcc.nodeCount(LEVEL); + if (nodeCount > 0) {// compute and temporarily encode IDs of child nodes + uint64_t childCount = 0; + auto *dstNode = this->template dstNode(0); + for (uint64_t i=0; i(static_cast(i)).getChildMask().countOn(); + } + } + + util::forEach(0, nodeCount, 4, [&](const util::Range1D& r) { + auto *dstNode = this->template dstNode(r.begin()); + for (auto i = r.begin(); i != r.end(); ++i, ++dstNode) { + auto &srcNode = mSrcNodeAcc.template node(i); + uint64_t childID = dstNode->mFlags; + if (DstNodeT::DataType::padding()>0u) { + util::memzero(dstNode, DstNodeT::memUsage()); + } else { + dstNode->mFlags = 0;// enable rendering, no bbox, no stats + dstNode->mMinimum = dstNode->mMaximum = typename DstNodeT::ValueType(); + dstNode->mAverage = dstNode->mStdDevi = 0; + } + dstNode->mBBox[0] = srcNode.origin(); // copy origin of node + dstNode->mValueMask = srcNode.getValueMask(); // copy value mask + dstNode->mChildMask = srcNode.getChildMask(); // copy child mask + for (auto it = srcNode.cbeginChildAll(); it; ++it) { + SrcValueT value{}; // default initialization + if (it.probeChild(value)) { + DstChildT *dstChild = this->template dstNode(childID++);// might be Leaf + dstNode->setChild(it.pos(), dstChild); + } else { + dstNode->setValue(it.pos(), static_cast(value)); + } + } + } + }); +} // CreateNanoGrid::processInternalNodes + +//================================================================================================ + +template +template +inline typename util::enable_if::is_index>::type +CreateNanoGrid::processInternalNodes() +{ + using DstNodeT = typename NanoNode::type; + using DstChildT = typename NanoNode::type; + static_assert(LEVEL == 1 || LEVEL == 2, "Expected internal node"); + static_assert(DstNodeT::DataType::padding()==0u, "Expected internal nodes to have no padding"); + + const uint64_t nodeCount = mSrcNodeAcc.nodeCount(LEVEL); + if (nodeCount > 0) {// compute and temporarily encode IDs of child nodes + uint64_t childCount = 0; + auto *dstNode = this->template dstNode(0); + for (uint64_t i=0; i(i).getChildMask().countOn(); + } + } + + util::forEach(0, nodeCount, 4, [&](const util::Range1D& r) { + auto *dstNode = this->template dstNode(r.begin()); + for (auto i = r.begin(); i != r.end(); ++i, ++dstNode) { + auto &srcNode = mSrcNodeAcc.template node(i); + uint64_t childID = dstNode->mFlags; + dstNode->mFlags = 0u; + dstNode->mBBox[0] = srcNode.origin(); // copy origin of node + dstNode->mValueMask = srcNode.getValueMask(); // copy value mask + dstNode->mChildMask = srcNode.getChildMask(); // copy child mask + uint64_t n = mIncludeTiles ? mValIdx[LEVEL][i] : 0u; + for (auto it = srcNode.cbeginChildAll(); it; ++it) { + SrcValueT value; + if (it.probeChild(value)) { + DstChildT *dstChild = this->template dstNode(childID++);// might be Leaf + dstNode->setChild(it.pos(), dstChild); + } else { + uint64_t m = 0u; + if (mIncludeTiles && !((BuildTraits::is_onindex) && dstNode->mValueMask.isOff(it.pos()))) m = n++; + dstNode->setValue(it.pos(), m); + } + } + if (mIncludeTiles && mIncludeStats) {// stats are always placed after the tile values + dstNode->mMinimum = n++; + dstNode->mMaximum = n++; + dstNode->mAverage = n++; + dstNode->mStdDevi = n++; + } else {// if not tiles or stats set stats to the background offset + dstNode->mMinimum = 0u; + dstNode->mMaximum = 0u; + dstNode->mAverage = 0u; + dstNode->mStdDevi = 0u; + } + } + }); +} // CreateNanoGrid::processInternalNodes + +//================================================================================================ + +template +template +inline typename util::enable_if::is_index>::type +CreateNanoGrid::processRoot() +{ + using DstRootT = NanoRoot; + using DstValueT = typename DstRootT::ValueType; + auto &srcRoot = mSrcNodeAcc.root(); + auto *dstRoot = this->template dstRoot(); + const uint32_t tableSize = srcRoot.getTableSize(); + if (DstRootT::DataType::padding()>0) util::memzero(dstRoot, DstRootT::memUsage(tableSize)); + dstRoot->mTableSize = tableSize; + dstRoot->mMinimum = dstRoot->mMaximum = dstRoot->mBackground = srcRoot.background(); + dstRoot->mBBox = CoordBBox(); // // set to an empty bounding box + if (tableSize==0) return; + auto *dstChild = this->template dstNode(0);// fixed size and linear in memory + auto *dstTile = dstRoot->tile(0);// fixed size and linear in memory + for (auto it = srcRoot.cbeginChildAll(); it; ++it, ++dstTile) { + SrcValueT value; + if (it.probeChild(value)) { + dstTile->setChild(it.getCoord(), dstChild++, dstRoot); + } else { + dstTile->setValue(it.getCoord(), it.isValueOn(), static_cast(value)); + } + } +} // CreateNanoGrid::processRoot + +//================================================================================================ + +template +template +inline typename util::enable_if::is_index>::type +CreateNanoGrid::processRoot() +{ + using DstRootT = NanoRoot; + auto &srcRoot = mSrcNodeAcc.root(); + auto *dstRoot = this->template dstRoot(); + const uint32_t tableSize = srcRoot.getTableSize(); + if (DstRootT::DataType::padding()>0) util::memzero(dstRoot, DstRootT::memUsage(tableSize)); + dstRoot->mTableSize = tableSize; + dstRoot->mBackground = 0u; + uint64_t valueCount = 0u;// the first entry is always the background value + dstRoot->mBBox = CoordBBox(); // set to an empty/invalid bounding box + + if (tableSize>0) { + auto *dstChild = this->template dstNode(0);// fixed size and linear in memory + auto *dstTile = dstRoot->tile(0);// fixed size and linear in memory + for (auto it = srcRoot.cbeginChildAll(); it; ++it, ++dstTile) { + SrcValueT tmp; + if (it.probeChild(tmp)) { + dstTile->setChild(it.getCoord(), dstChild++, dstRoot); + } else { + dstTile->setValue(it.getCoord(), it.isValueOn(), 0u); + if (mIncludeTiles && !((BuildTraits::is_onindex) && !dstTile->state)) dstTile->value = ++valueCount; + } + } + } + if (mIncludeTiles && mIncludeStats) {// stats are always placed after the tile values + dstRoot->mMinimum = ++valueCount; + dstRoot->mMaximum = ++valueCount; + dstRoot->mAverage = ++valueCount; + dstRoot->mStdDevi = ++valueCount; + } else if (dstRoot->padding()==0) { + dstRoot->mMinimum = 0u; + dstRoot->mMaximum = 0u; + dstRoot->mAverage = 0u; + dstRoot->mStdDevi = 0u; + } +} // CreateNanoGrid::processRoot + +//================================================================================================ + +template +template +void CreateNanoGrid::processTree() +{ + const uint64_t nodeCount[3] = {mSrcNodeAcc.nodeCount(0), mSrcNodeAcc.nodeCount(1), mSrcNodeAcc.nodeCount(2)}; + auto *dstTree = this->template dstTree(); + dstTree->setRoot( this->template dstRoot() ); + dstTree->setFirstNode(nodeCount[2] ? this->template dstNode(0) : nullptr); + dstTree->setFirstNode(nodeCount[1] ? this->template dstNode(0) : nullptr); + dstTree->setFirstNode(nodeCount[0] ? this->template dstNode(0) : nullptr); + + dstTree->mNodeCount[0] = static_cast(nodeCount[0]); + dstTree->mNodeCount[1] = static_cast(nodeCount[1]); + dstTree->mNodeCount[2] = static_cast(nodeCount[2]); + + // Count number of active leaf level tiles + dstTree->mTileCount[0] = util::reduce(util::Range1D(0,nodeCount[1]), uint32_t(0), [&](util::Range1D &r, uint32_t sum){ + for (auto i=r.begin(); i!=r.end(); ++i) sum += mSrcNodeAcc.template node<1>(i).getValueMask().countOn(); + return sum;}, std::plus()); + + // Count number of active lower internal node tiles + dstTree->mTileCount[1] = util::reduce(util::Range1D(0,nodeCount[2]), uint32_t(0), [&](util::Range1D &r, uint32_t sum){ + for (auto i=r.begin(); i!=r.end(); ++i) sum += mSrcNodeAcc.template node<2>(i).getValueMask().countOn(); + return sum;}, std::plus()); + + // Count number of active upper internal node tiles + dstTree->mTileCount[2] = 0; + for (auto it = mSrcNodeAcc.root().cbeginValueOn(); it; ++it) dstTree->mTileCount[2] += 1; + + // Count number of active voxels + dstTree->mVoxelCount = util::reduce(util::Range1D(0, nodeCount[0]), uint64_t(0), [&](util::Range1D &r, uint64_t sum){ + for (auto i=r.begin(); i!=r.end(); ++i) sum += mSrcNodeAcc.template node<0>(i).getValueMask().countOn(); + return sum;}, std::plus()); + + dstTree->mVoxelCount += uint64_t(dstTree->mTileCount[0]) << 9;// = 3 * 3 + dstTree->mVoxelCount += uint64_t(dstTree->mTileCount[1]) << 21;// = 3 * (3+4) + dstTree->mVoxelCount += uint64_t(dstTree->mTileCount[2]) << 36;// = 3 * (3+4+5) + +} // CreateNanoGrid::processTree + +//================================================================================================ + +template +template +void CreateNanoGrid::processGrid() +{ + auto* dstGrid = this->template dstGrid(); + dstGrid->init({GridFlags::IsBreadthFirst}, mOffset.size, mSrcNodeAcc.map(), + toGridType(), toGridClass(mSrcNodeAcc.gridClass())); + dstGrid->mBlindMetadataCount = static_cast(mBlindMetaData.size()); + dstGrid->mData1 = this->valueCount(); + +// if (!isValid(dstGrid->mGridType, dstGrid->mGridClass)) { +//#if 1 +// char str[30]; +// fprintf(stderr,"Warning: Strange combination of GridType(\"%s\") and GridClass(\"%s\"). Consider changing GridClass to \"Unknown\"\n", +// toStr(str, dstGrid->mGridType), toStr(str + 15, dstGrid->mGridClass)); +//#else +// throw std::runtime_error("Invalid combination of GridType("+std::to_string(int(dstGrid->mGridType))+ +// ") and GridClass("+std::to_string(int(dstGrid->mGridClass))+"). See NanoVDB.h for details!"); +//#endif +// } + util::memzero(dstGrid->mGridName, GridData::MaxNameSize);// initialize mGridName to zero + strncpy(dstGrid->mGridName, mSrcNodeAcc.getName().c_str(), GridData::MaxNameSize-1); + if (mSrcNodeAcc.hasLongGridName()) dstGrid->setLongGridNameOn();// grid name is long so store it as blind data + + // Partially process blind meta data - they will be complete in postProcess + if (mBlindMetaData.size()>0) { + auto *metaData = this->dstMeta(0); + dstGrid->mBlindMetadataOffset = util::PtrDiff(metaData, dstGrid); + dstGrid->mBlindMetadataCount = static_cast(mBlindMetaData.size()); + char *blindData = util::PtrAdd(mBufferPtr, mOffset.blind); + for (const auto &b : mBlindMetaData) { + std::memcpy(metaData, b.metaData, sizeof(GridBlindMetaData)); + metaData->setBlindData(blindData);// sets metaData.mOffset + if (metaData->mDataClass == GridBlindDataClass::GridName) strcpy(blindData, mSrcNodeAcc.getName().c_str()); + ++metaData; + blindData += b.size; + } + mBlindMetaData.clear(); + } +} // CreateNanoGrid::processGrid + +//================================================================================================ + +template +template +inline typename util::disable_if::is_index>::type +CreateNanoGrid::postProcess() +{ + if constexpr(util::is_same::value) mCodec.reset(); + auto *dstGrid = this->template dstGrid(); + updateGridStats(dstGrid, mStats); +#if defined(NANOVDB_USE_OPENVDB) && !defined(__CUDACC__) + auto *metaData = this->dstMeta(0); + if constexpr(util::is_same::value || + util::is_same::value) { + static_assert(util::is_same::value, "expected DstBuildT==uint32_t"); + auto *dstData0 = this->template dstNode(0)->data(); + dstData0->mMinimum = 0; // start of prefix sum + dstData0->mMaximum = dstData0->mValues[511u]; + for (uint64_t i=1, n=mSrcNodeAcc.nodeCount(0); imMinimum = dstData0->mMinimum + dstData0->mMaximum; + dstData1->mMaximum = dstData1->mValues[511u]; + dstData0 = dstData1; + } + for (size_t i = 0, n = dstGrid->blindDataCount(); i < n; ++i, ++metaData) { + if constexpr(util::is_same::value) { + if (metaData->mDataClass != GridBlindDataClass::IndexArray) continue; + if (metaData->mDataType == GridType::UInt32) { + uint32_t *blindData = const_cast(metaData->template getBlindData()); + util::forEach(0, mSrcNodeAcc.nodeCount(0), 16, [&](const auto& r) { + auto *dstLeaf = this->template dstNode(r.begin()); + for (auto j = r.begin(); j != r.end(); ++j, ++dstLeaf) { + uint32_t* p = blindData + dstLeaf->mMinimum; + for (uint32_t idx : mSrcNodeAcc.template node<0>(j).indices()) *p++ = idx; + } + }); + } + } else {// if constexpr(util::is_same::value) + if (metaData->mDataClass != GridBlindDataClass::AttributeArray) continue; + if (auto *blindData = dstGrid->template getBlindData(i)) { + this->template copyPointAttribute(i, blindData); + } else if (auto *blindData = dstGrid->template getBlindData(i)) { + this->template copyPointAttribute(i, reinterpret_cast(blindData)); + } else if (auto *blindData = dstGrid->template getBlindData(i)) { + this->template copyPointAttribute(i, blindData); + } else if (auto *blindData = dstGrid->template getBlindData(i)) { + this->template copyPointAttribute(i, blindData); + } else { + char str[16]; + std::cerr << "unsupported point attribute \"" << toStr(str, metaData->mDataType) << "\"\n"; + } + }// if + }// loop + } else { // if + (void)metaData; + } +#endif + updateChecksum(dstGrid, mChecksum); +}// CreateNanoGrid::postProcess + +//================================================================================================ + +template +template +inline typename util::enable_if::is_index>::type +CreateNanoGrid::postProcess(uint32_t channels) +{ + char str[16]; + const std::string typeName = toStr(str, toGridType()); + const uint64_t valueCount = this->valueCount(); + auto *dstGrid = this->template dstGrid(); + for (uint32_t i=0; ifindBlindData(name.c_str()); + if (j<0) throw std::runtime_error("missing " + name); + auto *metaData = this->dstMeta(j);// partially set in processGrid + metaData->mDataClass = GridBlindDataClass::ChannelArray; + metaData->mDataType = toGridType(); + SrcValueT *blindData = const_cast(metaData->template getBlindData()); + if (i>0) {// concurrent copy from previous channel + util::forEach(0,valueCount,1024,[&](const util::Range1D &r){ + SrcValueT *dst=blindData+r.begin(), *end=dst+r.size(), *src=dst-valueCount; + while(dst!=end) *dst++ = *src++; + }); + } else { + this->template copyValues(blindData); + } + }// loop over channels + updateGridStats(this->template dstGrid(), std::min(StatsMode::BBox, mStats)); + updateChecksum(dstGrid, mChecksum); +}// CreateNanoGrid::postProcess + +//================================================================================================ + +template +template +typename util::enable_if::is_index>::type +CreateNanoGrid::copyValues(SrcValueT *buffer) +{// copy values from the source grid into the provided buffer + assert(mBufferPtr && buffer); + using StatsT = typename FloatTraits::FloatType; + + if (this->valueCount()==0) this->template countValues(); + + auto copyNodeValues = [&](const auto &node, SrcValueT *v) { + if constexpr(BuildTraits::is_onindex) { + for (auto it = node.cbeginValueOn(); it; ++it) *v++ = *it; + } else { + for (auto it = node.cbeginValueAll(); it; ++it) *v++ = *it; + } + if (mIncludeStats) { + if constexpr(SrcNodeAccT::IS_NANOVDB) {// resolved at compile time + *v++ = node.minimum(); + *v++ = node.maximum(); + if constexpr(util::is_same::value) { + *v++ = node.average(); + *v++ = node.stdDeviation(); + } else {// eg when SrcValueT=Vec3f and StatsT=float + *v++ = SrcValueT(node.average()); + *v++ = SrcValueT(node.stdDeviation()); + } + } else {// openvdb and nanovdb::tools::build::Grid have no stats + *v++ = buffer[0];// background + *v++ = buffer[0];// background + *v++ = buffer[0];// background + *v++ = buffer[0];// background + } + } + };// copyNodeValues + + const SrcRootT &root = mSrcNodeAcc.root(); + buffer[0] = root.background();// Value array always starts with the background value + if (mIncludeTiles) { + copyNodeValues(root, buffer + 1u); + util::forEach(0, mSrcNodeAcc.nodeCount(2), 1, [&](const util::Range1D& r) { + for (auto i = r.begin(); i!=r.end(); ++i) { + copyNodeValues(mSrcNodeAcc.template node<2>(i), buffer + mValIdx[2][i]); + } + }); + util::forEach(0, mSrcNodeAcc.nodeCount(1), 1, [&](const util::Range1D& r) { + for (auto i = r.begin(); i!=r.end(); ++i) { + copyNodeValues(mSrcNodeAcc.template node<1>(i), buffer + mValIdx[1][i]); + } + }); + } + util::forEach(0, mSrcNodeAcc.nodeCount(0), 4, [&](const util::Range1D& r) { + for (auto i = r.begin(); i!=r.end(); ++i) { + copyNodeValues(mSrcNodeAcc.template node<0>(i), buffer + mValIdx[0][i]); + } + }); +}// CreateNanoGrid::copyValues + + +//================================================================================================ + +#if defined(NANOVDB_USE_OPENVDB) && !defined(__CUDACC__) + +template +template +typename util::disable_if::value || + util::is_same::value, uint64_t>::type +CreateNanoGrid::countPoints() const +{ + static_assert(util::is_same::value, "expected default template parameter"); + return 0u; +}// CreateNanoGrid::countPoints + +template +template +typename util::enable_if::value || + util::is_same::value, uint64_t>::type +CreateNanoGrid::countPoints() const +{ + static_assert(util::is_same::value, "expected default template parameter"); + return util::reduce(0, mSrcNodeAcc.nodeCount(0), 8, uint64_t(0), [&](auto &r, uint64_t sum) { + for (auto i=r.begin(); i!=r.end(); ++i) sum += mSrcNodeAcc.template node<0>(i).getLastValue(); + return sum;}, std::plus()); +}// CreateNanoGrid::countPoints + +template +template +typename util::enable_if::value>::type +CreateNanoGrid::copyPointAttribute(size_t attIdx, AttT *attPtr) +{ + static_assert(util::is_same::value, "Expected default parameter"); + using HandleT = openvdb::points::AttributeHandle; + util::forEach(0, mSrcNodeAcc.nodeCount(0), 16, [&](const auto& r) { + auto *dstLeaf = this->template dstNode(r.begin()); + for (auto i = r.begin(); i != r.end(); ++i, ++dstLeaf) { + auto& srcLeaf = mSrcNodeAcc.template node<0>(i); + HandleT handle(srcLeaf.constAttributeArray(attIdx)); + AttT *p = attPtr + dstLeaf->mMinimum; + for (auto iter = srcLeaf.beginIndexOn(); iter; ++iter) *p++ = handle.get(*iter); + } + }); +}// CreateNanoGrid::copyPointAttribute + +#endif + +//================================================================================================ + +template +typename util::disable_if::is_index || BuildTraits::is_Fp, GridHandle>::type +createNanoGrid(const SrcGridT &srcGrid, + StatsMode sMode, + CheckMode cMode, + int verbose, + const BufferT &buffer) +{ + CreateNanoGrid converter(srcGrid); + converter.setStats(sMode); + converter.setChecksum(cMode); + converter.setVerbose(verbose); + return converter.template getHandle(buffer); +}// createNanoGrid + +//================================================================================================ + +template +typename util::enable_if::is_index, GridHandle>::type +createNanoGrid(const SrcGridT &srcGrid, + uint32_t channels, + bool includeStats, + bool includeTiles, + int verbose, + const BufferT &buffer) +{ + CreateNanoGrid converter(srcGrid); + converter.setVerbose(verbose); + return converter.template getHandle(channels, includeStats, includeTiles, buffer); +} + +//================================================================================================ + +template +typename util::enable_if::value, GridHandle>::type +createNanoGrid(const SrcGridT &srcGrid, + StatsMode sMode, + CheckMode cMode, + bool ditherOn, + int verbose, + const OracleT &oracle, + const BufferT &buffer) +{ + CreateNanoGrid converter(srcGrid); + converter.setStats(sMode); + converter.setChecksum(cMode); + converter.enableDithering(ditherOn); + converter.setVerbose(verbose); + return converter.template getHandle(oracle, buffer); +}// createNanoGrid + +//================================================================================================ + +template +typename util::enable_if::is_FpX, GridHandle>::type +createNanoGrid(const SrcGridT &srcGrid, + StatsMode sMode, + CheckMode cMode, + bool ditherOn, + int verbose, + const BufferT &buffer) +{ + CreateNanoGrid converter(srcGrid); + converter.setStats(sMode); + converter.setChecksum(cMode); + converter.enableDithering(ditherOn); + converter.setVerbose(verbose); + return converter.template getHandle(buffer); +}// createNanoGrid + +//================================================================================================ + +#if defined(NANOVDB_USE_OPENVDB) && !defined(__CUDACC__) +template +GridHandle +openToNanoVDB(const openvdb::GridBase::Ptr& base, + StatsMode sMode, + CheckMode cMode, + int verbose) +{ + // We need to define these types because they are not defined in OpenVDB + using openvdb_Vec4fTree = typename openvdb::tree::Tree4::Type; + using openvdb_Vec4dTree = typename openvdb::tree::Tree4::Type; + using openvdb_Vec4fGrid = openvdb::Grid; + using openvdb_Vec4dGrid = openvdb::Grid; + using openvdb_UInt32Grid = openvdb::Grid; + + if (auto grid = openvdb::GridBase::grid(base)) { + return createNanoGrid(*grid, sMode, cMode, verbose); + } else if (auto grid = openvdb::GridBase::grid(base)) { + return createNanoGrid(*grid, sMode, cMode, verbose); + } else if (auto grid = openvdb::GridBase::grid(base)) { + return createNanoGrid(*grid, sMode, cMode, verbose); + } else if (auto grid = openvdb::GridBase::grid(base)) { + return createNanoGrid(*grid, sMode, cMode, verbose); + } else if (auto grid = openvdb::GridBase::grid(base)) { + return createNanoGrid(*grid, sMode, cMode, verbose); + } else if (auto grid = openvdb::GridBase::grid(base)) { + return createNanoGrid(*grid, sMode, cMode, verbose); + } else if (auto grid = openvdb::GridBase::grid(base)) { + return createNanoGrid(*grid, sMode, cMode, verbose); + } else if (auto grid = openvdb::GridBase::grid(base)) { + return createNanoGrid(*grid, sMode, cMode, verbose); + } else if (auto grid = openvdb::GridBase::grid(base)) { + return createNanoGrid(*grid, sMode, cMode, verbose); + } else if (auto grid = openvdb::GridBase::grid(base)) { + return createNanoGrid(*grid, sMode, cMode, verbose); + } else if (auto grid = openvdb::GridBase::grid(base)) { + return createNanoGrid(*grid, sMode, cMode, verbose); + } else if (auto grid = openvdb::GridBase::grid(base)) { + return createNanoGrid(*grid, sMode, cMode, verbose); + } else if (auto grid = openvdb::GridBase::grid(base)) { + return createNanoGrid(*grid, sMode, cMode, verbose); + } else { + OPENVDB_THROW(openvdb::RuntimeError, "Unrecognized OpenVDB grid type"); + } +}// openToNanoVDB +#endif + +}// namespace tools =============================================================================== + +} // namespace nanovdb + +#endif // NANOVDB_TOOLS_CREATENANOGRID_H_HAS_BEEN_INCLUDED diff --git a/nanovdb/nanovdb/tools/CreatePrimitives.h b/nanovdb/nanovdb/tools/CreatePrimitives.h new file mode 100644 index 0000000000..a28d5bacd4 --- /dev/null +++ b/nanovdb/nanovdb/tools/CreatePrimitives.h @@ -0,0 +1,1752 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: MPL-2.0 + +/*! + \file nanovdb/tools/CreatePrimitives.h + + \author Ken Museth + + \date June 26, 2020 + + \brief Generates volumetric primitives, e.g. sphere, torus etc, as NanoVDB grid. + + \note This has no dependency on openvdb. +*/ + +#ifndef NANOVDB_TOOLS_PRIMITIVES_H_HAS_BEEN_INCLUDED +#define NANOVDB_TOOLS_PRIMITIVES_H_HAS_BEEN_INCLUDED + +#define NANOVDB_PARALLEL_PRIMITIVES + +#include +#include +#include // for util::forEach and util::Range + +namespace nanovdb { + +namespace tools {// =================================================== + +/// @brief Returns a handle to a narrow-band level set of a sphere +/// +/// @param radius Radius of sphere in world units +/// @param center Center of sphere in world units +/// @param voxelSize Size of a voxel in world units +/// @param halfWidth Half-width of narrow band in voxel units +/// @param origin Origin of grid in world units +/// @param name Name of the grid +/// @param sMode Mode of computation for the statistics. +/// @param cMode Mode of computation for the checksum. +/// @param tolerance Global error tolerance use when VoxelT = FpN +/// @param ditherOn If true dithering will be applied when VoxelT = {Fp4,Fp8,Fp16,FpN} +/// @param buffer Buffer used for memory allocation by the handle +/// +/// @details The @c BuildT template parameter must be one of the following: +/// float (default), double, Fp4, Fp8, Fp16 or FpN. The @c tolerance +/// argument is only used when BuildT is set to FpN. +template +typename util::enable_if::value, GridHandle>::type +createLevelSetSphere(double radius = 100.0, + const Vec3d& center = Vec3d(0), + double voxelSize = 1.0, + double halfWidth = 3.0, + const Vec3d& origin = Vec3d(0), + const std::string& name = "sphere_ls", + StatsMode sMode = StatsMode::Default, + CheckMode cMode = CheckMode::Default, + const BufferT& buffer = BufferT()); + +template +typename util::enable_if::value, GridHandle>::type +createLevelSetSphere(double radius = 100.0, + const Vec3d& center = Vec3d(0), + double voxelSize = 1.0, + double halfWidth = 3.0, + const Vec3d& origin = Vec3d(0), + const std::string& name = "sphere_ls", + StatsMode sMode = StatsMode::Default, + CheckMode cMode = CheckMode::Default, + bool ditherOn = false, + const BufferT& buffer = BufferT()); + +template +typename util::enable_if::value, GridHandle>::type +createLevelSetSphere(double radius = 100.0, + const Vec3d& center = Vec3d(0), + double voxelSize = 1.0, + double halfWidth = 3.0, + const Vec3d& origin = Vec3d(0), + const std::string& name = "sphere_ls_FpN", + StatsMode sMode = StatsMode::Default, + CheckMode cMode = CheckMode::Default, + float tolerance = -1.0f, + bool ditherOn = false, + const BufferT& buffer = BufferT()); + +//================================================================================================ + +/// @brief Returns a handle to a sparse fog volume of a sphere such +/// that the exterior is 0 and inactive, the interior is active +/// with values varying smoothly from 0 at the surface of the +/// sphere to 1 at the halfWidth and interior of the sphere. +/// +/// @param radius Radius of sphere in world units +/// @param center Center of sphere in world units +/// @param voxelSize Size of a voxel in world units +/// @param halfWidth Half-width of narrow band in voxel units +/// @param origin Origin of grid in world units +/// @param name Name of the grid +/// @param sMode Mode of computation for the statistics. +/// @param cMode Mode of computation for the checksum. +/// @param tolerance Global error tolerance use when VoxelT = FpN +/// @param ditherOn If true dithering will be applied when BuildT = {Fp4,Fp8,Fp16,FpN} +/// @param buffer Buffer used for memory allocation by the handle +/// +/// @details The @c BuildT template parameter must be one of the following: +/// float (default), double, Fp4, Fp8, Fp16 or FpN. The @c tolerance +/// argument is only used when BuildT is set to FpN. +template +typename util::disable_if::value, GridHandle>::type +createFogVolumeSphere(double radius = 100.0, + const Vec3d& center = Vec3d(0.0), + double voxelSize = 1.0, + double halfWidth = 3.0, + const Vec3d& origin = Vec3d(0.0), + const std::string& name = "sphere_fog", + StatsMode sMode = StatsMode::Default, + CheckMode cMode = CheckMode::Default, + const BufferT& buffer = BufferT()); + +template +typename util::enable_if::value, GridHandle>::type +createFogVolumeSphere(double radius = 100.0, + const Vec3d& center = Vec3d(0.0), + double voxelSize = 1.0, + double halfWidth = 3.0, + const Vec3d& origin = Vec3d(0.0), + const std::string& name = "sphere_fog", + StatsMode sMode = StatsMode::Default, + CheckMode cMode = CheckMode::Default, + float tolerance = -1.0f, + bool ditherOn = false, + const BufferT& buffer = BufferT()); + +//================================================================================================ + +/// @brief Returns a handle to a PointDataGrid containing points scattered +/// on the surface of a sphere. +/// +/// @param pointsPerVoxel Number of point per voxel on on the surface +/// @param radius Radius of sphere in world units +/// @param center Center of sphere in world units +/// @param voxelSize Size of a voxel in world units +/// @param origin Origin of grid in world units +/// @param name Name of the grid +/// @param mode Mode of computation for the checksum. +/// @param buffer Buffer used for memory allocation by the handle +/// +/// @details The @c BuildT template parameter must be float (default) or double. +template +typename util::disable_if::value, GridHandle>::type +createPointSphere(int pointsPerVoxel = 1, + double radius = 100.0, + const Vec3d& center = Vec3d(0.0), + double voxelSize = 1.0, + const Vec3d& origin = Vec3d(0.0), + const std::string& name = "sphere_points", + CheckMode mode = CheckMode::Default, + const BufferT& buffer = BufferT()); + +//================================================================================================ + +/// @brief Returns a handle to a narrow-band level set of a torus in the xz-plane +/// +/// @param majorRadius Major radius of torus in world units +/// @param minorRadius Minor radius of torus in world units +/// @param center Center of torus in world units +/// @param voxelSize Size of a voxel in world units +/// @param halfWidth Half-width of narrow band in voxel units +/// @param origin Origin of grid in world units +/// @param name Name of the grid +/// @param sMode Mode of computation for the statistics. +/// @param cMode Mode of computation for the checksum. +/// @param tolerance Global error tolerance use when VoxelT = FpN +/// @param ditherOn If true dithering will be applied when VoxelT = {Fp4,Fp8,Fp16,FpN} +/// @param buffer Buffer used for memory allocation by the handle +/// +/// @details The @c BuildT template parameter must be one of the following: +/// float (default), double, Fp4, Fp8, Fp16 or FpN. The @c tolerance +/// argument is only used when BuildT is set to FpN. +template +typename util::disable_if::value, GridHandle>::type +createLevelSetTorus(double majorRadius = 100.0, + double minorRadius = 50.0, + const Vec3d& center = Vec3d(0.0), + double voxelSize = 1.0, + double halfWidth = 3.0, + const Vec3d& origin = Vec3d(0.0), + const std::string& name = "torus_ls", + StatsMode sMode = StatsMode::Default, + CheckMode cMode = CheckMode::Default, + const BufferT& buffer = BufferT()); + +template +typename util::enable_if::value, GridHandle>::type +createLevelSetTorus(double majorRadius = 100.0, + double minorRadius = 50.0, + const Vec3d& center = Vec3d(0.0), + double voxelSize = 1.0, + double halfWidth = 3.0, + const Vec3d& origin = Vec3d(0.0), + const std::string& name = "torus_ls", + StatsMode sMode = StatsMode::Default, + CheckMode cMode = CheckMode::Default, + float tolerance = -1.0f, + bool ditherOn = false, + const BufferT& buffer = BufferT()); + +//================================================================================================ + +/// @brief Returns a handle to a sparse fog volume of a torus in the xz-plane such +/// that the exterior is 0 and inactive, the interior is active +/// with values varying smoothly from 0 at the surface of the +/// torus to 1 at the halfWidth and interior of the torus. +/// +/// @param majorRadius Major radius of torus in world units +/// @param minorRadius Minor radius of torus in world units +/// @param center Center of torus in world units +/// @param voxelSize Size of a voxel in world units +/// @param halfWidth Half-width of narrow band in voxel units +/// @param origin Origin of grid in world units +/// @param name Name of the grid +/// @param sMode Mode of computation for the statistics. +/// @param cMode Mode of computation for the checksum. +/// @param tolerance Global error tolerance use when VoxelT = FpN +/// @param ditherOn If true dithering will be applied when VoxelT = {Fp4,Fp8,Fp16,FpN} +/// @param buffer Buffer used for memory allocation by the handle +/// +/// @details The @c BuildT template parameter must be one of the following: +/// float (default), double, Fp4, Fp8, Fp16 or FpN. The @c tolerance +/// argument is only used when BuildT is set to FpN. +template +typename util::disable_if::value, GridHandle>::type +createFogVolumeTorus(double majorRadius = 100.0, + double minorRadius = 50.0, + const Vec3d& center = Vec3d(0.0), + double voxelSize = 1.0, + double halfWidth = 3.0, + const Vec3d& origin = Vec3d(0.0), + const std::string& name = "torus_fog", + StatsMode sMode = StatsMode::Default, + CheckMode cMode = CheckMode::Default, + const BufferT& buffer = BufferT()); + +template +typename util::enable_if::value, GridHandle>::type +createFogVolumeTorus(double majorRadius = 100.0, + double minorRadius = 50.0, + const Vec3d& center = Vec3d(0.0), + double voxelSize = 1.0, + double halfWidth = 3.0, + const Vec3d& origin = Vec3d(0.0), + const std::string& name = "torus_fog_FpN", + StatsMode sMode = StatsMode::Default, + CheckMode cMode = CheckMode::Default, + float tolerance = -1.0f, + bool ditherOn = false, + const BufferT& buffer = BufferT()); + +//================================================================================================ + +/// @brief Returns a handle to a PointDataGrid containing points scattered +/// on the surface of a torus. +/// +/// @param pointsPerVoxel Number of point per voxel on on the surface +/// @param majorRadius Major radius of torus in world units +/// @param minorRadius Minor radius of torus in world units +/// @param center Center of torus in world units +/// @param voxelSize Size of a voxel in world units +/// @param origin Origin of grid in world units +/// @param name Name of the grid +/// @param cMode Mode of computation for the checksum. +/// @param buffer Buffer used for memory allocation by the handle +// +/// @details The @c BuildT template parameter must be float (default) or double. +template +typename util::disable_if::value, GridHandle>::type +createPointTorus(int pointsPerVoxel = 1, // half-width of narrow band in voxel units + double majorRadius = 100.0, // major radius of torus in world units + double minorRadius = 50.0, // minor radius of torus in world units + const Vec3d& center = Vec3d(0.0), // center of torus in world units + double voxelSize = 1.0, // size of a voxel in world units + const Vec3d& origin = Vec3d(0.0f), // origin of grid in world units + const std::string& name = "torus_points", // name of grid + CheckMode cMode = CheckMode::Default, + const BufferT& buffer = BufferT()); + +//================================================================================================ + +/// @brief Returns a handle to a narrow-band level set of a box +/// +/// @param width Width of box in world units +/// @param height Height of box in world units +/// @param depth Depth of box in world units +/// @param center Center of box in world units +/// @param voxelSize Size of a voxel in world units +/// @param halfWidth Half-width of narrow band in voxel units +/// @param origin Origin of grid in world units +/// @param name Name of the grid +/// @param sMode Mode of computation for the statistics. +/// @param cMode Mode of computation for the checksum. +/// @param tolerance Global error tolerance use when VoxelT = FpN +/// @param ditherOn If true dithering will be applied when VoxelT = {Fp4,Fp8,Fp16,FpN} +/// @param buffer Buffer used for memory allocation by the handle +/// +/// @details The @c BuildT template parameter must be one of the following: +/// float (default), double, Fp4, Fp8, Fp16 or FpN. The @c tolerance +/// argument is only used when BuildT is set to FpN. +template +typename util::disable_if::value, GridHandle>::type +createLevelSetBox(double width = 40.0, + double height = 60.0, + double depth = 100.0, + const Vec3d& center = Vec3d(0.0), + double voxelSize = 1.0, + double halfWidth = 3.0, + const Vec3d& origin = Vec3d(0.0), + const std::string& name = "box_ls", + StatsMode sMode = StatsMode::Default, + CheckMode cMode = CheckMode::Default, + const BufferT& buffer = BufferT()); + +template +typename util::enable_if::value, GridHandle>::type +createLevelSetBox(double width = 40.0, + double height = 60.0, + double depth = 100.0, + const Vec3d& center = Vec3d(0.0), + double voxelSize = 1.0, + double halfWidth = 3.0, + const Vec3d& origin = Vec3d(0.0), + const std::string& name = "box_ls_FpN", + StatsMode sMode = StatsMode::Default, + CheckMode cMode = CheckMode::Default, + float tolerance = -1.0f, + bool ditherOn = false, + const BufferT& buffer = BufferT()); + +//================================================================================================ + +/// @brief Returns a handle to a sparse fog volume of a box such +/// that the exterior is 0 and inactive, the interior is active +/// with values varying smoothly from 0 at the surface of the +/// box to 1 at the halfWidth and interior of the box. +/// +/// @param width Width of box in world units +/// @param height Height of box in world units +/// @param depth Depth of box in world units +/// @param center Center of box in world units +/// @param voxelSize Size of a voxel in world units +/// @param halfWidth Half-width of narrow band in voxel units +/// @param origin Origin of grid in world units +/// @param name Name of the grid +/// @param sMode Mode of computation for the statistics. +/// @param cMode Mode of computation for the checksum. +/// @param tolerance Global error tolerance use when VoxelT = FpN +/// @param ditherOn If true dithering will be applied when VoxelT = {Fp4,Fp8,Fp16,FpN} +/// @param buffer Buffer used for memory allocation by the handle +/// +/// @details The @c BuildT template parameter must be one of the following: +/// float (default), double, Fp4, Fp8, Fp16 or FpN. The @c tolerance +/// argument is only used when BuildT is set to FpN. +template +typename util::disable_if::value, GridHandle>::type +createFogVolumeBox(double width = 40.0, + double height = 60.0, + double depth = 100.0, + const Vec3d& center = Vec3d(0.0), + double voxelSize = 1.0, + double halfWidth = 3.0, + const Vec3d& origin = Vec3d(0.0), + const std::string& name = "box_fog", + StatsMode sMode = StatsMode::Default, + CheckMode cMode = CheckMode::Default, + const BufferT& buffer = BufferT()); + +template +typename util::enable_if::value, GridHandle>::type +createFogVolumeBox(double width = 40.0, + double height = 60.0, + double depth = 100.0, + const Vec3d& center = Vec3d(0.0), + double voxelSize = 1.0, + double halfWidth = 3.0, + const Vec3d& origin = Vec3d(0.0), + const std::string& name = "box_fog_FpN", + StatsMode sMode = StatsMode::Default, + CheckMode cMode = CheckMode::Default, + float tolerance = -1.0f, + bool ditherOn = false, + const BufferT& buffer = BufferT()); + +//================================================================================================ + +/// @brief Returns a handle to a narrow-band level set of a octahedron +/// +/// @param scale Scale of octahedron in world units +/// @param center Center of octahedron in world units +/// @param voxelSize Size of a voxel in world units +/// @param halfWidth Half-width of narrow band in voxel units +/// @param origin Origin of grid in world units +/// @param name Name of the grid +/// @param sMode Mode of computation for the statistics. +/// @param cMode Mode of computation for the checksum. +/// @param tolerance Global error tolerance use when VoxelT = FpN +/// @param ditherOn If true dithering will be applied when VoxelT = {Fp4,Fp8,Fp16,FpN} +/// @param buffer Buffer used for memory allocation by the handle +/// +/// @details The @c BuildT template parameter must be one of the following: +/// float (default), double, Fp4, Fp8, Fp16 or FpN. The @c tolerance +/// argument is only used when BuildT is set to FpN. +template +typename util::disable_if::value, GridHandle>::type +createLevelSetOctahedron(double scale = 100.0, + const Vec3d& center = Vec3d(0.0), + double voxelSize = 1.0, + double halfWidth = 3.0, + const Vec3d& origin = Vec3d(0.0), + const std::string& name = "octadedron_ls", + StatsMode sMode = StatsMode::Default, + CheckMode cMode = CheckMode::Default, + const BufferT& buffer = BufferT()); + +template +typename util::enable_if::value, GridHandle>::type +createLevelSetOctahedron(double scale = 100.0, + const Vec3d& center = Vec3d(0.0), + double voxelSize = 1.0, + double halfWidth = 3.0, + const Vec3d& origin = Vec3d(0.0), + const std::string& name = "octadedron_ls_FpN", + StatsMode sMode = StatsMode::Default, + CheckMode cMode = CheckMode::Default, + float tolerance = -1.0f, + bool ditherOn = false, + const BufferT& buffer = BufferT()); + +//================================================================================================ + +/// @brief Returns a handle to a sparse fog volume of an octahedron such +/// that the exterior is 0 and inactive, the interior is active +/// with values varying smoothly from 0 at the surface of the +/// octahedron to 1 at the halfWidth and interior of the octahedron. +/// +/// @param scale Scale of octahedron in world units +/// @param center Center of box in world units +/// @param voxelSize Size of a voxel in world units +/// @param halfWidth Half-width of narrow band in voxel units +/// @param origin Origin of grid in world units +/// @param name Name of the grid +/// @param sMode Mode of computation for the statistics. +/// @param cMode Mode of computation for the checksum. +/// @param tolerance Global error tolerance use when VoxelT = FpN +/// @param ditherOn If true dithering will be applied when VoxelT = {Fp4,Fp8,Fp16,FpN} +/// @param buffer Buffer used for memory allocation by the handle +/// +/// @details The @c BuildT template parameter must be one of the following: +/// float (default), double, Fp4, Fp8, Fp16 or FpN. The @c tolerance +/// argument is only used when BuildT is set to FpN. +template +typename util::disable_if::value, GridHandle>::type +createFogVolumeOctahedron(double scale = 100.0, + const Vec3d& center = Vec3d(0.0), + double voxelSize = 1.0, + double halfWidth = 3.0, + const Vec3d& origin = Vec3d(0.0), + const std::string& name = "octadedron_fog", + StatsMode sMode = StatsMode::Default, + CheckMode cMode = CheckMode::Default, + const BufferT& buffer = BufferT()); + +template +typename util::enable_if::value, GridHandle>::type +createFogVolumeOctahedron(double scale = 100.0, + const Vec3d& center = Vec3d(0.0), + double voxelSize = 1.0, + double halfWidth = 3.0, + const Vec3d& origin = Vec3d(0.0), + const std::string& name = "octadedron_fog_FpN", + StatsMode sMode = StatsMode::Default, + CheckMode cMode = CheckMode::Default, + float tolerance = -1.0f, + bool ditherOn = false, + const BufferT& buffer = BufferT()); + +//================================================================================================ + +/// @brief Returns a handle to a narrow-band level set of a bounding-box (= wireframe of a box) +/// +/// @param width Width of box in world units +/// @param height Height of box in world units +/// @param depth Depth of box in world units +/// @param thickness Thickness of the wire in world units +/// @param center Center of bbox in world units +/// @param voxelSize Size of a voxel in world units +/// @param halfWidth Half-width of narrow band in voxel units +/// @param origin Origin of grid in world units +/// @param name Name of the grid +/// @param sMode Mode of computation for the statistics. +/// @param cMode Mode of computation for the checksum. +/// @param tolerance Global error tolerance use when VoxelT = FpN +/// @param ditherOn If true dithering will be applied when VoxelT = {Fp4,Fp8,Fp16,FpN} +/// @param buffer Buffer used for memory allocation by the handle +/// +/// @details The @c BuildT template parameter must be one of the following: +/// float (default), double, Fp4, Fp8, Fp16 or FpN. The @c tolerance +/// argument is only used when BuildT is set to FpN. +template +typename util::disable_if::value, GridHandle>::type +createLevelSetBBox(double width = 40.0, + double height = 60.0, + double depth = 100.0, + double thickness = 10.0, + const Vec3d& center = Vec3d(0.0), + double voxelSize = 1.0, + double halfWidth = 3.0, + const Vec3d& origin = Vec3d(0.0), + const std::string& name = "bbox_ls", + StatsMode sMode = StatsMode::Default, + CheckMode cMode = CheckMode::Default, + const BufferT& buffer = BufferT()); + +template +typename util::enable_if::value, GridHandle>::type +createLevelSetBBox(double width = 40.0, + double height = 60.0, + double depth = 100.0, + double thickness = 10.0, + const Vec3d& center = Vec3d(0.0), + double voxelSize = 1.0, + double halfWidth = 3.0, + const Vec3d& origin = Vec3d(0.0), + const std::string& name = "bbox_ls_FpN", + StatsMode sMode = StatsMode::Default, + CheckMode cMode = CheckMode::Default, + float tolerance = -1.0f, + bool ditherOn = false, + const BufferT& buffer = BufferT()); + + +//================================================================================================ + +/// @brief Returns a handle to a PointDataGrid containing points scattered +/// on the surface of a box. +/// +/// @param pointsPerVoxel Number of point per voxel on on the surface +/// @param width Width of box in world units +/// @param height Height of box in world units +/// @param depth Depth of box in world units +/// @param center Center of box in world units +/// @param voxelSize Size of a voxel in world units +/// @param origin Origin of grid in world units +/// @param name Name of the grid +/// @param mode Mode of computation for the checksum. +/// @param buffer Buffer used for memory allocation by the handle +template +typename util::disable_if::value, GridHandle>::type +createPointBox(int pointsPerVoxel = 1, // half-width of narrow band in voxel units + double width = 40.0, // width of box in world units + double height = 60.0, // height of box in world units + double depth = 100.0, // depth of box in world units + const Vec3d& center = Vec3d(0.0), // center of box in world units + double voxelSize = 1.0, // size of a voxel in world units + const Vec3d& origin = Vec3d(0.0), // origin of grid in world units + const std::string& name = "box_points", // name of grid + CheckMode mode = CheckMode::Default, + const BufferT& buffer = BufferT()); + +//================================================================================================ + +/// @brief Given an input NanoVDB voxel grid this methods returns a GridHandle to another NanoVDB +/// PointDataGrid with points scattered in the active leaf voxels of in input grid. Note, the +/// coordinates of the points are encoded as blind data in world-space. +/// +/// @param srcGrid Const input grid used to determine the active voxels to scatter points into +/// @param pointsPerVoxel Number of point per voxel on on the surface +/// @param name Name of the grid +/// @param mode Mode of computation for the checksum. +/// @param buffer Buffer used for memory allocation by the handle +template +inline GridHandle +createPointScatter(const NanoGrid& srcGrid, // source grid used to scatter points into + int pointsPerVoxel = 1, // half-width of narrow band in voxel units + const std::string& name = "point_scatter", // name of grid + CheckMode mode = CheckMode::Default, + const BufferT& buffer = BufferT()); + +//================================================================================================ + +namespace { + +/// @brief Returns a shared pointer to a build::Grid containing a narrow-band SDF values for a sphere +/// +/// @brief Note, this is not (yet) a valid level set SDF field since values inside sphere (and outside +/// the narrow band) are still undefined. Call builder::sdfToLevelSet() to set those +/// values or alternatively call builder::levelSetToFog to generate a FOG volume. +/// +/// @details The @c BuildT template parameter must be one of the following: +/// float (default), double, Fp4, Fp8, Fp16 or FpN. +template +std::shared_ptr> +initSphere(double radius, // radius of sphere in world units + const Vec3d& center, // center of sphere in world units + double voxelSize, // size of a voxel in world units + double halfWidth, // half-width of narrow band in voxel units + const Vec3d& origin) // origin of grid in world units +{ + using GridT = build::Grid; + using ValueT = typename BuildToValueMap::type; + static_assert(util::is_floating_point::value, "initSphere: expect floating point"); + if (!(radius > 0)) + throw std::runtime_error("Sphere: radius must be positive!"); + if (!(voxelSize > 0)) + throw std::runtime_error("Sphere: voxelSize must be positive!"); + if (!(halfWidth > 0)) + throw std::runtime_error("Sphere: halfWidth must be positive!"); + + auto grid = std::make_shared(ValueT(halfWidth * voxelSize)); + grid->setTransform(voxelSize, origin); + + // Define radius of sphere with narrow-band in voxel units + const ValueT r0 = radius / ValueT(voxelSize), rmax = r0 + ValueT(halfWidth); + + // Radius below the Nyquist frequency + if (r0 < ValueT(1.5f)) return grid; + + // Define center of sphere in voxel units + const math::Vec3 c(ValueT(center[0] - origin[0]) / ValueT(voxelSize), + ValueT(center[1] - origin[1]) / ValueT(voxelSize), + ValueT(center[2] - origin[2]) / ValueT(voxelSize)); + + // Define bounds of the voxel coordinates + const int imin = math::Floor(c[0] - rmax), imax = math::Ceil(c[0] + rmax); + const int jmin = math::Floor(c[1] - rmax), jmax = math::Ceil(c[1] + rmax); + const int kmin = math::Floor(c[2] - rmax), kmax = math::Ceil(c[2] + rmax); + + const util::Range<1,int> range(imin, imax+1, 32); + + auto kernel = [&](const util::Range<1,int> &r) { + auto acc = grid->getWriteAccessor(); + Coord ijk; + int &i = ijk[0], &j = ijk[1], &k = ijk[2], m = 1; + // Compute signed distances to sphere using leapfrogging in k + for (i = r.begin(); i < r.end(); ++i) { + const auto x2 = math::Pow2(ValueT(i) - c[0]); + for (j = jmin; j <= jmax; ++j) { + const auto x2y2 = math::Pow2(ValueT(j) - c[1]) + x2; + for (k = kmin; k <= kmax; k += m) { + m = 1; + const auto v = math::Sqrt(x2y2 + math::Pow2(ValueT(k) - c[2])) - r0; // Distance in voxel units + const auto d = v < 0 ? -v : v; + if (d < halfWidth) { // inside narrow band + acc.setValue(ijk, ValueT(voxelSize) * v); // distance in world units + } else { // outside narrow band + m += math::Floor(d - halfWidth); // leapfrog + } + } //end leapfrog over k + } //end loop over j + } //end loop over i + };// kernel +#ifdef NANOVDB_PARALLEL_PRIMITIVES + util::forEach(range, kernel); +#else + kernel(range); +#endif + return grid; +} // initSphere + +template +std::shared_ptr> +initTorus(double radius1, // major radius of torus in world units + double radius2, // minor radius of torus in world units + const Vec3d& center, // center of torus in world units + double voxelSize, // size of a voxel in world units + double halfWidth, // half-width of narrow band in voxel units + const Vec3d& origin) // origin of grid in world units +{ + using GridT = build::Grid; + using ValueT = typename BuildToValueMap::type; + static_assert(util::is_floating_point::value, "initTorus: expect floating point"); + if (!(radius2 > 0)) + throw std::runtime_error("Torus: radius2 must be positive!"); + if (!(radius1 > radius2)) + throw std::runtime_error("Torus: radius1 must be larger than radius2!"); + if (!(voxelSize > 0)) + throw std::runtime_error("Torus: voxelSize must be positive!"); + if (!(halfWidth > 0)) + throw std::runtime_error("Torus: halfWidth must be positive!"); + + auto grid = std::make_shared(ValueT(halfWidth * voxelSize)); + grid->setTransform(voxelSize, origin); + + // Define size of torus with narrow-band in voxel units + const ValueT r1 = radius1 / ValueT(voxelSize), r2 = radius2 / ValueT(voxelSize), rmax1 = r1 + r2 + ValueT(halfWidth), rmax2 = r2 + ValueT(halfWidth); + + // Radius below the Nyquist frequency + if (r2 < ValueT(1.5)) return grid; + + // Define center of torus in voxel units + const math::Vec3 c(ValueT(center[0] - origin[0]) / ValueT(voxelSize), + ValueT(center[1] - origin[1]) / ValueT(voxelSize), + ValueT(center[2] - origin[2]) / ValueT(voxelSize)); + + // Define bounds of the voxel coordinates + const int imin = math::Floor(c[0] - rmax1), imax = math::Ceil(c[0] + rmax1); + const int jmin = math::Floor(c[1] - rmax2), jmax = math::Ceil(c[1] + rmax2); + const int kmin = math::Floor(c[2] - rmax1), kmax = math::Ceil(c[2] + rmax1); + + const util::Range<1,int> range(imin, imax+1, 32); + auto kernel = [&](const util::Range<1,int> &r) { + auto acc = grid->getWriteAccessor(); + Coord ijk; + int &i = ijk[0], &j = ijk[1], &k = ijk[2], m = 1; + // Compute signed distances to torus using leapfrogging in k + for (i = r.begin(); i < r.end(); ++i) { + const auto x2 = math::Pow2(ValueT(i) - c[0]); + for (k = kmin; k <= kmax; ++k) { + const auto x2z2 = math::Pow2(math::Sqrt(math::Pow2(ValueT(k) - c[2]) + x2) - r1); + for (j = jmin; j <= jmax; j += m) { + m = 1; + const auto v = math::Sqrt(x2z2 + math::Pow2(ValueT(j) - c[1])) - r2; // Distance in voxel units + const auto d = v < 0 ? -v : v; + if (d < halfWidth) { // inside narrow band + acc.setValue(ijk, ValueT(voxelSize) * v); // distance in world units + } else { // outside narrow band + m += math::Floor(d - halfWidth); // leapfrog + } + } //end leapfrog over k + } //end loop over j + } //end loop over i + }; // kernel + +#ifdef NANOVDB_PARALLEL_PRIMITIVES + util::forEach(range, kernel); +#else + kernel(range); +#endif + + return grid; +} // initTorus + +template +std::shared_ptr> +initBox(double width, // major radius of torus in world units + double height, // minor radius of torus in world units + double depth, + const Vec3d& center, // center of box in world units + double voxelSize, // size of a voxel in world units + double halfWidth, // half-width of narrow band in voxel units + const Vec3d& origin) // origin of grid in world units +{ + using GridT = build::Grid; + using ValueT = typename BuildToValueMap::type; + static_assert(util::is_floating_point::value, "initBox: expect floating point"); + using Vec3T = math::Vec3; + if (!(width > 0)) + throw std::runtime_error("Box: width must be positive!"); + if (!(height > 0)) + throw std::runtime_error("Box: height must be positive!"); + if (!(depth > 0)) + throw std::runtime_error("Box: depth must be positive!"); + + if (!(voxelSize > 0)) + throw std::runtime_error("Box: voxelSize must be positive!"); + if (!(halfWidth > 0)) + throw std::runtime_error("Box: halfWidth must be positive!"); + + auto grid = std::make_shared(ValueT(halfWidth * voxelSize)); + grid->setTransform(voxelSize, origin); + + // Define size of box with narrow-band in voxel units + const Vec3T r(width / (2 * ValueT(voxelSize)), + height / (2 * ValueT(voxelSize)), + depth / (2 * ValueT(voxelSize))); + + // Below the Nyquist frequency + if (r.min() < ValueT(1.5)) return grid; + + // Define center of box in voxel units + const Vec3T c(ValueT(center[0] - origin[0]) / ValueT(voxelSize), + ValueT(center[1] - origin[1]) / ValueT(voxelSize), + ValueT(center[2] - origin[2]) / ValueT(voxelSize)); + + // Define utility functions + auto Pos = [](ValueT x) { return x > 0 ? x : 0; }; + auto Neg = [](ValueT x) { return x < 0 ? x : 0; }; + + // Define bounds of the voxel coordinates + const math::BBox b(c - r - Vec3T(ValueT(halfWidth)), c + r + Vec3T(ValueT(halfWidth))); + const CoordBBox bbox(Coord(math::Floor(b[0][0]), math::Floor(b[0][1]), math::Floor(b[0][2])), + Coord(math::Ceil(b[1][0]), math::Ceil(b[1][1]), math::Ceil(b[1][2]))); + const util::Range<1,int> range(bbox[0][0], bbox[1][0]+1, 32); + + // Compute signed distances to box using leapfrogging in k + auto kernel = [&](const util::Range<1,int> &ra) { + auto acc = grid->getWriteAccessor(); + int m = 1; + for (Coord p(ra.begin(),bbox[0][1],bbox[0][2]); p[0] < ra.end(); ++p[0]) { + const auto q1 = math::Abs(ValueT(p[0]) - c[0]) - r[0]; + const auto x2 = math::Pow2(Pos(q1)); + for (p[1] = bbox[0][1]; p[1] <= bbox[1][1]; ++p[1]) { + const auto q2 = math::Abs(ValueT(p[1]) - c[1]) - r[1]; + const auto q0 = math::Max(q1, q2); + const auto x2y2 = x2 + math::Pow2(Pos(q2)); + for (p[2] = bbox[0][2]; p[2] <= bbox[1][2]; p[2] += m) { + m = 1; + const auto q3 = math::Abs(ValueT(p[2]) - c[2]) - r[2]; + const auto v = math::Sqrt(x2y2 + math::Pow2(Pos(q3))) + Neg(math::Max(q0, q3)); // Distance in voxel units + const auto d = math::Abs(v); + if (d < halfWidth) { // inside narrow band + acc.setValue(p, ValueT(voxelSize) * v); // distance in world units + } else { // outside narrow band + m += math::Floor(d - halfWidth); // leapfrog + } + } //end leapfrog over k + } //end loop over j + } //end loop over i + }; // kernel +#ifdef NANOVDB_PARALLEL_PRIMITIVES + util::forEach(range, kernel); +#else + kernel(range); +#endif + return grid; +} // initBox + +template +std::shared_ptr> +initBBox(double width, // width of the bbox in world units + double height, // height of the bbox in world units + double depth, // depth of the bbox in world units + double thickness, // thickness of the wire in world units + const Vec3d& center, // center of bbox in world units + double voxelSize, // size of a voxel in world units + double halfWidth, // half-width of narrow band in voxel units + const Vec3d& origin) // origin of grid in world units +{ + using GridT = build::Grid; + using ValueT = typename BuildToValueMap::type; + static_assert(util::is_floating_point::value, "initBBox: expect floating point"); + using Vec3T = math::Vec3; + if (!(width > 0)) + throw std::runtime_error("BBox: width must be positive!"); + if (!(height > 0)) + throw std::runtime_error("BBox: height must be positive!"); + if (!(depth > 0)) + throw std::runtime_error("BBox: depth must be positive!"); + if (!(thickness > 0)) + throw std::runtime_error("BBox: thickness must be positive!"); + if (!(voxelSize > 0.0)) + throw std::runtime_error("BBox: voxelSize must be positive!"); + + + auto grid = std::make_shared(ValueT(halfWidth * voxelSize)); + grid->setTransform(voxelSize, origin); + + // Define size of bbox with narrow-band in voxel units + const Vec3T r(width / (2 * ValueT(voxelSize)), + height / (2 * ValueT(voxelSize)), + depth / (2 * ValueT(voxelSize))); + const ValueT e = thickness / ValueT(voxelSize); + + // Below the Nyquist frequency + if (r.min() < ValueT(1.5) || e < ValueT(1.5)) return grid; + + // Define center of bbox in voxel units + const Vec3T c(ValueT(center[0] - origin[0]) / ValueT(voxelSize), + ValueT(center[1] - origin[1]) / ValueT(voxelSize), + ValueT(center[2] - origin[2]) / ValueT(voxelSize)); + + // Define utility functions + auto Pos = [](ValueT x) { return x > 0 ? x : 0; }; + auto Neg = [](ValueT x) { return x < 0 ? x : 0; }; + + // Define bounds of the voxel coordinates + const math::BBox b(c - r - Vec3T(e + ValueT(halfWidth)), c + r + Vec3T(e + ValueT(halfWidth))); + const CoordBBox bbox(Coord(math::Floor(b[0][0]), math::Floor(b[0][1]), math::Floor(b[0][2])), + Coord(math::Ceil(b[1][0]), math::Ceil(b[1][1]), math::Ceil(b[1][2]))); + const util::Range<1,int> range(bbox[0][0], bbox[1][0]+1, 32); + + // Compute signed distances to bbox using leapfrogging in k + auto kernel = [&](const util::Range<1,int> &ra) { + auto acc = grid->getWriteAccessor(); + int m = 1; + for (Coord p(ra.begin(),bbox[0][1],bbox[0][2]); p[0] < ra.end(); ++p[0]) { + const ValueT px = math::Abs(ValueT(p[0]) - c[0]) - r[0]; + const ValueT qx = math::Abs(ValueT(px) + e) - e; + const ValueT px2 = math::Pow2(Pos(px)); + const ValueT qx2 = math::Pow2(Pos(qx)); + for (p[1] = bbox[0][1]; p[1] <= bbox[1][1]; ++p[1]) { + const ValueT py = math::Abs(ValueT(p[1]) - c[1]) - r[1]; + const ValueT qy = math::Abs(ValueT(py) + e) - e; + const ValueT qy2 = math::Pow2(Pos(qy)); + const ValueT px2qy2 = px2 + qy2; + const ValueT qx2py2 = qx2 + math::Pow2(Pos(py)); + const ValueT qx2qy2 = qx2 + qy2; + const ValueT a[3] = {math::Max(px, qy), math::Max(qx, py), math::Max(qx, qy)}; + for (p[2] = bbox[0][2]; p[2] <= bbox[1][2]; p[2] += m) { + m = 1; + const ValueT pz = math::Abs(ValueT(p[2]) - c[2]) - r[2]; + const ValueT qz = math::Abs(ValueT(pz) + e) - e; + const ValueT qz2 = math::Pow2(Pos(qz)); + const ValueT s1 = math::Sqrt(px2qy2 + qz2) + Neg(math::Max(a[0], qz)); + const ValueT s2 = math::Sqrt(qx2py2 + qz2) + Neg(math::Max(a[1], qz)); + const ValueT s3 = math::Sqrt(qx2qy2 + math::Pow2(Pos(pz))) + Neg(math::Max(a[2], pz)); + const ValueT v = math::Min(s1, math::Min(s2, s3)); // Distance in voxel units + const ValueT d = math::Abs(v); + if (d < halfWidth) { // inside narrow band + acc.setValue(p, ValueT(voxelSize) * v); // distance in world units + } else { // outside narrow band + m += math::Floor(d - halfWidth); // leapfrog + } + } //end leapfrog over k + } //end loop over j + } //end loop over i + }; //kernel +#ifdef NANOVDB_PARALLEL_PRIMITIVES + util::forEach(range, kernel); +#else + kernel(range); +#endif + + return grid; +} // initBBox + +template +std::shared_ptr> +initOctahedron(double scale, // scale of the octahedron in world units + const Vec3d& center, // center of octahedron in world units + double voxelSize, // size of a voxel in world units + double halfWidth, // half-width of narrow band in voxel units + const Vec3d& origin) // origin of grid in world units +{ + using GridT = build::Grid; + using ValueT = typename BuildToValueMap::type; + using Vec3T = math::Vec3; + static_assert(util::is_floating_point::value, "initOctahedron: expect floating point"); + + if (!(scale > 0)) throw std::runtime_error("Octahedron: width must be positive!"); + if (!(voxelSize > 0)) throw std::runtime_error("Octahedron: voxelSize must be positive!"); + + auto grid = std::make_shared(ValueT(halfWidth * voxelSize)); + grid->setTransform(voxelSize, origin); + + // Define size of octahedron with narrow-band in voxel units + const ValueT s = scale / (2 * ValueT(voxelSize)); + + // Below the Nyquist frequency + if ( s < ValueT(1.5) ) return grid; + + // Define center of octahedron in voxel units + const Vec3T c(ValueT(center[0] - origin[0]) / ValueT(voxelSize), + ValueT(center[1] - origin[1]) / ValueT(voxelSize), + ValueT(center[2] - origin[2]) / ValueT(voxelSize)); + + // Define utility functions + auto sdf = [&s](ValueT x, ValueT y, ValueT z) { + const ValueT d = ValueT(0.5)*(z - y + s); + if (d < ValueT(0)) { + return Vec3T(x, y - s, z).length(); + } else if (d > s) { + return Vec3T(x, y, z - s).length(); + } + return Vec3T(x, y - s + d, z - d).length(); + }; + + // Define bounds of the voxel coordinates + const math::BBox b(c - Vec3T(s + ValueT(halfWidth)), c + Vec3T(s + ValueT(halfWidth))); + const CoordBBox bbox(Coord(math::Floor(b[0][0]), math::Floor(b[0][1]), math::Floor(b[0][2])), + Coord(math::Ceil(b[1][0]), math::Ceil(b[1][1]), math::Ceil(b[1][2]))); + const util::Range<1,int> range(bbox[0][0], bbox[1][0]+1, 32); + + // Compute signed distances to octahedron using leapfrogging in k + auto kernel = [&](const util::Range<1,int> &ra) { + auto acc = grid->getWriteAccessor(); + int m = 1; + static const ValueT a = math::Sqrt(ValueT(1)/ValueT(3)); + for (Coord p(ra.begin(),bbox[0][1],bbox[0][2]); p[0] < ra.end(); ++p[0]) { + const ValueT px = math::Abs(ValueT(p[0]) - c[0]); + for (p[1] = bbox[0][1]; p[1] <= bbox[1][1]; ++p[1]) { + const ValueT py = math::Abs(ValueT(p[1]) - c[1]); + for (p[2] = bbox[0][2]; p[2] <= bbox[1][2]; p[2] += m) { + m = 1; + const ValueT pz = math::Abs(ValueT(p[2]) - c[2]); + ValueT d = px + py + pz - s; + ValueT v; + if (ValueT(3)*px < d) { + v = sdf(px, py, pz); + } else if (ValueT(3)*py < d) { + v = sdf(py, pz, px); + } else if (ValueT(3)*pz < d) { + v = sdf(pz, px, py); + } else { + v = a * d; + } + d = math::Abs(v); + if (d < halfWidth) { // inside narrow band + acc.setValue(p, ValueT(voxelSize) * v); // distance in world units + } else { // outside narrow band + m += math::Floor(d - halfWidth); // leapfrog + } + } //end leapfrog over k + } //end loop over j + } //end loop over i + };// kernel +#ifdef NANOVDB_PARALLEL_PRIMITIVES + util::forEach(range, kernel); +#else + kernel(range); +#endif + return grid; +} // initOctahedron + +} // unnamed namespace + +//================================================================================================ + +template +typename util::enable_if::value, GridHandle>::type +createLevelSetSphere(double radius, // radius of sphere in world units + const Vec3d& center, // center of sphere in world units + double voxelSize, // size of a voxel in world units + double halfWidth, // half-width of narrow band in voxel units + const Vec3d& origin, // origin of grid in world units + const std::string& name, // name of grid + StatsMode sMode, // mode of computation for the statistics + CheckMode cMode, // mode of computation for the checksum + const BufferT& buffer) +{ + using GridT = build::Grid; + auto grid = initSphere(radius, center, voxelSize, halfWidth, origin); + grid->mName = name; + build::NodeManager mgr(*grid); + build::sdfToLevelSet(mgr); + CreateNanoGrid converter(*grid); + converter.setStats(sMode); + converter.setChecksum(cMode); + auto handle = converter.template getHandle(buffer); + assert(handle); + return handle; +} // createLevelSetSphere + +//================================================================================================ + +template +typename util::enable_if::value, GridHandle>::type +createLevelSetSphere(double radius, // radius of sphere in world units + const Vec3d& center, // center of sphere in world units + double voxelSize, // size of a voxel in world units + double halfWidth, // half-width of narrow band in voxel units + const Vec3d& origin, // origin of grid in world units + const std::string& name, // name of grid + StatsMode sMode, // mode of computation for the statistics + CheckMode cMode, // mode of computation for the checksum + bool ditherOn, + const BufferT& buffer) +{ + using GridT = build::Grid; + auto grid = initSphere(radius, center, voxelSize, halfWidth, origin); + grid->mName = name; + build::NodeManager mgr(*grid); + build::sdfToLevelSet(mgr); + CreateNanoGrid converter(*grid); + converter.setStats(sMode); + converter.setChecksum(cMode); + converter.enableDithering(ditherOn); + auto handle = converter.template getHandle(buffer); + assert(handle); + return handle; +} // createLevelSetSphere + +//================================================================================================ + +template +typename util::enable_if::value, GridHandle>::type +createLevelSetSphere(double radius, // radius of sphere in world units + const Vec3d& center, // center of sphere in world units + double voxelSize, // size of a voxel in world units + double halfWidth, // half-width of narrow band in voxel units + const Vec3d& origin, // origin of grid in world units + const std::string& name, // name of grid + StatsMode sMode, // mode of computation for the statistics + CheckMode cMode, // mode of computation for the checksum + float tolerance,// only used if VoxelT = FpN + bool ditherOn, + const BufferT& buffer) +{ + using GridT = build::Grid; + auto grid = initSphere(radius, center, voxelSize, halfWidth, origin); + grid->mName = name; + build::NodeManager mgr(*grid); + build::sdfToLevelSet(mgr); + CreateNanoGrid converter(*grid); + converter.setStats(sMode); + converter.setChecksum(cMode); + converter.enableDithering(ditherOn); + AbsDiff oracle(tolerance); + auto handle = converter.template getHandle(oracle, buffer); + assert(handle); + return handle; +} // createLevelSetSphere + +//================================================================================================ + +template +typename util::disable_if::value, GridHandle>::type +createFogVolumeSphere(double radius, // radius of sphere in world units + const Vec3d& center, // center of sphere in world units + double voxelSize, // size of a voxel in world units + double halfWidth, // half-width of narrow band in voxel units + const Vec3d& origin, // origin of grid in world units + const std::string& name, // name of grid + StatsMode sMode, // mode of computation for the statistics + CheckMode cMode, // mode of computation for the checksum + const BufferT& buffer) +{ + using GridT = build::Grid; + auto grid = initSphere(radius, center, voxelSize, halfWidth, origin); + grid->mName = name; + build::NodeManager mgr(*grid); + build::sdfToLevelSet(mgr); + build::levelSetToFog(mgr, false); + CreateNanoGrid converter(*grid); + converter.setStats(sMode); + converter.setChecksum(cMode); + auto handle = converter.template getHandle(buffer); + assert(handle); + return handle; +} // createFogVolumeSphere + +//================================================================================================ + +template +typename util::enable_if::value, GridHandle>::type +createFogVolumeSphere(double radius, // radius of sphere in world units + const Vec3d& center, // center of sphere in world units + double voxelSize, // size of a voxel in world units + double halfWidth, // half-width of narrow band in voxel units + const Vec3d& origin, // origin of grid in world units + const std::string& name, // name of grid + StatsMode sMode, // mode of computation for the statistics + CheckMode cMode, // mode of computation for the checksum + float tolerance,// only used if VoxelT = FpN + bool ditherOn, + const BufferT& buffer) +{ + using GridT = build::Grid; + auto grid = initSphere(radius, center, voxelSize, halfWidth, origin); + grid->mName = name; + build::NodeManager mgr(*grid); + build::sdfToLevelSet(mgr); + build::levelSetToFog(mgr, false); + CreateNanoGrid converter(*grid); + converter.setStats(sMode); + converter.setChecksum(cMode); + converter.enableDithering(ditherOn); + AbsDiff oracle(tolerance); + auto handle = converter.template getHandle(oracle, buffer); + assert(handle); + return handle; +} // createFogVolumeSphere + +//================================================================================================ + +template +typename util::disable_if::value, GridHandle>::type +createPointSphere(int pointsPerVoxel, // number of points to be scattered in each active voxel + double radius, // radius of sphere in world units + const Vec3d& center, // center of sphere in world units + double voxelSize, // size of a voxel in world units + const Vec3d& origin, // origin of grid in world units + const std::string& name, // name of grid + CheckMode cMode, // mode of computation for the checksum + const BufferT& buffer) +{ + auto sphereHandle = createLevelSetSphere(radius, center, voxelSize, 0.5, origin, "dummy", + StatsMode::BBox, CheckMode::Disable, buffer); + assert(sphereHandle); + auto* sphereGrid = sphereHandle.template grid(); + assert(sphereGrid); + auto pointHandle = createPointScatter(*sphereGrid, pointsPerVoxel, name, cMode, buffer); + assert(pointHandle); + return pointHandle; +} // createPointSphere + +//================================================================================================ + +template +typename util::disable_if::value, GridHandle>::type +createLevelSetTorus(double majorRadius, // major radius of torus in world units + double minorRadius, // minor radius of torus in world units + const Vec3d& center, // center of torus in world units + double voxelSize, // size of a voxel in world units + double halfWidth, // half-width of narrow band in voxel units + const Vec3d& origin, // origin of grid in world units + const std::string& name, // name of grid + StatsMode sMode, // mode of computation for the statistics + CheckMode cMode, // mode of computation for the checksum + const BufferT& buffer) +{ + using GridT = build::Grid; + auto grid = initTorus(majorRadius, minorRadius, center, voxelSize, halfWidth, origin); + grid->mName = name; + build::NodeManager mgr(*grid); + build::sdfToLevelSet(mgr); + CreateNanoGrid converter(*grid); + converter.setStats(sMode); + converter.setChecksum(cMode); + auto handle = converter.template getHandle(buffer); + assert(handle); + return handle; +} // createLevelSetTorus + +//================================================================================================ + +template +typename util::enable_if::value, GridHandle>::type +createLevelSetTorus(double majorRadius, // major radius of torus in world units + double minorRadius, // minor radius of torus in world units + const Vec3d& center, // center of torus in world units + double voxelSize, // size of a voxel in world units + double halfWidth, // half-width of narrow band in voxel units + const Vec3d& origin, // origin of grid in world units + const std::string& name, // name of grid + StatsMode sMode, // mode of computation for the statistics + CheckMode cMode, // mode of computation for the checksum + float tolerance, + bool ditherOn, + const BufferT& buffer) +{ + using GridT = build::Grid; + auto grid = initTorus(majorRadius, minorRadius, center, voxelSize, halfWidth, origin); + grid->mName = name; + build::NodeManager mgr(*grid); + build::sdfToLevelSet(mgr); + CreateNanoGrid converter(*grid); + converter.setStats(sMode); + converter.setChecksum(cMode); + converter.enableDithering(ditherOn); + AbsDiff oracle(tolerance); + auto handle = converter.template getHandle(oracle, buffer); + assert(handle); + return handle; +} // createLevelSetTorus + +//================================================================================================ + +template +typename util::disable_if::value, GridHandle>::type +createFogVolumeTorus(double majorRadius, // major radius of torus in world units + double minorRadius, // minor radius of torus in world units + const Vec3d& center, // center of torus in world units + double voxelSize, // size of a voxel in world units + double halfWidth, // half-width of narrow band in voxel units + const Vec3d& origin, // origin of grid in world units + const std::string& name, // name of grid + StatsMode sMode, // mode of computation for the statistics + CheckMode cMode, // mode of computation for the checksum + const BufferT& buffer) +{ + using GridT = build::Grid; + auto grid = initTorus(majorRadius, minorRadius, center, voxelSize, halfWidth, origin); + grid->mName = name; + build::NodeManager mgr(*grid); + build::sdfToLevelSet(mgr); + build::levelSetToFog(mgr, false); + CreateNanoGrid converter(*grid); + converter.setStats(sMode); + converter.setChecksum(cMode); + auto handle = converter.template getHandle(buffer); + assert(handle); + return handle; +} // createFogVolumeTorus + +//================================================================================================ + +template +typename util::enable_if::value, GridHandle>::type +createFogVolumeTorus(double majorRadius, // major radius of torus in world units + double minorRadius, // minor radius of torus in world units + const Vec3d& center, // center of torus in world units + double voxelSize, // size of a voxel in world units + double halfWidth, // half-width of narrow band in voxel units + const Vec3d& origin, // origin of grid in world units + const std::string& name, // name of grid + StatsMode sMode, // mode of computation for the statistics + CheckMode cMode, // mode of computation for the checksum + float tolerance, + bool ditherOn, + const BufferT& buffer) +{ + using GridT = build::Grid; + auto grid = initTorus(majorRadius, minorRadius, center, voxelSize, halfWidth, origin); + grid->mName = name; + build::NodeManager mgr(*grid); + build::sdfToLevelSet(mgr); + build::levelSetToFog(mgr, false); + CreateNanoGrid converter(*grid); + converter.setStats(sMode); + converter.setChecksum(cMode); + converter.enableDithering(ditherOn); + AbsDiff oracle(tolerance); + auto handle = converter.template getHandle(oracle, buffer); + assert(handle); + return handle; +} // createFogVolumeTorus + +//================================================================================================ + +template +typename util::disable_if::value, GridHandle>::type +createPointTorus(int pointsPerVoxel, // number of points to be scattered in each active voxel + double majorRadius, // major radius of torus in world units + double minorRadius, // minor radius of torus in world units + const Vec3d& center, // center of torus in world units + double voxelSize, // size of a voxel in world units + const Vec3d& origin, // origin of grid in world units + const std::string& name, // name of grid + CheckMode cMode, // mode of computation for the checksum + const BufferT& buffer) +{ + auto torusHandle = createLevelSetTorus(majorRadius, minorRadius, center, voxelSize, 0.5f, origin, + "dummy", StatsMode::BBox, CheckMode::Disable, buffer); + assert(torusHandle); + auto* torusGrid = torusHandle.template grid(); + assert(torusGrid); + auto pointHandle = createPointScatter(*torusGrid, pointsPerVoxel, name, cMode, buffer); + assert(pointHandle); + return pointHandle; +} // createPointTorus + +//================================================================================================ + +template +typename util::disable_if::value, GridHandle>::type +createLevelSetBox(double width, // width of box in world units + double height, // height of box in world units + double depth, // depth of box in world units + const Vec3d& center, // center of box in world units + double voxelSize, // size of a voxel in world units + double halfWidth, // half-width of narrow band in voxel units + const Vec3d& origin, // origin of grid in world units + const std::string& name, // name of grid + StatsMode sMode, // mode of computation for the statistics + CheckMode cMode, // mode of computation for the checksum + const BufferT& buffer) +{ + using GridT = build::Grid; + auto grid = initBox(width, height, depth, center, voxelSize, halfWidth, origin); + grid->mName = name; + build::NodeManager mgr(*grid); + build::sdfToLevelSet(mgr); + CreateNanoGrid converter(*grid); + converter.setStats(sMode); + converter.setChecksum(cMode); + auto handle = converter.template getHandle(buffer); + assert(handle); + return handle; +} // createLevelSetBox + +//================================================================================================ + +template +typename util::enable_if::value, GridHandle>::type +createLevelSetBox(double width, // width of box in world units + double height, // height of box in world units + double depth, // depth of box in world units + const Vec3d& center, // center of box in world units + double voxelSize, // size of a voxel in world units + double halfWidth, // half-width of narrow band in voxel units + const Vec3d& origin, // origin of grid in world units + const std::string& name, // name of grid + StatsMode sMode, // mode of computation for the statistics + CheckMode cMode, // mode of computation for the checksum + float tolerance, + bool ditherOn, + const BufferT& buffer) +{ + using GridT = build::Grid; + auto grid = initBox(width, height, depth, center, voxelSize, halfWidth, origin); + grid->mName = name; + build::NodeManager mgr(*grid); + build::sdfToLevelSet(mgr); + CreateNanoGrid converter(*grid); + converter.setStats(sMode); + converter.setChecksum(cMode); + converter.enableDithering(ditherOn); + AbsDiff oracle(tolerance); + auto handle = converter.template getHandle(oracle, buffer); + assert(handle); + return handle; +} // createLevelSetBox + +//================================================================================================ + +template +typename util::disable_if::value, GridHandle>::type +createLevelSetOctahedron(double scale, // scale of the octahedron in world units + const Vec3d& center, // center of box in world units + double voxelSize, // size of a voxel in world units + double halfWidth, // half-width of narrow band in voxel units + const Vec3d& origin, // origin of grid in world units + const std::string& name, // name of grid + StatsMode sMode, // mode of computation for the statistics + CheckMode cMode, // mode of computation for the checksum + const BufferT& buffer) +{ + using GridT = build::Grid; + auto grid = initOctahedron(scale, center, voxelSize, halfWidth, origin); + grid->mName = name; + build::NodeManager mgr(*grid); + build::sdfToLevelSet(mgr); + CreateNanoGrid converter(*grid); + converter.setStats(sMode); + converter.setChecksum(cMode); + auto handle = converter.template getHandle(buffer); + assert(handle); + return handle; +} // createLevelSetOctahedron + +//================================================================================================ + +template +typename util::enable_if::value, GridHandle>::type +createLevelSetOctahedron(double scale, // scale of the octahedron in world units + const Vec3d& center, // center of box in world units + double voxelSize, // size of a voxel in world units + double halfWidth, // half-width of narrow band in voxel units + const Vec3d& origin, // origin of grid in world units + const std::string& name, // name of grid + StatsMode sMode, // mode of computation for the statistics + CheckMode cMode, // mode of computation for the checksum + float tolerance, + bool ditherOn, + const BufferT& buffer) +{ + using GridT = build::Grid; + auto grid = initOctahedron(scale, center, voxelSize, halfWidth, origin); + grid->mName = name; + build::NodeManager mgr(*grid); + build::sdfToLevelSet(mgr); + CreateNanoGrid converter(*grid); + converter.setStats(sMode); + converter.setChecksum(cMode); + converter.enableDithering(ditherOn); + AbsDiff oracle(tolerance); + auto handle = converter.template getHandle(oracle, buffer); + assert(handle); + return handle; +} // createLevelSetOctahedron + +//================================================================================================ + +template +typename util::disable_if::value, GridHandle>::type +createLevelSetBBox(double width, // width of bbox in world units + double height, // height of bbox in world units + double depth, // depth of bbox in world units + double thickness, // thickness of the wire in world units + const Vec3d& center, // center of bbox in world units + double voxelSize, // size of a voxel in world units + double halfWidth, // half-width of narrow band in voxel units + const Vec3d& origin, // origin of grid in world units + const std::string& name, // name of grid + StatsMode sMode, // mode of computation for the statistics + CheckMode cMode, // mode of computation for the checksum + const BufferT& buffer) +{ + using GridT = build::Grid; + auto grid = initBBox(width, height, depth, thickness, center, voxelSize, halfWidth, origin); + grid->mName = name; + build::NodeManager mgr(*grid); + build::sdfToLevelSet(mgr); + CreateNanoGrid converter(*grid); + converter.setStats(sMode); + converter.setChecksum(cMode); + auto handle = converter.template getHandle(buffer); + assert(handle); + return handle; +} // createLevelSetBBox + +//================================================================================================ + +template +typename util::enable_if::value, GridHandle>::type +createLevelSetBBox(double width, // width of bbox in world units + double height, // height of bbox in world units + double depth, // depth of bbox in world units + double thickness, // thickness of the wire in world units + const Vec3d& center, // center of bbox in world units + double voxelSize, // size of a voxel in world units + double halfWidth, // half-width of narrow band in voxel units + const Vec3d& origin, // origin of grid in world units + const std::string& name, // name of grid + StatsMode sMode, // mode of computation for the statistics + CheckMode cMode, // mode of computation for the checksum + float tolerance, + bool ditherOn, + const BufferT& buffer) +{ + using GridT = build::Grid; + auto grid = initBBox(width, height, depth, thickness, center, voxelSize, halfWidth, origin); + grid->mName = name; + build::NodeManager mgr(*grid); + build::sdfToLevelSet(mgr); + CreateNanoGrid converter(*grid); + converter.setStats(sMode); + converter.setChecksum(cMode); + converter.enableDithering(ditherOn); + AbsDiff oracle(tolerance); + auto handle = converter.template getHandle(oracle, buffer); + assert(handle); + return handle; +} // createLevelSetBBox + +//================================================================================================ + +template +typename util::disable_if::value, GridHandle>::type +createFogVolumeBox(double width, // width of box in world units + double height, // height of box in world units + double depth, // depth of box in world units + const Vec3d& center, // center of box in world units + double voxelSize, // size of a voxel in world units + double halfWidth, // half-width of narrow band in voxel units + const Vec3d& origin, // origin of grid in world units + const std::string& name, // name of grid + StatsMode sMode, // mode of computation for the statistics + CheckMode cMode, // mode of computation for the checksum + const BufferT& buffer) +{ + using GridT = build::Grid; + auto grid = initBox(width, height, depth, center, voxelSize, halfWidth, origin); + grid->mName = name; + build::NodeManager mgr(*grid); + build::sdfToLevelSet(mgr); + build::levelSetToFog(mgr, false); + CreateNanoGrid converter(*grid); + converter.setStats(sMode); + converter.setChecksum(cMode); + auto handle = converter.template getHandle(buffer); + assert(handle); + return handle; +} // createFogVolumeBox + +//================================================================================================ + +template +typename util::enable_if::value, GridHandle>::type +createFogVolumeBox(double width, // width of box in world units + double height, // height of box in world units + double depth, // depth of box in world units + const Vec3d& center, // center of box in world units + double voxelSize, // size of a voxel in world units + double halfWidth, // half-width of narrow band in voxel units + const Vec3d& origin, // origin of grid in world units + const std::string& name, // name of grid + StatsMode sMode, // mode of computation for the statistics + CheckMode cMode, // mode of computation for the checksum + float tolerance, + bool ditherOn, + const BufferT& buffer) +{ + using GridT = build::Grid; + auto grid = initBox(width, height, depth, center, voxelSize, halfWidth, origin); + grid->mName = name; + build::NodeManager mgr(*grid); + build::sdfToLevelSet(mgr); + build::levelSetToFog(mgr, false); + CreateNanoGrid converter(*grid); + converter.setStats(sMode); + converter.setChecksum(cMode); + converter.enableDithering(ditherOn); + AbsDiff oracle(tolerance); + auto handle = converter.template getHandle(oracle, buffer); + assert(handle); + return handle; +} // createFogVolumeBox + +//================================================================================================ + +template +typename util::disable_if::value, GridHandle>::type +createFogVolumeOctahedron(double scale, // scale of octahedron in world units + const Vec3d& center, // center of box in world units + double voxelSize, // size of a voxel in world units + double halfWidth, // half-width of narrow band in voxel units + const Vec3d& origin, // origin of grid in world units + const std::string& name, // name of grid + StatsMode sMode, // mode of computation for the statistics + CheckMode cMode, // mode of computation for the checksum + const BufferT& buffer) +{ + using GridT = build::Grid; + auto grid = initOctahedron(scale, center, voxelSize, halfWidth, origin); + grid->mName = name; + build::NodeManager mgr(*grid); + build::sdfToLevelSet(mgr); + build::levelSetToFog(mgr, false); + CreateNanoGrid converter(*grid); + converter.setStats(sMode); + converter.setChecksum(cMode); + auto handle = converter.template getHandle(buffer); + assert(handle); + return handle; +} // createFogVolumeOctahedron + +//================================================================================================ + +template +typename util::enable_if::value, GridHandle>::type +createFogVolumeOctahedron(double scale, // scale of octahedron in world units + const Vec3d& center, // center of box in world units + double voxelSize, // size of a voxel in world units + double halfWidth, // half-width of narrow band in voxel units + const Vec3d& origin, // origin of grid in world units + const std::string& name, // name of grid + StatsMode sMode, // mode of computation for the statistics + CheckMode cMode, // mode of computation for the checksum + float tolerance, + bool ditherOn, + const BufferT& buffer) +{ + using GridT = build::Grid; + auto grid = initOctahedron(scale, center, voxelSize, halfWidth, origin); + grid->mName = name; + build::NodeManager mgr(*grid); + build::sdfToLevelSet(mgr); + build::levelSetToFog(mgr, false); + CreateNanoGrid converter(*grid); + converter.setStats(sMode); + converter.setChecksum(cMode); + converter.enableDithering(ditherOn); + AbsDiff oracle(tolerance); + auto handle = converter.template getHandle(oracle, buffer); + assert(handle); + return handle; +} // createFogVolumeOctahedron + +//================================================================================================ + +template +typename util::disable_if::value, GridHandle>::type +createPointBox(int pointsPerVoxel, // number of points to be scattered in each active voxel + double width, // width of box in world units + double height, // height of box in world units + double depth, // depth of box in world units + const Vec3d& center, // center of box in world units + double voxelSize, // size of a voxel in world units + const Vec3d& origin, // origin of grid in world units + const std::string& name, // name of grid + CheckMode cMode, // mode of computation for the checksum + const BufferT& buffer) +{ + auto boxHandle = createLevelSetBox(width, height, depth, center, voxelSize, 0.5, origin, "dummy", + StatsMode::BBox, CheckMode::Disable, buffer); + assert(boxHandle); + auto* boxGrid = boxHandle.template grid(); + assert(boxGrid); + auto pointHandle = createPointScatter(*boxGrid, pointsPerVoxel, name, cMode, buffer); + assert(pointHandle); + return pointHandle; +} // createPointBox + +//================================================================================================ + +template +inline GridHandle +createPointScatter(const NanoGrid& srcGrid, // origin of grid in world units + int pointsPerVoxel, // number of points to be scattered in each active voxel + const std::string& name, // name of grid + CheckMode cMode, // mode of computation for the checksum + const BufferT& buffer) +{ + using ValueT = typename BuildToValueMap::type; + static_assert(util::is_floating_point::value, "createPointScatter: expect floating point"); + using Vec3T = math::Vec3; + if (pointsPerVoxel < 1) { + throw std::runtime_error("createPointScatter: Expected at least one point per voxel"); + } + if (!srcGrid.isLevelSet()) { + throw std::runtime_error("createPointScatter: Expected a level set grid"); + } + if (!srcGrid.hasBBox()) { + throw std::runtime_error("createPointScatter: ActiveVoxelCount is required"); + } + const uint64_t pointCount = pointsPerVoxel * srcGrid.activeVoxelCount(); + if (pointCount == 0) { + throw std::runtime_error("createPointScatter: No particles to scatter"); + } + std::vector xyz; + xyz.reserve(pointCount); + using DstGridT = build::Grid; + DstGridT dstGrid(std::numeric_limits::max(), name, GridClass::PointData); + dstGrid.mMap = srcGrid.map(); + auto dstAcc = dstGrid.getAccessor(); + std::srand(1234); + const ValueT s = 1 / (1 + ValueT(RAND_MAX)); // scale so s*rand() is in ] 0, 1 [ + // return a point with random local voxel coordinates (-0.5 to +0.5) + auto randomPoint = [&s](){return s * Vec3T(rand(), rand(), rand()) - Vec3T(0.5);}; + const auto& srcTree = srcGrid.tree(); + auto srcMgrHandle = createNodeManager(srcGrid); + auto *srcMgr = srcMgrHandle.template mgr(); + assert(srcMgr); + for (uint32_t i = 0, end = srcTree.nodeCount(0); i < end; ++i) { + auto& srcLeaf = srcMgr->leaf(i); + auto* dstLeaf = dstAcc.setValue(srcLeaf.origin(), pointsPerVoxel); // allocates leaf node + dstLeaf->mValueMask = srcLeaf.valueMask(); + for (uint32_t j = 0, m = 0; j < 512; ++j) { + if (dstLeaf->mValueMask.isOn(j)) { + const Vec3f ijk = dstLeaf->offsetToGlobalCoord(j).asVec3s();// floating-point representatrion of index coorindates + for (int n = 0; n < pointsPerVoxel; ++n) xyz.push_back(srcGrid.indexToWorld(randomPoint() + ijk)); + m += pointsPerVoxel; + }// active voxels + dstLeaf->mValues[j] = m; + }// loop over all voxels + }// loop over leaf nodes + assert(pointCount == xyz.size()); + CreateNanoGrid converter(dstGrid); + converter.setStats(StatsMode::MinMax); + converter.setChecksum(CheckMode::Disable); + + converter.addBlindData(name, + GridBlindDataSemantic::WorldCoords, + GridBlindDataClass::AttributeArray, + toGridType(), + pointCount, + sizeof(Vec3T)); + auto handle = converter.template getHandle(buffer); + assert(handle); + + auto* grid = handle.template grid(); + assert(grid && grid->template isSequential<0>()); + auto &tree = grid->tree(); + if (tree.nodeCount(0) == 0) throw std::runtime_error("Expect leaf nodes!"); + auto *leafData = tree.getFirstLeaf()->data(); + leafData[0].mMinimum = 0; // start of prefix sum + for (uint32_t i = 1, n = tree.nodeCount(0); i < n; ++i) { + leafData[i].mMinimum = leafData[i - 1].mMinimum + leafData[i - 1].mMaximum; + } + if (Vec3T *blindData = grid->template getBlindData(0)) { + memcpy(blindData, xyz.data(), xyz.size() * sizeof(Vec3T)); + } else { + throw std::runtime_error("Blind data pointer was NULL"); + } + updateChecksum(grid, cMode); + return handle; +} // createPointScatter + +}// namespace tools + +} // namespace nanovdb + +#endif // NANOVDB_TOOLS_PRIMITIVES_H_HAS_BEEN_INCLUDED diff --git a/nanovdb/nanovdb/tools/GridBuilder.h b/nanovdb/nanovdb/tools/GridBuilder.h new file mode 100644 index 0000000000..3072a59348 --- /dev/null +++ b/nanovdb/nanovdb/tools/GridBuilder.h @@ -0,0 +1,2315 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: MPL-2.0 + +/*! + \file nanovdb/tools/GridBuilder.h + + \author Ken Museth + + \date June 26, 2020 + + \brief This file defines a minimum set of tree nodes and tools that + can be used (instead of OpenVDB) to build nanovdb grids on the CPU. +*/ + +#ifndef NANOVDB_TOOLS_BUILD_GRIDBUILDER_H_HAS_BEEN_INCLUDED +#define NANOVDB_TOOLS_BUILD_GRIDBUILDER_H_HAS_BEEN_INCLUDED + +#include + +#include +#include +#include // for stringstream +#include +#include // for memcpy +#include +#include +#include + +#include +#include +#include + +namespace nanovdb { + +namespace tools::build { + +// ----------------------------> Froward decelerations of random access methods <-------------------------------------- + +template struct GetValue; +template struct SetValue; +template struct TouchLeaf; +template struct GetState; +template struct ProbeValue; + +// ----------------------------> RootNode <-------------------------------------- + +template +struct RootNode +{ + using ValueType = typename ChildT::ValueType; + using BuildType = typename ChildT::BuildType; + using ChildNodeType = ChildT; + using LeafNodeType = typename ChildT::LeafNodeType; + static constexpr uint32_t LEVEL = 1 + ChildT::LEVEL; // level 0 = leaf + struct Tile { + Tile(ChildT* c = nullptr) : child(c) {} + Tile(const ValueType& v, bool s) : child(nullptr), value(v), state(s) {} + bool isChild() const { return child!=nullptr; } + bool isValue() const { return child==nullptr; } + bool isActive() const { return child==nullptr && state; } + ChildT* child; + ValueType value; + bool state; + }; + using MapT = std::map; + MapT mTable; + ValueType mBackground; + + Tile* probeTile(const Coord &ijk) { + auto iter = mTable.find(CoordToKey(ijk)); + return iter == mTable.end() ? nullptr : &(iter->second); + } + + const Tile* probeTile(const Coord &ijk) const { + auto iter = mTable.find(CoordToKey(ijk)); + return iter == mTable.end() ? nullptr : &(iter->second); + } + + class ChildIterator + { + const RootNode *mParent; + typename MapT::const_iterator mIter; + public: + ChildIterator() : mParent(nullptr), mIter() {} + ChildIterator(const RootNode *parent) : mParent(parent), mIter(parent->mTable.begin()) { + while (mIter!=parent->mTable.end() && mIter->second.child==nullptr) ++mIter; + } + ChildIterator& operator=(const ChildIterator&) = default; + ChildT& operator*() const {NANOVDB_ASSERT(*this); return *mIter->second.child;} + ChildT* operator->() const {NANOVDB_ASSERT(*this); return mIter->second.child;} + Coord getOrigin() const { NANOVDB_ASSERT(*this); return mIter->first;} + Coord getCoord() const { NANOVDB_ASSERT(*this); return mIter->first;} + operator bool() const {return mParent && mIter!=mParent->mTable.end();} + ChildIterator& operator++() { + NANOVDB_ASSERT(mParent); + ++mIter; + while (mIter!=mParent->mTable.end() && mIter->second.child==nullptr) ++mIter; + return *this; + } + ChildIterator operator++(int) { + auto tmp = *this; + ++(*this); + return tmp; + } + uint32_t pos() const { + NANOVDB_ASSERT(mParent); + return uint32_t(std::distance(mParent->mTable.begin(), mIter)); + } + }; // Member class ChildIterator + + ChildIterator cbeginChild() const {return ChildIterator(this);} + ChildIterator cbeginChildOn() const {return ChildIterator(this);}// match openvdb + + class ValueIterator + { + const RootNode *mParent; + typename MapT::const_iterator mIter; + public: + ValueIterator() : mParent(nullptr), mIter() {} + ValueIterator(const RootNode *parent) : mParent(parent), mIter(parent->mTable.begin()) { + while (mIter!=parent->mTable.end() && mIter->second.child!=nullptr) ++mIter; + } + ValueIterator& operator=(const ValueIterator&) = default; + ValueType operator*() const {NANOVDB_ASSERT(*this); return mIter->second.value;} + bool isActive() const {NANOVDB_ASSERT(*this); return mIter->second.state;} + Coord getOrigin() const { NANOVDB_ASSERT(*this); return mIter->first;} + Coord getCoord() const { NANOVDB_ASSERT(*this); return mIter->first;} + operator bool() const {return mParent && mIter!=mParent->mTable.end();} + ValueIterator& operator++() { + NANOVDB_ASSERT(mParent); + ++mIter; + while (mIter!=mParent->mTable.end() && mIter->second.child!=nullptr) ++mIter; + return *this;; + } + ValueIterator operator++(int) { + auto tmp = *this; + ++(*this); + return tmp; + } + uint32_t pos() const { + NANOVDB_ASSERT(mParent); + return uint32_t(std::distance(mParent->mTable.begin(), mIter)); + } + }; // Member class ValueIterator + + ValueIterator beginValue() {return ValueIterator(this);} + ValueIterator cbeginValueAll() const {return ValueIterator(this);} + + class ValueOnIterator + { + const RootNode *mParent; + typename MapT::const_iterator mIter; + public: + ValueOnIterator() : mParent(nullptr), mIter() {} + ValueOnIterator(const RootNode *parent) : mParent(parent), mIter(parent->mTable.begin()) { + while (mIter!=parent->mTable.end() && (mIter->second.child!=nullptr || !mIter->second.state)) ++mIter; + } + ValueOnIterator& operator=(const ValueOnIterator&) = default; + ValueType operator*() const {NANOVDB_ASSERT(*this); return mIter->second.value;} + Coord getOrigin() const { NANOVDB_ASSERT(*this); return mIter->first;} + Coord getCoord() const { NANOVDB_ASSERT(*this); return mIter->first;} + operator bool() const {return mParent && mIter!=mParent->mTable.end();} + ValueOnIterator& operator++() { + NANOVDB_ASSERT(mParent); + ++mIter; + while (mIter!=mParent->mTable.end() && (mIter->second.child!=nullptr || !mIter->second.state)) ++mIter; + return *this;; + } + ValueOnIterator operator++(int) { + auto tmp = *this; + ++(*this); + return tmp; + } + uint32_t pos() const { + NANOVDB_ASSERT(mParent); + return uint32_t(std::distance(mParent->mTable.begin(), mIter)); + } + }; // Member class ValueOnIterator + + ValueOnIterator beginValueOn() {return ValueOnIterator(this);} + ValueOnIterator cbeginValueOn() const {return ValueOnIterator(this);} + + class TileIterator + { + const RootNode *mParent; + typename MapT::const_iterator mIter; + public: + TileIterator() : mParent(nullptr), mIter() {} + TileIterator(const RootNode *parent) : mParent(parent), mIter(parent->mTable.begin()) { + NANOVDB_ASSERT(mParent); + } + TileIterator& operator=(const TileIterator&) = default; + const Tile& operator*() const {NANOVDB_ASSERT(*this); return mIter->second;} + const Tile* operator->() const {NANOVDB_ASSERT(*this); return &(mIter->second);} + Coord getOrigin() const { NANOVDB_ASSERT(*this); return mIter->first;} + Coord getCoord() const { NANOVDB_ASSERT(*this); return mIter->first;} + operator bool() const {return mParent && mIter!=mParent->mTable.end();} + const ChildT* probeChild(ValueType &value) { + NANOVDB_ASSERT(*this); + const ChildT *child = mIter->second.child; + if (child==nullptr) value = mIter->second.value; + return child; + } + bool isValueOn() const {return mIter->second.child==nullptr && mIter->second.state;} + TileIterator& operator++() { + NANOVDB_ASSERT(mParent); + ++mIter; + return *this; + } + TileIterator operator++(int) { + auto tmp = *this; + ++(*this); + return tmp; + } + uint32_t pos() const { + NANOVDB_ASSERT(mParent); + return uint32_t(std::distance(mParent->mTable.begin(), mIter)); + } + }; // Member class TileIterator + + TileIterator beginTile() {return TileIterator(this);} + TileIterator cbeginChildAll() const {return TileIterator(this);} + + //class DenseIterator : public TileIterator + + RootNode(const ValueType& background) : mBackground(background) {} + RootNode(const RootNode&) = delete; // disallow copy-construction + RootNode(RootNode&&) = default; // allow move construction + RootNode& operator=(const RootNode&) = delete; // disallow copy assignment + RootNode& operator=(RootNode&&) = default; // allow move assignment + + ~RootNode() { this->clear(); } + + uint32_t tileCount() const { return uint32_t(mTable.size()); } + uint32_t getTableSize() const { return uint32_t(mTable.size()); }// match openvdb + const ValueType& background() const {return mBackground;} + + void nodeCount(std::array &count) const + { + for (auto it = this->cbeginChild(); it; ++it) { + count[ChildT::LEVEL] += 1; + it->nodeCount(count); + } + } + + bool empty() const { return mTable.empty(); } + + void clear() + { + for (auto iter = mTable.begin(); iter != mTable.end(); ++iter) delete iter->second.child; + mTable.clear(); + } + + static Coord CoordToKey(const Coord& ijk) { return ijk & ~ChildT::MASK; } + +#ifdef NANOVDB_NEW_ACCESSOR_METHODS + template + auto get(const Coord& ijk, ArgsT&&... args) const + { + if (const Tile *tile = this->probeTile(ijk)) { + if (auto *child = tile->child) return child->template get(ijk, args...); + return OpT::get(*tile, args...); + } + return OpT::get(*this, args...); + } + template + auto set(const Coord& ijk, ArgsT&&... args) + { + ChildT* child = nullptr; + const Coord key = CoordToKey(ijk); + auto iter = mTable.find(key); + if (iter == mTable.end()) { + child = new ChildT(ijk, mBackground, false); + mTable[key] = Tile(child); + } else if (iter->second.child != nullptr) { + child = iter->second.child; + } else { + child = new ChildT(ijk, iter->second.value, iter->second.state); + iter->second.child = child; + } + NANOVDB_ASSERT(child); + return child->template set(ijk, args...); + } + template + auto getAndCache(const Coord& ijk, const AccT& acc, ArgsT&&... args) const + { + if (const Tile *tile = this->probeTile(ijk)) { + if (auto *child = tile->child) { + acc.insert(ijk, child); + return child->template get(ijk, args...); + } + return OpT::get(*tile, args...); + } + return OpT::get(*this, args...); + } + + template + auto setAndCache(const Coord& ijk, const AccT& acc, ArgsT&&... args) + { + ChildT* child = nullptr; + const Coord key = CoordToKey(ijk); + auto iter = mTable.find(key); + if (iter == mTable.end()) { + child = new ChildT(ijk, mBackground, false); + mTable[key] = Tile(child); + } else if (iter->second.child != nullptr) { + child = iter->second.child; + } else { + child = new ChildT(ijk, iter->second.value, iter->second.state); + iter->second.child = child; + } + NANOVDB_ASSERT(child); + acc.insert(ijk, child); + return child->template setAndCache(ijk, acc, args...); + } + ValueType getValue(const Coord& ijk) const {return this->template get>(ijk);} + ValueType getValue(int i, int j, int k) const {return this->template get>(Coord(i,j,k));} + ValueType operator()(const Coord& ijk) const {return this->template get>(ijk);} + ValueType operator()(int i, int j, int k) const {return this->template get>(Coord(i,j,k));} + void setValue(const Coord& ijk, const ValueType& value) {this->template set>(ijk, value);} + bool probeValue(const Coord& ijk, ValueType& value) const {return this->template get>(ijk, value);} + bool isActive(const Coord& ijk) const {return this->template get>(ijk);} +#else + ValueType getValue(const Coord& ijk) const + { +#if 1 + if (auto *tile = this->probeTile(ijk)) return tile->child ? tile->child->getValue(ijk) : tile->value; + return mBackground; +#else + auto iter = mTable.find(CoordToKey(ijk)); + if (iter == mTable.end()) { + return mBackground; + } else if (iter->second.child) { + return iter->second.child->getValue(ijk); + } else { + return iter->second.value; + } +#endif + } + ValueType getValue(int i, int j, int k) const {return this->getValue(Coord(i,j,k));} + + void setValue(const Coord& ijk, const ValueType& value) + { + ChildT* child = nullptr; + const Coord key = CoordToKey(ijk); + auto iter = mTable.find(key); + if (iter == mTable.end()) { + child = new ChildT(ijk, mBackground, false); + mTable[key] = Tile(child); + } else if (iter->second.child != nullptr) { + child = iter->second.child; + } else { + child = new ChildT(ijk, iter->second.value, iter->second.state); + iter->second.child = child; + } + NANOVDB_ASSERT(child); + child->setValue(ijk, value); + } + + template + bool isActiveAndCache(const Coord& ijk, AccT& acc) const + { + auto iter = mTable.find(CoordToKey(ijk)); + if (iter == mTable.end()) + return false; + if (iter->second.child) { + acc.insert(ijk, iter->second.child); + return iter->second.child->isActiveAndCache(ijk, acc); + } + return iter->second.state; + } + + template + ValueType getValueAndCache(const Coord& ijk, AccT& acc) const + { + auto iter = mTable.find(CoordToKey(ijk)); + if (iter == mTable.end()) + return mBackground; + if (iter->second.child) { + acc.insert(ijk, iter->second.child); + return iter->second.child->getValueAndCache(ijk, acc); + } + return iter->second.value; + } + + template + void setValueAndCache(const Coord& ijk, const ValueType& value, AccT& acc) + { + ChildT* child = nullptr; + const Coord key = CoordToKey(ijk); + auto iter = mTable.find(key); + if (iter == mTable.end()) { + child = new ChildT(ijk, mBackground, false); + mTable[key] = Tile(child); + } else if (iter->second.child != nullptr) { + child = iter->second.child; + } else { + child = new ChildT(ijk, iter->second.value, iter->second.state); + iter->second.child = child; + } + NANOVDB_ASSERT(child); + acc.insert(ijk, child); + child->setValueAndCache(ijk, value, acc); + } + template + void setValueOnAndCache(const Coord& ijk, AccT& acc) + { + ChildT* child = nullptr; + const Coord key = CoordToKey(ijk); + auto iter = mTable.find(key); + if (iter == mTable.end()) { + child = new ChildT(ijk, mBackground, false); + mTable[key] = Tile(child); + } else if (iter->second.child != nullptr) { + child = iter->second.child; + } else { + child = new ChildT(ijk, iter->second.value, iter->second.state); + iter->second.child = child; + } + NANOVDB_ASSERT(child); + acc.insert(ijk, child); + child->setValueOnAndCache(ijk, acc); + } + template + void touchLeafAndCache(const Coord &ijk, AccT& acc) + { + ChildT* child = nullptr; + const Coord key = CoordToKey(ijk); + auto iter = mTable.find(key); + if (iter == mTable.end()) { + child = new ChildT(ijk, mBackground, false); + mTable[key] = Tile(child); + } else if (iter->second.child != nullptr) { + child = iter->second.child; + } else { + child = new ChildT(ijk, iter->second.value, iter->second.state); + iter->second.child = child; + } + acc.insert(ijk, child); + child->touchLeafAndCache(ijk, acc); + } +#endif// NANOVDB_NEW_ACCESSOR_METHODS + + template + uint32_t nodeCount() const + { + static_assert(util::is_same::value, "Root::getNodes: Invalid type"); + static_assert(NodeT::LEVEL < LEVEL, "Root::getNodes: LEVEL error"); + uint32_t sum = 0; + for (auto iter = mTable.begin(); iter != mTable.end(); ++iter) { + if (iter->second.child == nullptr) continue; // skip tiles + if constexpr(util::is_same::value) { //resolved at compile-time + ++sum; + } else { + sum += iter->second.child->template nodeCount(); + } + } + return sum; + } + + template + void getNodes(std::vector& array) + { + static_assert(util::is_same::value, "Root::getNodes: Invalid type"); + static_assert(NodeT::LEVEL < LEVEL, "Root::getNodes: LEVEL error"); + for (auto iter = mTable.begin(); iter != mTable.end(); ++iter) { + if (iter->second.child == nullptr) + continue; + if constexpr(util::is_same::value) { //resolved at compile-time + array.push_back(reinterpret_cast(iter->second.child)); + } else { + iter->second.child->getNodes(array); + } + } + } + + void addChild(ChildT*& child) + { + NANOVDB_ASSERT(child); + const Coord key = CoordToKey(child->mOrigin); + auto iter = mTable.find(key); + if (iter != mTable.end() && iter->second.child != nullptr) { // existing child node + delete iter->second.child; + iter->second.child = child; + } else { + mTable[key] = Tile(child); + } + child = nullptr; + } + + /// @brief Add a tile containing voxel (i, j, k) at the specified tree level, + /// creating a new branch if necessary. Delete any existing lower-level nodes + /// that contain (x, y, z). + /// @tparam level tree level at which the tile is inserted. Must be 1, 2 or 3. + /// @param ijk Index coordinate that map to the tile being inserted + /// @param value Value of the tile + /// @param state Binary state of the tile + template + void addTile(const Coord& ijk, const ValueType& value, bool state) + { + static_assert(level > 0 && level <= LEVEL, "invalid template value of level"); + const Coord key = CoordToKey(ijk); + auto iter = mTable.find(key); + if constexpr(level == LEVEL) { + if (iter == mTable.end()) { + mTable[key] = Tile(value, state); + } else if (iter->second.child == nullptr) { + iter->second.value = value; + iter->second.state = state; + } else { + delete iter->second.child; + iter->second.child = nullptr; + iter->second.value = value; + iter->second.state = state; + } + } else if constexpr(level < LEVEL) { + ChildT* child = nullptr; + if (iter == mTable.end()) { + child = new ChildT(ijk, mBackground, false); + mTable[key] = Tile(child); + } else if (iter->second.child != nullptr) { + child = iter->second.child; + } else { + child = new ChildT(ijk, iter->second.value, iter->second.state); + iter->second.child = child; + } + child->template addTile(ijk, value, state); + } + } + + template + void addNode(NodeT*& node) + { + if constexpr(util::is_same::value) { //resolved at compile-time + this->addChild(reinterpret_cast(node)); + } else { + ChildT* child = nullptr; + const Coord key = CoordToKey(node->mOrigin); + auto iter = mTable.find(key); + if (iter == mTable.end()) { + child = new ChildT(node->mOrigin, mBackground, false); + mTable[key] = Tile(child); + } else if (iter->second.child != nullptr) { + child = iter->second.child; + } else { + child = new ChildT(node->mOrigin, iter->second.value, iter->second.state); + iter->second.child = child; + } + child->addNode(node); + } + } + + void merge(RootNode &other) + { + for (auto iter1 = other.mTable.begin(); iter1 != other.mTable.end(); ++iter1) { + if (iter1->second.child == nullptr) continue;// ignore input tiles + auto iter2 = mTable.find(iter1->first); + if (iter2 == mTable.end() || iter2->second.child == nullptr) { + mTable[iter1->first] = Tile(iter1->second.child); + iter1->second.child = nullptr; + } else { + iter2->second.child->merge(*iter1->second.child); + } + } + other.clear(); + } + + template + typename util::enable_if::value>::type + signedFloodFill(T outside); + +}; // tools::build::RootNode + +//================================================================================================ + +template +template +inline typename util::enable_if::value>::type +RootNode::signedFloodFill(T outside) +{ + std::map nodeKeys; + for (auto iter = mTable.begin(); iter != mTable.end(); ++iter) { + if (iter->second.child == nullptr) + continue; + nodeKeys.insert(std::pair(iter->first, iter->second.child)); + } + + // We employ a simple z-scanline algorithm that inserts inactive tiles with + // the inside value if they are sandwiched between inside child nodes only! + auto b = nodeKeys.begin(), e = nodeKeys.end(); + if (b == e) + return; + for (auto a = b++; b != e; ++a, ++b) { + Coord d = b->first - a->first; // delta of neighboring coordinates + if (d[0] != 0 || d[1] != 0 || d[2] == int(ChildT::DIM)) + continue; // not same z-scanline or neighbors + const ValueType fill[] = {a->second->getLastValue(), b->second->getFirstValue()}; + if (!(fill[0] < 0) || !(fill[1] < 0)) + continue; // scanline isn't inside + Coord c = a->first + Coord(0u, 0u, ChildT::DIM); + for (; c[2] != b->first[2]; c[2] += ChildT::DIM) { + const Coord key = RootNode::CoordToKey(c); + mTable[key] = typename RootNode::Tile(-outside, false); // inactive tile + } + } +} // tools::build::RootNode::signedFloodFill + +// ----------------------------> InternalNode <-------------------------------------- + +template +struct InternalNode +{ + using ValueType = typename ChildT::ValueType; + using BuildType = typename ChildT::BuildType; + using ChildNodeType = ChildT; + using LeafNodeType = typename ChildT::LeafNodeType; + static constexpr uint32_t LOG2DIM = ChildT::LOG2DIM + 1; + static constexpr uint32_t TOTAL = LOG2DIM + ChildT::TOTAL; //dimension in index space + static constexpr uint32_t DIM = 1u << TOTAL; + static constexpr uint32_t SIZE = 1u << (3 * LOG2DIM); //number of tile values (or child pointers) + static constexpr uint32_t MASK = DIM - 1; + static constexpr uint32_t LEVEL = 1 + ChildT::LEVEL; // level 0 = leaf + static constexpr uint64_t NUM_VALUES = uint64_t(1) << (3 * TOTAL); // total voxel count represented by this node + using MaskT = Mask; + template + using MaskIterT = typename MaskT::template Iterator; + using NanoNodeT = typename NanoNode::Type; + + struct Tile { + Tile(ChildT* c = nullptr) : child(c) {} + Tile(const ValueType& v) : value(v) {} + union{ + ChildT* child; + ValueType value; + }; + }; + Coord mOrigin; + MaskT mValueMask; + MaskT mChildMask; + Tile mTable[SIZE]; + + union { + NanoNodeT *mDstNode; + uint64_t mDstOffset; + }; + + /// @brief Visits child nodes of this node only + class ChildIterator : public MaskIterT + { + using BaseT = MaskIterT; + const InternalNode *mParent; + public: + ChildIterator() : BaseT(), mParent(nullptr) {} + ChildIterator(const InternalNode* parent) : BaseT(parent->mChildMask.beginOn()), mParent(parent) {} + ChildIterator& operator=(const ChildIterator&) = default; + const ChildT& operator*() const {NANOVDB_ASSERT(*this); return *mParent->mTable[BaseT::pos()].child;} + const ChildT* operator->() const {NANOVDB_ASSERT(*this); return mParent->mTable[BaseT::pos()].child;} + Coord getCoord() const { NANOVDB_ASSERT(*this); return (*this)->origin();} + }; // Member class ChildIterator + + ChildIterator beginChild() {return ChildIterator(this);} + ChildIterator cbeginChildOn() const {return ChildIterator(this);}// match openvdb + + /// @brief Visits all tile values in this node, i.e. both inactive and active tiles + class ValueIterator : public MaskIterT + { + using BaseT = MaskIterT; + const InternalNode *mParent; + public: + ValueIterator() : BaseT(), mParent(nullptr) {} + ValueIterator(const InternalNode* parent) : BaseT(parent->mChildMask.beginOff()), mParent(parent) {} + ValueIterator& operator=(const ValueIterator&) = default; + ValueType operator*() const {NANOVDB_ASSERT(*this); return mParent->mTable[BaseT::pos()].value;} + Coord getCoord() const { NANOVDB_ASSERT(*this); return mParent->offsetToGlobalCoord(BaseT::pos());} + bool isActive() const { NANOVDB_ASSERT(*this); return mParent->mValueMask.isOn(BaseT::pos());} + }; // Member class ValueIterator + + ValueIterator beginValue() {return ValueIterator(this);} + ValueIterator cbeginValueAll() const {return ValueIterator(this);} + + /// @brief Visits active tile values of this node only + class ValueOnIterator : public MaskIterT + { + using BaseT = MaskIterT; + const InternalNode *mParent; + public: + ValueOnIterator() : BaseT(), mParent(nullptr) {} + ValueOnIterator(const InternalNode* parent) : BaseT(parent->mValueMask.beginOn()), mParent(parent) {} + ValueOnIterator& operator=(const ValueOnIterator&) = default; + ValueType operator*() const {NANOVDB_ASSERT(*this); return mParent->mTable[BaseT::pos()].value;} + Coord getCoord() const { NANOVDB_ASSERT(*this); return mParent->offsetToGlobalCoord(BaseT::pos());} + }; // Member class ValueOnIterator + + ValueOnIterator beginValueOn() {return ValueOnIterator(this);} + ValueOnIterator cbeginValueOn() const {return ValueOnIterator(this);} + + /// @brief Visits all tile values and child nodes of this node + class DenseIterator : public MaskT::DenseIterator + { + using BaseT = typename MaskT::DenseIterator; + const InternalNode *mParent; + public: + DenseIterator() : BaseT(), mParent(nullptr) {} + DenseIterator(const InternalNode* parent) : BaseT(0), mParent(parent) {} + DenseIterator& operator=(const DenseIterator&) = default; + ChildT* probeChild(ValueType& value) const + { + NANOVDB_ASSERT(mParent && bool(*this)); + ChildT *child = nullptr; + if (mParent->mChildMask.isOn(BaseT::pos())) { + child = mParent->mTable[BaseT::pos()].child; + } else { + value = mParent->mTable[BaseT::pos()].value; + } + return child; + } + Coord getCoord() const { NANOVDB_ASSERT(mParent && bool(*this)); return mParent->offsetToGlobalCoord(BaseT::pos());} + }; // Member class DenseIterator + + DenseIterator beginDense() {return DenseIterator(this);} + DenseIterator cbeginChildAll() const {return DenseIterator(this);}// matches openvdb + + InternalNode(const Coord& origin, const ValueType& value, bool state) + : mOrigin(origin & ~MASK) + , mValueMask(state) + , mChildMask() + , mDstOffset(0) + { + for (uint32_t i = 0; i < SIZE; ++i) mTable[i].value = value; + } + InternalNode(const InternalNode&) = delete; // disallow copy-construction + InternalNode(InternalNode&&) = delete; // disallow move construction + InternalNode& operator=(const InternalNode&) = delete; // disallow copy assignment + InternalNode& operator=(InternalNode&&) = delete; // disallow move assignment + ~InternalNode() + { + for (auto iter = mChildMask.beginOn(); iter; ++iter) { + delete mTable[*iter].child; + } + } + const MaskT& getValueMask() const {return mValueMask;} + const MaskT& valueMask() const {return mValueMask;} + const MaskT& getChildMask() const {return mChildMask;} + const MaskT& childMask() const {return mChildMask;} + const Coord& origin() const {return mOrigin;} + + void nodeCount(std::array &count) const + { + count[ChildT::LEVEL] += mChildMask.countOn(); + if constexpr(ChildT::LEVEL>0) { + for (auto it = const_cast(this)->beginChild(); it; ++it) it->nodeCount(count); + } + } + + static uint32_t CoordToOffset(const Coord& ijk) + { + return (((ijk[0] & int32_t(MASK)) >> ChildT::TOTAL) << (2 * LOG2DIM)) + + (((ijk[1] & int32_t(MASK)) >> ChildT::TOTAL) << (LOG2DIM)) + + ((ijk[2] & int32_t(MASK)) >> ChildT::TOTAL); + } + + static Coord OffsetToLocalCoord(uint32_t n) + { + NANOVDB_ASSERT(n < SIZE); + const uint32_t m = n & ((1 << 2 * LOG2DIM) - 1); + return Coord(n >> 2 * LOG2DIM, m >> LOG2DIM, m & ((1 << LOG2DIM) - 1)); + } + + void localToGlobalCoord(Coord& ijk) const + { + ijk <<= ChildT::TOTAL; + ijk += mOrigin; + } + + Coord offsetToGlobalCoord(uint32_t n) const + { + Coord ijk = InternalNode::OffsetToLocalCoord(n); + this->localToGlobalCoord(ijk); + return ijk; + } + + ValueType getFirstValue() const { return mChildMask.isOn(0) ? mTable[0].child->getFirstValue() : mTable[0].value; } + ValueType getLastValue() const { return mChildMask.isOn(SIZE - 1) ? mTable[SIZE - 1].child->getLastValue() : mTable[SIZE - 1].value; } + + template + auto get(const Coord& ijk, ArgsT&&... args) const + { + const uint32_t n = CoordToOffset(ijk); + if (mChildMask.isOn(n)) return mTable[n].child->template get(ijk, args...); + return OpT::get(*this, n, args...); + } + + template + auto set(const Coord& ijk, ArgsT&&... args) + { + const uint32_t n = CoordToOffset(ijk); + ChildT* child = nullptr; + if (mChildMask.isOn(n)) { + child = mTable[n].child; + } else { + child = new ChildT(ijk, mTable[n].value, mValueMask.isOn(n)); + mTable[n].child = child; + mChildMask.setOn(n); + } + NANOVDB_ASSERT(child); + return child->template set(ijk, args...); + } + + template + auto getAndCache(const Coord& ijk, const AccT& acc, ArgsT&&... args) const + { + const uint32_t n = CoordToOffset(ijk); + if (mChildMask.isOff(n)) return OpT::get(*this, n, args...); + ChildT* child = mTable[n].child; + acc.insert(ijk, child); + if constexpr(ChildT::LEVEL == 0) { + return child->template get(ijk, args...); + } else { + return child->template getAndCache(ijk, acc, args...); + } + } + + template + auto setAndCache(const Coord& ijk, const AccT& acc, ArgsT&&... args) + { + const uint32_t n = CoordToOffset(ijk); + ChildT* child = nullptr; + if (mChildMask.isOn(n)) { + child = mTable[n].child; + } else { + child = new ChildT(ijk, mTable[n].value, mValueMask.isOn(n)); + mTable[n].child = child; + mChildMask.setOn(n); + } + NANOVDB_ASSERT(child); + acc.insert(ijk, child); + if constexpr(ChildT::LEVEL == 0) { + return child->template set(ijk, args...); + } else { + return child->template setAndCache(ijk, acc, args...); + } + } + +#ifdef NANOVDB_NEW_ACCESSOR_METHODS + ValueType getValue(const Coord& ijk) const {return this->template get>(ijk);} + LeafNodeType& setValue(const Coord& ijk, const ValueType& value){return this->template set>(ijk, value);} +#else + ValueType getValue(const Coord& ijk) const + { + const uint32_t n = CoordToOffset(ijk); + if (mChildMask.isOn(n)) { + return mTable[n].child->getValue(ijk); + } + return mTable[n].value; + } + void setValue(const Coord& ijk, const ValueType& value) + { + const uint32_t n = CoordToOffset(ijk); + ChildT* child = nullptr; + if (mChildMask.isOn(n)) { + child = mTable[n].child; + } else { + child = new ChildT(ijk, mTable[n].value, mValueMask.isOn(n)); + mTable[n].child = child; + mChildMask.setOn(n); + } + child->setValue(ijk, value); + } + + template + ValueType getValueAndCache(const Coord& ijk, AccT& acc) const + { + const uint32_t n = CoordToOffset(ijk); + if (mChildMask.isOn(n)) { + acc.insert(ijk, const_cast(mTable[n].child)); + return mTable[n].child->getValueAndCache(ijk, acc); + } + return mTable[n].value; + } + + template + void setValueAndCache(const Coord& ijk, const ValueType& value, AccT& acc) + { + const uint32_t n = CoordToOffset(ijk); + ChildT* child = nullptr; + if (mChildMask.isOn(n)) { + child = mTable[n].child; + } else { + child = new ChildT(ijk, mTable[n].value, mValueMask.isOn(n)); + mTable[n].child = child; + mChildMask.setOn(n); + } + acc.insert(ijk, child); + child->setValueAndCache(ijk, value, acc); + } + + template + void setValueOnAndCache(const Coord& ijk, AccT& acc) + { + const uint32_t n = CoordToOffset(ijk); + ChildT* child = nullptr; + if (mChildMask.isOn(n)) { + child = mTable[n].child; + } else { + child = new ChildT(ijk, mTable[n].value, mValueMask.isOn(n)); + mTable[n].child = child; + mChildMask.setOn(n); + } + acc.insert(ijk, child); + child->setValueOnAndCache(ijk, acc); + } + + template + void touchLeafAndCache(const Coord &ijk, AccT& acc) + { + const uint32_t n = CoordToOffset(ijk); + ChildT* child = nullptr; + if (mChildMask.isOn(n)) { + child = mTable[n].child; + } else { + child = new ChildT(ijk, mTable[n].value, mValueMask.isOn(n)); + mTable[n].child = child; + mChildMask.setOn(n); + } + acc.insert(ijk, child); + if constexpr(LEVEL>1) child->touchLeafAndCache(ijk, acc); + } + template + bool isActiveAndCache(const Coord& ijk, AccT& acc) const + { + const uint32_t n = CoordToOffset(ijk); + if (mChildMask.isOn(n)) { + acc.insert(ijk, const_cast(mTable[n].child)); + return mTable[n].child->isActiveAndCache(ijk, acc); + } + return mValueMask.isOn(n); + } +#endif + + template + uint32_t nodeCount() const + { + static_assert(util::is_same::value, "Node::getNodes: Invalid type"); + NANOVDB_ASSERT(NodeT::LEVEL < LEVEL); + uint32_t sum = 0; + if constexpr(util::is_same::value) { // resolved at compile-time + sum += mChildMask.countOn(); + } else if constexpr(LEVEL>1) { + for (auto iter = mChildMask.beginOn(); iter; ++iter) { + sum += mTable[*iter].child->template nodeCount(); + } + } + return sum; + } + + template + void getNodes(std::vector& array) + { + static_assert(util::is_same::value, "Node::getNodes: Invalid type"); + NANOVDB_ASSERT(NodeT::LEVEL < LEVEL); + for (auto iter = mChildMask.beginOn(); iter; ++iter) { + if constexpr(util::is_same::value) { // resolved at compile-time + array.push_back(reinterpret_cast(mTable[*iter].child)); + } else if constexpr(LEVEL>1) { + mTable[*iter].child->getNodes(array); + } + } + } + + void addChild(ChildT*& child) + { + NANOVDB_ASSERT(child && (child->mOrigin & ~MASK) == this->mOrigin); + const uint32_t n = CoordToOffset(child->mOrigin); + if (mChildMask.isOn(n)) { + delete mTable[n].child; + } else { + mChildMask.setOn(n); + } + mTable[n].child = child; + child = nullptr; + } + + /// @brief Add a tile containing voxel (i, j, k) at the specified tree level, + /// creating a new branch if necessary. Delete any existing lower-level nodes + /// that contain (x, y, z). + /// @tparam level tree level at which the tile is inserted. Must be 1 or 2. + /// @param ijk Index coordinate that map to the tile being inserted + /// @param value Value of the tile + /// @param state Binary state of the tile + template + void addTile(const Coord& ijk, const ValueType& value, bool state) + { + static_assert(level > 0 && level <= LEVEL, "invalid template value of level"); + const uint32_t n = CoordToOffset(ijk); + if constexpr(level == LEVEL) { + if (mChildMask.isOn(n)) { + delete mTable[n].child; + mTable[n] = Tile(value); + } else { + mValueMask.set(n, state); + mTable[n].value = value; + } + } else if constexpr(level < LEVEL) { + ChildT* child = nullptr; + if (mChildMask.isOn(n)) { + child = mTable[n].child; + } else { + child = new ChildT(ijk, value, state); + mTable[n].child = child; + mChildMask.setOn(n); + } + child->template addTile(ijk, value, state); + } + } + + template + void addNode(NodeT*& node) + { + if constexpr(util::is_same::value) { //resolved at compile-time + this->addChild(reinterpret_cast(node)); + } else if constexpr(LEVEL>1) { + const uint32_t n = CoordToOffset(node->mOrigin); + ChildT* child = nullptr; + if (mChildMask.isOn(n)) { + child = mTable[n].child; + } else { + child = new ChildT(node->mOrigin, mTable[n].value, mValueMask.isOn(n)); + mTable[n].child = child; + mChildMask.setOn(n); + } + child->addNode(node); + } + } + + void merge(InternalNode &other) + { + for (auto iter = other.mChildMask.beginOn(); iter; ++iter) { + const uint32_t n = *iter; + if (mChildMask.isOn(n)) { + mTable[n].child->merge(*other.mTable[n].child); + } else { + mTable[n].child = other.mTable[n].child; + other.mChildMask.setOff(n); + mChildMask.setOn(n); + } + } + } + + template + typename util::enable_if::value>::type + signedFloodFill(T outside); + +}; // tools::build::InternalNode + +//================================================================================================ + +template +template +inline typename util::enable_if::value>::type +InternalNode::signedFloodFill(T outside) +{ + const uint32_t first = *mChildMask.beginOn(); + if (first < NUM_VALUES) { + bool xInside = mTable[first].child->getFirstValue() < 0; + bool yInside = xInside, zInside = xInside; + for (uint32_t x = 0; x != (1 << LOG2DIM); ++x) { + const uint32_t x00 = x << (2 * LOG2DIM); // offset for block(x, 0, 0) + if (mChildMask.isOn(x00)) { + xInside = mTable[x00].child->getLastValue() < 0; + } + yInside = xInside; + for (uint32_t y = 0; y != (1u << LOG2DIM); ++y) { + const uint32_t xy0 = x00 + (y << LOG2DIM); // offset for block(x, y, 0) + if (mChildMask.isOn(xy0)) + yInside = mTable[xy0].child->getLastValue() < 0; + zInside = yInside; + for (uint32_t z = 0; z != (1 << LOG2DIM); ++z) { + const uint32_t xyz = xy0 + z; // offset for block(x, y, z) + if (mChildMask.isOn(xyz)) { + zInside = mTable[xyz].child->getLastValue() < 0; + } else { + mTable[xyz].value = zInside ? -outside : outside; + } + } + } + } + } +} // tools::build::InternalNode::signedFloodFill + +// ----------------------------> LeafNode <-------------------------------------- + +template +struct LeafNode +{ + using BuildType = BuildT; + using ValueType = typename BuildToValueMap::type; + using LeafNodeType = LeafNode; + static constexpr uint32_t LOG2DIM = 3; + static constexpr uint32_t TOTAL = LOG2DIM; // needed by parent nodes + static constexpr uint32_t DIM = 1u << TOTAL; + static constexpr uint32_t SIZE = 1u << 3 * LOG2DIM; // total number of voxels represented by this node + static constexpr uint32_t MASK = DIM - 1; // mask for bit operations + static constexpr uint32_t LEVEL = 0; // level 0 = leaf + static constexpr uint64_t NUM_VALUES = uint64_t(1) << (3 * TOTAL); // total voxel count represented by this node + using NodeMaskType = Mask; + template + using MaskIterT = typename Mask::template Iterator; + using NanoLeafT = typename NanoNode::Type; + + Coord mOrigin; + Mask mValueMask; + ValueType mValues[SIZE]; + union { + NanoLeafT *mDstNode; + uint64_t mDstOffset; + }; + + /// @brief Visits all active values in a leaf node + class ValueOnIterator : public MaskIterT + { + using BaseT = MaskIterT; + const LeafNode *mParent; + public: + ValueOnIterator() : BaseT(), mParent(nullptr) {} + ValueOnIterator(const LeafNode* parent) : BaseT(parent->mValueMask.beginOn()), mParent(parent) {} + ValueOnIterator& operator=(const ValueOnIterator&) = default; + ValueType operator*() const {NANOVDB_ASSERT(*this); return mParent->mValues[BaseT::pos()];} + Coord getCoord() const { NANOVDB_ASSERT(*this); return mParent->offsetToGlobalCoord(BaseT::pos());} + }; // Member class ValueOnIterator + + ValueOnIterator beginValueOn() {return ValueOnIterator(this);} + ValueOnIterator cbeginValueOn() const {return ValueOnIterator(this);} + + /// @brief Visits all inactive values in a leaf node + class ValueOffIterator : public MaskIterT + { + using BaseT = MaskIterT; + const LeafNode *mParent; + public: + ValueOffIterator() : BaseT(), mParent(nullptr) {} + ValueOffIterator(const LeafNode* parent) : BaseT(parent->mValueMask.beginOff()), mParent(parent) {} + ValueOffIterator& operator=(const ValueOffIterator&) = default; + ValueType operator*() const {NANOVDB_ASSERT(*this); return mParent->mValues[BaseT::pos()];} + Coord getCoord() const { NANOVDB_ASSERT(*this); return mParent->offsetToGlobalCoord(BaseT::pos());} + }; // Member class ValueOffIterator + + ValueOffIterator beginValueOff() {return ValueOffIterator(this);} + ValueOffIterator cbeginValueOff() const {return ValueOffIterator(this);} + + /// @brief Visits all values in a leaf node, i.e. both active and inactive values + class ValueIterator + { + const LeafNode *mParent; + uint32_t mPos; + public: + ValueIterator() : mParent(nullptr), mPos(1u << 3 * LOG2DIM) {} + ValueIterator(const LeafNode* parent) : mParent(parent), mPos(0) {NANOVDB_ASSERT(parent);} + ValueIterator& operator=(const ValueIterator&) = default; + ValueType operator*() const { NANOVDB_ASSERT(*this); return mParent->mValues[mPos];} + Coord getCoord() const { NANOVDB_ASSERT(*this); return mParent->offsetToGlobalCoord(mPos);} + bool isActive() const { NANOVDB_ASSERT(*this); return mParent->isActive(mPos);} + operator bool() const {return mPos < SIZE;} + ValueIterator& operator++() {++mPos; return *this;} + ValueIterator operator++(int) { + auto tmp = *this; + ++(*this); + return tmp; + } + }; // Member class ValueIterator + + ValueIterator beginValue() {return ValueIterator(this);} + ValueIterator cbeginValueAll() const {return ValueIterator(this);} + + LeafNode(const Coord& ijk, const ValueType& value, bool state) + : mOrigin(ijk & ~MASK) + , mValueMask(state) //invalid + , mDstOffset(0) + { + ValueType* target = mValues; + uint32_t n = SIZE; + while (n--) { + *target++ = value; + } + } + LeafNode(const LeafNode&) = delete; // disallow copy-construction + LeafNode(LeafNode&&) = delete; // disallow move construction + LeafNode& operator=(const LeafNode&) = delete; // disallow copy assignment + LeafNode& operator=(LeafNode&&) = delete; // disallow move assignment + ~LeafNode() = default; + + const Mask& getValueMask() const {return mValueMask;} + const Mask& valueMask() const {return mValueMask;} + const Coord& origin() const {return mOrigin;} + + /// @brief Return the linear offset corresponding to the given coordinate + static uint32_t CoordToOffset(const Coord& ijk) + { + return ((ijk[0] & int32_t(MASK)) << (2 * LOG2DIM)) + + ((ijk[1] & int32_t(MASK)) << LOG2DIM) + + (ijk[2] & int32_t(MASK)); + } + + static Coord OffsetToLocalCoord(uint32_t n) + { + NANOVDB_ASSERT(n < SIZE); + const int32_t m = n & ((1 << 2 * LOG2DIM) - 1); + return Coord(n >> 2 * LOG2DIM, m >> LOG2DIM, m & int32_t(MASK)); + } + + void localToGlobalCoord(Coord& ijk) const + { + ijk += mOrigin; + } + + Coord offsetToGlobalCoord(uint32_t n) const + { + Coord ijk = LeafNode::OffsetToLocalCoord(n); + this->localToGlobalCoord(ijk); + return ijk; + } + + ValueType getFirstValue() const { return mValues[0]; } + ValueType getLastValue() const { return mValues[SIZE - 1]; } + const ValueType& getValue(uint32_t i) const {return mValues[i];} + const ValueType& getValue(const Coord& ijk) const {return mValues[CoordToOffset(ijk)];} + + template + auto get(const Coord& ijk, ArgsT&&... args) const {return OpT::get(*this, CoordToOffset(ijk), args...);} + + template + auto set(const Coord& ijk, ArgsT&&... args) {return OpT::set(*this, CoordToOffset(ijk), args...);} + +#ifndef NANOVDB_NEW_ACCESSOR_METHODS + template + const ValueType& getValueAndCache(const Coord& ijk, const AccT&) const + { + return mValues[CoordToOffset(ijk)]; + } + + template + void setValueAndCache(const Coord& ijk, const ValueType& value, const AccT&) + { + const uint32_t n = CoordToOffset(ijk); + mValueMask.setOn(n); + mValues[n] = value; + } + + template + void setValueOnAndCache(const Coord& ijk, const AccT&) + { + const uint32_t n = CoordToOffset(ijk); + mValueMask.setOn(n); + } + + template + bool isActiveAndCache(const Coord& ijk, const AccT&) const + { + return mValueMask.isOn(CoordToOffset(ijk)); + } +#endif + + void setValue(uint32_t n, const ValueType& value) + { + mValueMask.setOn(n); + mValues[n] = value; + } + void setValue(const Coord& ijk, const ValueType& value){this->setValue(CoordToOffset(ijk), value);} + + void merge(LeafNode &other) + { + other.mValueMask -= mValueMask; + for (auto iter = other.mValueMask.beginOn(); iter; ++iter) { + const uint32_t n = *iter; + mValues[n] = other.mValues[n]; + } + mValueMask |= other.mValueMask; + } + + template + typename util::enable_if::value>::type + signedFloodFill(T outside); + +}; // tools::build::LeafNode + +//================================================================================================ + +template <> +struct LeafNode +{ + using ValueType = bool; + using BuildType = ValueMask; + using LeafNodeType = LeafNode; + static constexpr uint32_t LOG2DIM = 3; + static constexpr uint32_t TOTAL = LOG2DIM; // needed by parent nodes + static constexpr uint32_t DIM = 1u << TOTAL; + static constexpr uint32_t SIZE = 1u << 3 * LOG2DIM; // total number of voxels represented by this node + static constexpr uint32_t MASK = DIM - 1; // mask for bit operations + static constexpr uint32_t LEVEL = 0; // level 0 = leaf + static constexpr uint64_t NUM_VALUES = uint64_t(1) << (3 * TOTAL); // total voxel count represented by this node + using NodeMaskType = Mask; + template + using MaskIterT = typename Mask::template Iterator; + using NanoLeafT = typename NanoNode::Type; + + Coord mOrigin; + Mask mValueMask; + union { + NanoLeafT *mDstNode; + uint64_t mDstOffset; + }; + + /// @brief Visits all active values in a leaf node + class ValueOnIterator : public MaskIterT + { + using BaseT = MaskIterT; + const LeafNode *mParent; + public: + ValueOnIterator() : BaseT(), mParent(nullptr) {} + ValueOnIterator(const LeafNode* parent) : BaseT(parent->mValueMask.beginOn()), mParent(parent) {} + ValueOnIterator& operator=(const ValueOnIterator&) = default; + bool operator*() const {NANOVDB_ASSERT(*this); return true;} + Coord getCoord() const { NANOVDB_ASSERT(*this); return mParent->offsetToGlobalCoord(BaseT::pos());} + }; // Member class ValueOnIterator + + ValueOnIterator beginValueOn() {return ValueOnIterator(this);} + ValueOnIterator cbeginValueOn() const {return ValueOnIterator(this);} + + /// @brief Visits all inactive values in a leaf node + class ValueOffIterator : public MaskIterT + { + using BaseT = MaskIterT; + const LeafNode *mParent; + public: + ValueOffIterator() : BaseT(), mParent(nullptr) {} + ValueOffIterator(const LeafNode* parent) : BaseT(parent->mValueMask.beginOff()), mParent(parent) {} + ValueOffIterator& operator=(const ValueOffIterator&) = default; + bool operator*() const {NANOVDB_ASSERT(*this); return false;} + Coord getCoord() const { NANOVDB_ASSERT(*this); return mParent->offsetToGlobalCoord(BaseT::pos());} + }; // Member class ValueOffIterator + + ValueOffIterator beginValueOff() {return ValueOffIterator(this);} + ValueOffIterator cbeginValueOff() const {return ValueOffIterator(this);} + + /// @brief Visits all values in a leaf node, i.e. both active and inactive values + class ValueIterator + { + const LeafNode *mParent; + uint32_t mPos; + public: + ValueIterator() : mParent(nullptr), mPos(1u << 3 * LOG2DIM) {} + ValueIterator(const LeafNode* parent) : mParent(parent), mPos(0) {NANOVDB_ASSERT(parent);} + ValueIterator& operator=(const ValueIterator&) = default; + bool operator*() const { NANOVDB_ASSERT(*this); return mParent->mValueMask.isOn(mPos);} + Coord getCoord() const { NANOVDB_ASSERT(*this); return mParent->offsetToGlobalCoord(mPos);} + bool isActive() const { NANOVDB_ASSERT(*this); return mParent->mValueMask.isOn(mPos);} + operator bool() const {return mPos < SIZE;} + ValueIterator& operator++() {++mPos; return *this;} + ValueIterator operator++(int) { + auto tmp = *this; + ++(*this); + return tmp; + } + }; // Member class ValueIterator + + ValueIterator beginValue() {return ValueIterator(this);} + ValueIterator cbeginValueAll() const {return ValueIterator(this);} + + LeafNode(const Coord& ijk, const ValueType&, bool state) + : mOrigin(ijk & ~MASK) + , mValueMask(state) //invalid + , mDstOffset(0) + { + } + LeafNode(const LeafNode&) = delete; // disallow copy-construction + LeafNode(LeafNode&&) = delete; // disallow move construction + LeafNode& operator=(const LeafNode&) = delete; // disallow copy assignment + LeafNode& operator=(LeafNode&&) = delete; // disallow move assignment + ~LeafNode() = default; + + const Mask& valueMask() const {return mValueMask;} + const Mask& getValueMask() const {return mValueMask;} + const Coord& origin() const {return mOrigin;} + + /// @brief Return the linear offset corresponding to the given coordinate + static uint32_t CoordToOffset(const Coord& ijk) + { + return ((ijk[0] & int32_t(MASK)) << (2 * LOG2DIM)) + + ((ijk[1] & int32_t(MASK)) << LOG2DIM) + + (ijk[2] & int32_t(MASK)); + } + + static Coord OffsetToLocalCoord(uint32_t n) + { + NANOVDB_ASSERT(n < SIZE); + const int32_t m = n & ((1 << 2 * LOG2DIM) - 1); + return Coord(n >> 2 * LOG2DIM, m >> LOG2DIM, m & int32_t(MASK)); + } + + void localToGlobalCoord(Coord& ijk) const {ijk += mOrigin;} + + Coord offsetToGlobalCoord(uint32_t n) const + { + Coord ijk = LeafNode::OffsetToLocalCoord(n); + this->localToGlobalCoord(ijk); + return ijk; + } + + bool getFirstValue() const { return mValueMask.isOn(0); } + bool getLastValue() const { return mValueMask.isOn(SIZE - 1); } + bool getValue(uint32_t i) const {return mValueMask.isOn(i);} + bool getValue(const Coord& ijk) const {return mValueMask.isOn(CoordToOffset(ijk));} + + template + auto get(const Coord& ijk, ArgsT&&... args) const {return OpT::get(*this, CoordToOffset(ijk), args...);} + + template + auto set(const Coord& ijk, ArgsT&&... args) {return OpT::set(*this, CoordToOffset(ijk), args...);} + +#ifndef NANOVDB_NEW_ACCESSOR_METHODS + template + bool getValueAndCache(const Coord& ijk, const AccT&) const + { + return mValueMask.isOn(CoordToOffset(ijk)); + } + + template + void setValueAndCache(const Coord& ijk, bool, const AccT&) + { + const uint32_t n = CoordToOffset(ijk); + mValueMask.setOn(n); + } + + template + void setValueOnAndCache(const Coord& ijk, const AccT&) + { + const uint32_t n = CoordToOffset(ijk); + mValueMask.setOn(n); + } + + template + bool isActiveAndCache(const Coord& ijk, const AccT&) const + { + return mValueMask.isOn(CoordToOffset(ijk)); + } +#endif + + void setValue(uint32_t n, bool) {mValueMask.setOn(n);} + void setValue(const Coord& ijk) {mValueMask.setOn(CoordToOffset(ijk));} + + void merge(LeafNode &other) + { + mValueMask |= other.mValueMask; + } + +}; // tools::build::LeafNode + +//================================================================================================ + +template <> +struct LeafNode +{ + using ValueType = bool; + using BuildType = ValueMask; + using LeafNodeType = LeafNode; + static constexpr uint32_t LOG2DIM = 3; + static constexpr uint32_t TOTAL = LOG2DIM; // needed by parent nodes + static constexpr uint32_t DIM = 1u << TOTAL; + static constexpr uint32_t SIZE = 1u << 3 * LOG2DIM; // total number of voxels represented by this node + static constexpr uint32_t MASK = DIM - 1; // mask for bit operations + static constexpr uint32_t LEVEL = 0; // level 0 = leaf + static constexpr uint64_t NUM_VALUES = uint64_t(1) << (3 * TOTAL); // total voxel count represented by this node + using NodeMaskType = Mask; + template + using MaskIterT = typename Mask::template Iterator; + using NanoLeafT = typename NanoNode::Type; + + Coord mOrigin; + Mask mValueMask, mValues; + union { + NanoLeafT *mDstNode; + uint64_t mDstOffset; + }; + + /// @brief Visits all active values in a leaf node + class ValueOnIterator : public MaskIterT + { + using BaseT = MaskIterT; + const LeafNode *mParent; + public: + ValueOnIterator() : BaseT(), mParent(nullptr) {} + ValueOnIterator(const LeafNode* parent) : BaseT(parent->mValueMask.beginOn()), mParent(parent) {} + ValueOnIterator& operator=(const ValueOnIterator&) = default; + bool operator*() const {NANOVDB_ASSERT(*this); return mParent->mValues.isOn(BaseT::pos());} + Coord getCoord() const { NANOVDB_ASSERT(*this); return mParent->offsetToGlobalCoord(BaseT::pos());} + }; // Member class ValueOnIterator + + ValueOnIterator beginValueOn() {return ValueOnIterator(this);} + ValueOnIterator cbeginValueOn() const {return ValueOnIterator(this);} + + /// @brief Visits all inactive values in a leaf node + class ValueOffIterator : public MaskIterT + { + using BaseT = MaskIterT; + const LeafNode *mParent; + public: + ValueOffIterator() : BaseT(), mParent(nullptr) {} + ValueOffIterator(const LeafNode* parent) : BaseT(parent->mValueMask.beginOff()), mParent(parent) {} + ValueOffIterator& operator=(const ValueOffIterator&) = default; + bool operator*() const {NANOVDB_ASSERT(*this); return mParent->mValues.isOn(BaseT::pos());} + Coord getCoord() const { NANOVDB_ASSERT(*this); return mParent->offsetToGlobalCoord(BaseT::pos());} + }; // Member class ValueOffIterator + + ValueOffIterator beginValueOff() {return ValueOffIterator(this);} + ValueOffIterator cbeginValueOff() const {return ValueOffIterator(this);} + + /// @brief Visits all values in a leaf node, i.e. both active and inactive values + class ValueIterator + { + const LeafNode *mParent; + uint32_t mPos; + public: + ValueIterator() : mParent(nullptr), mPos(1u << 3 * LOG2DIM) {} + ValueIterator(const LeafNode* parent) : mParent(parent), mPos(0) {NANOVDB_ASSERT(parent);} + ValueIterator& operator=(const ValueIterator&) = default; + bool operator*() const { NANOVDB_ASSERT(*this); return mParent->mValues.isOn(mPos);} + Coord getCoord() const { NANOVDB_ASSERT(*this); return mParent->offsetToGlobalCoord(mPos);} + bool isActive() const { NANOVDB_ASSERT(*this); return mParent->mValueMask.isOn(mPos);} + operator bool() const {return mPos < SIZE;} + ValueIterator& operator++() {++mPos; return *this;} + ValueIterator operator++(int) { + auto tmp = *this; + ++(*this); + return tmp; + } + }; // Member class ValueIterator + + ValueIterator beginValue() {return ValueIterator(this);} + ValueIterator cbeginValueAll() const {return ValueIterator(this);} + + LeafNode(const Coord& ijk, bool value, bool state) + : mOrigin(ijk & ~MASK) + , mValueMask(state) + , mValues(value) + , mDstOffset(0) + { + } + LeafNode(const LeafNode&) = delete; // disallow copy-construction + LeafNode(LeafNode&&) = delete; // disallow move construction + LeafNode& operator=(const LeafNode&) = delete; // disallow copy assignment + LeafNode& operator=(LeafNode&&) = delete; // disallow move assignment + ~LeafNode() = default; + + const Mask& valueMask() const {return mValueMask;} + const Mask& getValueMask() const {return mValueMask;} + const Coord& origin() const {return mOrigin;} + + /// @brief Return the linear offset corresponding to the given coordinate + static uint32_t CoordToOffset(const Coord& ijk) + { + return ((ijk[0] & int32_t(MASK)) << (2 * LOG2DIM)) + + ((ijk[1] & int32_t(MASK)) << LOG2DIM) + + (ijk[2] & int32_t(MASK)); + } + + static Coord OffsetToLocalCoord(uint32_t n) + { + NANOVDB_ASSERT(n < SIZE); + const int32_t m = n & ((1 << 2 * LOG2DIM) - 1); + return Coord(n >> 2 * LOG2DIM, m >> LOG2DIM, m & int32_t(MASK)); + } + + void localToGlobalCoord(Coord& ijk) const + { + ijk += mOrigin; + } + + Coord offsetToGlobalCoord(uint32_t n) const + { + Coord ijk = LeafNode::OffsetToLocalCoord(n); + this->localToGlobalCoord(ijk); + return ijk; + } + bool getFirstValue() const { return mValues.isOn(0); } + bool getLastValue() const { return mValues.isOn(SIZE - 1); } + + bool getValue(uint32_t i) const {return mValues.isOn(i);} + bool getValue(const Coord& ijk) const + { + return mValues.isOn(CoordToOffset(ijk)); + } +#ifndef NANOVDB_NEW_ACCESSOR_METHODS + template + bool isActiveAndCache(const Coord& ijk, const AccT&) const + { + return mValueMask.isOn(CoordToOffset(ijk)); + } + + template + bool getValueAndCache(const Coord& ijk, const AccT&) const + { + return mValues.isOn(CoordToOffset(ijk)); + } + + template + void setValueAndCache(const Coord& ijk, bool value, const AccT&) + { + const uint32_t n = CoordToOffset(ijk); + mValueMask.setOn(n); + mValues.setOn(n); + } + + template + void setValueOnAndCache(const Coord& ijk, const AccT&) + { + const uint32_t n = CoordToOffset(ijk); + mValueMask.setOn(n); + } +#endif + + void setValue(uint32_t n, bool value) + { + mValueMask.setOn(n); + mValues.set(n, value); + } + void setValue(const Coord& ijk, bool value) {return this->setValue(CoordToOffset(ijk), value);} + + void merge(LeafNode &other) + { + mValues |= other.mValues; + mValueMask |= other.mValueMask; + } + +}; // tools::build::LeafNode + +//================================================================================================ + +template +template +inline typename util::enable_if::value>::type +LeafNode::signedFloodFill(T outside) +{ + const uint32_t first = *mValueMask.beginOn(); + if (first < SIZE) { + bool xInside = mValues[first] < 0, yInside = xInside, zInside = xInside; + for (uint32_t x = 0; x != DIM; ++x) { + const uint32_t x00 = x << (2 * LOG2DIM); + if (mValueMask.isOn(x00)) + xInside = mValues[x00] < 0; // element(x, 0, 0) + yInside = xInside; + for (uint32_t y = 0; y != DIM; ++y) { + const uint32_t xy0 = x00 + (y << LOG2DIM); + if (mValueMask.isOn(xy0)) + yInside = mValues[xy0] < 0; // element(x, y, 0) + zInside = yInside; + for (uint32_t z = 0; z != (1 << LOG2DIM); ++z) { + const uint32_t xyz = xy0 + z; // element(x, y, z) + if (mValueMask.isOn(xyz)) { + zInside = mValues[xyz] < 0; + } else { + mValues[xyz] = zInside ? -outside : outside; + } + } + } + } + } +} // tools::build::LeafNode::signedFloodFill + +// ----------------------------> ValueAccessor <-------------------------------------- + +template +struct ValueAccessor +{ + using ValueType = typename BuildToValueMap::type; + using LeafT = LeafNode; + using Node1 = InternalNode; + using Node2 = InternalNode; + using RootNodeType = RootNode; + using LeafNodeType = typename RootNodeType::LeafNodeType; + + ValueAccessor(RootNodeType& root) + : mRoot(root) + , mKeys{Coord(math::Maximum::value()), Coord(math::Maximum::value()), Coord(math::Maximum::value())} + , mNode{nullptr, nullptr, nullptr} + { + } + ValueAccessor(ValueAccessor&&) = default; // allow move construction + ValueAccessor(const ValueAccessor&) = delete; // disallow copy construction + ValueType getValue(int i, int j, int k) const {return this->getValue(Coord(i,j,k));} + template + bool isCached(const Coord& ijk) const + { + return (ijk[0] & int32_t(~NodeT::MASK)) == mKeys[NodeT::LEVEL][0] && + (ijk[1] & int32_t(~NodeT::MASK)) == mKeys[NodeT::LEVEL][1] && + (ijk[2] & int32_t(~NodeT::MASK)) == mKeys[NodeT::LEVEL][2]; + } + + template + auto get(const Coord& ijk, ArgsT&&... args) const + { + if (this->template isCached(ijk)) { + return ((const LeafT*)mNode[0])->template get(ijk, args...); + } else if (this->template isCached(ijk)) { + return ((const Node1*)mNode[1])->template getAndCache(ijk, *this, args...); + } else if (this->template isCached(ijk)) { + return ((const Node2*)mNode[2])->template getAndCache(ijk, *this, args...); + } + return mRoot.template getAndCache(ijk, *this, args...); + } + + template + auto set(const Coord& ijk, ArgsT&&... args) const + { + if (this->template isCached(ijk)) { + return ((LeafT*)mNode[0])->template set(ijk, args...); + } else if (this->template isCached(ijk)) { + return ((Node1*)mNode[1])->template setAndCache(ijk, *this, args...); + } else if (this->template isCached(ijk)) { + return ((Node2*)mNode[2])->template setAndCache(ijk, *this, args...); + } + return mRoot.template setAndCache(ijk, *this, args...); + } + +#ifdef NANOVDB_NEW_ACCESSOR_METHODS + ValueType getValue(const Coord& ijk) const {return this->template get>(ijk);} + LeafT* setValue(const Coord& ijk, const ValueType& value) {return this->template set>(ijk, value);} + LeafT* setValueOn(const Coord& ijk) {return this->template set>(ijk);} + LeafT& touchLeaf(const Coord& ijk) {return this->template set>(ijk);} + bool isActive(const Coord& ijk) const {return this->template get>(ijk);} +#else + ValueType getValue(const Coord& ijk) const + { + if (this->template isCached(ijk)) { + return ((LeafT*)mNode[0])->getValueAndCache(ijk, *this); + } else if (this->template isCached(ijk)) { + return ((Node1*)mNode[1])->getValueAndCache(ijk, *this); + } else if (this->template isCached(ijk)) { + return ((Node2*)mNode[2])->getValueAndCache(ijk, *this); + } + return mRoot.getValueAndCache(ijk, *this); + } + + /// @brief Sets value in a leaf node and returns it. + LeafT* setValue(const Coord& ijk, const ValueType& value) + { + if (this->template isCached(ijk)) { + ((LeafT*)mNode[0])->setValueAndCache(ijk, value, *this); + } else if (this->template isCached(ijk)) { + ((Node1*)mNode[1])->setValueAndCache(ijk, value, *this); + } else if (this->template isCached(ijk)) { + ((Node2*)mNode[2])->setValueAndCache(ijk, value, *this); + } else { + mRoot.setValueAndCache(ijk, value, *this); + } + NANOVDB_ASSERT(this->isCached(ijk)); + return (LeafT*)mNode[0]; + } + void setValueOn(const Coord& ijk) + { + if (this->template isCached(ijk)) { + ((LeafT*)mNode[0])->setValueOnAndCache(ijk, *this); + } else if (this->template isCached(ijk)) { + ((Node1*)mNode[1])->setValueOnAndCache(ijk, *this); + } else if (this->template isCached(ijk)) { + ((Node2*)mNode[2])->setValueOnAndCache(ijk, *this); + } else { + mRoot.setValueOnAndCache(ijk, *this); + } + } + void touchLeaf(const Coord& ijk) const + { + if (this->template isCached(ijk)) { + return; + } else if (this->template isCached(ijk)) { + ((Node1*)mNode[1])->touchLeafAndCache(ijk, *this); + } else if (this->template isCached(ijk)) { + ((Node2*)mNode[2])->touchLeafAndCache(ijk, *this); + } else { + mRoot.touchLeafAndCache(ijk, *this); + } + } + bool isActive(const Coord& ijk) const + { + if (this->template isCached(ijk)) { + return ((LeafT*)mNode[0])->isActiveAndCache(ijk, *this); + } else if (this->template isCached(ijk)) { + return ((Node1*)mNode[1])->isActiveAndCache(ijk, *this); + } else if (this->template isCached(ijk)) { + return ((Node2*)mNode[2])->isActiveAndCache(ijk, *this); + } + return mRoot.isActiveAndCache(ijk, *this); + } +#endif + + bool isValueOn(const Coord& ijk) const { return this->isActive(ijk); } + template + void insert(const Coord& ijk, NodeT* node) const + { + mKeys[NodeT::LEVEL] = ijk & ~NodeT::MASK; + mNode[NodeT::LEVEL] = node; + } + RootNodeType& mRoot; + mutable Coord mKeys[3]; + mutable void* mNode[3]; +}; // tools::build::ValueAccessor + +// ----------------------------> Tree <-------------------------------------- + +template +struct Tree +{ + using ValueType = typename BuildToValueMap::type; + using Node0 = LeafNode; + using Node1 = InternalNode; + using Node2 = InternalNode; + using RootNodeType = RootNode; + using LeafNodeType = typename RootNodeType::LeafNodeType; + struct WriteAccessor; + + RootNodeType mRoot; + std::mutex mMutex; + + Tree(const ValueType &background) : mRoot(background) {} + Tree(const Tree&) = delete; // disallow copy construction + Tree(Tree&&) = delete; // disallow move construction + Tree& tree() {return *this;} + RootNodeType& root() {return mRoot;} + ValueType getValue(const Coord& ijk) const {return mRoot.getValue(ijk);} + ValueType getValue(int i, int j, int k) const {return this->getValue(Coord(i,j,k));} + void setValue(const Coord& ijk, const ValueType &value) {mRoot.setValue(ijk, value);} + std::array nodeCount() const + { + std::array count{0,0,0}; + mRoot.nodeCount(count); + return count; + } + /// @brief regular accessor for thread-safe reading and non-thread-safe writing + ValueAccessor getAccessor() { return ValueAccessor(mRoot); } + /// @brief special accessor for thread-safe writing only + WriteAccessor getWriteAccessor() { return WriteAccessor(mRoot, mMutex); } +};// tools::build::Tree + +// ----------------------------> Tree::WriteAccessor <-------------------------------------- + +template +struct Tree::WriteAccessor +{ + using AccT = ValueAccessor; + using ValueType = typename AccT::ValueType; + using LeafT = typename AccT::LeafT; + using Node1 = typename AccT::Node1; + using Node2 = typename AccT::Node2; + using RootNodeType = typename AccT::RootNodeType; + + WriteAccessor(RootNodeType& parent, std::mutex &mx) + : mParent(parent) + , mRoot(parent.mBackground) + , mAcc(mRoot) + , mMutex(mx) + { + } + WriteAccessor(const WriteAccessor&) = delete; // disallow copy construction + WriteAccessor(WriteAccessor&&) = default; // allow move construction + ~WriteAccessor() { this->merge(); } + void merge() + { + mMutex.lock(); + mParent.merge(mRoot); + mMutex.unlock(); + } + inline void setValueOn(const Coord& ijk) {mAcc.setValueOn(ijk);} + inline void setValue(const Coord& ijk, const ValueType &value) {mAcc.setValue(ijk, value);} + + RootNodeType &mParent, mRoot; + AccT mAcc; + std::mutex &mMutex; +}; // tools::build::Tree::WriteAccessor + +// ----------------------------> Grid <-------------------------------------- + +template +struct Grid : public Tree +{ + using BuildType = BuildT; + using ValueType = typename BuildToValueMap::type; + using TreeType = Tree; + using Node0 = LeafNode; + using Node1 = InternalNode; + using Node2 = InternalNode; + using RootNodeType = RootNode; + + GridClass mGridClass; + GridType mGridType; + Map mMap; + std::string mName; + + Grid(const ValueType &background, const std::string &name = "", GridClass gClass = GridClass::Unknown) + : TreeType(background) + , mGridClass(gClass) + , mGridType(toGridType()) + , mName(name) + { + mMap.set(1.0, Vec3d(0.0), 1.0); + } + TreeType& tree() {return *this;} + const GridType& gridType() const { return mGridType; } + const GridClass& gridClass() const { return mGridClass; } + const Map& map() const { return mMap; } + void setTransform(double scale=1.0, const Vec3d &translation = Vec3d(0.0)) {mMap.set(scale, translation, 1.0);} + const std::string& gridName() const { return mName; } + const std::string& getName() const { return mName; } + void setName(const std::string &name) { mName = name; } + /// @brief Sets grids values in domain of the @a bbox to those returned by the specified @a func with the + /// expected signature [](const Coord&)->ValueType. + /// + /// @note If @a func returns a value equal to the background value of the input grid at a + /// specific voxel coordinate, then the active state of that coordinate is off! Else the value + /// value is set and the active state is on. This is done to allow for sparse grids to be generated. + /// + /// @param func Functor used to evaluate the grid values in the @a bbox + /// @param bbox Coordinate bounding-box over which the grid values will be set. + /// @param delta Specifies a lower threshold value for rendering (optional). Typically equals the voxel size + /// for level sets and otherwise it's zero. + template + void operator()(const Func& func, const CoordBBox& bbox, ValueType delta = ValueType(0)); +};// tools::build::Grid + +template +template +void Grid::operator()(const Func& func, const CoordBBox& bbox, ValueType delta) +{ + auto &root = this->tree().root(); +#if __cplusplus >= 201703L + static_assert(util::is_same::type>::value, "GridBuilder: mismatched ValueType"); +#else// invoke_result was introduced in C++17 and result_of was removed in C++20 + static_assert(util::is_same::type>::value, "GridBuilder: mismatched ValueType"); +#endif + const CoordBBox leafBBox(bbox[0] >> Node0::TOTAL, bbox[1] >> Node0::TOTAL); + std::mutex mutex; + util::forEach(leafBBox, [&](const CoordBBox& b) { + Node0* leaf = nullptr; + for (auto it = b.begin(); it; ++it) { + Coord min(*it << Node0::TOTAL), max(min + Coord(Node0::DIM - 1)); + const CoordBBox b(min.maxComponent(bbox.min()), + max.minComponent(bbox.max()));// crop + if (leaf == nullptr) { + leaf = new Node0(b[0], root.mBackground, false); + } else { + leaf->mOrigin = b[0] & ~Node0::MASK; + NANOVDB_ASSERT(leaf->mValueMask.isOff()); + } + leaf->mDstOffset = 0;// no prune + for (auto ijk = b.begin(); ijk; ++ijk) { + const auto v = func(*ijk);// call functor + if (v != root.mBackground) leaf->setValue(*ijk, v);// don't insert background values + } + if (!leaf->mValueMask.isOff()) {// has active values + if (leaf->mValueMask.isOn()) {// only active values + const auto first = leaf->getFirstValue(); + int n=1; + while (n<512) {// 8^3 = 512 + if (leaf->mValues[n++] != first) break; + } + if (n == 512) leaf->mDstOffset = 1;// prune below + } + std::lock_guard guard(mutex); + NANOVDB_ASSERT(leaf != nullptr); + root.addNode(leaf); + NANOVDB_ASSERT(leaf == nullptr); + } + }// loop over sub-part of leafBBox + if (leaf) delete leaf; + }); + + // Prune leaf and tile nodes + for (auto it2 = root.mTable.begin(); it2 != root.mTable.end(); ++it2) { + if (auto *upper = it2->second.child) {//upper level internal node + for (auto it1 = upper->mChildMask.beginOn(); it1; ++it1) { + auto *lower = upper->mTable[*it1].child;// lower level internal node + for (auto it0 = lower->mChildMask.beginOn(); it0; ++it0) { + auto *leaf = lower->mTable[*it0].child;// leaf nodes + if (leaf->mDstOffset) { + lower->mTable[*it0].value = leaf->getFirstValue(); + lower->mChildMask.setOff(*it0); + lower->mValueMask.setOn(*it0); + delete leaf; + } + }// loop over leaf nodes + if (lower->mChildMask.isOff()) {//only tiles + const auto first = lower->getFirstValue(); + int n=1; + while (n < 4096) {// 16^3 = 4096 + if (lower->mTable[n++].value != first) break; + } + if (n == 4096) {// identical tile values so prune + upper->mTable[*it1].value = first; + upper->mChildMask.setOff(*it1); + upper->mValueMask.setOn(*it1); + delete lower; + } + } + }// loop over lower internal nodes + if (upper->mChildMask.isOff()) {//only tiles + const auto first = upper->getFirstValue(); + int n=1; + while (n < 32768) {// 32^3 = 32768 + if (upper->mTable[n++].value != first) break; + } + if (n == 32768) {// identical tile values so prune + it2->second.value = first; + it2->second.state = upper->mValueMask.isOn(); + it2->second.child = nullptr; + delete upper; + } + } + }// is child node of the root + }// loop over root table +}// tools::build::Grid::operator() + +//================================================================================================ + +template +using BuildLeaf = LeafNode; +template +using BuildLower = InternalNode>; +template +using BuildUpper = InternalNode>; +template +using BuildRoot = RootNode>; +template +using BuildTile = typename BuildRoot::Tile; + +using FloatGrid = Grid; +using Fp4Grid = Grid; +using Fp8Grid = Grid; +using Fp16Grid = Grid; +using FpNGrid = Grid; +using DoubleGrid = Grid; +using Int32Grid = Grid; +using UInt32Grid = Grid; +using Int64Grid = Grid; +using Vec3fGrid = Grid; +using Vec3dGrid = Grid; +using Vec4fGrid = Grid; +using Vec4dGrid = Grid; +using MaskGrid = Grid; +using IndexGrid = Grid; +using OnIndexGrid = Grid; +using BoolGrid = Grid; + +// ----------------------------> NodeManager <-------------------------------------- + +// GridT can be openvdb::Grid and nanovdb::tools::build::Grid +template +class NodeManager +{ +public: + + using ValueType = typename GridT::ValueType; + using BuildType = typename GridT::BuildType; + using GridType = GridT; + using TreeType = typename GridT::TreeType; + using RootNodeType = typename TreeType::RootNodeType; + static_assert(RootNodeType::LEVEL == 3, "NodeManager expected LEVEL=3"); + using Node2 = typename RootNodeType::ChildNodeType; + using Node1 = typename Node2::ChildNodeType; + using Node0 = typename Node1::ChildNodeType; + + NodeManager(GridT &grid) : mGrid(grid) {this->init();} + void init() + { + mArray0.clear(); + mArray1.clear(); + mArray2.clear(); + auto counts = mGrid.tree().nodeCount(); + mArray0.reserve(counts[0]); + mArray1.reserve(counts[1]); + mArray2.reserve(counts[2]); + + for (auto it2 = mGrid.tree().root().cbeginChildOn(); it2; ++it2) { + Node2 &upper = const_cast(*it2); + mArray2.emplace_back(&upper); + for (auto it1 = upper.cbeginChildOn(); it1; ++it1) { + Node1 &lower = const_cast(*it1); + mArray1.emplace_back(&lower); + for (auto it0 = lower.cbeginChildOn(); it0; ++it0) { + Node0 &leaf = const_cast(*it0); + mArray0.emplace_back(&leaf); + }// loop over leaf nodes + }// loop over lower internal nodes + }// loop over root node + } + + /// @brief Return the number of tree nodes at the specified level + /// @details 0 is leaf, 1 is lower internal, and 2 is upper internal level + uint64_t nodeCount(int level) const + { + NANOVDB_ASSERT(level==0 || level==1 || level==2); + return level==0 ? mArray0.size() : level==1 ? mArray1.size() : mArray2.size(); + } + + template + typename util::enable_if::type node(int i) {return *mArray0[i];} + template + typename util::enable_if::type node(int i) const {return *mArray0[i];} + template + typename util::enable_if::type node(int i) {return *mArray1[i];} + template + typename util::enable_if::type node(int i) const {return *mArray1[i];} + template + typename util::enable_if::type node(int i) {return *mArray2[i];} + template + typename util::enable_if::type node(int i) const {return *mArray2[i];} + + /// @brief Return the i'th leaf node with respect to breadth-first ordering + const Node0& leaf(uint32_t i) const { return *mArray0[i]; } + Node0& leaf(uint32_t i) { return *mArray0[i]; } + uint64_t leafCount() const {return mArray0.size();} + + /// @brief Return the i'th lower internal node with respect to breadth-first ordering + const Node1& lower(uint32_t i) const { return *mArray1[i]; } + Node1& lower(uint32_t i) { return *mArray1[i]; } + uint64_t lowerCount() const {return mArray1.size();} + + /// @brief Return the i'th upper internal node with respect to breadth-first ordering + const Node2& upper(uint32_t i) const { return *mArray2[i]; } + Node2& upper(uint32_t i) { return *mArray2[i]; } + uint64_t upperCount() const {return mArray2.size();} + + RootNodeType& root() {return mGrid.tree().root();} + const RootNodeType& root() const {return mGrid.tree().root();} + + TreeType& tree() {return mGrid.tree();} + const TreeType& tree() const {return mGrid.tree();} + + GridType& grid() {return mGrid;} + const GridType& grid() const {return mGrid;} + +protected: + + GridT &mGrid; + std::vector mArray0; // leaf nodes + std::vector mArray1; // lower internal nodes + std::vector mArray2; // upper internal nodes + +};// NodeManager + +template +typename util::enable_if::value>::type +sdfToLevelSet(NodeManagerT &mgr) +{ + mgr.grid().mGridClass = GridClass::LevelSet; + // Note that the bottom-up flood filling is essential + const auto outside = mgr.root().mBackground; + util::forEach(0, mgr.leafCount(), 8, [&](const util::Range1D& r) { + for (auto i = r.begin(); i != r.end(); ++i) mgr.leaf(i).signedFloodFill(outside); + }); + util::forEach(0, mgr.lowerCount(), 1, [&](const util::Range1D& r) { + for (auto i = r.begin(); i != r.end(); ++i) mgr.lower(i).signedFloodFill(outside); + }); + util::forEach(0, mgr.upperCount(), 1, [&](const util::Range1D& r) { + for (auto i = r.begin(); i != r.end(); ++i) mgr.upper(i).signedFloodFill(outside); + }); + mgr.root().signedFloodFill(outside); +}// sdfToLevelSet + +template +void levelSetToFog(NodeManagerT &mgr, bool rebuild = true) +{ + using ValueType = typename NodeManagerT::ValueType; + mgr.grid().mGridClass = GridClass::FogVolume; + const ValueType d = -mgr.root().mBackground, w = 1.0f / d; + //std::atomic_bool prune{false}; + std::atomic prune{false}; + auto op = [&](ValueType& v) -> bool { + if (v > ValueType(0)) { + v = ValueType(0); + return false; + } + v = v > d ? v * w : ValueType(1); + return true; + }; + util::forEach(0, mgr.leafCount(), 8, [&](const util::Range1D& r) { + for (auto i = r.begin(); i != r.end(); ++i) { + auto& leaf = mgr.leaf(i); + for (uint32_t i = 0; i < 512u; ++i) leaf.mValueMask.set(i, op(leaf.mValues[i])); + } + }); + util::forEach(0, mgr.lowerCount(), 1, [&](const util::Range1D& r) { + for (auto i = r.begin(); i != r.end(); ++i) { + auto& node = mgr.lower(i); + for (uint32_t i = 0; i < 4096u; ++i) { + if (node.mChildMask.isOn(i)) { + auto* leaf = node.mTable[i].child; + if (leaf->mValueMask.isOff()) {// prune leaf node + node.mTable[i].value = leaf->getFirstValue(); + node.mChildMask.setOff(i); + delete leaf; + prune = true; + } + } else { + node.mValueMask.set(i, op(node.mTable[i].value)); + } + } + } + }); + util::forEach(0, mgr.upperCount(), 1, [&](const util::Range1D& r) { + for (auto i = r.begin(); i != r.end(); ++i) { + auto& node = mgr.upper(i); + for (uint32_t i = 0; i < 32768u; ++i) { + if (node.mChildMask.isOn(i)) {// prune lower internal node + auto* child = node.mTable[i].child; + if (child->mChildMask.isOff() && child->mValueMask.isOff()) { + node.mTable[i].value = child->getFirstValue(); + node.mChildMask.setOff(i); + delete child; + prune = true; + } + } else { + node.mValueMask.set(i, op(node.mTable[i].value)); + } + } + } + }); + + for (auto it = mgr.root().mTable.begin(); it != mgr.root().mTable.end(); ++it) { + auto* child = it->second.child; + if (child == nullptr) { + it->second.state = op(it->second.value); + } else if (child->mChildMask.isOff() && child->mValueMask.isOff()) { + it->second.value = child->getFirstValue(); + it->second.state = false; + it->second.child = nullptr; + delete child; + prune = true; + } + } + if (rebuild && prune) mgr.init(); +}// levelSetToFog + +// ----------------------------> Implementations of random access methods <-------------------------------------- + +template +struct TouchLeaf { + static BuildLeaf& set(BuildLeaf &leaf, uint32_t) {return leaf;} +};// TouchLeaf + +/// @brief Implements Tree::getValue(Coord), i.e. return the value associated with a specific coordinate @c ijk. +/// @tparam BuildT Build type of the grid being called +/// @details The value at a coordinate maps to the background, a tile value or a leaf value. +template +struct GetValue { + static auto get(const BuildRoot &root) {return root.mBackground;} + static auto get(const BuildTile &tile) {return tile.value;} + static auto get(const BuildUpper &node, uint32_t n) {return node.mTable[n].value;} + static auto get(const BuildLower &node, uint32_t n) {return node.mTable[n].value;} + static auto get(const BuildLeaf &leaf, uint32_t n) {return leaf.getValue(n);} +};// GetValue + +/// @brief Implements Tree::isActive(Coord) +/// @tparam T Build type of the grid being called +template +struct GetState { + static bool get(const BuildRoot&) {return false;} + static bool get(const BuildTile &tile) {return tile.state;} + static bool get(const BuildUpper &node, uint32_t n) {return node.mValueMask.isOn(n);} + static bool get(const BuildLower &node, uint32_t n) {return node.mValueMask.isOn(n);} + static bool get(const BuildLeaf &leaf, uint32_t n) {return leaf.mValueMask.isOn(n);} +};// GetState + +/// @brief Set the value and its state at the leaf level mapped to by ijk, and create the leaf node and branch if needed. +/// @tparam T BuildType of the corresponding tree +template +struct SetValue { + static BuildLeaf* set(BuildLeaf &leaf, uint32_t n) { + leaf.mValueMask.setOn(n);// always set the active bit + return &leaf; + } + static BuildLeaf* set(BuildLeaf &leaf, uint32_t n, const typename BuildLeaf::ValueType &v) { + leaf.setValue(n, v); + return &leaf; + } +};// SetValue + +/// @brief Implements Tree::probeLeaf(Coord) +/// @tparam T Build type of the grid being called +template +struct ProbeValue { + using ValueT = typename BuildLeaf::ValueType; + static bool get(const BuildRoot &root, ValueT &v) { + v = root.mBackground; + return false; + } + static bool get(const BuildTile &tile, ValueT &v) { + v = tile.value; + return tile.state; + } + static bool get(const BuildUpper &node, uint32_t n, ValueT &v) { + v = node.mTable[n].value; + return node.mValueMask.isOn(n); + } + static bool get(const BuildLower &node, uint32_t n, ValueT &v) { + v = node.mTable[n].value; + return node.mValueMask.isOn(n); + } + static bool get(const BuildLeaf &leaf, uint32_t n, ValueT &v) { + v = leaf.getValue(n); + return leaf.isActive(n); + } +};// ProbeValue + +} // namespace tools::build + +} // namespace nanovdb + +#endif // NANOVDB_TOOLS_BUILD_GRIDBUILDER_H_HAS_BEEN_INCLUDED diff --git a/nanovdb/nanovdb/tools/GridChecksum.h b/nanovdb/nanovdb/tools/GridChecksum.h new file mode 100644 index 0000000000..882ab9222a --- /dev/null +++ b/nanovdb/nanovdb/tools/GridChecksum.h @@ -0,0 +1,427 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: MPL-2.0 + +/*! + \file nanovdb/tools/GridChecksum.h + + \author Ken Museth + + \brief Computes a pair of uint32_t checksums, of a Grid, by means of 32 bit Cyclic Redundancy Check (CRC32) + + \details A CRC32 is the 32 bit remainder, or residue, of binary division of a message, by a polynomial. + + + \note before v32.6.0: checksum[0] = Grid+Tree+Root, checksum[1] = nodes + after v32.6.0: checksum[0] = Grid+Tree, checksum[1] = nodes + blind data in 4K blocks + + When serialized: + [Grid,Tree][Root][ROOT TILES...][Node<5>...][Node<4>...][Leaf<3>...][BlindMeta...][BlindData...] + checksum[2] before v32.6.0: <------------- [0] ------------><-------------- [1] ---------------> + checksum[2] after v32.6.0: <---[0]---><----------------------------------------[1]----------------------------------------> +*/ + +#ifndef NANOVDB_TOOLS_GRIDCHECKSUM_H_HAS_BEEN_INCLUDED +#define NANOVDB_TOOLS_GRIDCHECKSUM_H_HAS_BEEN_INCLUDED + +#include // for std::generate +#include +#include +#include +#include // offsetof macro +#include +#include +#include // for std::unique_ptr + +#include +#include +#include + +// Define log of block size for FULL CRC32 computation. +// A value of 12 corresponds to a block size of 4KB (2^12 = 4096). +#define NANOVDB_CRC32_LOG2_BLOCK_SIZE 12 + +namespace nanovdb {// ================================================================== + +namespace tools {// ==================================================================== + +/// @brief Compute the (2 x CRC32) checksum of the specified @c gridData +/// @param gridData Base pointer to the grid from which the checksum is computed. +/// @param mode Defines the mode of computation for the checksum. +/// @return Return the (2 x CRC32) checksum of the specified @c gridData +Checksum evalChecksum(const GridData *gridData, CheckMode mode = CheckMode::Default); + +/// @brief Extract the checksum of a grid +/// @param gridData Base pointer to grid with a checksum +/// @return Checksum encoded in the specified grid +inline Checksum getChecksum(const GridData *gridData) +{ + NANOVDB_ASSERT(gridData); + return gridData->mChecksum; +} + +/// @brief Return true if the checksum of @c gridData matches the expected +/// value already encoded into the grid's meta data. +/// @tparam BuildT Template parameter used to build NanoVDB grid. +/// @param grid Grid whose checksum is validated. +/// @param mode Defines the mode of computation for the checksum. +bool validateChecksum(const GridData *gridData, CheckMode mode = CheckMode::Default); + +/// @brief Updates the checksum of a grid +/// @param grid Grid whose checksum will be updated. +/// @param mode Defines the mode of computation for the checksum. +inline void updateChecksum(GridData *gridData, CheckMode mode) +{ + NANOVDB_ASSERT(gridData); + gridData->mChecksum = evalChecksum(gridData, mode); +} + +/// @brief Updates the checksum of a grid by preserving its mode +/// @param gridData Base pointer to grid +inline void updateChecksum(GridData *gridData) +{ + updateChecksum(gridData, gridData->mChecksum.mode()); +} + +}// namespace tools + +namespace util { + +/// @brief Initiate single entry in look-up-table for CRC32 computations +/// @param lut pointer of size 256 for look-up-table +/// @param n entry in table (assumed n < 256) +inline __hostdev__ void initCrc32Lut(uint32_t lut[256], uint32_t n) +{ + lut[n] = n; + uint32_t &cs = lut[n]; + for (int i = 0; i < 8; ++i) cs = (cs >> 1) ^ ((cs & 1) ? 0xEDB88320 : 0); +} + +/// @brief Initiate entire look-up-table for CRC32 computations +/// @param lut pointer of size 256 for look-up-table +inline __hostdev__ void initCrc32Lut(uint32_t lut[256]){for (uint32_t n = 0u; n < 256u; ++n) initCrc32Lut(lut, n);} + +/// @brief Create and initiate entire look-up-table for CRC32 computations +/// @return returns a unique pointer to the lookup table of size 256. +inline std::unique_ptr createCrc32Lut() +{ + std::unique_ptr lut(new uint32_t[256]); + initCrc32Lut(lut.get()); + return lut; +} + +/// @brief Compute crc32 checksum of @c data of @c size bytes (without a lookup table)) +/// @param data pointer to beginning of data +/// @param size byte size of data +/// @param crc initial value of crc32 checksum +/// @return return crc32 checksum of @c data +inline __hostdev__ uint32_t crc32(const void* data, size_t size, uint32_t crc = 0) +{ + NANOVDB_ASSERT(data); + crc = ~crc; + for (auto *p = (const uint8_t*)data, *q = p + size; p != q; ++p) { + crc ^= *p; + for (int j = 0; j < 8; ++j) crc = (crc >> 1) ^ (0xEDB88320 & (-(crc & 1))); + } + return ~crc; +} + +/// @brief Compute crc32 checksum of data between @c begin and @c end +/// @param begin points to beginning of data +/// @param end points to end of @data, (exclusive) +/// @param crc initial value of crc32 checksum +/// @return return crc32 checksum +inline __hostdev__ uint32_t crc32(const void *begin, const void *end, uint32_t crc = 0) +{ + NANOVDB_ASSERT(begin && end); + NANOVDB_ASSERT(end >= begin); + return crc32(begin, (const char*)end - (const char*)begin, crc); +} + +/// @brief Compute crc32 checksum of @c data with @c size bytes using a lookup table +/// @param data pointer to begenning of data +/// @param size byte size +/// @param lut pointer to loopup table for accelerated crc32 computation +/// @param crc initial value of the checksum +/// @return crc32 checksum of @c data with @c size bytes +inline __hostdev__ uint32_t crc32(const void *data, size_t size, const uint32_t lut[256], uint32_t crc = 0) +{ + NANOVDB_ASSERT(data); + crc = ~crc; + for (auto *p = (const uint8_t*)data, *q = p + size; p != q; ++p) crc = lut[(crc ^ *p) & 0xFF] ^ (crc >> 8); + return ~crc; +} + +/// @brief Compute crc32 checksum of data between @c begin and @c end using a lookup table +/// @param begin points to beginning of data +/// @param end points to end of @data, (exclusive) +/// @param lut pointer to loopup table for accelerated crc32 computation +/// @param crc initial value of crc32 checksum +/// @return return crc32 checksum +inline __hostdev__ uint32_t crc32(const void *begin, const void *end, const uint32_t lut[256], uint32_t crc = 0) +{ + NANOVDB_ASSERT(begin && end); + NANOVDB_ASSERT(end >= begin); + return crc32(begin, (const char*)end - (const char*)begin, lut, crc); +}// uint32_t util::crc32(const void *begin, const void *end, const uint32_t lut[256], uint32_t crc = 0) + +/// @brief +/// @param data +/// @param size +/// @param lut +/// @return +inline uint32_t blockedCrc32(const void *data, size_t size, const uint32_t *lut) +{ + if (size == 0 ) return ~uint32_t(0); + const uint64_t blockCount = size >> NANOVDB_CRC32_LOG2_BLOCK_SIZE;// number of 4 KB (4096 byte) blocks + std::unique_ptr checksums(new uint32_t[blockCount]); + forEach(0, blockCount, 64, [&](const Range1D &r) { + uint32_t blockSize = 1 << NANOVDB_CRC32_LOG2_BLOCK_SIZE, *p = checksums.get() + r.begin(); + for (auto i = r.begin(); i != r.end(); ++i) { + if (i+1 == blockCount) blockSize += static_cast(size - (blockCount<...][Node<4>...][Leaf<3>...][BlindMeta...][BlindData...] +// checksum[2] before v32.6.0: <------------- [0] ------------><-------------- [1] ---------------> +// checksum[]2 after v32.6.0: <---[0]---><----------------------------------------[1]----------------------------------------> + +// ----------------------------> crc32Head <-------------------------------------- + +/// @brief +/// @tparam ValueT +/// @param grid +/// @param mode +/// @return +inline __hostdev__ uint32_t crc32Head(const GridData *gridData, const uint32_t *lut) +{ + NANOVDB_ASSERT(gridData); + const uint8_t *begin = (const uint8_t*)(gridData), *mid = begin + sizeof(GridData) + sizeof(TreeData); + if (gridData->mVersion <= Version(32,6,0)) mid = (const uint8_t*)(gridData->template nodePtr<2>()); + return util::crc32(begin + 16u, mid, lut);// exclude GridData::mMagic and GridData::mChecksum +}// uint32_t crc32Head(const GridData *gridData, const uint32_t *lut) + +/// @brief +/// @param gridData +/// @return +inline __hostdev__ uint32_t crc32Head(const GridData *gridData) +{ + NANOVDB_ASSERT(gridData); + const uint8_t *begin = (const uint8_t*)(gridData), *mid = begin + sizeof(GridData) + sizeof(TreeData); + if (gridData->mVersion <= Version(32,6,0)) mid = (const uint8_t*)(gridData->template nodePtr<2>()); + return util::crc32(begin + 16, mid);// exclude GridData::mMagic and GridData::mChecksum +}// uint32_t crc32Head(const GridData *gridData) + +// ----------------------------> crc32TailOld <-------------------------------------- + +// Old checksum +template +uint32_t crc32TailOld(const NanoGrid *grid, const uint32_t *lut) +{ + NANOVDB_ASSERT(grid->mVersion <= Version(32,6,0)); + const auto &tree = grid->tree(); + auto nodeMgrHandle = createNodeManager(*grid); + auto *nodeMgr = nodeMgrHandle.template mgr(); + assert(nodeMgr && isAligned(nodeMgr)); + const auto nodeCount = tree.nodeCount(0) + tree.nodeCount(1) + tree.nodeCount(2); + std::vector checksums(nodeCount, 0); + util::forEach(0, tree.nodeCount(2), 1,[&](const util::Range1D &r) {// process upper internal nodes + uint32_t *p = checksums.data() + r.begin(); + for (auto i = r.begin(); i != r.end(); ++i) { + const auto &node = nodeMgr->upper(static_cast(i)); + *p++ = util::crc32(&node, node.memUsage(), lut); + } + }); + util::forEach(0, tree.nodeCount(1), 1, [&](const util::Range1D &r) { // process lower internal nodes + uint32_t *p = checksums.data() + r.begin() + tree.nodeCount(2); + for (auto i = r.begin(); i != r.end(); ++i) { + const auto &node = nodeMgr->lower(static_cast(i)); + *p++ = util::crc32(&node, node.memUsage(), lut); + } + }); + util::forEach(0, tree.nodeCount(0), 8, [&](const util::Range1D &r) { // process leaf nodes + uint32_t *p = checksums.data() + r.begin() + tree.nodeCount(1) + tree.nodeCount(2); + for (auto i = r.begin(); i != r.end(); ++i) { + const auto &leaf = nodeMgr->leaf(static_cast(i)); + *p++ = util::crc32(&leaf, leaf.memUsage(), lut); + } + }); + return util::crc32(checksums.data(), sizeof(uint32_t)*checksums.size(), lut); +}// uint32_t crc32TailOld(const NanoGrid *grid, const uint32_t *lut) + +struct Crc32TailOld { + template + static uint32_t known(const GridData *gridData, const uint32_t *lut) + { + return crc32TailOld((const NanoGrid*)gridData, lut); + } + static uint32_t unknown(const GridData*, const uint32_t*) + { + throw std::runtime_error("Cannot call Crc32TailOld with grid of unknown type"); + return 0u;//dummy + } +};// struct Crc32TailOld + +inline uint32_t crc32Tail(const GridData *gridData, const uint32_t *lut) +{ + NANOVDB_ASSERT(gridData); + if (gridData->mVersion > Version(32,6,0)) { + const uint8_t *begin = (const uint8_t*)(gridData); + return util::blockedCrc32(begin + sizeof(GridData) + sizeof(TreeData), begin + gridData->mGridSize, lut); + } else { + return callNanoGrid(gridData, lut); + } +}// uint32_t crc32Tail(const GridData *gridData, const uint32_t *lut) + +template +uint32_t crc32Tail(const NanoGrid *grid, const uint32_t *lut) +{ + NANOVDB_ASSERT(grid); + if (grid->mVersion > Version(32,6,0)) { + const uint8_t *begin = (const uint8_t*)(grid); + return util::blockedCrc32(begin + sizeof(GridData) + sizeof(TreeData), begin + grid->mGridSize, lut); + } else { + return crc32TailOld(grid, lut); + } +}// uint32_t crc32Tail(const NanoGrid *gridData, const uint32_t *lut) + +// ----------------------------> evalChecksum <-------------------------------------- + +/// @brief +/// @tparam ValueT +/// @param grid +/// @param mode +/// @return +template +Checksum evalChecksum(const NanoGrid *grid, CheckMode mode) +{ + NANOVDB_ASSERT(grid); + Checksum cs; + if (mode != CheckMode::Empty) { + auto lut = util::createCrc32Lut(); + cs.head() = crc32Head(grid, lut.get()); + if (mode == CheckMode::Full) cs.tail() = crc32Tail(grid, lut.get()); + } + return cs; +}// checksum(const NanoGrid*, CheckMode) + +template +[[deprecated("Use evalChecksum(const NanoGrid *grid, CheckMode mode) instead")]] +Checksum checksum(const NanoGrid *grid, CheckMode mode){return evalChecksum(grid, mode);} + +inline Checksum evalChecksum(const GridData *gridData, CheckMode mode) +{ + NANOVDB_ASSERT(gridData); + Checksum cs; + if (mode != CheckMode::Disable) { + auto lut = util::createCrc32Lut(); + cs.head() = crc32Head(gridData, lut.get()); + if (mode == CheckMode::Full) cs.tail() = crc32Tail(gridData, lut.get()); + } + return cs; +}// evalChecksum(GridData *data, CheckMode mode) + +[[deprecated("Use evalChecksum(const NanoGrid*, CheckMode) instead")]] +inline Checksum checksum(const GridData *gridData, CheckMode mode){return evalChecksum(gridData, mode);} + +template +[[deprecated("Use checksum(const NanoGrid*, CheckMode) instead")]] +Checksum checksum(const NanoGrid &grid, CheckMode mode){return checksum(&grid, mode);} + +// ----------------------------> validateChecksum <-------------------------------------- + +/// @brief +/// @tparam ValueT +/// @param grid +/// @param mode +/// @return +template +bool validateChecksum(const NanoGrid *grid, CheckMode mode) +{ + if (grid->mChecksum.isEmpty() || mode == CheckMode::Empty) return true; + auto lut = util::createCrc32Lut(); + bool checkHead = grid->mChecksum.head() == crc32Head(grid->data(), lut.get()); + if (grid->mChecksum.isHalf() || mode == CheckMode::Half || !checkHead) { + return checkHead; + } else { + return grid->mChecksum.tail() == crc32Tail(grid, lut.get()); + } +} + +/// @brief +/// @tparam ValueT +/// @param grid +/// @param mode +/// @return +inline bool validateChecksum(const GridData *gridData, CheckMode mode) +{ + if (gridData->mChecksum.isEmpty()|| mode == CheckMode::Empty) return true; + auto lut = util::createCrc32Lut(); + bool checkHead = gridData->mChecksum.head() == crc32Head(gridData, lut.get()); + if (gridData->mChecksum.isHalf() || mode == CheckMode::Half || !checkHead) { + return checkHead; + } else { + return gridData->mChecksum.tail() == crc32Tail(gridData, lut.get()); + } +}// bool validateChecksum(const GridData *gridData, CheckMode mode) + +template +[[deprecated("Use validateChecksum(const NanoGrid*, CheckMode) instead")]] +bool validateChecksum(const NanoGrid &grid, CheckMode mode){return validateChecksum(&grid, mode);} + +// ----------------------------> updateChecksum <-------------------------------------- + +/// @brief +/// @tparam ValueT +/// @param grid +/// @param mode +template +void updateChecksum(NanoGrid *grid, CheckMode mode){grid->mChecksum = evalChecksum(grid, mode);} + +template +void updateChecksum(NanoGrid *grid){grid->mChecksum = evalChecksum(grid, grid->mChecksum.mode());} + +// deprecated method that takes a reference vs a pointer +template +[[deprecated("Use updateChecksum(const NanoGrid*, CheckMode) instead")]] +void updateChecksum(NanoGrid &grid, CheckMode mode){updateChecksum(&grid, mode);} + +// ----------------------------> updateGridCount <-------------------------------------- + +/// @brief Updates the ground index and count, as well as the head checksum if needed +/// @param data Pointer to grid data +/// @param gridIndex New value of the index +/// @param gridCount New value of the grid count +inline void updateGridCount(GridData *data, uint32_t gridIndex, uint32_t gridCount) +{ + NANOVDB_ASSERT(data && gridIndex < gridCount); + if (data->mGridIndex != gridIndex || data->mGridCount != gridCount) { + data->mGridIndex = gridIndex; + data->mGridCount = gridCount; + if (!data->mChecksum.isEmpty()) data->mChecksum.head() = crc32Head(data); + } +} + +} // namespace tools ====================================================================== + + +} // namespace nanovdb ==================================================================== + +#endif // NANOVDB_TOOLS_GRIDCHECKSUM_H_HAS_BEEN_INCLUDED diff --git a/nanovdb/nanovdb/tools/GridStats.h b/nanovdb/nanovdb/tools/GridStats.h new file mode 100644 index 0000000000..f07f5a6040 --- /dev/null +++ b/nanovdb/nanovdb/tools/GridStats.h @@ -0,0 +1,877 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: MPL-2.0 + +/*! + \file nanovdb/tools/GridStats.h + + \author Ken Museth + + \date August 29, 2020 + + \brief Re-computes min/max/avg/var/bbox information for each node in a + pre-existing NanoVDB grid. +*/ + +#ifndef NANOVDB_TOOLS_GRIDSTATS_H_HAS_BEEN_INCLUDED +#define NANOVDB_TOOLS_GRIDSTATS_H_HAS_BEEN_INCLUDED + +#include + +#ifdef NANOVDB_USE_TBB +#include +#include +#endif + +#if defined(__CUDACC__) +#include // for cuda::std::numeric_limits +#else +#include // for std::numeric_limits +#endif + +#include +#include + +namespace nanovdb { + +namespace tools {//======================================================================= + +/// @brief Grid flags which indicate what extra information is present in the grid buffer +enum class StatsMode : uint32_t { + Disable = 0,// disable the computation of any type of statistics (obviously the FASTEST!) + BBox = 1,// only compute the bbox of active values per node and total activeVoxelCount + MinMax = 2,// additionally compute extrema values + All = 3,// compute all of the statics, i.e. bbox, min/max, average and standard deviation + Default = 3,// default computational mode for statistics + End = 4, +}; + +/// @brief Re-computes the min/max, stats and bbox information for an existing NanoVDB Grid +/// @param grid Grid whose stats to update +/// @param mode Mode of computation for the statistics. +template +void updateGridStats(NanoGrid* grid, StatsMode mode = StatsMode::Default); + +template::Rank> +class Extrema; + +/// @brief Determine the extrema of all the values in a grid that +/// intersects the specified bounding box. +/// @tparam BuildT Build type of the input grid +/// @param grid typed grid +/// @param bbox index bounding box in which min/max are computed +/// @return Extream of values insixe @c bbox +template +Extrema::ValueType> +getExtrema(const NanoGrid& grid, const CoordBBox &bbox); + +//================================================================================================ + +/// @brief Template specialization of Extrema on scalar value types, i.e. rank = 0 +template +class Extrema +{ +protected: + ValueT mMin, mMax; + +public: + using ValueType = ValueT; + __hostdev__ Extrema() +#if defined(__CUDACC__) + // note "::cuda" is needed since we also define a cuda namespace + : mMin(::cuda::std::numeric_limits::max()) + , mMax(::cuda::std::numeric_limits::lowest()) +#else + : mMin(std::numeric_limits::max()) + , mMax(std::numeric_limits::lowest()) +#endif + { + } + __hostdev__ Extrema(const ValueT& v) + : mMin(v) + , mMax(v) + { + } + __hostdev__ Extrema(const ValueT& a, const ValueT& b) + : mMin(a) + , mMax(b) + { + } + __hostdev__ Extrema& min(const ValueT& v) + { + if (v < mMin) mMin = v; + return *this; + } + __hostdev__ Extrema& max(const ValueT& v) + { + if (v > mMax) mMax = v; + return *this; + } + __hostdev__ Extrema& add(const ValueT& v) + { + this->min(v); + this->max(v); + return *this; + } + __hostdev__ Extrema& add(const ValueT& v, uint64_t) { return this->add(v); } + __hostdev__ Extrema& add(const Extrema& other) + { + this->min(other.mMin); + this->max(other.mMax); + return *this; + } + __hostdev__ const ValueT& min() const { return mMin; } + __hostdev__ const ValueT& max() const { return mMax; } + __hostdev__ operator bool() const { return mMin <= mMax; } + __hostdev__ static constexpr bool hasMinMax() { return !util::is_same::value; } + __hostdev__ static constexpr bool hasAverage() { return false; } + __hostdev__ static constexpr bool hasStdDeviation() { return false; } + __hostdev__ static constexpr bool hasStats() { return !util::is_same::value; } + __hostdev__ static constexpr size_t size() { return 0; } + + template + __hostdev__ void setStats(NodeT &node) const + { + node.setMin(this->min()); + node.setMax(this->max()); + } +}; // Extrema + +/// @brief Template specialization of Extrema on vector value types, i.e. rank = 1 +template +class Extrema +{ +protected: + using Real = typename VecT::ValueType; // this works with both nanovdb and openvdb vectors + struct Pair + { + Real scalar; + VecT vector; + + __hostdev__ Pair(Real s)// is only used by Extrema() default c-tor + : scalar(s) + , vector(s) + { + } + __hostdev__ Pair(const VecT& v) + : scalar(v.lengthSqr()) + , vector(v) + { + } + __hostdev__ bool operator<(const Pair& rhs) const { return scalar < rhs.scalar; } + } mMin, mMax; + __hostdev__ Extrema& add(const Pair& p) + { + if (p < mMin) mMin = p; + if (mMax < p) mMax = p; + return *this; + } + +public: + using ValueType = VecT; + __hostdev__ Extrema() +#if defined(__CUDACC__) + // note "::cuda" is needed since we also define a cuda namespace + : mMin(::cuda::std::numeric_limits::max()) + , mMax(::cuda::std::numeric_limits::lowest()) +#else + : mMin(std::numeric_limits::max()) + , mMax(std::numeric_limits::lowest()) +#endif + { + } + __hostdev__ Extrema(const VecT& v) + : mMin(v) + , mMax(v) + { + } + __hostdev__ Extrema(const VecT& a, const VecT& b) + : mMin(a) + , mMax(b) + { + } + __hostdev__ Extrema& min(const VecT& v) + { + Pair tmp(v); + if (tmp < mMin) mMin = tmp; + return *this; + } + __hostdev__ Extrema& max(const VecT& v) + { + Pair tmp(v); + if (mMax < tmp) mMax = tmp; + return *this; + } + __hostdev__ Extrema& add(const VecT& v) { return this->add(Pair(v)); } + __hostdev__ Extrema& add(const VecT& v, uint64_t) { return this->add(Pair(v)); } + __hostdev__ Extrema& add(const Extrema& other) + { + if (other.mMin < mMin) mMin = other.mMin; + if (mMax < other.mMax) mMax = other.mMax; + return *this; + } + __hostdev__ const VecT& min() const { return mMin.vector; } + __hostdev__ const VecT& max() const { return mMax.vector; } + __hostdev__ operator bool() const { return !(mMax < mMin); } + __hostdev__ static constexpr bool hasMinMax() { return !util::is_same::value; } + __hostdev__ static constexpr bool hasAverage() { return false; } + __hostdev__ static constexpr bool hasStdDeviation() { return false; } + __hostdev__ static constexpr bool hasStats() { return !util::is_same::value; } + __hostdev__ static constexpr size_t size() { return 0; } + + template + __hostdev__ void setStats(NodeT &node) const + { + node.setMin(this->min()); + node.setMax(this->max()); + } +}; // Extrema + +//================================================================================================ + +template::Rank> +class Stats; + +/// @brief This class computes statistics (minimum value, maximum +/// value, mean, variance and standard deviation) of a population +/// of floating-point values. +/// +/// @details variance = Mean[ (X-Mean[X])^2 ] = Mean[X^2] - Mean[X]^2, +/// standard deviation = sqrt(variance) +/// +/// @note This class employs incremental computation and double precision. +template +class Stats : public Extrema +{ +protected: + using BaseT = Extrema; + using RealT = double; // for accuracy the internal precission must be 64 bit floats + size_t mSize; + double mAvg, mAux; + +public: + using ValueType = ValueT; + __hostdev__ Stats() + : BaseT() + , mSize(0) + , mAvg(0.0) + , mAux(0.0) + { + } + __hostdev__ Stats(const ValueT& val) + : BaseT(val) + , mSize(1) + , mAvg(RealT(val)) + , mAux(0.0) + { + } + /// @brief Add a single sample + __hostdev__ Stats& add(const ValueT& val) + { + BaseT::add(val); + mSize += 1; + const double delta = double(val) - mAvg; + mAvg += delta / double(mSize); + mAux += delta * (double(val) - mAvg); + return *this; + } + /// @brief Add @a n samples with constant value @a val. + __hostdev__ Stats& add(const ValueT& val, uint64_t n) + { + const double denom = 1.0 / double(mSize + n); + const double delta = double(val) - mAvg; + mAvg += denom * delta * double(n); + mAux += denom * delta * delta * double(mSize) * double(n); + BaseT::add(val); + mSize += n; + return *this; + } + + /// Add the samples from the other Stats instance. + __hostdev__ Stats& add(const Stats& other) + { + if (other.mSize > 0) { + const double denom = 1.0 / double(mSize + other.mSize); + const double delta = other.mAvg - mAvg; + mAvg += denom * delta * double(other.mSize); + mAux += other.mAux + denom * delta * delta * double(mSize) * double(other.mSize); + BaseT::add(other); + mSize += other.mSize; + } + return *this; + } + + __hostdev__ static constexpr bool hasMinMax() { return !util::is_same::value; } + __hostdev__ static constexpr bool hasAverage() { return !util::is_same::value; } + __hostdev__ static constexpr bool hasStdDeviation() { return !util::is_same::value; } + __hostdev__ static constexpr bool hasStats() { return !util::is_same::value; } + + __hostdev__ size_t size() const { return mSize; } + + //@{ + /// Return the arithmetic mean, i.e. average, value. + __hostdev__ double avg() const { return mAvg; } + __hostdev__ double mean() const { return mAvg; } + //@} + + //@{ + /// @brief Return the population variance. + /// + /// @note The unbiased sample variance = population variance * num/(num-1) + __hostdev__ double var() const { return mSize < 2 ? 0.0 : mAux / double(mSize); } + __hostdev__ double variance() const { return this->var(); } + //@} + + //@{ + /// @brief Return the standard deviation (=Sqrt(variance)) as + /// defined from the (biased) population variance. + __hostdev__ double std() const { return sqrt(this->var()); } + __hostdev__ double stdDev() const { return this->std(); } + //@} + + template + __hostdev__ void setStats(NodeT &node) const + { + node.setMin(this->min()); + node.setMax(this->max()); + node.setAvg(this->avg()); + node.setDev(this->std()); + } +}; // end Stats + +/// @brief This class computes statistics (minimum value, maximum +/// value, mean, variance and standard deviation) of a population +/// of floating-point values. +/// +/// @details variance = Mean[ (X-Mean[X])^2 ] = Mean[X^2] - Mean[X]^2, +/// standard deviation = sqrt(variance) +/// +/// @note This class employs incremental computation and double precision. +template +class Stats : public Extrema +{ +protected: + using BaseT = Extrema; + using RealT = double; // for accuracy the internal precision must be 64 bit floats + size_t mSize; + double mAvg, mAux; + +public: + using ValueType = ValueT; + __hostdev__ Stats() + : BaseT() + , mSize(0) + , mAvg(0.0) + , mAux(0.0) + { + } + /// @brief Add a single sample + __hostdev__ Stats& add(const ValueT& val) + { + typename BaseT::Pair tmp(val); + BaseT::add(tmp); + mSize += 1; + const double delta = tmp.scalar - mAvg; + mAvg += delta / double(mSize); + mAux += delta * (tmp.scalar - mAvg); + return *this; + } + /// @brief Add @a n samples with constant value @a val. + __hostdev__ Stats& add(const ValueT& val, uint64_t n) + { + typename BaseT::Pair tmp(val); + const double denom = 1.0 / double(mSize + n); + const double delta = tmp.scalar - mAvg; + mAvg += denom * delta * double(n); + mAux += denom * delta * delta * double(mSize) * double(n); + BaseT::add(tmp); + mSize += n; + return *this; + } + + /// Add the samples from the other Stats instance. + __hostdev__ Stats& add(const Stats& other) + { + if (other.mSize > 0) { + const double denom = 1.0 / double(mSize + other.mSize); + const double delta = other.mAvg - mAvg; + mAvg += denom * delta * double(other.mSize); + mAux += other.mAux + denom * delta * delta * double(mSize) * double(other.mSize); + BaseT::add(other); + mSize += other.mSize; + } + return *this; + } + + __hostdev__ static constexpr bool hasMinMax() { return !util::is_same::value; } + __hostdev__ static constexpr bool hasAverage() { return !util::is_same::value; } + __hostdev__ static constexpr bool hasStdDeviation() { return !util::is_same::value; } + __hostdev__ static constexpr bool hasStats() { return !util::is_same::value; } + + __hostdev__ size_t size() const { return mSize; } + + //@{ + /// Return the arithmetic mean, i.e. average, value. + __hostdev__ double avg() const { return mAvg; } + __hostdev__ double mean() const { return mAvg; } + //@} + + //@{ + /// @brief Return the population variance. + /// + /// @note The unbiased sample variance = population variance * num/(num-1) + __hostdev__ double var() const { return mSize < 2 ? 0.0 : mAux / double(mSize); } + __hostdev__ double variance() const { return this->var(); } + //@} + + //@{ + /// @brief Return the standard deviation (=Sqrt(variance)) as + /// defined from the (biased) population variance. + __hostdev__ double std() const { return sqrt(this->var()); } + __hostdev__ double stdDev() const { return this->std(); } + //@} + + template + __hostdev__ void setStats(NodeT &node) const + { + node.setMin(this->min()); + node.setMax(this->max()); + node.setAvg(this->avg()); + node.setDev(this->std()); + } +}; // end Stats + +/// @brief No-op Stats class +template +struct NoopStats +{ + using ValueType = ValueT; + __hostdev__ NoopStats() {} + __hostdev__ NoopStats(const ValueT&) {} + __hostdev__ NoopStats& add(const ValueT&) { return *this; } + __hostdev__ NoopStats& add(const ValueT&, uint64_t) { return *this; } + __hostdev__ NoopStats& add(const NoopStats&) { return *this; } + __hostdev__ static constexpr size_t size() { return 0; } + __hostdev__ static constexpr bool hasMinMax() { return false; } + __hostdev__ static constexpr bool hasAverage() { return false; } + __hostdev__ static constexpr bool hasStdDeviation() { return false; } + __hostdev__ static constexpr bool hasStats() { return false; } + template + __hostdev__ void setStats(NodeT&) const{} +}; // end NoopStats + +//================================================================================================ + +/// @brief Allows for the construction of NanoVDB grids without any dependency +template> +class GridStats +{ + struct NodeStats; + using TreeT = typename GridT::TreeType; + using ValueT = typename TreeT::ValueType; + using BuildT = typename TreeT::BuildType; + using Node0 = typename TreeT::Node0; // leaf + using Node1 = typename TreeT::Node1; // lower + using Node2 = typename TreeT::Node2; // upper + using RootT = typename TreeT::Node3; // root + static_assert(util::is_same::value, "Mismatching type"); + + ValueT mDelta; // skip rendering of node if: node.max < -mDelta || node.min > mDelta + + void process( GridT& );// process grid and all tree nodes + void process( TreeT& );// process Tree, root node and child nodes + void process( RootT& );// process root node and child nodes + NodeStats process( Node0& );// process leaf node + + template + NodeStats process( NodeT& );// process internal node and child nodes + + template + void setStats(DataT*, const Extrema&); + template + void setStats(DataT*, const Stats&); + template + void setStats(DataT*, const NoopStats&) {} + + template + typename std::enable_if::value>::type + setFlag(const T&, const T&, FlagT& flag) const { flag &= ~FlagT(1); } // unset 1st bit to enable rendering + + template + typename std::enable_if::value>::type + setFlag(const T& min, const T& max, FlagT& flag) const; + +public: + GridStats() = default; + + void update(GridT& grid, ValueT delta = ValueT(0)); + +}; // GridStats + +template +struct GridStats::NodeStats +{ + StatsT stats; + CoordBBox bbox; + + NodeStats(): stats(), bbox() {}//activeCount(0), bbox() {}; + + NodeStats& add(const NodeStats &other) + { + stats.add( other.stats );// no-op for NoopStats?! + bbox[0].minComponent(other.bbox[0]); + bbox[1].maxComponent(other.bbox[1]); + return *this; + } +};// GridStats::NodeStats + +//================================================================================================ + +template +void GridStats::update(GridT& grid, ValueT delta) +{ + mDelta = delta; // delta = voxel size for level sets, else 0 + this->process( grid ); +} + +//================================================================================================ + +template +template +inline void GridStats:: + setStats(DataT* data, const Extrema& e) +{ + data->setMin(e.min()); + data->setMax(e.max()); +} + +template +template +inline void GridStats:: + setStats(DataT* data, const Stats& s) +{ + data->setMin(s.min()); + data->setMax(s.max()); + data->setAvg(s.avg()); + data->setDev(s.std()); +} + +//================================================================================================ + +template +template +inline typename std::enable_if::value>::type +GridStats:: + setFlag(const T& min, const T& max, FlagT& flag) const +{ + if (mDelta > 0 && (min > mDelta || max < -mDelta)) {// LS: min > dx || max < -dx + flag |= FlagT(1u);// set 1st bit to disable rendering + } else { + flag &= ~FlagT(1u);// unset 1st bit to enable rendering + } +} + +//================================================================================================ + +template +void GridStats::process( GridT &grid ) +{ + this->process( grid.tree() );// this processes tree, root and all nodes + + // set world space AABB + auto& data = *grid.data(); + const auto& indexBBox = grid.tree().root().bbox(); + if (indexBBox.empty()) { + data.mWorldBBox = Vec3dBBox(); + data.setBBoxOn(false); + } else { + // Note that below max is offset by one since CoordBBox.max is inclusive + // while bbox.max is exclusive. However, min is inclusive in both + // CoordBBox and Vec3dBBox. This also guarantees that a grid with a single + // active voxel, does not have an empty world bbox! E.g. if a grid with a + // unit index-to-world transformation only contains the active voxel (0,0,0) + // then indeBBox = (0,0,0) -> (0,0,0) and then worldBBox = (0.0, 0.0, 0.0) + // -> (1.0, 1.0, 1.0). This is a consequence of the different definitions + // of index and world bounding boxes inherited from OpenVDB! + grid.mWorldBBox = CoordBBox(indexBBox[0], indexBBox[1].offsetBy(1)).transform(grid.map()); + grid.setBBoxOn(true); + } + + // set bit flags + data.setMinMaxOn(StatsT::hasMinMax()); + data.setAverageOn(StatsT::hasAverage()); + data.setStdDeviationOn(StatsT::hasStdDeviation()); +} // GridStats::process( Grid ) + +//================================================================================================ + +template +inline void GridStats::process( typename GridT::TreeType &tree ) +{ + this->process( tree.root() ); +} + +//================================================================================================ + +template +void GridStats::process(RootT &root) +{ + using ChildT = Node2; + auto &data = *root.data(); + if (data.mTableSize == 0) { // empty root node + data.mMinimum = data.mMaximum = data.mBackground; + data.mAverage = data.mStdDevi = 0; + data.mBBox = CoordBBox(); + } else { + NodeStats total; + for (uint32_t i = 0; i < data.mTableSize; ++i) { + auto* tile = data.tile(i); + if (tile->isChild()) { // process child node + total.add( this->process( *data.getChild(tile) ) ); + } else if (tile->state) { // active tile + const Coord ijk = tile->origin(); + total.bbox[0].minComponent(ijk); + total.bbox[1].maxComponent(ijk + Coord(ChildT::DIM - 1)); + if (StatsT::hasStats()) { // resolved at compile time + total.stats.add(tile->value, ChildT::NUM_VALUES); + } + } + } + this->setStats(&data, total.stats); + if (total.bbox.empty()) { + std::cerr << "\nWarning in GridStats: input tree only contained inactive root tiles!" + << "\nWhile not strictly an error it's rather suspicious!\n"; + } + data.mBBox = total.bbox; + } +} // GridStats::process( RootNode ) + +//================================================================================================ + +template +template +typename GridStats::NodeStats +GridStats::process(NodeT &node) +{ + static_assert(util::is_same::value || util::is_same::value, "Incorrect node type"); + using ChildT = typename NodeT::ChildNodeType; + + NodeStats total; + auto* data = node.data(); + + // Serial processing of active tiles + if (const auto tileCount = data->mValueMask.countOn()) { + //total.activeCount = tileCount * ChildT::NUM_VALUES; // active tiles + for (auto it = data->mValueMask.beginOn(); it; ++it) { + if (StatsT::hasStats()) { // resolved at compile time + total.stats.add( data->mTable[*it].value, ChildT::NUM_VALUES ); + } + const Coord ijk = node.offsetToGlobalCoord(*it); + total.bbox[0].minComponent(ijk); + total.bbox[1].maxComponent(ijk + Coord(int32_t(ChildT::DIM) - 1)); + } + } + + // Serial or parallel processing of child nodes + if (const size_t childCount = data->mChildMask.countOn()) { +#ifndef NANOVDB_USE_TBB + for (auto it = data->mChildMask.beginOn(); it; ++it) { + total.add( this->process( *data->getChild(*it) ) ); + } +#else + std::unique_ptr childNodes(new ChildT*[childCount]); + ChildT **ptr = childNodes.get(); + for (auto it = data->mChildMask.beginOn(); it; ++it) { + *ptr++ = data->getChild( *it ); + } + using RangeT = tbb::blocked_range; + total.add( tbb::parallel_reduce(RangeT(0, childCount), NodeStats(), + [&](const RangeT &r, NodeStats local)->NodeStats { + for(size_t i=r.begin(); i!=r.end(); ++i){ + local.add( this->process( *childNodes[i] ) ); + } + return local;}, + [](NodeStats a, const NodeStats &b)->NodeStats { return a.add( b ); } + )); +#endif + } + + data->mBBox = total.bbox; + if (total.bbox.empty()) { + data->mFlags |= uint32_t(1); // set 1st bit on to disable rendering of node + data->mFlags &= ~uint32_t(2); // set 2nd bit off since node does not contain active values + } else { + data->mFlags |= uint32_t(2); // set 2nd bit on since node contains active values + if (StatsT::hasStats()) { // resolved at compile time + this->setStats(data, total.stats); + this->setFlag(data->mMinimum, data->mMaximum, data->mFlags); + } + } + return total; +} // GridStats::process( InternalNode ) + +//================================================================================================ + +template +typename GridStats::NodeStats +GridStats::process(Node0 &leaf) +{ + NodeStats local; + if (leaf.updateBBox()) {// optionally update active bounding box (updates data->mFlags) + local.bbox[0] = local.bbox[1] = leaf.mBBoxMin; + local.bbox[1] += Coord(leaf.mBBoxDif[0], leaf.mBBoxDif[1], leaf.mBBoxDif[2]); + if (StatsT::hasStats()) {// resolved at compile time + for (auto it = leaf.cbeginValueOn(); it; ++it) local.stats.add(*it); + this->setStats(&leaf, local.stats); + this->setFlag(leaf.getMin(), leaf.getMax(), leaf.mFlags); + } + } + return local; +} // GridStats::process( LeafNode ) + +//================================================================================================ + +template +void updateGridStats(NanoGrid* grid, StatsMode mode) +{ + NANOVDB_ASSERT(grid); + using GridT = NanoGrid; + using ValueT = typename GridT::ValueType; + if (mode == StatsMode::Disable) { + return; + } else if (mode == StatsMode::BBox || util::is_same::value) { + GridStats > stats; + stats.update(*grid); + } else if (mode == StatsMode::MinMax) { + GridStats > stats; + stats.update(*grid); + } else if (mode == StatsMode::All) { + GridStats > stats; + stats.update(*grid); + } else { + throw std::runtime_error("gridStats: Unsupported statistics mode."); + } +}// updateGridStats + +template +[[deprecated("Use nanovdb::tools::updateGridStats(NanoGrid*, StatsMode) instead")]] +void gridStats(NanoGrid& grid, StatsMode mode = StatsMode::Default) +{ + updateGridStats(&grid, mode); +} + +//================================================================================================ + +namespace { + +// returns a bitmask (of size 32^3 or 16^3) that marks all the entries +// in a node table that intersects with the specified bounding box. +template +Mask getBBoxMask(const CoordBBox &bbox, const NodeT* node) +{ + Mask mask;// typically 32^3 or 16^3 bit mask + auto b = CoordBBox::createCube(node->origin(), node->dim()); + assert( bbox.hasOverlap(b) ); + if ( bbox.isInside(b) ) { + mask.setOn();//node is completely inside the bbox so early out + } else { + b.intersect(bbox);// trim bounding box + // transform bounding box from global to local coordinates + b.min() &= NodeT::DIM-1u; + b.min() >>= NodeT::ChildNodeType::TOTAL; + b.max() &= NodeT::DIM-1u; + b.max() >>= NodeT::ChildNodeType::TOTAL; + assert( !b.empty() ); + auto it = b.begin();// iterates over all the child nodes or tiles that intersects bbox + for (const Coord& ijk = *it; it; ++it) { + mask.setOn(ijk[2] + (ijk[1] << NodeT::LOG2DIM) + (ijk[0] << 2*NodeT::LOG2DIM)); + } + } + return mask; +}// getBBoxMask + +}// end of unnamed namespace + +/// @brief return the extrema of all the values in a grid that +/// intersects the specified bounding box. +template +Extrema::ValueType> +getExtrema(const NanoGrid& grid, const CoordBBox &bbox) +{ + using GridT = NanoGrid; + using ValueT = typename GridT::ValueType; + using TreeT = typename GridTree::type; + using RootT = typename NodeTrait::type;// root node + using Node2 = typename NodeTrait::type;// upper internal node + using Node1 = typename NodeTrait::type;// lower internal node + using Node0 = typename NodeTrait::type;// leaf node + + Extrema extrema; + const RootT &root = grid.tree().root(); + const auto &bbox3 = root.bbox(); + if (bbox.isInside(bbox3)) {// bbox3 is contained inside bbox + extrema.min(root.minimum()); + extrema.max(root.maximum()); + extrema.add(root.background()); + } else if (bbox.hasOverlap(bbox3)) { + const auto *data3 = root.data(); + for (uint32_t i=0; imTableSize; ++i) { + const auto *tile = data3->tile(i); + CoordBBox bbox2 = CoordBBox::createCube(tile->origin(), Node2::dim()); + if (!bbox.hasOverlap(bbox2)) continue; + if (tile->isChild()) { + const Node2 *node2 = data3->getChild(tile); + if (bbox.isInside(bbox2)) { + extrema.min(node2->minimum()); + extrema.max(node2->maximum()); + } else {// partial intersections at level 2 + auto *data2 = node2->data(); + const auto bboxMask2 = getBBoxMask(bbox, node2); + for (auto it2 = bboxMask2.beginOn(); it2; ++it2) { + if (data2->mChildMask.isOn(*it2)) { + const Node1* node1 = data2->getChild(*it2); + CoordBBox bbox1 = CoordBBox::createCube(node1->origin(), Node1::dim()); + if (bbox.isInside(bbox1)) { + extrema.min(node1->minimum()); + extrema.max(node1->maximum()); + } else {// partial intersection at level 1 + auto *data1 = node1->data(); + const auto bboxMask1 = getBBoxMask(bbox, node1); + for (auto it1 = bboxMask1.beginOn(); it1; ++it1) { + if (data1->mChildMask.isOn(*it1)) { + const Node0* node0 = data1->getChild(*it1); + CoordBBox bbox0 = CoordBBox::createCube(node0->origin(), Node0::dim()); + if (bbox.isInside(bbox0)) { + extrema.min(node0->minimum()); + extrema.max(node0->maximum()); + } else {// partial intersection at level 0 + auto *data0 = node0->data(); + const auto bboxMask0 = getBBoxMask(bbox, node0); + for (auto it0 = bboxMask0.beginOn(); it0; ++it0) { + extrema.add(data0->getValue(*it0)); + } + }// end partial intersection at level 0 + } else {// tile at level 1 + extrema.add(data1->mTable[*it1].value); + } + } + }// end of partial intersection at level 1 + } else {// tile at level 2 + extrema.add(data2->mTable[*it2].value); + } + }// loop over tiles and nodes at level 2 + }// end of partial intersection at level 1 + } else {// tile at root level + extrema.add(tile->value); + } + }// loop over root table + } else {// bbox does not overlap the grid + extrema.add(root.background()); + } + return extrema; +}// getExtrema + +}// namespace tools + +} // namespace nanovdb + +#endif // NANOVDB_TOOLS_GRIDSTATS_H_HAS_BEEN_INCLUDED diff --git a/nanovdb/nanovdb/tools/GridValidator.h b/nanovdb/nanovdb/tools/GridValidator.h new file mode 100644 index 0000000000..fbc4e14ded --- /dev/null +++ b/nanovdb/nanovdb/tools/GridValidator.h @@ -0,0 +1,244 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: MPL-2.0 + +/*! + \file nanovdb/tools/GridValidator.h + + \author Ken Museth + + \date August 30, 2020 + + \brief Checks the validity of an existing NanoVDB grid. + + \note before v32.6.0: checksum[0] = Grid+Tree+Root, checksum[1] = nodes + after v32.6.0: checksum[0] = Grid+Tree, checksum[1] = nodes + blind data in 4K blocks + + When serialized: + [Grid,Tree][Root][ROOT TILES...][Node<5>...][Node<4>...][Leaf<3>...][BlindMeta...][BlindData...] +*/ + +#ifndef NANOVDB_TOOLS_GRID_VALIDATOR_H_HAS_BEEN_INCLUDED +#define NANOVDB_TOOLS_GRID_VALIDATOR_H_HAS_BEEN_INCLUDED + +#include // for std::cerr + +#include +#include + +namespace nanovdb { + +namespace tools { + +/// @brief Performs several validation tests on a grid pointer. +/// @tparam ValueT Build type of the input grid +/// @param grid const point to the grid that needs validation +/// @param mode Mode of the validation check (defined in GridChecksum.h) +/// @param verbose If true information about the first failed test is printed to std::cerr +/// @return Return true if the specified grid passes several validation tests. +template +bool isValid(const NanoGrid *grid, CheckMode mode, bool verbose = false); + +/// @brief Return true if the specified grid passes several validation tests. +/// @tparam ValueT Build type of the input grid +/// @param grid Grid to validate +/// @param detailed If true the validation test is detailed and relatively slow. +/// @param verbose If true information about the first failed test is printed to std::cerr +/// @note This method has been deprecated by the one defined above +template +[[deprecated("Use isValue(const NanoGrid*, CheckMode, bool) instead.")]] +bool isValid(const NanoGrid &grid, bool detailed = true, bool verbose = false) +{ + return isValid(&grid, detailed ? CheckMode::Full : CheckMode::Half, verbose); +} + +//================================================================================================ + +/// @brief validate grid +template +__hostdev__ char* checkGrid(const NanoGrid *grid, char *error, CheckMode mode = CheckMode::Full) +{ + *error = '\0';// reset error string + char str[32];// temporary buffer for toStr + + // check Grid + if (grid == nullptr) { + return util::sprint(error, "Invalid pointer: Grid is NULL"); + } else if (!isAligned(grid)) { + return util::sprint(error, "Invalid pointer: Grid is misaligned"); + } else if (grid->mMagic != NANOVDB_MAGIC_NUMB && grid->mMagic != NANOVDB_MAGIC_GRID) { + return util::sprint(error, "Invalid magic number: ", toStr(str, toMagic(grid->mMagic))); + } else if (!grid->mVersion.isCompatible()) { + return util::sprint(error, "Incompatible version number: ", toStr(str, grid->mVersion)); + } else if (grid->mGridCount == 0) { + return util::sprint(error, "Zero grid count"); + } else if (grid->mGridIndex >= grid->mGridCount) { + return util::sprint(error, "grid index(", int(grid->mGridIndex), ") >= grid count(", int(grid->mGridCount), ")"); + } else if (grid->mGridClass >= GridClass::End) { + return util::sprint(error, "Invalid GridClass(", toStr(str, grid->mGridClass), ")"); + } else if (grid->mGridType >= GridType::End) { + return util::sprint(error, "Invalid GridType(", toStr(str, grid->mGridType), ")"); + } else if (grid->mGridType != toGridType()) { + return util::sprint(error, "Invalid combination of BuildType(", toStr(str, toGridType()), ") and GridType(", toStr(str+16, grid->mGridType), ")"); + } else if (!isValid(grid->mGridType, grid->mGridClass)) { + return util::sprint(error, "Invalid combination of GridType(", toStr(str, grid->mGridType), ") and GridClass(", toStr(str+16,grid->mGridClass), ")"); + } + + // check Tree + auto &tree = grid->tree(); + if (auto *p = tree.getRoot()) { + if (!isAligned(p)) return util::strcpy(error, "Invalid pointer: Root is misaligned"); + } else { + return util::strcpy(error, "Invalid pointer: Root is NULL"); + } + + // check Root + auto &root = tree.root(); + auto *rootData = root.data(); + if (rootData == nullptr) { + return util::strcpy(error, "Invalid pointer: Root is NULL"); + } else if (!isAligned((const void*)rootData)) { + return util::strcpy(error, "Invalid pointer: Root is misaligned"); + } else if ( (const uint8_t*)(rootData) < (const uint8_t*)(&tree+1)) { + return util::strcpy(error, "Invalid root pointer (should be located after the Grid and Tree)"); + } else if ( (const void*)(rootData) > util::PtrAdd(rootData, root.memUsage())) { + return util::strcpy(error, "Invalid root pointer (appears to be located after the end of the buffer)"); + } else {// check root tiles + const void *bounds[2] = {rootData + 1, util::PtrAdd(rootData, root.memUsage())}; + for (uint32_t i = 0; imTableSize; ++i) { + const void *tile = rootData->tile(i); + if ( tile < bounds[0] ) { + return util::strcpy(error, "Invalid root tile pointer (below lower bound"); + } else if (tile >= bounds[1]) { + return util::strcpy(error, "Invalid root tile pointer (above higher bound"); + } + } + } + if (mode == CheckMode::Half) return error; + + // check nodes + const bool test = grid->isBreadthFirst(); + auto *n0 = tree.template getFirstNode<0>(); + auto *n1 = tree.template getFirstNode<1>(); + auto *n2 = tree.template getFirstNode<2>(); + const void *bounds[3][2] = {{n0, util::PtrAdd(n0, grid->gridSize())}, {n1, n0}, {n2, n1}}; + + auto check = [&](const void *ptr, int level) -> bool { + if (ptr==nullptr) { + util::strcpy(error, "Invalid node pointer: node is NULL"); + } else if (!isAligned(ptr)) { + util::strcpy(error, "Invalid node pointer: node is misaligned"); + } else if (test && level == 0 && (const void*)(n0++) != ptr) { + util::strcpy(error, "Leaf node is not stored breadth-first"); + } else if (test && level == 1 && (const void*)(n1++) != ptr) { + util::strcpy(error, "Lower node is not stored breadth-first"); + } else if (test && level == 2 && (const void*)(n2++) != ptr) { + util::strcpy(error, "Upper node is not stored breadth-first"); + } else if ( ptr < bounds[level][0] ) { + util::strcpy(error, "Invalid node pointer: below lower bound"); + } else if ( ptr >= bounds[level][1] ) { + util::strcpy(error, "Invalid node pointer: above higher bound"); + } + return !util::empty(error); + }; + + for (auto it2 = root.cbeginChild(); it2; ++it2) { + if (check(&*it2, 2)) return error; + for (auto it1 = it2->cbeginChild(); it1; ++it1) { + if (check(&*it1, 1)) return error; + for (auto it0 = it1->cbeginChild(); it0; ++it0) if (check(&*it0, 0)) return error; + }// loop over child nodes of the upper internal node + }// loop over child nodes of the root node + + return error; +} // checkGrid + +//================================================================================================ + +template +bool isValid(const NanoGrid *grid, CheckMode mode, bool verbose) +{ + std::unique_ptr strUP(new char[100]); + char *str = strUP.get(); + + tools::checkGrid(grid, str, mode); + + if (util::empty(str) && !validateChecksum(grid, mode)) util::strcpy(str, "Mis-matching checksum"); + if (verbose && !util::empty(str)) std::cerr << "Validation failed: " << str << std::endl; + + return util::empty(str); +}// isValid + +//================================================================================================ + +struct IsNanoGridValid { + template + static bool known(const GridData *gridData, CheckMode mode, bool verbose) + { + return tools::isValid((const NanoGrid*)gridData, mode, verbose); + } + static bool unknown(const GridData *gridData, CheckMode, bool verbose) + { + if (verbose) { + char str[16]; + std::cerr << "Unsupported GridType: \"" << toStr(str, gridData->mGridType) << "\"\n" << std::endl; + } + return false; + } +};// IsNanoGridValid + +/// @brief Validate a specific grid in a GridHandle +/// @tparam GridHandleT Type of GridHandle +/// @param handle GridHandle containing host grids +/// @param gridID linear index of the grid to be validated +/// @param mode node of validation tests +/// @param verbose if true information is printed if the grid fails a validation test +/// @return true if grid @c gridID passes all the validation tests +template +bool validateGrid(const GridHandleT &handle, uint32_t gridID, CheckMode mode, bool verbose) +{ + if (mode == CheckMode::Disable) { + return true; + } else if (gridID >= handle.gridCount()) { + if (verbose) std::cerr << "grid index " << gridID << " exceeds available grid count " << handle.gridCount() << std::endl; + return false; + } + return callNanoGrid(handle.gridData(gridID), mode, verbose); +}// validateGrid + +//================================================================================================ + +/// @brief Validate all the grids in a GridHandle +/// @tparam GridHandleT Type of GridHandle +/// @param handle GridHandle containing host grids (0,1...,N) +/// @param mode node of validation tests +/// @param verbose if true information is printed if a grid fails a validation test +/// @return true if all grids pass alle the validation tests +template +bool validateGrids(const GridHandleT &handle, CheckMode mode, bool verbose) +{ + if (mode == CheckMode::Disable) return true; + for (uint32_t gridID=0; gridID +[[deprecated("Use nanovdb:tools::checkGrid instead.")]] +__hostdev__ char* checkGrid(const NanoGrid *grid, char *error, CheckMode mode = CheckMode::Full) +{ + return tools::checkGrid(grid, error, mode); +} + +template +[[deprecated("Use nanovdb:tools::isValid instead.")]] +bool isValid(const NanoGrid *grid, CheckMode mode, bool verbose = false) +{ + return tools::isValid(grid, mode, verbose); +} + +}// namespace nanovdb + +#endif // NANOVDB_TOOLS_GRID_VALIDATOR_H_HAS_BEEN_INCLUDED diff --git a/nanovdb/nanovdb/tools/NanoToOpenVDB.h b/nanovdb/nanovdb/tools/NanoToOpenVDB.h new file mode 100644 index 0000000000..3723a4ecb1 --- /dev/null +++ b/nanovdb/nanovdb/tools/NanoToOpenVDB.h @@ -0,0 +1,366 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: MPL-2.0 + +/*! + \file nanovdb/tools/NanoToOpenVDB.h + + \author Ken Museth + + \date May 6, 2020 + + \brief This class will deserialize an NanoVDB grid into an OpenVDB grid. + + \todo Add support for PointIndexGrid and PointDataGrid +*/ + +#include // manages and streams the raw memory buffer of a NanoVDB grid. +#include +#include + +#include + +#ifndef NANOVDB_TOOLS_NANOTOOPENVDB_H_HAS_BEEN_INCLUDED +#define NANOVDB_TOOLS_NANOTOOPENVDB_H_HAS_BEEN_INCLUDED + +template +struct ConvertTrait {using Type = T;}; + +template +struct ConvertTrait> {using Type = openvdb::math::Vec3;}; + +template +struct ConvertTrait> {using Type = openvdb::math::Vec4;}; + +template<> +struct ConvertTrait {using Type = float;}; + +template<> +struct ConvertTrait {using Type = float;}; + +template<> +struct ConvertTrait {using Type = float;}; + +template<> +struct ConvertTrait {using Type = float;}; + +template<> +struct ConvertTrait {using Type = openvdb::ValueMask;}; + +namespace nanovdb { + +namespace tools { + +/// @brief Forward declaration of free-standing function that de-serializes a typed NanoVDB grid into an OpenVDB Grid +template +typename openvdb::Grid::Type>::Type>::Ptr +nanoToOpenVDB(const NanoGrid& grid, int verbose = 0); + +/// @brief Forward declaration of free-standing function that de-serializes a NanoVDB GridHandle into an OpenVDB GridBase +template +openvdb::GridBase::Ptr +nanoToOpenVDB(const GridHandle& handle, int verbose = 0, uint32_t n = 0); + +/// @brief This class will serialize an OpenVDB grid into a NanoVDB grid managed by a GridHandle. +template +class NanoToOpenVDB +{ + using NanoNode0 = nanovdb::LeafNode; // note that it's using openvdb coord nd mask types! + using NanoNode1 = nanovdb::InternalNode; + using NanoNode2 = nanovdb::InternalNode; + using NanoRootT = nanovdb::RootNode; + using NanoTreeT = nanovdb::Tree; + using NanoGridT = nanovdb::Grid; + using NanoValueT = typename NanoGridT::ValueType; + + using OpenBuildT = typename ConvertTrait::Type; // e.g. float -> float but nanovdb::math::Vec3 -> openvdb::Vec3 + using OpenNode0 = openvdb::tree::LeafNode; // leaf + using OpenNode1 = openvdb::tree::InternalNode; // lower + using OpenNode2 = openvdb::tree::InternalNode; // upper + using OpenRootT = openvdb::tree::RootNode; + using OpenTreeT = openvdb::tree::Tree; + using OpenGridT = openvdb::Grid; + using OpenValueT = typename OpenGridT::ValueType; + +public: + /// @brief Construction from an existing const OpenVDB Grid. + NanoToOpenVDB(){}; + + /// @brief Return a shared pointer to a NanoVDB grid constructed from the specified OpenVDB grid + typename OpenGridT::Ptr operator()(const NanoGrid& grid, int verbose = 0); + +private: + + template + OpenNodeT* processNode(const NanoNodeT*); + + OpenNode2* process(const NanoNode2* node) {return this->template processNode(node);} + OpenNode1* process(const NanoNode1* node) {return this->template processNode(node);} + + template + typename std::enable_if::value && + !std::is_same::value && + !std::is_same::value && + !std::is_same::value && + !std::is_same::value && + !std::is_same::value, + OpenNode0*>::type + process(const NanoLeafT* node); + + template + typename std::enable_if::value || + std::is_same::value || + std::is_same::value || + std::is_same::value, + OpenNode0*>::type + process(const NanoLeafT* node); + + template + typename std::enable_if::value, + OpenNode0*>::type + process(const NanoLeafT* node); + + template + typename std::enable_if::value, + OpenNode0*>::type + process(const NanoLeafT* node); + + /// converts nanovdb value types to openvdb value types, e.g. nanovdb::Vec3f& -> openvdb::Vec3f& + static const OpenValueT& Convert(const NanoValueT &v) {return reinterpret_cast(v);} + static const OpenValueT* Convert(const NanoValueT *v) {return reinterpret_cast(v);} + +}; // NanoToOpenVDB class + +template +typename NanoToOpenVDB::OpenGridT::Ptr +NanoToOpenVDB::operator()(const NanoGrid& grid, int /*verbose*/) +{ + // since the input nanovdb grid might use nanovdb types (Coord, Mask, Vec3) we cast to use openvdb types + const NanoGridT *srcGrid = reinterpret_cast(&grid); + + auto dstGrid = openvdb::createGrid(Convert(srcGrid->tree().background())); + dstGrid->setName(srcGrid->gridName()); // set grid name + switch (srcGrid->gridClass()) { // set grid class + case nanovdb::GridClass::LevelSet: + dstGrid->setGridClass(openvdb::GRID_LEVEL_SET); + break; + case nanovdb::GridClass::FogVolume: + dstGrid->setGridClass(openvdb::GRID_FOG_VOLUME); + break; + case nanovdb::GridClass::Staggered: + dstGrid->setGridClass(openvdb::GRID_STAGGERED); + break; + case nanovdb::GridClass::PointIndex: + throw std::runtime_error("NanoToOpenVDB does not yet support PointIndexGrids"); + case nanovdb::GridClass::PointData: + throw std::runtime_error("NanoToOpenVDB does not yet support PointDataGrids"); + default: + dstGrid->setGridClass(openvdb::GRID_UNKNOWN); + } + // set transform + const nanovdb::Map& nanoMap = reinterpret_cast(srcGrid)->mMap; + auto mat = openvdb::math::Mat4::identity(); + mat.setMat3(openvdb::math::Mat3(nanoMap.mMatD)); + mat.transpose(); // the 3x3 in nanovdb is transposed relative to openvdb's 3x3 + mat.setTranslation(openvdb::math::Vec3(nanoMap.mVecD)); + dstGrid->setTransform(openvdb::math::Transform::createLinearTransform(mat)); // calls simplify! + + // process root node + auto &root = dstGrid->tree().root(); + auto *data = srcGrid->tree().root().data(); + for (uint32_t i=0; imTableSize; ++i) { + auto *tile = data->tile(i); + if (tile->isChild()) { + root.addChild( this->process( data->getChild(tile)) ); + } else { + root.addTile(tile->origin(), Convert(tile->value), tile->state); + } + } + + return dstGrid; +} + +template +template +DstNodeT* +NanoToOpenVDB::processNode(const SrcNodeT *srcNode) +{ + DstNodeT *dstNode = new DstNodeT(); // un-initialized for fast construction + dstNode->setOrigin(srcNode->origin()); + const auto& childMask = srcNode->childMask(); + const_cast(dstNode->getValueMask()) = srcNode->valueMask(); + const_cast(dstNode->getChildMask()) = childMask; + auto* dstTable = const_cast(dstNode->getTable()); + auto* srcData = srcNode->data(); + std::vector> childNodes; + const auto childCount = childMask.countOn(); + childNodes.reserve(childCount); + for (uint32_t n = 0; n < DstNodeT::NUM_VALUES; ++n) { + if (childMask.isOn(n)) { + childNodes.emplace_back(n, srcData->getChild(n)); + } else { + dstTable[n].setValue(Convert(srcData->mTable[n].value)); + } + } + auto kernel = [&](const auto& r) { + for (auto i = r.begin(); i != r.end(); ++i) { + auto &p = childNodes[i]; + dstTable[p.first].setChild( this->process(p.second) ); + } + }; + +#if 0 + kernel(Range1D(0, childCount)); +#else + util::forEach(0, childCount, 1, kernel); +#endif + return dstNode; +} // processNode + +template +template +inline typename std::enable_if::value && + !std::is_same::value && + !std::is_same::value && + !std::is_same::value && + !std::is_same::value && + !std::is_same::value, + typename NanoToOpenVDB::OpenNode0*>::type +NanoToOpenVDB::process(const NanoLeafT *srcNode) +{ + static_assert(std::is_same::value, "NanoToOpenVDB::process assert failed"); + OpenNode0* dstNode = new OpenNode0(); // un-initialized for fast construction + dstNode->setOrigin(srcNode->origin()); + dstNode->setValueMask(srcNode->valueMask()); + + const auto* src = Convert(srcNode->data()->mValues);// doesn't work for compressed data, bool or ValueMask + for (auto *dst = dstNode->buffer().data(), *end = dst + OpenNode0::SIZE; dst != end; dst += 4, src += 4) { + dst[0] = src[0]; + dst[1] = src[1]; + dst[2] = src[2]; + dst[3] = src[3]; + } + + return dstNode; +} // process(NanoNode0) + +template +template +inline typename std::enable_if::value || + std::is_same::value || + std::is_same::value || + std::is_same::value, + typename NanoToOpenVDB::OpenNode0*>::type +NanoToOpenVDB::process(const NanoLeafT *srcNode) +{ + static_assert(std::is_same::value, "NanoToOpenVDB::process assert failed"); + OpenNode0* dstNode = new OpenNode0(); // un-initialized for fast construction + dstNode->setOrigin(srcNode->origin()); + dstNode->setValueMask(srcNode->valueMask()); + float *dst = dstNode->buffer().data(); + for (int i=0; i!=512; i+=4) { + *dst++ = srcNode->getValue(i); + *dst++ = srcNode->getValue(i+1); + *dst++ = srcNode->getValue(i+2); + *dst++ = srcNode->getValue(i+3); + } + + return dstNode; +} // process(NanoNode0) + +template +template +inline typename std::enable_if::value, + typename NanoToOpenVDB::OpenNode0*>::type +NanoToOpenVDB::process(const NanoLeafT *srcNode) +{ + static_assert(std::is_same::value, "NanoToOpenVDB::process assert failed"); + OpenNode0* dstNode = new OpenNode0(); // un-initialized for fast construction + dstNode->setOrigin(srcNode->origin()); + dstNode->setValueMask(srcNode->valueMask()); + + return dstNode; +} // process(NanoNode0) + +template +template +inline typename std::enable_if::value, + typename NanoToOpenVDB::OpenNode0*>::type +NanoToOpenVDB::process(const NanoLeafT *srcNode) +{ + static_assert(std::is_same::value, "NanoToOpenVDB::process assert failed"); + OpenNode0* dstNode = new OpenNode0(); // un-initialized for fast construction + dstNode->setOrigin(srcNode->origin()); + dstNode->setValueMask(srcNode->valueMask()); + reinterpret_cast&>(dstNode->buffer()) = srcNode->data()->mValues; + + return dstNode; +} // process(NanoNode0) + +template +inline typename openvdb::Grid::Type>::Type>::Ptr +nanoToOpenVDB(const NanoGrid& grid, int verbose) +{ + NanoToOpenVDB tmp; + return tmp(grid, verbose); +} + +template +openvdb::GridBase::Ptr +nanoToOpenVDB(const GridHandle& handle, int verbose, uint32_t n) +{ + if (auto grid = handle.template grid(n)) { + return tools::nanoToOpenVDB(*grid, verbose); + } else if (auto grid = handle.template grid(n)) { + return tools::nanoToOpenVDB(*grid, verbose); + } else if (auto grid = handle.template grid(n)) { + return tools::nanoToOpenVDB(*grid, verbose); + } else if (auto grid = handle.template grid(n)) { + return tools::nanoToOpenVDB(*grid, verbose); + } else if (auto grid = handle.template grid(n)) { + return tools::nanoToOpenVDB(*grid, verbose); + } else if (auto grid = handle.template grid(n)) { + return tools::nanoToOpenVDB(*grid, verbose); + } else if (auto grid = handle.template grid(n)) { + return tools::nanoToOpenVDB(*grid, verbose); + } else if (auto grid = handle.template grid(n)) { + return tools::nanoToOpenVDB(*grid, verbose); + } else if (auto grid = handle.template grid(n)) { + return tools::nanoToOpenVDB(*grid, verbose); + } else if (auto grid = handle.template grid(n)) { + return tools::nanoToOpenVDB(*grid, verbose); + } else if (auto grid = handle.template grid(n)) { + return tools::nanoToOpenVDB(*grid, verbose); + } else if (auto grid = handle.template grid(n)) { + return tools::nanoToOpenVDB(*grid, verbose); + } else if (auto grid = handle.template grid(n)) { + return tools::nanoToOpenVDB(*grid, verbose); + } else if (auto grid = handle.template grid(n)) { + return tools::nanoToOpenVDB(*grid, verbose); + } else { + OPENVDB_THROW(openvdb::RuntimeError, "Unsupported NanoVDB grid type!"); + } +}// tools::nanoToOpenVDB + +}// namespace tools + +/// @brief Forward declaration of free-standing function that de-serializes a typed NanoVDB grid into an OpenVDB Grid +template +[[deprecated("Use nanovdb::tools::nanoToOpenVDB instead.")]] +typename openvdb::Grid::Type>::Type>::Ptr +nanoToOpenVDB(const NanoGrid& grid, int verbose = 0) +{ + return tools::nanoToOpenVDB(grid, verbose); +} + +/// @brief Forward declaration of free-standing function that de-serializes a NanoVDB GridHandle into an OpenVDB GridBase +template +[[deprecated("Use nanovdb::tools::nanoToOpenVDB instead.")]] +openvdb::GridBase::Ptr +nanoToOpenVDB(const GridHandle& handle, int verbose = 0, uint32_t n = 0) +{ + return tools::nanoToOpenVDB(handle, verbose, n); +} + +} // namespace nanovdb + +#endif // NANOVDB_TOOLS_NANOTOOPENVDB_H_HAS_BEEN_INCLUDED diff --git a/nanovdb/nanovdb/tools/cuda/AddBlindData.cuh b/nanovdb/nanovdb/tools/cuda/AddBlindData.cuh new file mode 100644 index 0000000000..a7dcfcd6f5 --- /dev/null +++ b/nanovdb/nanovdb/tools/cuda/AddBlindData.cuh @@ -0,0 +1,146 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: MPL-2.0 + +/*! + \file nanovdb/tools/cuda/AddBlindData.cuh + + \author Ken Museth + + \date August 3, 2023 + + \brief Defines function that appends blind device data to and existing device NanoGrid + + \warning The header file contains cuda device code so be sure + to only include it in .cu files (or other .cuh files) +*/ + +#ifndef NVIDIA_TOOLS_CUDA_ADDBLINDDATA_CUH_HAS_BEEN_INCLUDED +#define NVIDIA_TOOLS_CUDA_ADDBLINDDATA_CUH_HAS_BEEN_INCLUDED + +#include +#include +#include +#include +#include +#include + +#include // for std::strcpy + +namespace nanovdb {// ================================================ + +namespace tools::cuda {// ============================================ + +/// @brief This function appends blind data to and existing NanoGrid +/// @tparam BuildT Build type of the grid +/// @tparam BlindDataT Type of the blind data +/// @tparam BufferT Type of the buffer used for allocation +/// @param d_grid Pointer to device grid +/// @param d_blindData Pointer to device blind data +/// @param valueCount number of values in the blind data +/// @param blindClass class of the blind data +/// @param semantics semantics of the blind data +/// @param name optional name of the blind data +/// @param pool optional pool used for allocation +/// @param stream optional CUDA stream (defaults to CUDA stream 0) +/// @return GridHandle with blind data appended +template +GridHandle +addBlindData(const NanoGrid *d_grid, + const BlindDataT *d_blindData, + uint64_t valueCount, + GridBlindDataClass blindClass = GridBlindDataClass::Unknown, + GridBlindDataSemantic semantics = GridBlindDataSemantic::Unknown, + const char *name = "", + const BufferT &pool = BufferT(), + cudaStream_t stream = 0) +{ + // In: |-----------|--------- |-----------| + // old grid old meta old data + // Out: |-----------|----------|----------|-----------|------------| + // old grid old meta new meta old data new data + + static_assert(BufferTraits::hasDeviceDual, "Expected BufferT to support device allocation"); + + // extract byte sizes of the grid, blind meta data and blind data + enum {GRID=0, META=1, DATA=2, CHECKSUM=3}; + uint64_t tmp[4], *d_tmp; + cudaCheck(util::cuda::mallocAsync((void**)&d_tmp, 4*sizeof(uint64_t), stream)); + util::cuda::lambdaKernel<<<1, 1, 0, stream>>>(1, [=] __device__(size_t) { + if (auto count = d_grid->blindDataCount()) { + d_tmp[GRID] = util::PtrDiff(&d_grid->blindMetaData(0), d_grid); + d_tmp[META] = count*sizeof(GridBlindMetaData); + d_tmp[DATA] = d_grid->gridSize() - d_tmp[GRID] - d_tmp[META]; + } else { + d_tmp[GRID] = d_grid->gridSize(); + d_tmp[META] = d_tmp[DATA] = 0u; + } + d_tmp[CHECKSUM] = d_grid->checksum().full(); + }); cudaCheckError(); + cudaCheck(cudaMemcpyAsync(&tmp, d_tmp, 4*sizeof(uint64_t), cudaMemcpyDeviceToHost, stream)); + + GridBlindMetaData metaData{int64_t(sizeof(GridBlindMetaData) + tmp[DATA]), valueCount, + sizeof(BlindDataT), semantics, blindClass, toGridType()}; + if (!metaData.isValid()) throw std::runtime_error("cudaAddBlindData: invalid combination of blind meta data"); + std::strcpy(metaData.mName, name); + auto buffer = BufferT::create(tmp[GRID] + tmp[META] + sizeof(GridBlindMetaData) + tmp[DATA] + metaData.blindDataSize(), &pool, false); + void *d_data = buffer.deviceData(); + + // 1: |-----------|----------| + // old grid old meta + cudaCheck(cudaMemcpyAsync(d_data, d_grid, tmp[GRID] + tmp[META], cudaMemcpyDeviceToDevice, stream)); + + // 2: |-----------|----------|----------| + // old grid old meta new meta + cudaCheck(cudaMemcpyAsync((char*)d_data + tmp[GRID] + tmp[META], &metaData, sizeof(GridBlindMetaData), cudaMemcpyHostToDevice, stream)); + + // 3: |-----------|----------|----------|-----------| + // old grid old meta new meta old data + cudaCheck(cudaMemcpyAsync((char*)d_data + tmp[GRID] + tmp[META] + sizeof(GridBlindMetaData), + (const char*)d_grid + tmp[GRID] + tmp[META], tmp[DATA], cudaMemcpyDeviceToDevice, stream)); + + // 4: |-----------|----------|----------|-----------|------------| + // old grid old meta new meta old data new data + const size_t dataSize = valueCount*sizeof(BlindDataT);// no padding + cudaCheck(cudaMemcpyAsync((char*)d_data + tmp[GRID] + tmp[META] + sizeof(GridBlindMetaData) + tmp[DATA], + d_blindData, dataSize, cudaMemcpyDeviceToDevice, stream)); + if (auto padding = metaData.blindDataSize() - dataSize) {// zero out possible padding + cudaCheck(cudaMemsetAsync((char*)d_data + tmp[GRID] + tmp[META] + sizeof(GridBlindMetaData) + tmp[DATA] + dataSize, 0, padding, stream)); + } + + // increment grid size and blind data counter in output grid + util::cuda::lambdaKernel<<<1, 1, 0, stream>>>(1, [=] __device__(size_t) { + auto &grid = *reinterpret_cast*>(d_data); + grid.mBlindMetadataCount += 1; + grid.mBlindMetadataOffset = d_tmp[GRID]; + auto *meta = util::PtrAdd(d_data, grid.mBlindMetadataOffset);// points to first blind meta data + for (uint32_t i=0, n=grid.mBlindMetadataCount-1; imDataOffset += sizeof(GridBlindMetaData); + grid.mGridSize += sizeof(GridBlindMetaData) + meta->blindDataSize();// expansion with 32 byte alignment + }); cudaCheckError(); + cudaCheck(util::cuda::freeAsync(d_tmp, stream)); + + Checksum cs(tmp[CHECKSUM]); + cuda::updateChecksum(reinterpret_cast(d_data), cs.mode(), stream); + + return GridHandle(std::move(buffer)); +}// cudaAddBlindData + +}// namespace tools::cuda + +template +[[deprecated("Use nanovdb::cuda::addBlindData instead")]] +GridHandle +cudaAddBlindData(const NanoGrid *d_grid, + const BlindDataT *d_blindData, + uint64_t valueCount, + GridBlindDataClass blindClass = GridBlindDataClass::Unknown, + GridBlindDataSemantic semantics = GridBlindDataSemantic::Unknown, + const char *name = "", + const BufferT &pool = BufferT(), + cudaStream_t stream = 0) +{ + return tools::cuda::addBlindData(d_grid, d_blindData, valueCount, blindClass, semantics, name, pool, stream); +} + +}// namespace nanovdb + +#endif // NVIDIA_TOOLS_CUDA_ADDBLINDDATA_CUH_HAS_BEEN_INCLUDED \ No newline at end of file diff --git a/nanovdb/nanovdb/tools/cuda/GridChecksum.cuh b/nanovdb/nanovdb/tools/cuda/GridChecksum.cuh new file mode 100644 index 0000000000..b1f61e2fe7 --- /dev/null +++ b/nanovdb/nanovdb/tools/cuda/GridChecksum.cuh @@ -0,0 +1,441 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: MPL-2.0 + +/*! + \file nanovdb/tools/cuda/GridChecksum.cuh + + \author Ken Museth + + \date September 28, 2023 + + \brief Compute CRC32 checksum of NanoVDB grids + + \note before v32.6.0: checksum[0] = Grid+Tree+Root, checksum[1] = nodes + after v32.6.0: checksum[0] = Grid+Tree, checksum[1] = nodes + blind data in 4K blocks + + When serialized: + [Grid,Tree][Root][ROOT TILES...][Node<5>...][Node<4>...][Leaf<3>...][BlindMeta...][BlindData...] + checksum[2] before v32.6.0: <------------- [0] ------------><-------------- [1] ---------------> + checksum[]2 after v32.6.0: <---[0]---><----------------------------------------[1]----------------------------------------> +*/ + +#ifndef NANOVDB_TOOLS_CUDA_GRIDCHECKSUM_CUH_HAS_BEEN_INCLUDED +#define NANOVDB_TOOLS_CUDA_GRIDCHECKSUM_CUH_HAS_BEEN_INCLUDED + +#include +#include // required for instantiation of move c-tor of GridHandle +#include +#include +#include + +namespace nanovdb {// ======================================================================= + +namespace tools::cuda {// =================================================================== + +/// @brief Compute the (2 x CRC32) checksum of the specified @c d_gridData on the device +/// @param d_gridData Device base pointer to the grid from which the checksum is computed. +/// @param mode Defines the mode of computation for the checksum. +/// @param stream optional cuda stream (defaults to zero) +/// @return Return the (2 x CRC32) checksum of the specified @c d_gridData +Checksum evalChecksum(const GridData *d_gridData, CheckMode mode = CheckMode::Default, cudaStream_t stream = 0); + +/// @brief Extract the checksum of a device grid +/// @param d_gridData Device basepointer to grid with a checksum +/// @param stream optional cuda stream (defaults to zero) +/// @return Checksum encoded in the specified grid +Checksum getChecksum(const GridData *d_gridData, cudaStream_t stream = 0); + +/// @brief Return true if the checksum of @c d_gridData matches the expected +/// value already encoded into the grid's meta data. +/// @tparam BuildT Template parameter used to build NanoVDB grid. +/// @param d_gridData Grid whose checksum is validated. +/// @param mode Defines the mode of computation for the checksum. +/// @param stream optional cuda stream (defaults to zero) +bool validateChecksum(const GridData *d_gridData, CheckMode mode = CheckMode::Default, cudaStream_t stream = 0); + +/// @brief Update the checksum of a device grid +/// @param d_gridData device pointer to GridData +/// @param mode Mode of computation for the checksum. +/// @param stream optional cuda stream (defaults to zero) +void updateChecksum(GridData *d_gridData, CheckMode mode, cudaStream_t stream = 0); + +/// @brief Updates the checksum of a device grid by preserving its mode +/// @param d_gridData Device base pointer to grid +/// @param stream optional cuda stream (defaults to zero) +inline void updateChecksum(GridData *d_gridData, cudaStream_t stream = 0) +{ + updateChecksum(d_gridData, getChecksum(d_gridData, stream).mode(), stream); +} + +}// namespace tools::cuda + +namespace util::cuda { + +/// @brief Cuda kernel that computes CRC32 checksums of blocks of data using a look-up-table +/// @param d_data device pointer to raw data from wich to compute the CRC32 checksums +/// @param d_blockCRC device pointer to array of @c blockCount checksums for each block +/// @param blockCount number of blocks and checksums +/// @param blockSize size of each block in bytes +/// @param d_lut device pointer to CRC32 Lookup Table +template +__global__ void crc32Kernel(const T *d_data, uint32_t* d_blockCRC, uint32_t blockCount, uint32_t blockSize, const uint32_t *d_lut) +{ + const uint32_t tid = blockIdx.x * blockDim.x + threadIdx.x; + if (tid < blockCount) d_blockCRC[tid] = crc32((const uint8_t*)d_data + tid * blockSize, blockSize, d_lut); +} + +/// @brief Cuda kernel that computes CRC32 checksums of blocks of data (without using a look-up-table) +/// @param d_data device pointer to raw data from wich to compute the CRC32 checksums +/// @param d_blockCRC device pointer to array of @c blockCount checksums for each block +/// @param blockCount number of blocks and checksums +/// @param blockSize size of each block in bytes +template +__global__ void crc32Kernel(const T *d_data, uint32_t* d_blockCRC, uint32_t blockCount, uint32_t blockSize) +{ + const uint32_t tid = blockIdx.x * blockDim.x + threadIdx.x; + if (tid < blockCount) d_blockCRC[tid] = crc32((const uint8_t*)d_data + tid * blockSize, blockSize); +} + +/// @brief Host function to allocate and initiate a Look-Up-Table of size 256 for subsequent CRC32 computation on the device +/// @param extra number of extra elements in the LUT +/// @param stream optional cuda stream (defaults to zero) +/// @return returns a nanovdb::util::cuda::unique_ptr point to a lookup-table for CRC32 computation +inline unique_ptr createCrc32Lut(size_t extra = 0, cudaStream_t stream = 0) +{ + unique_ptr lut(256 + extra, stream); + uint32_t *d_lut = lut.get(); + lambdaKernel<<<1, 256, 0, stream>>>(256, [=] __device__(size_t tid) {initCrc32Lut(d_lut, tid);}); + cudaCheckError(); + return lut; +} + +/// @brief Compute CRC32 checksum of 4K block +/// @param d_data device pointer to start of data +/// @param size number of bytes +/// @param d_lut Look-Up-Table for CRC32 computation +/// @param stream optional cuda stream (defaults to zero) +inline void blockedCRC32(const void *d_data, size_t size, const uint32_t *d_lut, uint32_t *d_crc, cudaStream_t stream) +{ + NANOVDB_ASSERT(d_data && d_lut && d_crc); + static constexpr unsigned int threadsPerBlock = 128;// seems faster than the old value of 256! + const uint64_t checksumCount = size >> NANOVDB_CRC32_LOG2_BLOCK_SIZE;// 4 KB (4096 byte) + unique_ptr buffer(checksumCount, stream);// for checksums of 4 KB blocks + uint32_t *d_checksums = buffer.get(); + lambdaKernel<<>>(checksumCount, [=] __device__(size_t tid) { + uint32_t blockSize = 1 << NANOVDB_CRC32_LOG2_BLOCK_SIZE; + if (tid+1 == checksumCount) blockSize += size - (checksumCount<>>(1, [=] __device__(size_t) {// Compute CRC32 of all the 4K blocks + *d_crc = crc32((const uint8_t*)d_checksums, checksumCount*sizeof(uint32_t), d_lut); + }); cudaCheckError(); +}// void cudaBlockedCRC32(const void *d_data, size_t size, const uint32_t *d_lut, uint32_t *d_crc, cudaStream_t stream) + +/// @brief Compute CRC32 checksum of 4K block +/// @param d_begin device pointer to start of data (inclusive) +/// @param d_end device pointer to end of data (exclusive) +/// @param d_lut pointer to Look-Up-Table for accelerated CRC32 computation +/// @param stream optional cuda stream (defaults to zero) +inline void blockedCRC32(const void *d_begin, const void *d_end, const uint32_t *d_lut, uint32_t *d_crc, cudaStream_t stream) +{ + blockedCRC32(d_begin, PtrDiff(d_end, d_begin), d_lut, d_crc, stream); +} + +}// namespace util::cuda + +namespace tools::cuda { + +/// @brief +/// @param d_gridData +/// @param d_lut pointer to Look-Up-Table for accelerated CRC32 computation +/// @param d_crc +/// @param stream optional cuda stream (defaults to zero) +inline void crc32Head(const GridData *d_gridData, const uint32_t *d_lut, uint32_t *d_crc, cudaStream_t stream) +{ + NANOVDB_ASSERT(d_gridData && d_lut && d_crc); + util::cuda::lambdaKernel<<<1, 1, 0, stream>>>(1, [=] __device__(size_t){*d_crc = tools::crc32Head(d_gridData, d_lut);}); +}// void cudaCrc32Head(const GridData *d_gridData, const uint32_t *d_lut, uint32_t *d_crc, cudaStream_t stream) + +/// @brief +/// @param d_gridData +/// @param gridData +/// @param d_lut pointer to Look-Up-Table for accelerated CRC32 computation +/// @param stream optional cuda stream (defaults to zero) +inline void crc32Tail(const GridData *d_gridData, const GridData *gridData, const uint32_t *d_lut, uint32_t *d_crc, cudaStream_t stream) +{ + NANOVDB_ASSERT(d_gridData && gridData && d_lut && d_crc); + NANOVDB_ASSERT(gridData->mVersion > Version(32,6,0)); + const uint8_t *d_begin = (const uint8_t*)d_gridData; + util::cuda::blockedCRC32(d_begin + sizeof(GridData) + sizeof(TreeData), d_begin + gridData->mGridSize, d_lut, d_crc, stream); +} + +/// @brief +/// @tparam ValueT +/// @param d_grid +/// @param gridData +/// @param d_lut pointer to Look-Up-Table for accelerated CRC32 computation +/// @param d_crc +/// @param stream +template +void crc32TailOld(const NanoGrid *d_grid, const GridData *gridData, const uint32_t *d_lut, uint32_t *d_crc, cudaStream_t stream) +{ + static constexpr unsigned int threadsPerBlock = 128;// seems faster than the old value of 256! + auto nodeMgrHandle = nanovdb::cuda::createNodeManager(d_grid, nanovdb::cuda::DeviceBuffer(), stream); + auto *d_nodeMgr = nodeMgrHandle.template deviceMgr(); + NANOVDB_ASSERT(isAligned(d_nodeMgr)); + const uint32_t nodeCount[3]={gridData->template nodeCount<0>(), gridData->template nodeCount<1>(), gridData->template nodeCount<2>()}; + util::cuda::unique_ptr d_checksumsUP(nodeCount[0]+nodeCount[1]+nodeCount[2]); + uint32_t *d_checksums = d_checksumsUP.get(), *d_ptr = d_checksums; + + util::cuda::lambdaKernel<<>>(nodeCount[2], [=] __device__(size_t tid) { + auto &node = d_nodeMgr->upper(uint32_t(tid)); + d_ptr[tid] = util::crc32((const uint8_t*)&node, node.memUsage(), d_lut); + }); cudaCheckError(); + + d_ptr += nodeCount[2]; + util::cuda::lambdaKernel<<>>(nodeCount[1], [=] __device__(size_t tid) { + auto &node = d_nodeMgr->lower(uint32_t(tid)); + d_ptr[tid] = util::crc32((const uint8_t*)&node, node.memUsage(), d_lut); + }); cudaCheckError(); + + d_ptr += nodeCount[1]; + util::cuda::lambdaKernel<<>>(nodeCount[0], [=] __device__(size_t tid) { + auto &node = d_nodeMgr->leaf(uint32_t(tid)); + d_ptr[tid] = util::crc32((const uint8_t*)&node, node.memUsage(), d_lut); + }); cudaCheckError(); + + util::cuda::lambdaKernel<<<1, 1, 0, stream>>>(1, [=] __device__(size_t) { + *d_crc = util::crc32(d_checksums, d_nodeMgr->tree().totalNodeCount()*sizeof(uint32_t), d_lut); + }); cudaCheckError(); +}// void cudaCrc32TailOld(const NanoGrid *d_grid, const GridData *gridData, uint32_t *d_lut, cudaStream_t stream) + +struct Crc32TailOld { + template + static void known(const GridData *d_gridData, const GridData *gridData, const uint32_t *d_lut, uint32_t *d_crc, cudaStream_t stream) + { + crc32TailOld((const NanoGrid*)d_gridData, gridData, d_lut, d_crc, stream); + } + static void unknown(const GridData*, const GridData*, const uint32_t*, uint32_t*, cudaStream_t) + { + throw std::runtime_error("Cannot call cudaCrc32TailOld with grid of unknown type"); + } +};// Crc32TailOld + +/// @brief +/// @param d_gridData +/// @param mode +/// @param stream +/// @return +inline Checksum evalChecksum(const GridData *d_gridData, CheckMode mode, cudaStream_t stream) +{ + static const int headSize = sizeof(GridData) + sizeof(TreeData); + NANOVDB_ASSERT(d_gridData); + Checksum cs; + if (mode != CheckMode::Empty) { + auto d_lut = util::cuda::createCrc32Lut(1, stream); + crc32Head(d_gridData, d_lut.get(), d_lut.get() + 256, stream); + cudaCheck(cudaMemcpyAsync(&(cs.head()), d_lut.get() + 256, headSize, cudaMemcpyDeviceToHost, stream)); + if (mode == CheckMode::Full) { + std::unique_ptr buffer(new char[headSize]); + auto *gridData = (GridData*)(buffer.get()); + cudaCheck(cudaMemcpyAsync(gridData, d_gridData, headSize, cudaMemcpyDeviceToHost, stream)); + if (gridData->mVersion > Version(32,6,0)) { + crc32Tail(d_gridData, gridData, d_lut.get(), d_lut.get() + 256, stream); + } else { + callNanoGrid(d_gridData, gridData, d_lut.get(), d_lut.get() + 256, stream); + } + cudaCheck(cudaMemcpyAsync(&(cs.tail()), d_lut.get() + 256, headSize, cudaMemcpyDeviceToHost, stream)); + } + } + return cs; +} + +/// @brief +/// @tparam BuildT +/// @param d_grid +/// @param mode +/// @param stream +/// @return +template +Checksum evalChecksum(const NanoGrid *d_grid, CheckMode mode, cudaStream_t stream = 0) +{ + static const int headSize = sizeof(GridData) + sizeof(TreeData); + NANOVDB_ASSERT(d_grid); + Checksum cs; + if (mode != CheckMode::Empty) { + auto d_lut = util::cuda::createCrc32Lut(1, stream); + crc32Head(d_grid, d_lut.get(), d_lut.get() + 256, stream); + cudaCheck(cudaMemcpyAsync(&(cs.head()), d_lut.get() + 256, headSize, cudaMemcpyDeviceToHost, stream)); + if (mode == CheckMode::Full) { + std::unique_ptr buffer(new char[headSize]); + auto *gridData = (GridData*)(buffer.get()); + cudaCheck(cudaMemcpyAsync(gridData, d_grid, headSize, cudaMemcpyDeviceToHost, stream)); + if (gridData->mVersion > Version(32,6,0)) { + crc32Tail(d_grid, gridData, d_lut.get(), d_lut.get() + 256, stream); + } else { + crc32TailOld(d_grid, gridData, d_lut.get(), d_lut.get() + 256, stream); + } + cudaCheck(cudaMemcpyAsync(&(cs.tail()), d_lut.get() + 256, headSize, cudaMemcpyDeviceToHost, stream)); + } + } + return cs; +} + +/// @brief +/// @param d_gridData +/// @param mode +/// @param stream +/// @return +inline bool validateChecksum(const GridData *d_gridData, CheckMode mode, cudaStream_t stream) +{ + static const int headSize = sizeof(GridData) + sizeof(TreeData); + NANOVDB_ASSERT(d_gridData); + if (mode == CheckMode::Empty) return true; + + // Copy just the GridData from the device to the host + std::unique_ptr buffer(new char[headSize]); + auto *gridData = (GridData*)(buffer.get()); + cudaCheck(cudaMemcpyAsync(gridData, d_gridData, headSize, cudaMemcpyDeviceToHost, stream)); + if (gridData->mChecksum.isEmpty()) return true;// checksum is empty so nothing to check + + // Allocate device LUT for CRC32 computation + auto d_lut = util::cuda::createCrc32Lut(1, stream);// unique pointer + uint32_t crc = 0, *d_crc = d_lut.get() + 256; + + // Check head checksum + crc32Head(d_gridData, d_lut.get(), d_crc, stream); + cudaCheck(cudaMemcpyAsync(&crc, d_crc, sizeof(uint32_t), cudaMemcpyDeviceToHost, stream)); + const bool checkHead = (crc == gridData->mChecksum.head()); + if (gridData->mChecksum.isHalf() || mode == CheckMode::Half || !checkHead) return checkHead; + + // Check tail checksum + if (gridData->mVersion > Version(32,6,0)) { + crc32Tail(d_gridData, gridData, d_lut.get(), d_crc, stream); + } else { + callNanoGrid(d_gridData, gridData, d_lut.get(), d_crc, stream); + } + cudaCheck(cudaMemcpyAsync(&crc, d_crc, sizeof(uint32_t), cudaMemcpyDeviceToHost, stream)); + return crc == gridData->mChecksum.tail(); +}// bool cudaValidateChecksum(const GridData *d_gridData, CheckMode mode, cudaStream_t stream = 0) + +/// @brief +/// @tparam BuildT +/// @param d_grid +/// @param mode +/// @param stream +/// @return +template +bool validateChecksum(const NanoGrid *d_grid, CheckMode mode, cudaStream_t stream = 0) +{ + static const int headSize = sizeof(GridData) + sizeof(TreeData); + NANOVDB_ASSERT(d_grid); + if (mode == CheckMode::Empty) return true; + + // Copy just the GridData from the device to the host + std::unique_ptr buffer(new char[headSize]); + auto *gridData = (GridData*)(buffer.get()); + cudaCheck(cudaMemcpyAsync(gridData, d_grid, headSize, cudaMemcpyDeviceToHost, stream)); + if (gridData->mChecksum.isEmpty()) return true;// checksum is empty so nothing to check + + // Allocate device LUT for CRC32 computation + auto d_lut = util::cuda::createCrc32Lut(1, stream);// unique pointer + uint32_t crc = 0, *d_crc = d_lut.get() + 256; + + // Check head checksum + crc32Head(d_grid, d_lut.get(), d_crc, stream); + cudaCheck(cudaMemcpyAsync(&crc, d_crc, sizeof(uint32_t), cudaMemcpyDeviceToHost, stream)); + const bool checkHead = (crc == gridData->mChecksum.head()); + if (gridData->mChecksum.isHalf() || mode == CheckMode::Half || !checkHead) return checkHead; + + // Check tail checksum + if (gridData->mVersion > Version(32,6,0)) { + crc32Tail(d_grid, gridData, d_lut.get(), d_crc, stream); + } else { + crc32TailOld(d_grid, gridData, d_lut.get(), d_crc, stream); + } + cudaCheck(cudaMemcpyAsync(&crc, d_crc, sizeof(uint32_t), cudaMemcpyDeviceToHost, stream)); + return crc == gridData->mChecksum.tail(); +}// bool cudaValidateChecksum(const GridData *d_gridData, CheckMode mode, cudaStream_t stream = 0) + +/// @brief Extract the checksum of a device grid +/// @param d_gridData Device pointer to grid with a checksum +/// @param stream optional cuda stream (defaults to zero) +inline Checksum getChecksum(const GridData *d_gridData, cudaStream_t stream) +{ + NANOVDB_ASSERT(d_gridData); + Checksum cs; + cudaCheck(cudaMemcpyAsync(&cs, (const uint8_t*)d_gridData + 8, sizeof(cs), cudaMemcpyDeviceToHost, stream)); + return cs; +} + +/// @brief Update the checksum of a device grid +/// @param d_gridData device pointer to GridData +/// @param mode Mode of computation for the checksum. +/// @param stream optional cuda stream (defaults to zero) +/// @return The actual mode used for checksum computation. Eg. if @c d_gridData is NULL (or @c mode = CheckMode::Empty) +/// then CheckMode::Empty is always returned. Else if the grid has no nodes or blind data CheckMode::Partial +/// is always returnd (even if @c mode = CheckMode::Full). +inline void updateChecksum(GridData *d_gridData, CheckMode mode, cudaStream_t stream) +{ + NANOVDB_ASSERT(d_gridData); + if (mode == CheckMode::Empty) return; + + // Allocate device LUT for CRC32 computation + auto d_lut = util::cuda::createCrc32Lut(0, stream);// unique pointers + + // Update head checksum + crc32Head(d_gridData, d_lut.get(), (uint32_t*)d_gridData + 2, stream); + + if (mode == CheckMode::Half) return; + + // Copy just the GridData from the device to the host + std::unique_ptr buffer(new char[sizeof(GridData) + sizeof(TreeData)]); + auto *gridData = (GridData*)(buffer.get()); + cudaCheck(cudaMemcpyAsync(gridData, d_gridData, sizeof(GridData) + sizeof(TreeData), cudaMemcpyDeviceToHost, stream)); + + // Update tail checksum + uint32_t *d_tail = (uint32_t*)d_gridData + 3; + if (gridData->mVersion > Version(32,6,0)) { + crc32Tail(d_gridData, gridData, d_lut.get(), d_tail, stream); + } else { + callNanoGrid(d_gridData, gridData, d_lut.get(), d_tail, stream); + } +}// cudaUpdateChecksum + +/// @brief +/// @tparam ValueT +/// @param d_grid +/// @param mode +/// @param stream +template +void updateChecksum(NanoGrid *d_grid, CheckMode mode, cudaStream_t stream = 0) +{ + NANOVDB_ASSERT(d_grid); + if (mode == CheckMode::Empty) return; + + // Allocate device LUT for CRC32 computation + auto d_lut = util::cuda::createCrc32Lut(0, stream);// unique pointers + + // Update head checksum + cuda::crc32Head(d_grid, d_lut.get(), (uint32_t*)d_grid + 2, stream); + if (mode == CheckMode::Half) return; + + // Copy just the GridData from the device to the host + std::unique_ptr buffer(new char[sizeof(GridData) + sizeof(TreeData)]); + auto *gridData = (GridData*)(buffer.get()); + cudaCheck(cudaMemcpyAsync(gridData, d_grid, sizeof(GridData) + sizeof(TreeData), cudaMemcpyDeviceToHost, stream)); + + // Update tail checksum + uint32_t *d_tail = (uint32_t*)d_grid + 3; + if (gridData->mVersion > Version(32,6,0)) { + crc32Tail(d_grid->data(), gridData, d_lut.get(), d_tail, stream); + } else { + crc32TailOld(d_grid, gridData, d_lut.get(), d_tail, stream); + } +} + +}// namespace tools::cuda // ================================================ + +}// namespace nanovdb // ==================================================== + +#endif // NANOVDB_TOOLS_CUDA_GRIDCHECKSUM_CUH_HAS_BEEN_INCLUDED diff --git a/nanovdb/nanovdb/tools/cuda/GridStats.cuh b/nanovdb/nanovdb/tools/cuda/GridStats.cuh new file mode 100644 index 0000000000..34c615f6d2 --- /dev/null +++ b/nanovdb/nanovdb/tools/cuda/GridStats.cuh @@ -0,0 +1,249 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: MPL-2.0 + +/*! + \file nanovdb/tools/cuda/GridStats.cuh + + \author Ken Museth + + \date October 9, 2023 + + \brief Re-computes min/max/avg/var/bbox information for each node in a + pre-existing NanoVDB grid on the device. +*/ + +#ifndef NANOVDB_TOOLS_CUDA_GRIDSTATS_CUH_HAS_BEEN_INCLUDED +#define NANOVDB_TOOLS_CUDA_GRIDSTATS_CUH_HAS_BEEN_INCLUDED + +#include +#include + +namespace nanovdb { + +namespace tools::cuda { + +/// @brief Update, i.e. re-compute, grid statistics like min/max, stats and bbox +/// information for an existing NanoVDB Grid. +/// @param grid Grid whose stats to update +/// @param mode Mode of computation for the statistics. +/// @param stream Optional cuda stream (defaults to zero) +template +void updateGridStats(NanoGrid *d_grid, StatsMode mode = StatsMode::Default, cudaStream_t stream = 0); + +//================================================================================================ + +/// @brief Allows for the construction of NanoVDB grids without any dependecy +template::ValueType>> +class GridStats +{ + using GridT = NanoGrid; + using TreeT = typename GridT::TreeType; + using ValueT = typename TreeT::ValueType; + using Node0 = typename TreeT::Node0; // leaf + using Node1 = typename TreeT::Node1; // lower + using Node2 = typename TreeT::Node2; // upper + using RootT = typename TreeT::Node3; // root + static_assert(util::is_same::value, "Mismatching type"); + + ValueT mDelta; // skip rendering of node if: node.max < -mDelta || node.min > mDelta + +public: + GridStats(ValueT delta = ValueT(0)) : mDelta(delta) {} + + void update(GridT *d_grid, cudaStream_t stream = 0); + +}; // cuda::GridStats + +//================================================================================================ + +namespace {// define cuda kernels in an unnamed namespace + +template +__global__ void processLeaf(NodeManager *d_nodeMgr, StatsT *d_stats) +{ + const uint32_t tid = blockIdx.x * blockDim.x + threadIdx.x; + if (tid >= d_nodeMgr->leafCount()) return; + auto &d_leaf = d_nodeMgr->leaf(tid); + + if (d_leaf.updateBBox()) {// updates active bounding box (also updates data->mFlags) and return true if non-empty + if constexpr(StatsT::hasStats()) { + StatsT stats; + for (auto it = d_leaf.cbeginValueOn(); it; ++it) stats.add(*it); + if constexpr(StatsT::hasAverage()) { + d_stats[tid] = stats; + *reinterpret_cast(&d_leaf.mMinimum) = tid; + } else { + stats.setStats(d_leaf); + } + } + } + d_leaf.mFlags &= ~uint8_t(1u);// enable rendering +}// processLeaf + +template +__global__ void processInternal(NodeManager *d_nodeMgr, StatsT *d_stats) +{ + using ChildT = typename NanoNode::type; + const uint32_t tid = blockIdx.x * blockDim.x + threadIdx.x; + if (tid >= d_nodeMgr->nodeCount(LEVEL)) return; + auto &d_node = d_nodeMgr->template node(tid); + auto &bbox = d_node.mBBox; + bbox = CoordBBox();// empty bbox + StatsT stats; + uint32_t childID = 0u; + + for (auto it = d_node.beginChild(); it; ++it) { + auto &child = *it; + bbox.expand( child.bbox() ); + if constexpr(StatsT::hasAverage()) { + childID = *reinterpret_cast(&child.mMinimum); + StatsT &s = d_stats[childID]; + s.setStats(child); + stats.add(s); + } else if constexpr(StatsT::hasMinMax()) { + stats.add(child.minimum()); + stats.add(child.maximum()); + } + } + for (auto it = d_node.cbeginValueOn(); it; ++it) { + const Coord ijk = it.getCoord(); + bbox[0].minComponent(ijk); + bbox[1].maxComponent(ijk + Coord(ChildT::DIM - 1)); + if constexpr(StatsT::hasStats()) stats.add(*it, ChildT::NUM_VALUES); + } + if constexpr(StatsT::hasAverage()) { + d_stats[childID] = stats; + *reinterpret_cast(&d_node.mMinimum) = childID; + } else if constexpr(StatsT::hasMinMax()) { + stats.setStats(d_node); + } + d_node.mFlags &= ~uint64_t(1u);// enable rendering +}// processInternal + +template +__global__ void processRootAndGrid(NodeManager *d_nodeMgr, StatsT *d_stats) +{ + using ChildT = NanoUpper; + using ValueT = typename ChildT::ValueType; + + // process root + auto &root = d_nodeMgr->root(); + root.mBBox = CoordBBox(); + if (root.isEmpty()) { + root.mMinimum = root.mMaximum = root.mBackground; + root.mAverage = root.mStdDevi = 0; + } else { + ValueT v; + StatsT s; + for (auto it = root.beginDense(); it; ++it) { + if (auto *child = it.probeChild(v)) { + root.mBBox.expand( child->bbox() ); + if constexpr(StatsT::hasAverage()) { + StatsT &stats = d_stats[*reinterpret_cast(&child->mMinimum)]; + stats.setStats(*child); + s.add(stats); + } else if constexpr(StatsT::hasMinMax()){ + s.add(child->minimum()); + s.add(child->maximum()); + } + } else if (it.isValueOn()) { + const Coord ijk = it.getCoord(); + root.mBBox[0].minComponent(ijk); + root.mBBox[1].maxComponent(ijk + Coord(ChildT::DIM - 1)); + if constexpr(StatsT::hasStats()) s.add(v, ChildT::NUM_VALUES); + } + } + s.setStats(root); + } + + // process Grid + auto& grid = d_nodeMgr->grid(); + const auto& indexBBox = root.bbox(); + if (indexBBox.empty()) { + grid.mWorldBBox = Vec3dBBox(); + grid.setBBoxOn(false); + } else { + // Note that below max is offset by one since CoordBBox.max is inclusive + // while bbox.max is exclusive. However, min is inclusive in both + // CoordBBox and Vec3dBBox. This also guarantees that a grid with a single + // active voxel, does not have an empty world bbox! E.g. if a grid with a + // unit index-to-world transformation only contains the active voxel (0,0,0) + // then indeBBox = (0,0,0) -> (0,0,0) and then worldBBox = (0.0, 0.0, 0.0) + // -> (1.0, 1.0, 1.0). This is a consequence of the different definitions + // of index and world bounding boxes inherited from OpenVDB! + grid.mWorldBBox = CoordBBox(indexBBox[0], indexBBox[1].offsetBy(1)).transform(grid.map()); + grid.setBBoxOn(true); + } + + // set bit flags + grid.setMinMaxOn(StatsT::hasMinMax()); + grid.setAverageOn(StatsT::hasAverage()); + grid.setStdDeviationOn(StatsT::hasStdDeviation()); +}// processRootAndGrid + +}// cuda kernels are defined in an unnamed namespace + +//================================================================================================ + +template +void GridStats::update(NanoGrid *d_grid, cudaStream_t stream) +{ + static const uint32_t threadsPerBlock = 128; + auto blocksPerGrid = [&](uint32_t count)->uint32_t{return (count + (threadsPerBlock - 1)) / threadsPerBlock;}; + + auto nodeMgrHandle = nanovdb::cuda::createNodeManager(d_grid, CudaDeviceBuffer(), stream); + auto *d_nodeMgr = nodeMgrHandle.template deviceMgr(); + + uint32_t nodeCount[3];// {leaf, lower, upper} + cudaCheck(cudaMemcpyAsync(nodeCount, (char*)d_grid + sizeof(GridData) + 4*sizeof(uint64_t), 3*sizeof(uint32_t), cudaMemcpyDeviceToHost, stream)); + //cudaStreamSynchronize(stream);// finish all device tasks in stream + + StatsT *d_stats = nullptr; + + if constexpr(StatsT::hasAverage()) cudaCheck(util::cuda::mallocAsync((void**)&d_stats, nodeCount[0]*sizeof(StatsT), stream)); + + processLeaf<<>>(d_nodeMgr, d_stats); + + processInternal<<>>(d_nodeMgr, d_stats); + + processInternal<<>>(d_nodeMgr, d_stats); + + processRootAndGrid<<<1, 1, 0, stream>>>(d_nodeMgr, d_stats); + + if constexpr(StatsT::hasAverage()) cudaCheck(util::cuda::freeAsync(d_stats, stream)); + +} // cuda::GridStats::update( Grid ) + +//================================================================================================ + +template +void updateGridStats(NanoGrid *d_grid, StatsMode mode, cudaStream_t stream) +{ + if (d_grid == nullptr && mode == StatsMode::Disable) { + return; + } else if (mode == StatsMode::BBox || util::is_same::value) { + GridStats > stats; + stats.update(d_grid, stream); + } else if (mode == StatsMode::MinMax) { + GridStats > stats; + stats.update(d_grid, stream); + } else if (mode == StatsMode::All) { + GridStats > stats; + stats.update(d_grid, stream); + } else { + throw std::runtime_error("GridStats: Unsupported statistics mode."); + } +}// cuda::updateGridStats + +}// namespace tools::cuda + +template +[[deprecated("Use nanovdb::cuda::updateGridStats instead")]] +void cudaGridStats(NanoGrid *d_grid, tools::StatsMode mode = tools::StatsMode::Default, cudaStream_t stream = 0) +{ + tools::cuda::updateGridStats(d_grid, mode, stream); +} + +} // namespace nanovdb + +#endif // NANOVDB_TOOLS_CUDA_GRIDSTATS_CUH_HAS_BEEN_INCLUDED diff --git a/nanovdb/nanovdb/tools/cuda/GridValidator.cuh b/nanovdb/nanovdb/tools/cuda/GridValidator.cuh new file mode 100644 index 0000000000..2edfc0bdb1 --- /dev/null +++ b/nanovdb/nanovdb/tools/cuda/GridValidator.cuh @@ -0,0 +1,59 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: MPL-2.0 + +/*! + \file nanovdb/tools/cuda/GridValidator.cuh + + \author Ken Museth + + \date November 3, 2023 + + \brief Checks the validity of an existing NanoVDB device grid. +*/ + +#ifndef NANOVDB_TOOLS_CUDA_GRIDVALIDATOR_CUH_HAS_BEEN_INCLUDED +#define NANOVDB_TOOLS_CUDA_GRIDVALIDATOR_CUH_HAS_BEEN_INCLUDED + +#include +#include +#include +#include + +namespace nanovdb { + +namespace tools::cuda { + +/// @brief Return true if the specified grid passes several validation tests. +/// +/// @param grid Grid to validate +/// @param detailed If true the validation test is detailed and relatively slow. +/// @param verbose If true information about the first failed test is printed to std::cerr +template +bool isValid(const NanoGrid *d_grid, CheckMode mode, bool verbose = false, cudaStream_t stream = 0) +{ + static const int size = 100; + std::unique_ptr strUP(new char[size]); + util::cuda::unique_ptr d_strUP(size); + char *str = strUP.get(), *d_str = d_strUP.get(); + + util::cuda::lambdaKernel<<<1, 1, 0, stream>>>(1, [=] __device__(size_t) {nanovdb::tools::checkGrid(d_grid, d_str, mode);}); + cudaMemcpyAsync(str, d_str, size, cudaMemcpyDeviceToHost, stream); + + if (util::empty(str) && !cuda::validateChecksum(d_grid, mode)) util::strcpy(str, "Mis-matching checksum"); + if (verbose && !util::empty(str)) std::cerr << "Validation failed: " << str << std::endl; + + return util::empty(str); +}// tools::cuda::isValid + +}// namespace tools::cuda + +template +[[deprecated("Use cuda::isValid() instead.")]] +bool cudaIsValid(const NanoGrid *d_grid, CheckMode mode, bool verbose = false, cudaStream_t stream = 0) +{ + return tools::cuda::isValid(d_grid, mode, verbose, stream); +} + +} // namespace nanovdb + +#endif // NANOVDB_TOOLS_CUDA_GRIDVALIDATOR_CUH_HAS_BEEN_INCLUDED diff --git a/nanovdb/nanovdb/tools/cuda/IndexToGrid.cuh b/nanovdb/nanovdb/tools/cuda/IndexToGrid.cuh new file mode 100644 index 0000000000..756a4fde63 --- /dev/null +++ b/nanovdb/nanovdb/tools/cuda/IndexToGrid.cuh @@ -0,0 +1,407 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: MPL-2.0 + +/*! + \file nanovdb/tools/cuda/IndexToGrid.cuh + + \author Ken Museth + + \date April 17, 2023 + + \brief Combines an IndexGrid and values into a regular Grid on the device + + \warning The header file contains cuda device code so be sure + to only include it in .cu files (or other .cuh files) +*/ + +#ifndef NVIDIA_TOOLS_CUDA_INDEXTOGRID_CUH_HAS_BEEN_INCLUDED +#define NVIDIA_TOOLS_CUDA_INDEXTOGRID_CUH_HAS_BEEN_INCLUDED + +#include +#include +#include +#include +#include + +namespace nanovdb {// ================================================================ + +namespace tools::cuda {// ============================================================ + +/// @brief Freestanding function that combines an IndexGrid and values into a regular Grid +/// @tparam DstBuildT Build time of the destination/output Grid +/// @tparam SrcBuildT Build type of the source/input IndexGrid +/// @tparam BufferT Type of the buffer used for allocation of the destination Grid +/// @param d_srcGrid Device pointer to source/input IndexGrid, i.e. SrcBuildT={ValueIndex,ValueOnIndex,ValueIndexMask,ValueOnIndexMask} +/// @param d_srcValues Device pointer to an array of values +/// @param pool Memory pool used to create a buffer for the destination/output Grid +/// @param stream optional CUDA stream (defaults to CUDA stream 0 +/// @note If d_srcGrid has stats (min,max,avg,std-div), the d_srcValues is also assumed +/// to have the same information, all of which are then copied to the destination/output grid. +/// An exception to this rule is if the type of d_srcValues is different from the stats type +/// NanoRoot::FloatType, e.g. if DstBuildT=Vec3f then NanoRoot::FloatType=float, +/// in which case average and standard-deviation is undefined in the output grid. +/// @return returns handle to grid that combined IndexGrid and values +template +typename util::enable_if::is_index, GridHandle>::type +indexToGrid(const NanoGrid *d_srcGrid, const typename BuildToValueMap::type *d_srcValues, const BufferT &pool = BufferT(), cudaStream_t stream = 0); + + +template +typename util::enable_if::is_index, GridHandle>::type +createNanoGrid(const NanoGrid *d_srcGrid, const typename BuildToValueMap::type *d_srcValues, const BufferT &pool = BufferT(), cudaStream_t stream = 0) +{ + return indexToGrid(d_srcGrid, d_srcValues, pool, stream); +} + +namespace {// anonymous namespace + +template +class IndexToGrid +{ + using SrcGridT = NanoGrid; +public: + struct NodeAccessor; + + /// @brief Constructor from a source IndeGrid + /// @param srcGrid Device pointer to IndexGrid used as the source + IndexToGrid(const SrcGridT *d_srcGrid, cudaStream_t stream = 0); + + ~IndexToGrid() {cudaCheck(util::cuda::freeAsync(mDevNodeAcc, mStream));} + + /// @brief Toggle on and off verbose mode + /// @param on if true verbose is turned on + void setVerbose(bool on = true) {mVerbose = on; } + + /// @brief Set the name of the destination/output grid + /// @param name Name used for the destination grid + void setGridName(const std::string &name) {mGridName = name;} + + /// @brief Combines the IndexGrid with values to produce a regular Grid + /// @tparam DstBuildT Template parameter of the destination grid and value type + /// @tparam BufferT Template parameter of the memory allocator + /// @param srcValues pointer to values that will be inserted into the output grid + /// @param buffer optional buffer used for memory allocation + /// @return A new GridHandle with the grid of type @c DstBuildT + template + GridHandle getHandle(const typename BuildToValueMap::type *srcValues, const BufferT &buffer = BufferT()); + +private: + cudaStream_t mStream{0}; + util::cuda::Timer mTimer; + std::string mGridName; + bool mVerbose{false}; + NodeAccessor mNodeAcc, *mDevNodeAcc; + + template + BufferT getBuffer(const BufferT &pool); +};// IndexToGrid + +//================================================================================================ + +template +struct IndexToGrid::NodeAccessor +{ + uint64_t grid, tree, root, node[3], meta, blind, size;// byte offsets, node: 0=leaf,1=lower, 2=upper + const SrcGridT *d_srcGrid;// device point to source IndexGrid + void *d_dstPtr;// device pointer to buffer with destination Grid + char *d_gridName; + uint32_t nodeCount[4];// 0=leaf, 1=lower, 2=upper, 3=root tiles + + __device__ const NanoGrid& srcGrid() const {return *d_srcGrid;} + __device__ const NanoTree& srcTree() const {return d_srcGrid->tree();} + __device__ const NanoRoot& srcRoot() const {return d_srcGrid->tree().root();} + template + __device__ const typename NanoNode::type& srcNode(int i) const { + return *(this->srcTree().template getFirstNode() + i); + } + + template + __device__ NanoGrid& dstGrid() const {return *util::PtrAdd>(d_dstPtr, grid);} + template + __device__ NanoTree& dstTree() const {return *util::PtrAdd>(d_dstPtr, tree);} + template + __device__ NanoRoot& dstRoot() const {return *util::PtrAdd>(d_dstPtr, root);} + template + __device__ typename NanoNode::type& dstNode(int i) const { + return *(util::PtrAdd::type>(d_dstPtr, node[LEVEL])+i); + } +};// IndexToGrid::NodeAccessor + +//================================================================================================ + +template +__global__ void processGridTreeRootKernel(typename IndexToGrid::NodeAccessor *nodeAcc, + const typename BuildToValueMap::type *srcValues) +{ + using SrcValueT = typename BuildToValueMap::type; + using DstStatsT = typename NanoRoot::FloatType; + + auto &srcGrid = nodeAcc->srcGrid(); + auto &dstGrid = nodeAcc->template dstGrid(); + auto &srcTree = srcGrid.tree(); + auto &dstTree = nodeAcc->template dstTree(); + auto &srcRoot = srcTree.root(); + auto &dstRoot = nodeAcc->template dstRoot(); + + // process Grid + *dstGrid.data() = *srcGrid.data(); + dstGrid.mGridType = toGridType(); + dstGrid.mData1 = 0u; + // we will recompute GridData::mChecksum later + + // process Tree + *dstTree.data() = *srcTree.data(); + dstTree.setRoot(&dstRoot); + dstTree.setFirstNode(&nodeAcc->template dstNode(0)); + dstTree.setFirstNode(&nodeAcc->template dstNode(0)); + dstTree.setFirstNode(&nodeAcc->template dstNode(0)); + + // process Root + dstRoot.mBBox = srcRoot.mBBox; + dstRoot.mTableSize = srcRoot.mTableSize; + dstRoot.mBackground = srcValues[srcRoot.mBackground]; + if (srcGrid.hasMinMax()) { + dstRoot.mMinimum = srcValues[srcRoot.mMinimum]; + dstRoot.mMaximum = srcValues[srcRoot.mMaximum]; + } + if constexpr(util::is_same::value) {// e.g. {float,float} or {Vec3f,float} + if (srcGrid.hasAverage()) dstRoot.mAverage = srcValues[srcRoot.mAverage]; + if (srcGrid.hasStdDeviation()) dstRoot.mStdDevi = srcValues[srcRoot.mStdDevi]; + } +}// processGridTreeRootKernel + +//================================================================================================ + +template +__global__ void processRootTilesKernel(typename IndexToGrid::NodeAccessor *nodeAcc, + const typename BuildToValueMap::type *srcValues) +{ + const auto tid = blockIdx.x; + + // Process children and tiles + const auto &srcTile = *nodeAcc->srcRoot().tile(tid); + auto &dstTile = *nodeAcc->template dstRoot().tile(tid); + dstTile.key = srcTile.key; + if (srcTile.child) { + dstTile.child = sizeof(NanoRoot) + sizeof(NanoRoot::Tile)*((srcTile.child - sizeof(NanoRoot))/sizeof(NanoRoot::Tile)); + dstTile.value = srcValues[0];// set to background + dstTile.state = false; + } else { + dstTile.child = 0;// i.e. no child node + dstTile.value = srcValues[srcTile.value]; + dstTile.state = srcTile.state; + } +}// processRootTilesKernel + +//================================================================================================ + +template +__global__ void processNodesKernel(typename IndexToGrid::NodeAccessor *nodeAcc, + const typename BuildToValueMap::type *srcValues) +{ + using SrcNodeT = typename NanoNode::type; + using DstNodeT = typename NanoNode::type; + using SrcChildT = typename SrcNodeT::ChildNodeType; + using DstChildT = typename DstNodeT::ChildNodeType; + using SrcValueT = typename BuildToValueMap::type; + using DstStatsT = typename NanoRoot::FloatType; + + auto &srcNode = nodeAcc->template srcNode(blockIdx.x); + auto &dstNode = nodeAcc->template dstNode(blockIdx.x); + + if (threadIdx.x == 0 && threadIdx.y == 0) { + dstNode.mBBox = srcNode.mBBox; + dstNode.mFlags = srcNode.mFlags; + dstNode.mValueMask = srcNode.mValueMask; + dstNode.mChildMask = srcNode.mChildMask; + auto &srcGrid = nodeAcc->srcGrid(); + if (srcGrid.hasMinMax()) { + dstNode.mMinimum = srcValues[srcNode.mMinimum]; + dstNode.mMaximum = srcValues[srcNode.mMaximum]; + } + if constexpr(util::is_same::value) {// e.g. {float,float} or {Vec3f,float} + if (srcGrid.hasAverage()) dstNode.mAverage = srcValues[srcNode.mAverage]; + if (srcGrid.hasStdDeviation()) dstNode.mStdDevi = srcValues[srcNode.mStdDevi]; + } + } + const uint64_t nodeSkip = nodeAcc->nodeCount[LEVEL] - blockIdx.x, srcOff = sizeof(SrcNodeT)*nodeSkip, dstOff = sizeof(DstNodeT)*nodeSkip;// offset to first node of child type + const int off = blockDim.x*blockDim.y*threadIdx.x + blockDim.x*threadIdx.y; + for (int threadIdx_z=0; threadIdx_z +__global__ void processLeafsKernel(typename IndexToGrid::NodeAccessor *nodeAcc, + const typename BuildToValueMap::type *srcValues) +{ + using SrcValueT = typename BuildToValueMap::type; + using DstStatsT = typename NanoRoot::FloatType; + static_assert(!BuildTraits::is_special, "Invalid destination type!"); + auto &srcLeaf = nodeAcc->template srcNode<0>(blockIdx.x); + auto &dstLeaf = nodeAcc->template dstNode(blockIdx.x); + if (threadIdx.x == 0 && threadIdx.y == 0) { + dstLeaf.mBBoxMin = srcLeaf.mBBoxMin; + for (int i=0; i<3; ++i) dstLeaf.mBBoxDif[i] = srcLeaf.mBBoxDif[i]; + dstLeaf.mFlags = srcLeaf.mFlags; + dstLeaf.mValueMask = srcLeaf.mValueMask; + /// + auto &srcGrid = nodeAcc->srcGrid(); + if (srcGrid.hasMinMax()) { + dstLeaf.mMinimum = srcValues[srcLeaf.getMin()]; + dstLeaf.mMaximum = srcValues[srcLeaf.getMax()]; + } + if constexpr(util::is_same::value) {// e.g. {float,float} or {Vec3f,float} + if (srcGrid.hasAverage()) dstLeaf.mAverage = srcValues[srcLeaf.getAvg()]; + if (srcGrid.hasStdDeviation()) dstLeaf.mStdDevi = srcValues[srcLeaf.getDev()]; + } + } + const int off = blockDim.x*blockDim.y*threadIdx.x + blockDim.x*threadIdx.y; + auto *dst = dstLeaf.mValues + off; + for (int threadIdx_z=0; threadIdx_z +__global__ void cpyNodeCountKernel(const NanoGrid *srcGrid, + typename IndexToGrid::NodeAccessor *nodeAcc) +{ + assert(srcGrid->isSequential()); + nodeAcc->d_srcGrid = srcGrid; + for (int i=0; i<3; ++i) nodeAcc->nodeCount[i] = srcGrid->tree().nodeCount(i); + nodeAcc->nodeCount[3] = srcGrid->tree().root().tileCount(); +} + +}// anonymous namespace + +//================================================================================================ + +template +IndexToGrid::IndexToGrid(const SrcGridT *d_srcGrid, cudaStream_t stream) + : mStream(stream), mTimer(stream) +{ + NANOVDB_ASSERT(d_srcGrid); + cudaCheck(util::cuda::mallocAsync((void**)&mDevNodeAcc, sizeof(NodeAccessor), mStream)); + cpyNodeCountKernel<<<1, 1, 0, mStream>>>(d_srcGrid, mDevNodeAcc); + cudaCheckError(); + cudaCheck(cudaMemcpyAsync(&mNodeAcc, mDevNodeAcc, sizeof(NodeAccessor), cudaMemcpyDeviceToHost, mStream));// mNodeAcc = *mDevNodeAcc +} + +//================================================================================================ + +template +template +GridHandle IndexToGrid::getHandle(const typename BuildToValueMap::type *srcValues, + const BufferT &pool) +{ + if (mVerbose) mTimer.start("Initiate buffer"); + auto buffer = this->template getBuffer(pool); + + if (mVerbose) mTimer.restart("Process grid,tree,root"); + processGridTreeRootKernel<<<1, 1, 0, mStream>>>(mDevNodeAcc, srcValues); + cudaCheckError(); + + if (mVerbose) mTimer.restart("Process root children and tiles"); + processRootTilesKernel<<>>(mDevNodeAcc, srcValues); + cudaCheckError(); + + cudaCheck(util::cuda::freeAsync(mNodeAcc.d_gridName, mStream)); + + if (mVerbose) mTimer.restart("Process upper internal nodes"); + processNodesKernel<<>>(mDevNodeAcc, srcValues); + cudaCheckError(); + + if (mVerbose) mTimer.restart("Process lower internal nodes"); + processNodesKernel<<>>(mDevNodeAcc, srcValues); + cudaCheckError(); + + if (mVerbose) mTimer.restart("Process leaf nodes"); + processLeafsKernel<<>>(mDevNodeAcc, srcValues); + if (mVerbose) mTimer.stop(); + cudaCheckError(); + + if (mVerbose) mTimer.restart("Compute checksums"); + updateChecksum((GridData*)mNodeAcc.d_dstPtr, mStream); + if (mVerbose) mTimer.stop(); + + //cudaStreamSynchronize(mStream);// finish all device tasks in mStream + return GridHandle(std::move(buffer)); +}// IndexToGrid::getHandle + +//================================================================================================ + +template +template +inline BufferT IndexToGrid::getBuffer(const BufferT &pool) +{ + mNodeAcc.grid = 0;// grid is always stored at the start of the buffer! + mNodeAcc.tree = NanoGrid::memUsage(); // grid ends and tree begins + mNodeAcc.root = mNodeAcc.tree + NanoTree::memUsage(); // tree ends and root node begins + mNodeAcc.node[2] = mNodeAcc.root + NanoRoot::memUsage(mNodeAcc.nodeCount[3]); // root node ends and upper internal nodes begin + mNodeAcc.node[1] = mNodeAcc.node[2] + NanoUpper::memUsage()*mNodeAcc.nodeCount[2]; // upper internal nodes ends and lower internal nodes begin + mNodeAcc.node[0] = mNodeAcc.node[1] + NanoLower::memUsage()*mNodeAcc.nodeCount[1]; // lower internal nodes ends and leaf nodes begin + mNodeAcc.meta = mNodeAcc.node[0] + NanoLeaf::DataType::memUsage()*mNodeAcc.nodeCount[0];// leaf nodes end and blind meta data begins + mNodeAcc.blind = mNodeAcc.meta + 0*sizeof(GridBlindMetaData); // meta data ends and blind data begins + mNodeAcc.size = mNodeAcc.blind;// end of buffer + auto buffer = BufferT::create(mNodeAcc.size, &pool, false, mStream); + mNodeAcc.d_dstPtr = buffer.deviceData(); + if (mNodeAcc.d_dstPtr == nullptr) throw std::runtime_error("Failed memory allocation on the device"); + + if (size_t size = mGridName.size()) { + cudaCheck(util::cuda::mallocAsync((void**)&mNodeAcc.d_gridName, size, mStream)); + cudaCheck(cudaMemcpyAsync(mNodeAcc.d_gridName, mGridName.data(), size, cudaMemcpyHostToDevice, mStream)); + } else { + mNodeAcc.d_gridName = nullptr; + } + cudaCheck(cudaMemcpyAsync(mDevNodeAcc, &mNodeAcc, sizeof(NodeAccessor), cudaMemcpyHostToDevice, mStream));// copy NodeAccessor CPU -> GPU + return buffer; +} + +//================================================================================================ + +template +typename util::enable_if::is_index, GridHandle>::type +indexToGrid(const NanoGrid *d_srcGrid, const typename BuildToValueMap::type *d_srcValues, const BufferT &pool, cudaStream_t stream) +{ + IndexToGrid converter(d_srcGrid, stream); + return converter.template getHandle(d_srcValues, pool); +} + +}// namespace tools::cuda ============================================================= + +template +[[deprecated("Use nanovdb::cuda::indexToGrid instead")]] +typename util::enable_if::is_index, GridHandle>::type +cudaIndexToGrid(const NanoGrid *d_srcGrid, const typename BuildToValueMap::type *d_srcValues, const BufferT &pool = BufferT(), cudaStream_t stream = 0) +{ + return tools::cuda::indexToGrid(d_srcGrid, d_srcValues, pool, stream); +} + + +template +[[deprecated("Use nanovdb::cuda::indexToGrid instead")]] +typename util::enable_if::is_index, GridHandle>::type +cudaCreateNanoGrid(const NanoGrid *d_srcGrid, const typename BuildToValueMap::type *d_srcValues, const BufferT &pool = BufferT(), cudaStream_t stream = 0) +{ + return tools::cuda::indexToGrid(d_srcGrid, d_srcValues, pool, stream); +} + +}// nanovdb namespace =================================================================== + +#endif // NVIDIA_TOOLS_CUDA_INDEXTOGRID_CUH_HAS_BEEN_INCLUDED diff --git a/nanovdb/nanovdb/tools/cuda/PointsToGrid.cuh b/nanovdb/nanovdb/tools/cuda/PointsToGrid.cuh new file mode 100644 index 0000000000..70134b5e7b --- /dev/null +++ b/nanovdb/nanovdb/tools/cuda/PointsToGrid.cuh @@ -0,0 +1,1293 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: MPL-2.0 + +/*! + \file nanovdb/tools/cuda/PointsToGrid.cuh + + \authors Greg Klar (initial version) and Ken Museth (final version) + + \brief Generates NanoVDB grids from a list of voxels or points on the device + + \warning The header file contains cuda device code so be sure + to only include it in .cu files (or other .cuh files) +*/ + +#ifndef NVIDIA_TOOLS_CUDA_POINTSTOGRID_CUH_HAS_BEEN_INCLUDED +#define NVIDIA_TOOLS_CUDA_POINTSTOGRID_CUH_HAS_BEEN_INCLUDED + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +/* + Note: 4.29 billion (=2^32) coordinates of type Vec3f have a memory footprint of 48 GB! +*/ + +namespace nanovdb {// ================================================================================ + +namespace tools::cuda {// ============================================================================ + +/// @brief Generates a NanoGrid from a list of point coordinates on the device. This method is +/// mainly used as a means to build a BVH acceleration structure for points, e.g. for efficient rendering. +/// @tparam PtrT Template type to a raw or fancy-pointer of point coordinates in world space. Dereferencing should return Vec3f or Vec3d. +/// @tparam BufferT Template type of buffer used for memory allocation on the device +/// @tparam AllocT Template type of optional device allocator for internal temporary memory +/// @param dWorldPoints Raw or fancy pointer to list of point coordinates in world space on the device +/// @param pointCount number of point in the list @c d_world +/// @param voxelSize Size of a voxel in world units used for the output grid +/// @param type Defined the way point information is represented in the output grid (see PointType enum NanoVDB.h) +/// Should not be PointType::Disable! +/// @param buffer Instance of the device buffer used for memory allocation +/// @param stream optional CUDA stream (defaults to CUDA stream 0) +/// @return Returns a handle with a grid of type NanoGrid where point information, e.g. coordinates, +/// are represented as blind data defined by @c type. +template +GridHandle +pointsToGrid(const PtrT dWorldPoints, + int pointCount, + double voxelSize, + PointType type = PointType::Default, + const BufferT &buffer = BufferT(), + cudaStream_t stream = 0); + +//----------------------------------------------------------------------------------------------------- + +/// @brief Generates a NanoGrid from a list of point coordinates on the device. This method is +/// mainly used as a means to build a BVH acceleration structure for points, e.g. for efficient rendering. +/// @tparam PtrT Template type to a raw or fancy-pointer of point coordinates in world space. Dereferencing should return Vec3f or Vec3d. +/// @tparam BufferT Template type of buffer used for memory allocation on the device +/// @tparam AllocT Template type of optional device allocator for internal temporary memory +/// @param dWorldPoints Raw or fancy pointer to list of point coordinates in world space on the device +/// @param pointCount total number of point in the list @c d_world +/// @param maxPointsPerVoxel Max density of points per voxel, i.e. maximum number of points in any voxel +/// @param tolerance allow for point density to vary by the specified tolerance (defaults to 1). That is, the voxel size +/// is selected such that the max density is +/- the tolerance. +/// @param maxIterations Maximum number of iterations used to seach for a voxel size that produces a point density +/// with specified tolerance takes. +/// @param type Defined the way point information is represented in the output grid (see PointType enum in NanoVDB.h) +/// Should not be PointType::Disable! +/// @param buffer Instance of the device buffer used for memory allocation +/// @param stream optional CUDA stream (defaults to CUDA stream 0) +/// @return Returns a handle with a grid of type NanoGrid where point information, e.g. coordinates, +/// are represented as blind data defined by @c type. +template +GridHandle +pointsToGrid(const PtrT dWorldPoints, + int pointCount, + int maxPointPerVoxel, + int tolerance = 1, + int maxIterations = 10, + PointType type = PointType::Default, + const BufferT &buffer = BufferT(), + cudaStream_t stream = 0); + +//----------------------------------------------------------------------------------------------------- + +template +GridHandle +pointsToGrid(std::vector> pointSet, + const BufferT &buffer = BufferT(), + cudaStream_t stream = 0); + +//----------------------------------------------------------------------------------------------------- + +/// @brief Generates a NanoGrid of any type from a list of voxel coordinates on the device. Unlike @c cudaPointsToGrid +/// this method only builds the grid but does not encode the coordinates as blind data. It is mainly useful as a +/// means to generate a grid that is know to contain the voxels given in the list. +/// @tparam BuildT Template type of the return grid +/// @tparam PtrT Template type to a raw or fancy-pointer of point coordinates in world space. Dereferencing should return Vec3f or Vec3d. +/// @tparam BufferT Template type of buffer used for memory allocation on the device +/// @tparam AllocT Template type of optional device allocator for internal temporary memory +/// @param dGridVoxels Raw or fancy pointer to list of voxel coordinates in grid (or index) space on the device +/// @param pointCount number of voxel in the list @c dGridVoxels +/// @param voxelSize Size of a voxel in world units used for the output grid +/// @param buffer Instance of the device buffer used for memory allocation +/// @return Returns a handle with the grid of type NanoGrid +template +GridHandle +voxelsToGrid(const PtrT dGridVoxels, + size_t voxelCount, + double voxelSize = 1.0, + const BufferT &buffer = BufferT(), + cudaStream_t stream = 0); + +//------------------------------------------------------------------------------------------------------- + +template +GridHandle +voxelsToGrid(std::vector> pointSet, + const BufferT &buffer = BufferT(), + cudaStream_t stream = 0); + +}// namespace tools::cuda ======================================================================== + +/// @brief Example class of a fancy pointer that can optionally be used as a template for writing +/// a custom fancy pointer that allows for particle coordinates to be arrange non-linearly +/// in memory. For instance with coordinates are interlaced with other dats, i.e. an array +/// of structs, a custom implementation of fancy_ptr::operator[](size_t i) can account for +/// strides that skip other interlaces data. +/// @tparam T Template type that specifies the type use for the coordinates of the points +template +class fancy_ptr +{ + const T* mPtr; +public: + /// @brief Default constructor. + /// @note This method is atcually not required by cuda::PointsToGrid + /// @param ptr Pointer to array of elements + __hostdev__ explicit fancy_ptr(const T* ptr = nullptr) : mPtr(ptr) {} + /// @brief Index acces into the array pointed to by the stored pointer. + /// @note This method is required by cuda::PointsToGrid! + /// @param i Unsigned index of the element to be returned + /// @return Const refernce to the element at the i'th poisiton + __hostdev__ inline const T& operator[](size_t i) const {return mPtr[i];} + /// @brief Dummy implementation required by pointer_traits. + /// @note Note that only the return type matters! + /// @details Unlike operator[] it is safe to assume that all pointer types have operator*, + /// which is why pointer_traits makes use of it to determine the element_type that + /// a pointer class is pointing to. E.g. operator[] is not always defined for std::shared_ptr! + __hostdev__ inline const T& operator*() const {return *mPtr;} +};// fancy_ptr + +/// @brief Simple stand-alone function that can be used to conveniently construct a fancy_ptr +/// @tparam T Template type that specifies the type use for the coordinates of the points +/// @param ptr Raw pointer to data +/// @return a new instance of a fancy_ptr +template +fancy_ptr make_fancy(const T* ptr = nullptr) {return fancy_ptr(ptr);} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +/// @brief Trait of points, like type of pointer and size of the pointer type +template +struct pointer_traits; + +template +struct pointer_traits { + using element_type = T; + static constexpr size_t element_size = sizeof(T); +}; + +template +struct pointer_traits { + using element_type = typename util::remove_reference())>::type;// assumes T::operator*() exists! + static constexpr size_t element_size = sizeof(element_type); +}; + +//------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +/// @brief computes the relative 8-bit voxel offsets from a world coordinate +/// @tparam Vec3T Type of the world coordinate +/// @param voxel 8-bit output coordinates that are relative to a voxel +/// @param world input world coordinates +/// @param indexToWorld Transform from index to world space +template +__hostdev__ inline static void worldToVoxel(Vec3u8 &voxel, const Vec3T &world, const Map &indexToWorld) +{ + const Vec3d ijk = indexToWorld.applyInverseMap(world);// world -> index + static constexpr double encode = double((1<<8) - 1); + voxel[0] = uint8_t( encode*(ijk[0] - math::Floor(ijk[0] + 0.5) + 0.5) ); + voxel[1] = uint8_t( encode*(ijk[1] - math::Floor(ijk[1] + 0.5) + 0.5) ); + voxel[2] = uint8_t( encode*(ijk[2] - math::Floor(ijk[2] + 0.5) + 0.5) ); +} + +/// @brief computes the relative 16-bit voxel offsets from a world coordinate +/// @tparam Vec3T Type of the world coordinate +/// @param voxel 16-bit output coordinates that are relative to a voxel +/// @param world input world coordinates +/// @param indexToWorld Transform from index to world space +template +__hostdev__ inline static void worldToVoxel(Vec3u16 &voxel, const Vec3T &world, const Map &indexToWorld) +{ + const Vec3d ijk = indexToWorld.applyInverseMap(world);// world -> index + static constexpr double encode = double((1<<16) - 1); + voxel[0] = uint16_t( encode*(ijk[0] - math::Floor(ijk[0] + 0.5) + 0.5) ); + voxel[1] = uint16_t( encode*(ijk[1] - math::Floor(ijk[1] + 0.5) + 0.5) ); + voxel[2] = uint16_t( encode*(ijk[2] - math::Floor(ijk[2] + 0.5) + 0.5) ); +} + +/// @brief computes the relative float voxel offsets from a world coordinate +/// @tparam Vec3T Type of the world coordinate +/// @param voxel float output coordinates that are relative to a voxel +/// @param world input world coordinates +/// @param indexToWorld Transform from index to world space +template +__hostdev__ inline static void worldToVoxel(Vec3f &voxel, const Vec3T &world, const Map &indexToWorld) +{ + const Vec3d ijk = indexToWorld.applyInverseMap(world);// world -> index + voxel[0] = float( ijk[0] - math::Floor(ijk[0] + 0.5) ); + voxel[1] = float( ijk[1] - math::Floor(ijk[1] + 0.5) ); + voxel[2] = float( ijk[2] - math::Floor(ijk[2] + 0.5) ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +template +__hostdev__ inline static Vec3T voxelToWorld(const Vec3u8 &voxel, const Coord &ijk, const Map &map) +{ + static constexpr double decode = 1.0/double((1<<8) - 1); + if constexpr(util::is_same::value) { + return map.applyMap( Vec3d(ijk[0] + decode*voxel[0] - 0.5, ijk[1] + decode*voxel[1] - 0.5, ijk[2] + decode*voxel[2] - 0.5)); + } else { + return map.applyMapF(Vec3f(ijk[0] + decode*voxel[0] - 0.5f, ijk[1] + decode*voxel[1] - 0.5f, ijk[2] + decode*voxel[2] - 0.5f)); + } +} + +template +__hostdev__ inline static Vec3T voxelToWorld(const Vec3u16 &voxel, const Coord &ijk, const Map &map) +{ + static constexpr double decode = 1.0/double((1<<16) - 1); + if constexpr(util::is_same::value) { + return map.applyMap( Vec3d(ijk[0] + decode*voxel[0] - 0.5, ijk[1] + decode*voxel[1] - 0.5, ijk[2] + decode*voxel[2] - 0.5)); + } else { + return map.applyMapF(Vec3f(ijk[0] + decode*voxel[0] - 0.5f, ijk[1] + decode*voxel[1] - 0.5f, ijk[2] + decode*voxel[2] - 0.5f)); + } +} + +template +__hostdev__ inline static Vec3T voxelToWorld(const Vec3f &voxel, const Coord &ijk, const Map &map) +{ + if constexpr(util::is_same::value) { + return map.applyMap( Vec3d(ijk[0] + voxel[0], ijk[1] + voxel[1], ijk[2] + voxel[2])); + } else { + return map.applyMapF(Vec3f(ijk[0] + voxel[0], ijk[1] + voxel[1], ijk[2] + voxel[2])); + } +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +namespace tools::cuda { + +template +class PointsToGrid +{ +public: + + struct Data { + Map map; + void *d_bufferPtr; + uint64_t *d_keys, *d_tile_keys, *d_lower_keys, *d_leaf_keys;// device pointer to 64 bit keys + uint64_t grid, tree, root, upper, lower, leaf, meta, blind, size;// byte offsets to nodes in buffer + uint32_t *d_indx;// device pointer to point indices (or IDs) + uint32_t nodeCount[3], *pointsPerLeafPrefix, *pointsPerLeaf;// 0=leaf,1=lower, 2=upper + uint32_t voxelCount, *pointsPerVoxelPrefix, *pointsPerVoxel; + BitFlags<16> flags; + __hostdev__ NanoGrid& getGrid() const {return *util::PtrAdd>(d_bufferPtr, grid);} + __hostdev__ NanoTree& getTree() const {return *util::PtrAdd>(d_bufferPtr, tree);} + __hostdev__ NanoRoot& getRoot() const {return *util::PtrAdd>(d_bufferPtr, root);} + __hostdev__ NanoUpper& getUpper(int i) const {return *(util::PtrAdd>(d_bufferPtr, upper)+i);} + __hostdev__ NanoLower& getLower(int i) const {return *(util::PtrAdd>(d_bufferPtr, lower)+i);} + __hostdev__ NanoLeaf& getLeaf(int i) const {return *(util::PtrAdd>(d_bufferPtr, leaf)+i);} + __hostdev__ GridBlindMetaData& getMeta() const { return *util::PtrAdd(d_bufferPtr, meta);}; + template + __hostdev__ Vec3T& getPoint(int i) const {return *(util::PtrAdd(d_bufferPtr, blind)+i);} + };// Data + + /// @brief Map constructor, which other constructors might call + /// @param map Map to be used for the output device grid + /// @param stream optional CUDA stream (defaults to CUDA stream 0) + PointsToGrid(const Map &map, cudaStream_t stream = 0) + : mStream(stream) + , mPointType(util::is_same::value ? PointType::Default : PointType::Disable) + { + mData.map = map; + mData.flags.initMask({GridFlags::HasBBox, GridFlags::IsBreadthFirst}); + mDeviceData = mMemPool.template alloc(mStream); + } + + /// @brief Default constructor that calls the Map constructor defined above + /// @param scale Voxel size in world units + /// @param trans Translation of origin in world units + /// @param stream optional CUDA stream (defaults to CUDA stream 0) + PointsToGrid(const double scale = 1.0, const Vec3d &trans = Vec3d(0.0), cudaStream_t stream = 0) + : PointsToGrid(Map(scale, trans), stream){} + + /// @brief Constructor from a target maximum number of particles per voxel. Calls the Map constructor defined above + /// @param maxPointsPerVoxel Maximum number of points oer voxel + /// @param stream optional CUDA stream (defaults to CUDA stream 0) + PointsToGrid(int maxPointsPerVoxel, int tolerance = 1, int maxIterations = 10, cudaStream_t stream = 0) + : PointsToGrid(Map(1.0), stream) + { + mMaxPointsPerVoxel = maxPointsPerVoxel; + mTolerance = tolerance; + mMaxIterations = maxIterations; + } + + /// @brief Toggle on and off verbose mode + /// @param level Verbose level: 0=quiet, 1=timing, 2=benchmarking + void setVerbose(int level = 1) {mVerbose = level; mData.flags.setBit(7u, level); } + + /// @brief Set the mode for checksum computation, which is disabled by default + /// @param mode Mode of checksum computation + void setChecksum(CheckMode mode = CheckMode::Disable){mChecksum = mode;} + + /// @brief Toggle on and off the computation of a bounding-box + /// @param on If true bbox will be computed + void includeBBox(bool on = true) { mData.flags.setMask(GridFlags::HasBBox, on); } + + /// @brief Set the name of the output grid + /// @param name name of the output grid + void setGridName(const std::string &name) {mGridName = name;} + + // only available when BuildT == Point + template typename util::enable_if::value>::type + setPointType(PointType type) { mPointType = type; } + + /// @brief Creates a handle to a grid with the specified build type from a list of points in index or world space + /// @tparam BuildT Build type of the output grid, i.e NanoGrid + /// @tparam PtrT Template type to a raw or fancy-pointer of point coordinates in world or index space. + /// @tparam BufferT Buffer type used for allocation of the grid handle + /// @param points device point to an array of points in world space + /// @param pointCount number of input points or voxels + /// @param buffer optional buffer (currently ignored) + /// @return returns a handle with a grid of type NanoGrid + template + GridHandle getHandle(const PtrT points, + size_t pointCount, + const BufferT &buffer = BufferT()); + + template + void countNodes(const PtrT points, size_t pointCount); + + template + void processGridTreeRoot(const PtrT points, size_t pointCount); + + void processUpperNodes(); + + void processLowerNodes(); + + template + void processLeafNodes(const PtrT points); + + template + void processPoints(const PtrT points, size_t pointCount); + + void processBBox(); + + // the following methods are only defined when BuildT == Point + template typename util::enable_if::value, uint32_t>::type + maxPointsPerVoxel() const {return mMaxPointsPerVoxel;} + template typename util::enable_if::value, uint32_t>::type + maxPointsPerLeaf() const {return mMaxPointsPerLeaf;} + +private: + static constexpr unsigned int mNumThreads = 128;// seems faster than the old value of 256! + static unsigned int numBlocks(unsigned int n) {return (n + mNumThreads - 1) / mNumThreads;} + + cudaStream_t mStream{0}; + util::cuda::Timer mTimer; + PointType mPointType; + std::string mGridName; + int mVerbose{0}; + Data mData, *mDeviceData; + uint32_t mMaxPointsPerVoxel{0u}, mMaxPointsPerLeaf{0u}; + int mTolerance{1}, mMaxIterations{1}; + CheckMode mChecksum{CheckMode::Disable}; + + // wrapper of AllocT, defaulting to cub::CachingDeviceAllocator, which offers a shared scratch space + struct Allocator { + AllocT mAllocator; + void* d_scratch; + size_t scratchSize, actualScratchSize; + Allocator() : d_scratch(nullptr), scratchSize(0), actualScratchSize(0) {} + ~Allocator() { + if (scratchSize > 0) this->free(d_scratch);// a bug in cub makes this necessary + mAllocator.FreeAllCached(); + } + template + T* alloc(size_t count, cudaStream_t stream) { + T* d_ptr = nullptr; + cudaCheck(mAllocator.DeviceAllocate((void**)&d_ptr, sizeof(T)*count, stream)); + return d_ptr; + } + template + T* alloc(cudaStream_t stream) {return this->template alloc(1, stream);} + void free(void *d_ptr) {if (d_ptr) cudaCheck(mAllocator.DeviceFree(d_ptr));} + template + void free(void *d_ptr, T... other) { + if (d_ptr) cudaCheck(mAllocator.DeviceFree(d_ptr)); + this->free(other...); + } + void adjustScratch(cudaStream_t stream){ + if (scratchSize > actualScratchSize) { + if (actualScratchSize>0) cudaCheck(mAllocator.DeviceFree(d_scratch)); + cudaCheck(mAllocator.DeviceAllocate((void**)&d_scratch, scratchSize, stream)); + actualScratchSize = scratchSize; + } + } + } mMemPool; + + template + BufferT getBuffer(const PtrT points, size_t pointCount, const BufferT &buffer); +};// tools::cuda::PointsToGrid + +namespace kernels { +/// @details Used by cuda::PointsToGrid::processLeafNodes before the computation +/// of prefix-sum for index grid. +/// Moving this away from an implementation using the lambdaKernel wrapper +/// to fix the following on Windows platform: +/// error : For this host platform/dialect, an extended lambda cannot be defined inside the 'if' +/// or 'else' block of a constexpr if statement. +/// function in a lambda through lambdaKernel wrapper defined in CudaUtils.h. +template +__global__ void fillValueIndexKernel(const size_t numItems, uint64_t* devValueIndex, typename PointsToGrid::Data* d_data) { + const int tid = blockIdx.x * blockDim.x + threadIdx.x; + if (tid >= numItems) return; + devValueIndex[tid] = static_cast(d_data->getLeaf(tid).mValueMask.countOn()); +} + +/// @details Used by PointsToGrid::processLeafNodes for the computation +/// of prefix-sum for index grid. +/// Moving this away from an implementation using the lambdaKernel wrapper +/// to fix the following on Windows platform: +/// error : For this host platform/dialect, an extended lambda cannot be defined inside the 'if' +/// or 'else' block of a constexpr if statement. +template +__global__ void leafPrefixSumKernel(const size_t numItems, uint64_t* devValueIndexPrefix, typename PointsToGrid::Data* d_data) { + const int tid = blockIdx.x * blockDim.x + threadIdx.x; + if (tid >= numItems) return; + + auto &leaf = d_data->getLeaf(tid); + leaf.mOffset = 1u;// will be re-set below + const uint64_t *w = leaf.mValueMask.words(); + uint64_t &prefixSum = leaf.mPrefixSum, sum = util::countOn(*w++); + prefixSum = sum; + for (int n = 9; n < 55; n += 9) {// n=i*9 where i=1,2,..6 + sum += util::countOn(*w++); + prefixSum |= sum << n;// each pre-fixed sum is encoded in 9 bits + } + if (tid==0) { + d_data->getGrid().mData1 = 1u + devValueIndexPrefix[d_data->nodeCount[0]-1];// set total count + d_data->getTree().mVoxelCount = devValueIndexPrefix[d_data->nodeCount[0]-1]; + } else { + leaf.mOffset = 1u + devValueIndexPrefix[tid-1];// background is index 0 + } +} + +/// @details Used by PointsToGrid::processLeafNodes to make sure leaf.mMask - leaf.mValueMask. +/// Moving this away from an implementation using the lambdaKernel wrapper +/// to fix the following on Windows platform: +/// error : For this host platform/dialect, an extended lambda cannot be defined inside the 'if' +/// or 'else' block of a constexpr if statement. +template +__global__ void setMaskEqValMaskKernel(const size_t numItems, typename PointsToGrid::Data* d_data) { + const int tid = blockIdx.x * blockDim.x + threadIdx.x; + if (tid >= numItems) return; + auto &leaf = d_data->getLeaf(tid); + leaf.mMask = leaf.mValueMask; +} +} // namespace kernels + +//------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +// Define utility macro used to call cub functions that use dynamic temporary storage +#ifndef CALL_CUBS +#ifdef _WIN32 +#define CALL_CUBS(func, ...) \ + cudaCheck(cub::func(nullptr, mMemPool.scratchSize, __VA_ARGS__, mStream)); \ + mMemPool.adjustScratch(mStream); \ + cudaCheck(cub::func(mMemPool.d_scratch, mMemPool.scratchSize, __VA_ARGS__, mStream)); +#else// fdef _WIN32 +#define CALL_CUBS(func, args...) \ + cudaCheck(cub::func(nullptr, mMemPool.scratchSize, args, mStream)); \ + mMemPool.adjustScratch(mStream); \ + cudaCheck(cub::func(mMemPool.d_scratch, mMemPool.scratchSize, args, mStream)); +#endif// ifdef _WIN32 +#endif// ifndef CALL_CUBS + +//------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +template +template +inline GridHandle +PointsToGrid::getHandle(const PtrT points, + size_t pointCount, + const BufferT &pool) +{ + if (mVerbose==1) mTimer.start("\nCounting nodes"); + this->countNodes(points, pointCount); + + if (mVerbose==1) mTimer.restart("Initiate buffer"); + auto buffer = this->getBuffer(points, pointCount, pool); + + if (mVerbose==1) mTimer.restart("Process grid,tree,root"); + this->processGridTreeRoot(points, pointCount); + + if (mVerbose==1) mTimer.restart("Process upper nodes"); + this->processUpperNodes(); + + if (mVerbose==1) mTimer.restart("Process lower nodes"); + this->processLowerNodes(); + + if (mVerbose==1) mTimer.restart("Process leaf nodes"); + this->processLeafNodes(points); + + if (mVerbose==1) mTimer.restart("Process points"); + this->processPoints(points, pointCount); + + if (mVerbose==1) mTimer.restart("Process bbox"); + this->processBBox(); + if (mVerbose==1) mTimer.stop(); + + if (mVerbose==1) mTimer.restart("Computation of checksum"); + tools::cuda::updateChecksum((GridData*)buffer.deviceData(), mChecksum); + if (mVerbose==1) mTimer.stop(); + + return GridHandle(std::move(buffer)); +}// PointsToGrid::getHandle + +//------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +// --- CUB helpers --- +template +struct ShiftRight +{ + __hostdev__ inline OutT operator()(const InT& v) const {return static_cast(v >> BitCount);} +}; + +template +struct ShiftRightIterator : public cub::TransformInputIterator, InT*> +{ + using BASE = cub::TransformInputIterator, InT*>; + __hostdev__ inline ShiftRightIterator(uint64_t* input_itr) : BASE(input_itr, ShiftRight()) {} +}; + +//------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +template +template +void PointsToGrid::countNodes(const PtrT points, size_t pointCount) +{ + using Vec3T = typename util::remove_const::element_type>::type; + if constexpr(util::is_same::value) { + static_assert(util::is_same::value, "Point (vs voxels) coordinates should be represented as Vec3f or Vec3d"); + } else { + static_assert(util::is_same::value, "Voxel coordinates should be represented as Coord, Vec3f or Vec3d"); + } + + mMaxPointsPerVoxel = math::Min(mMaxPointsPerVoxel, pointCount); + int iterCounter = 0; + struct Foo {// pairs current voxel size, dx, with the corresponding particle density, i.e. maximum number of points per voxel + double dx; + uint32_t density; + bool operator<(const Foo &rhs) const {return density < rhs.density || (density == rhs.density && dx < rhs.dx);} + } min{0.0, 1}, max{0.0, 0};// min: as dx -> 0 density -> 1 point per voxel, max: density is 0 i.e. undefined + +jump:// this marks the beginning of the actual algorithm + + mData.d_keys = mMemPool.template alloc(pointCount, mStream); + mData.d_indx = mMemPool.template alloc(pointCount, mStream);// uint32_t can index 4.29 billion Coords, corresponding to 48 GB + cudaCheck(cudaMemcpyAsync(mDeviceData, &mData, sizeof(Data), cudaMemcpyHostToDevice, mStream));// copy mData from CPU -> GPU + + if (mVerbose==2) mTimer.start("\nAllocating arrays for keys and indices"); + auto *d_keys = mMemPool.template alloc(pointCount, mStream); + auto *d_indx = mMemPool.template alloc(pointCount, mStream); + + if (mVerbose==2) mTimer.restart("Generate tile keys"); + util::cuda::lambdaKernel<<>>(pointCount, [=] __device__(size_t tid, const Data *d_data, const PtrT points) { + auto coordToKey = [](const Coord &ijk)->uint64_t{ + // Note: int32_t has a range of -2^31 to 2^31 - 1 whereas uint32_t has a range of 0 to 2^32 - 1 + static constexpr int64_t offset = 1 << 31; + return (uint64_t(uint32_t(int64_t(ijk[2]) + offset) >> 12) ) | // z is the lower 21 bits + (uint64_t(uint32_t(int64_t(ijk[1]) + offset) >> 12) << 21) | // y is the middle 21 bits + (uint64_t(uint32_t(int64_t(ijk[0]) + offset) >> 12) << 42); // x is the upper 21 bits + };// coordToKey lambda functor + d_indx[tid] = uint32_t(tid); + uint64_t &key = d_keys[tid]; + if constexpr(util::is_same::value) {// points are in world space + if constexpr(util::is_same::value) { + key = coordToKey(d_data->map.applyInverseMapF(points[tid]).round()); + } else {// points are Vec3d + key = coordToKey(d_data->map.applyInverseMap(points[tid]).round()); + } + } else if constexpr(util::is_same::value) {// points Coord are in index space + key = coordToKey(points[tid]); + } else {// points are Vec3f or Vec3d in index space + key = coordToKey(points[tid].round()); + } + }, mDeviceData, points); + cudaCheckError(); + if (mVerbose==2) mTimer.restart("DeviceRadixSort of "+std::to_string(pointCount)+" tile keys"); + CALL_CUBS(DeviceRadixSort::SortPairs, d_keys, mData.d_keys, d_indx, mData.d_indx, pointCount, 0, 62);// 21 bits per coord + std::swap(d_indx, mData.d_indx);// sorted indices are now in d_indx + + if (mVerbose==2) mTimer.restart("Allocate runs"); + auto *d_points_per_tile = mMemPool.template alloc(pointCount, mStream); + uint32_t *d_node_count = mMemPool.template alloc(3, mStream); + + if (mVerbose==2) mTimer.restart("DeviceRunLengthEncode tile keys"); + CALL_CUBS(DeviceRunLengthEncode::Encode, mData.d_keys, d_keys, d_points_per_tile, d_node_count+2, pointCount); + cudaCheck(cudaMemcpyAsync(mData.nodeCount+2, d_node_count+2, sizeof(uint32_t), cudaMemcpyDeviceToHost, mStream)); + mData.d_tile_keys = mMemPool.template alloc(mData.nodeCount[2], mStream); + cudaCheck(cudaMemcpyAsync(mData.d_tile_keys, d_keys, mData.nodeCount[2]*sizeof(uint64_t), cudaMemcpyDeviceToDevice, mStream)); + + if (mVerbose) mTimer.restart("DeviceRadixSort of " + std::to_string(pointCount) + " voxel keys in " + std::to_string(mData.nodeCount[2]) + " tiles"); + uint32_t *points_per_tile = new uint32_t[mData.nodeCount[2]]; + cudaCheck(cudaMemcpyAsync(points_per_tile, d_points_per_tile, mData.nodeCount[2]*sizeof(uint32_t), cudaMemcpyDeviceToHost, mStream)); + mMemPool.free(d_points_per_tile); + + for (uint32_t id = 0, offset = 0; id < mData.nodeCount[2]; ++id) { + const uint32_t count = points_per_tile[id]; + util::cuda::lambdaKernel<<>>(count, [=] __device__(size_t tid, const Data *d_data) { + auto voxelKey = [] __device__ (uint64_t tileID, const Coord &ijk){ + return tileID << 36 | // upper offset: 64-15-12-9=28, i.e. last 28 bits + uint64_t(NanoUpper::CoordToOffset(ijk)) << 21 | // lower offset: 32^3 = 2^15, i.e. next 15 bits + uint64_t(NanoLower::CoordToOffset(ijk)) << 9 | // leaf offset: 16^3 = 2^12, i.e. next 12 bits + uint64_t(NanoLeaf< BuildT>::CoordToOffset(ijk)); // voxel offset: 8^3 = 2^9, i.e. first 9 bits + };// voxelKey lambda functor + tid += offset; + Vec3T p = points[d_indx[tid]]; + if constexpr(util::is_same::value) p = util::is_same::value ? d_data->map.applyInverseMapF(p) : d_data->map.applyInverseMap(p); + d_keys[tid] = voxelKey(id, p.round()); + }, mDeviceData); cudaCheckError(); + CALL_CUBS(DeviceRadixSort::SortPairs, d_keys + offset, mData.d_keys + offset, d_indx + offset, mData.d_indx + offset, count, 0, 36);// 9+12+15=36 + offset += count; + } + mMemPool.free(d_indx); + delete [] points_per_tile; + + if (mVerbose==2) mTimer.restart("Count points per voxel"); + + mData.pointsPerVoxel = mMemPool.template alloc(pointCount, mStream); + uint32_t *d_voxel_count = mMemPool.template alloc(mStream); + CALL_CUBS(DeviceRunLengthEncode::Encode, mData.d_keys, d_keys, mData.pointsPerVoxel, d_voxel_count, pointCount); + cudaCheck(cudaMemcpyAsync(&mData.voxelCount, d_voxel_count, sizeof(uint32_t), cudaMemcpyDeviceToHost, mStream)); + mMemPool.free(d_voxel_count); + + if (util::is_same::value) { + if (mVerbose==2) mTimer.restart("Count max points per voxel"); + uint32_t *d_maxPointsPerVoxel = mMemPool.template alloc(mStream), maxPointsPerVoxel; + CALL_CUBS(DeviceReduce::Max, mData.pointsPerVoxel, d_maxPointsPerVoxel, mData.voxelCount); + cudaCheck(cudaMemcpyAsync(&maxPointsPerVoxel, d_maxPointsPerVoxel, sizeof(uint32_t), cudaMemcpyDeviceToHost, mStream)); + mMemPool.free(d_maxPointsPerVoxel); + double dx = mData.map.getVoxelSize()[0]; + if (++iterCounter >= mMaxIterations || pointCount == 1u || math::Abs((int)maxPointsPerVoxel - (int)mMaxPointsPerVoxel) <= mTolerance) { + mMaxPointsPerVoxel = maxPointsPerVoxel; + } else { + const Foo tmp{dx, maxPointsPerVoxel}; + if (maxPointsPerVoxel < mMaxPointsPerVoxel) { + if (min < tmp) min = tmp; + } else if (max.density == 0 || tmp < max) { + max = tmp; + } + if (max.density) { + dx = (min.dx*(max.density - mMaxPointsPerVoxel) + max.dx*(mMaxPointsPerVoxel-min.density))/double(max.density-min.density); + } else if (maxPointsPerVoxel > 1u) { + dx *= (mMaxPointsPerVoxel-1.0)/(maxPointsPerVoxel-1.0); + } else {// maxPointsPerVoxel = 1 so increase dx significantly + dx *= 10.0; + } + if (mVerbose==2) printf("\ntarget density = %u, current density = %u current dx = %f, next dx = %f\n", mMaxPointsPerVoxel, maxPointsPerVoxel, tmp.dx, dx); + mData.map = Map(dx); + mMemPool.free(mData.d_keys, mData.d_indx, d_keys, mData.d_tile_keys, d_node_count, mData.pointsPerVoxel); + goto jump; + } + } + if (iterCounter>1 && mVerbose) std::cerr << "Used " << iterCounter << " attempts to determine dx that produces a target dpoint denisty\n\n"; + + if (mVerbose==2) mTimer.restart("Compute prefix sum of points per voxel"); + mData.pointsPerVoxelPrefix = mMemPool.template alloc(mData.voxelCount, mStream); + CALL_CUBS(DeviceScan::ExclusiveSum, mData.pointsPerVoxel, mData.pointsPerVoxelPrefix, mData.voxelCount); + + mData.pointsPerLeaf = mMemPool.template alloc(pointCount, mStream); + CALL_CUBS(DeviceRunLengthEncode::Encode, ShiftRightIterator<9>(mData.d_keys), d_keys, mData.pointsPerLeaf, d_node_count, pointCount); + cudaCheck(cudaMemcpyAsync(mData.nodeCount, d_node_count, sizeof(uint32_t), cudaMemcpyDeviceToHost, mStream)); + + if constexpr(util::is_same::value) { + uint32_t *d_maxPointsPerLeaf = mMemPool.template alloc(mStream); + CALL_CUBS(DeviceReduce::Max, mData.pointsPerLeaf, d_maxPointsPerLeaf, mData.nodeCount[0]); + cudaCheck(cudaMemcpyAsync(&mMaxPointsPerLeaf, d_maxPointsPerLeaf, sizeof(uint32_t), cudaMemcpyDeviceToHost, mStream)); + //printf("\n Leaf count = %u, max points per leaf = %u\n", mData.nodeCount[0], mMaxPointsPerLeaf); + if (mMaxPointsPerLeaf > std::numeric_limits::max()) { + throw std::runtime_error("Too many points per leaf: "+std::to_string(mMaxPointsPerLeaf)); + } + mMemPool.free(d_maxPointsPerLeaf); + } + + mData.pointsPerLeafPrefix = mMemPool.template alloc(mData.nodeCount[0], mStream); + CALL_CUBS(DeviceScan::ExclusiveSum, mData.pointsPerLeaf, mData.pointsPerLeafPrefix, mData.nodeCount[0]); + + mData.d_leaf_keys = mMemPool.template alloc(mData.nodeCount[0], mStream); + cudaCheck(cudaMemcpyAsync(mData.d_leaf_keys, d_keys, mData.nodeCount[0]*sizeof(uint64_t), cudaMemcpyDeviceToDevice, mStream)); + + CALL_CUBS(DeviceSelect::Unique, ShiftRightIterator<12>(mData.d_leaf_keys), d_keys, d_node_count+1, mData.nodeCount[0]);// count lower nodes + cudaCheck(cudaMemcpyAsync(mData.nodeCount+1, d_node_count+1, sizeof(uint32_t), cudaMemcpyDeviceToHost, mStream)); + mData.d_lower_keys = mMemPool.template alloc(mData.nodeCount[1], mStream); + cudaCheck(cudaMemcpyAsync(mData.d_lower_keys, d_keys, mData.nodeCount[1]*sizeof(uint64_t), cudaMemcpyDeviceToDevice, mStream)); + + mMemPool.free(d_keys, d_node_count); + if (mVerbose==2) mTimer.stop(); + + //printf("Leaf count = %u, lower count = %u, upper count = %u\n", mData.nodeCount[0], mData.nodeCount[1], mData.nodeCount[2]); +}// PointsToGrid::countNodes + +//------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +template +template +inline BufferT PointsToGrid::getBuffer(const PtrT, size_t pointCount, const BufferT &pool) +{ + auto sizeofPoint = [&]()->size_t{ + switch (mPointType){ + case PointType::PointID: return sizeof(uint32_t); + case PointType::World64: return sizeof(Vec3d); + case PointType::World32: return sizeof(Vec3f); + case PointType::Grid64: return sizeof(Vec3d); + case PointType::Grid32: return sizeof(Vec3f); + case PointType::Voxel32: return sizeof(Vec3f); + case PointType::Voxel16: return sizeof(Vec3u16); + case PointType::Voxel8: return sizeof(Vec3u8); + case PointType::Default: return pointer_traits::element_size; + default: return size_t(0);// PointType::Disable + } + }; + + mData.grid = 0;// grid is always stored at the start of the buffer! + mData.tree = NanoGrid::memUsage(); // grid ends and tree begins + mData.root = mData.tree + NanoTree::memUsage(); // tree ends and root node begins + mData.upper = mData.root + NanoRoot::memUsage(mData.nodeCount[2]); // root node ends and upper internal nodes begin + mData.lower = mData.upper + NanoUpper::memUsage()*mData.nodeCount[2]; // upper internal nodes ends and lower internal nodes begin + mData.leaf = mData.lower + NanoLower::memUsage()*mData.nodeCount[1]; // lower internal nodes ends and leaf nodes begin + mData.meta = mData.leaf + NanoLeaf::DataType::memUsage()*mData.nodeCount[0];// leaf nodes end and blind meta data begins + mData.blind = mData.meta + sizeof(GridBlindMetaData)*int( mPointType!=PointType::Disable ); // meta data ends and blind data begins + mData.size = mData.blind + pointCount*sizeofPoint();// end of buffer + + auto buffer = BufferT::create(mData.size, &pool, false);// only allocate buffer on the device + mData.d_bufferPtr = buffer.deviceData(); + if (mData.d_bufferPtr == nullptr) throw std::runtime_error("Failed to allocate grid buffer on the device"); + cudaCheck(cudaMemcpyAsync(mDeviceData, &mData, sizeof(Data), cudaMemcpyHostToDevice, mStream));// copy Data CPU -> GPU + return buffer; +}// PointsToGrid::getBuffer + +//------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +template +template +inline void PointsToGrid::processGridTreeRoot(const PtrT points, size_t pointCount) +{ + using Vec3T = typename util::remove_const::element_type>::type; + util::cuda::lambdaKernel<<<1, 1, 0, mStream>>>(1, [=] __device__(size_t, Data *d_data, PointType pointType) { + // process Root + auto &root = d_data->getRoot(); + root.mBBox = CoordBBox(); // init to empty + root.mTableSize = d_data->nodeCount[2]; + root.mBackground = NanoRoot::ValueType(0);// background_value + root.mMinimum = root.mMaximum = NanoRoot::ValueType(0); + root.mAverage = root.mStdDevi = NanoRoot::FloatType(0); + + // process Tree + auto &tree = d_data->getTree(); + tree.setRoot(&root); + tree.setFirstNode(&d_data->getUpper(0)); + tree.setFirstNode(&d_data->getLower(0)); + tree.setFirstNode(&d_data->getLeaf(0)); + tree.mNodeCount[2] = tree.mTileCount[2] = d_data->nodeCount[2]; + tree.mNodeCount[1] = tree.mTileCount[1] = d_data->nodeCount[1]; + tree.mNodeCount[0] = tree.mTileCount[0] = d_data->nodeCount[0]; + tree.mVoxelCount = d_data->voxelCount; + + // process Grid + auto &grid = d_data->getGrid(); + grid.init({GridFlags::HasBBox, GridFlags::IsBreadthFirst}, d_data->size, d_data->map, toGridType()); + grid.mChecksum = ~uint64_t(0);// set all bits on which means it's disabled + grid.mBlindMetadataCount = util::is_same::value;// ? 1u : 0u; + grid.mBlindMetadataOffset = d_data->meta; + if (pointType != PointType::Disable) { + const auto lastLeaf = tree.mNodeCount[0] - 1; + grid.mData1 = d_data->pointsPerLeafPrefix[lastLeaf] + d_data->pointsPerLeaf[lastLeaf]; + auto &meta = d_data->getMeta(); + meta.mDataOffset = sizeof(GridBlindMetaData);// blind data is placed right after this meta data + meta.mValueCount = pointCount; + // Blind meta data + switch (pointType){ + case PointType::PointID: + grid.mGridClass = GridClass::PointIndex; + meta.mSemantic = GridBlindDataSemantic::PointId; + meta.mDataClass = GridBlindDataClass::IndexArray; + meta.mDataType = toGridType(); + meta.mValueSize = sizeof(uint32_t); + util::strcpy(meta.mName, "PointID: uint32_t indices to points"); + break; + case PointType::World64: + grid.mGridClass = GridClass::PointData; + meta.mSemantic = GridBlindDataSemantic::WorldCoords; + meta.mDataClass = GridBlindDataClass::AttributeArray; + meta.mDataType = toGridType(); + meta.mValueSize = sizeof(Vec3d); + util::strcpy(meta.mName, "World64: Vec3 point coordinates in world space"); + break; + case PointType::World32: + grid.mGridClass = GridClass::PointData; + meta.mSemantic = GridBlindDataSemantic::WorldCoords; + meta.mDataClass = GridBlindDataClass::AttributeArray; + meta.mDataType = toGridType(); + meta.mValueSize = sizeof(Vec3f); + util::strcpy(meta.mName, "World32: Vec3 point coordinates in world space"); + break; + case PointType::Grid64: + grid.mGridClass = GridClass::PointData; + meta.mSemantic = GridBlindDataSemantic::GridCoords; + meta.mDataClass = GridBlindDataClass::AttributeArray; + meta.mDataType = toGridType(); + meta.mValueSize = sizeof(Vec3d); + util::strcpy(meta.mName, "Grid64: Vec3 point coordinates in grid space"); + break; + case PointType::Grid32: + grid.mGridClass = GridClass::PointData; + meta.mSemantic = GridBlindDataSemantic::GridCoords; + meta.mDataClass = GridBlindDataClass::AttributeArray; + meta.mDataType = toGridType(); + meta.mValueSize = sizeof(Vec3f); + util::strcpy(meta.mName, "Grid32: Vec3 point coordinates in grid space"); + break; + case PointType::Voxel32: + grid.mGridClass = GridClass::PointData; + meta.mSemantic = GridBlindDataSemantic::VoxelCoords; + meta.mDataClass = GridBlindDataClass::AttributeArray; + meta.mDataType = toGridType(); + meta.mValueSize = sizeof(Vec3f); + util::strcpy(meta.mName, "Voxel32: Vec3 point coordinates in voxel space"); + break; + case PointType::Voxel16: + grid.mGridClass = GridClass::PointData; + meta.mSemantic = GridBlindDataSemantic::VoxelCoords; + meta.mDataClass = GridBlindDataClass::AttributeArray; + meta.mDataType = toGridType(); + meta.mValueSize = sizeof(Vec3u16); + util::strcpy(meta.mName, "Voxel16: Vec3 point coordinates in voxel space"); + break; + case PointType::Voxel8: + grid.mGridClass = GridClass::PointData; + meta.mSemantic = GridBlindDataSemantic::VoxelCoords; + meta.mDataClass = GridBlindDataClass::AttributeArray; + meta.mDataType = toGridType(); + meta.mValueSize = sizeof(Vec3u8); + util::strcpy(meta.mName, "Voxel8: Vec3 point coordinates in voxel space"); + break; + case PointType::Default: + grid.mGridClass = GridClass::PointData; + meta.mSemantic = GridBlindDataSemantic::WorldCoords; + meta.mDataClass = GridBlindDataClass::AttributeArray; + meta.mDataType = toGridType(); + meta.mValueSize = sizeof(Vec3T); + if constexpr(util::is_same::value) { + util::strcpy(meta.mName, "World32: Vec3 point coordinates in world space"); + } else if constexpr(util::is_same::value){ + util::strcpy(meta.mName, "World64: Vec3 point coordinates in world space"); + } else { + printf("Error in PointsToGrid::processGridTreeRoot: expected Vec3T = Vec3f or Vec3d\n"); + } + break; + default: + printf("Error in PointsToGrid::processGridTreeRoot: invalid pointType\n"); + } + } else if constexpr(BuildTraits::is_offindex) { + grid.mData1 = 1u + 512u*d_data->nodeCount[0]; + grid.mGridClass = GridClass::IndexGrid; + } + }, mDeviceData, mPointType);// lambdaKernel + cudaCheckError(); + + char *dst = mData.getGrid().mGridName; + if (const char *src = mGridName.data()) { + cudaCheck(cudaMemcpyAsync(dst, src, GridData::MaxNameSize, cudaMemcpyHostToDevice, mStream)); + } else { + cudaCheck(cudaMemsetAsync(dst, 0, GridData::MaxNameSize, mStream)); + } +}// PointsToGrid::processGridTreeRoot + +//------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +template +inline void PointsToGrid::processUpperNodes() +{ + util::cuda::lambdaKernel<<>>(mData.nodeCount[2], [=] __device__(size_t tid, Data *d_data) { + auto &root = d_data->getRoot(); + auto &upper = d_data->getUpper(tid); +#if 1 + auto keyToCoord = [](uint64_t key)->nanovdb::Coord{ + static constexpr int64_t offset = 1 << 31;// max values of uint32_t is 2^31 - 1 + static constexpr uint64_t MASK = (1u << 21) - 1; // used to mask out 21 lower bits + return nanovdb::Coord(int(int64_t(((key >> 42) & MASK) << 12) - offset), // x are the upper 21 bits + int(int64_t(((key >> 21) & MASK) << 12) - offset), // y are the middle 21 bits + int(int64_t(( key & MASK) << 12) - offset)); // z are the lower 21 bits + }; + const Coord ijk = keyToCoord(d_data->d_tile_keys[tid]); +#else + const Coord ijk = NanoRoot::KeyToCoord(d_data->d_tile_keys[tid]); +#endif + root.tile(tid)->setChild(ijk, &upper, &root); + upper.mBBox[0] = ijk; + upper.mFlags = 0; + upper.mValueMask.setOff(); + upper.mChildMask.setOff(); + upper.mMinimum = upper.mMaximum = NanoLower::ValueType(0); + upper.mAverage = upper.mStdDevi = NanoLower::FloatType(0); + }, mDeviceData); + cudaCheckError(); + + mMemPool.free(mData.d_tile_keys); + + const uint64_t valueCount = mData.nodeCount[2] << 15; + util::cuda::lambdaKernel<<>>(valueCount, [=] __device__(size_t tid, Data *d_data) { + auto &upper = d_data->getUpper(tid >> 15); + upper.mTable[tid & 32767u].value = NanoUpper::ValueType(0);// background + }, mDeviceData); + cudaCheckError(); +}// PointsToGrid::processUpperNodes + +//------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +template +inline void PointsToGrid::processLowerNodes() +{ + util::cuda::lambdaKernel<<>>(mData.nodeCount[1], [=] __device__(size_t tid, Data *d_data) { + auto &root = d_data->getRoot(); + const uint64_t lowerKey = d_data->d_lower_keys[tid]; + auto &upper = d_data->getUpper(lowerKey >> 15); + const uint32_t upperOffset = lowerKey & 32767u;// (1 << 15) - 1 = 32767 + upper.mChildMask.setOnAtomic(upperOffset); + auto &lower = d_data->getLower(tid); + upper.setChild(upperOffset, &lower); + lower.mBBox[0] = upper.offsetToGlobalCoord(upperOffset); + lower.mFlags = 0; + lower.mValueMask.setOff(); + lower.mChildMask.setOff(); + lower.mMinimum = lower.mMaximum = NanoLower::ValueType(0);// background; + lower.mAverage = lower.mStdDevi = NanoLower::FloatType(0); + }, mDeviceData); + cudaCheckError(); + + const uint64_t valueCount = mData.nodeCount[1] << 12; + util::cuda::lambdaKernel<<>>(valueCount, [=] __device__(size_t tid, Data *d_data) { + auto &lower = d_data->getLower(tid >> 12); + lower.mTable[tid & 4095u].value = NanoLower::ValueType(0);// background + }, mDeviceData); + cudaCheckError(); +}// PointsToGrid::processLowerNodes + +//------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +template +template +inline void PointsToGrid::processLeafNodes(const PtrT points) +{ + const uint8_t flags = static_cast(mData.flags.data());// mIncludeStats ? 16u : 0u;// 4th bit indicates stats + + if (mVerbose==2) mTimer.start("process leaf meta data"); + // loop over leaf nodes and add it to its parent node + util::cuda::lambdaKernel<<>>(mData.nodeCount[0], [=] __device__(size_t tid, Data *d_data) { + const uint64_t leafKey = d_data->d_leaf_keys[tid], tile_id = leafKey >> 27; + auto &upper = d_data->getUpper(tile_id); + const uint32_t lowerOffset = leafKey & 4095u, upperOffset = (leafKey >> 12) & 32767u; + auto &lower = *upper.getChild(upperOffset); + lower.mChildMask.setOnAtomic(lowerOffset); + auto &leaf = d_data->getLeaf(tid); + lower.setChild(lowerOffset, &leaf); + leaf.mBBoxMin = lower.offsetToGlobalCoord(lowerOffset); + leaf.mFlags = flags; + auto &valueMask = leaf.mValueMask; + valueMask.setOff();// initiate all bits to off + + if constexpr(util::is_same::value) { + leaf.mOffset = d_data->pointsPerLeafPrefix[tid]; + leaf.mPointCount = d_data->pointsPerLeaf[tid]; + } else if constexpr(BuildTraits::is_offindex) { + leaf.mOffset = tid*512u + 1u;// background is index 0 + leaf.mPrefixSum = 0u; + } else if constexpr(!BuildTraits::is_special) { + leaf.mAverage = leaf.mStdDevi = NanoLeaf::FloatType(0); + leaf.mMinimum = leaf.mMaximum = NanoLeaf::ValueType(0); + } + }, mDeviceData); cudaCheckError(); + + if (mVerbose==2) mTimer.restart("set active voxel state and values"); + // loop over all active voxels and set LeafNode::mValueMask and LeafNode::mValues + util::cuda::lambdaKernel<<>>(mData.voxelCount, [=] __device__(size_t tid, Data *d_data) { + const uint32_t pointID = d_data->pointsPerVoxelPrefix[tid]; + const uint64_t voxelKey = d_data->d_keys[pointID]; + auto &upper = d_data->getUpper(voxelKey >> 36); + auto &lower = *upper.getChild((voxelKey >> 21) & 32767u); + auto &leaf = *lower.getChild((voxelKey >> 9) & 4095u); + const uint32_t n = voxelKey & 511u; + leaf.mValueMask.setOnAtomic(n);// <--- slow! + if constexpr(util::is_same::value) { + leaf.mValues[n] = uint16_t(pointID + d_data->pointsPerVoxel[tid] - leaf.offset()); + } else if constexpr(!BuildTraits::is_special) { + leaf.mValues[n] = NanoLeaf::ValueType(1);// set value of active voxels that are not points (or index) + } + }, mDeviceData); cudaCheckError(); + + mMemPool.free(mData.d_keys, mData.pointsPerVoxel, mData.pointsPerVoxelPrefix, mData.pointsPerLeafPrefix, mData.pointsPerLeaf); + + if (mVerbose==2) mTimer.restart("set inactive voxel values"); + const uint64_t denseVoxelCount = mData.nodeCount[0] << 9; + util::cuda::lambdaKernel<<>>(denseVoxelCount, [=] __device__(size_t tid, Data *d_data) { + auto &leaf = d_data->getLeaf(tid >> 9u); + const uint32_t n = tid & 511u; + if (leaf.mValueMask.isOn(n)) return; + if constexpr(util::is_same::value) { + const uint32_t m = leaf.mValueMask.findPrev(n - 1); + leaf.mValues[n] = m < 512u ? leaf.mValues[m] : 0u; + } else if constexpr(!BuildTraits::is_special) { + leaf.mValues[n] = NanoLeaf::ValueType(0);// value of inactive voxels + } + }, mDeviceData); cudaCheckError(); + + if constexpr(BuildTraits::is_onindex) { + if (mVerbose==2) mTimer.restart("prefix-sum for index grid"); + uint64_t *devValueIndex = mMemPool.template alloc(mData.nodeCount[0], mStream); + auto devValueIndexPrefix = mMemPool.template alloc(mData.nodeCount[0], mStream); + kernels::fillValueIndexKernel<<>>(mData.nodeCount[0], devValueIndex, mDeviceData); + cudaCheckError(); + CALL_CUBS(DeviceScan::InclusiveSum, devValueIndex, devValueIndexPrefix, mData.nodeCount[0]); + mMemPool.free(devValueIndex); + kernels::leafPrefixSumKernel<<>>(mData.nodeCount[0], devValueIndexPrefix, mDeviceData); + cudaCheckError(); + mMemPool.free(devValueIndexPrefix); + } + + if constexpr(BuildTraits::is_indexmask) { + if (mVerbose==2) mTimer.restart("leaf.mMask = leaf.mValueMask"); + kernels::setMaskEqValMaskKernel<<>>(mData.nodeCount[0], mDeviceData); + cudaCheckError(); + } + if (mVerbose==2) mTimer.stop(); +}// PointsToGrid::processLeafNodes + +//------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +template +template +inline void PointsToGrid::processPoints(const PtrT, size_t) +{ + mMemPool.free(mData.d_indx); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +// Template specialization with BuildT = Point +template <> +template +inline void PointsToGrid::processPoints(const PtrT points, size_t pointCount) +{ + switch (mPointType){ + case PointType::Disable: + throw std::runtime_error("PointsToGrid::processPoints: mPointType == PointType::Disable\n"); + case PointType::PointID: + util::cuda::lambdaKernel<<>>(pointCount, [=] __device__(size_t tid, Data *d_data) { + d_data->template getPoint(tid) = d_data->d_indx[tid]; + }, mDeviceData); cudaCheckError(); + break; + case PointType::World64: + util::cuda::lambdaKernel<<>>(pointCount, [=] __device__(size_t tid, Data *d_data) { + d_data->template getPoint(tid) = points[d_data->d_indx[tid]]; + }, mDeviceData); cudaCheckError(); + break; + case PointType::World32: + util::cuda::lambdaKernel<<>>(pointCount, [=] __device__(size_t tid, Data *d_data) { + d_data->template getPoint(tid) = points[d_data->d_indx[tid]]; + }, mDeviceData); cudaCheckError(); + break; + case PointType::Grid64: + util::cuda::lambdaKernel<<>>(pointCount, [=] __device__(size_t tid, Data *d_data) { + d_data->template getPoint(tid) = d_data->map.applyInverseMap(points[d_data->d_indx[tid]]); + }, mDeviceData); cudaCheckError(); + break; + case PointType::Grid32: + util::cuda::lambdaKernel<<>>(pointCount, [=] __device__(size_t tid, Data *d_data) { + d_data->template getPoint(tid) = d_data->map.applyInverseMapF(points[d_data->d_indx[tid]]); + }, mDeviceData); cudaCheckError(); + break; + case PointType::Voxel32: + util::cuda::lambdaKernel<<>>(pointCount, [=] __device__(size_t tid, Data *d_data) { + worldToVoxel(d_data->template getPoint(tid), points[d_data->d_indx[tid]], d_data->map); + }, mDeviceData); cudaCheckError(); + break; + case PointType::Voxel16: + util::cuda::lambdaKernel<<>>(pointCount, [=] __device__(size_t tid, Data *d_data) { + worldToVoxel(d_data->template getPoint(tid), points[d_data->d_indx[tid]], d_data->map); + }, mDeviceData); cudaCheckError(); + break; + case PointType::Voxel8: + util::cuda::lambdaKernel<<>>(pointCount, [=] __device__(size_t tid, Data *d_data) { + worldToVoxel(d_data->template getPoint(tid), points[d_data->d_indx[tid]], d_data->map); + }, mDeviceData); cudaCheckError(); + break; + case PointType::Default: + util::cuda::lambdaKernel<<>>(pointCount, [=] __device__(size_t tid, Data *d_data) { + d_data->template getPoint::element_type>(tid) = points[d_data->d_indx[tid]]; + }, mDeviceData); cudaCheckError(); + break; + default: + printf("Internal error in PointsToGrid::processPoints\n"); + } + mMemPool.free(mData.d_indx); +}// PointsToGrid::processPoints + +//------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +template +inline void PointsToGrid::processBBox() +{ + if (mData.flags.isMaskOff(GridFlags::HasBBox)) { + mMemPool.free(mData.d_leaf_keys, mData.d_lower_keys); + return; + } + + // reset bbox in lower nodes + util::cuda::lambdaKernel<<>>(mData.nodeCount[1], [=] __device__(size_t tid, Data *d_data) { + d_data->getLower(tid).mBBox = CoordBBox(); + }, mDeviceData); + cudaCheckError(); + + // update and propagate bbox from leaf -> lower/parent nodes + util::cuda::lambdaKernel<<>>(mData.nodeCount[0], [=] __device__(size_t tid, Data *d_data) { + const uint64_t leafKey = d_data->d_leaf_keys[tid]; + auto &upper = d_data->getUpper(leafKey >> 27); + auto &lower = *upper.getChild((leafKey >> 12) & 32767u); + auto &leaf = d_data->getLeaf(tid); + leaf.updateBBox(); + lower.mBBox.expandAtomic(leaf.bbox()); + }, mDeviceData); + mMemPool.free(mData.d_leaf_keys); + cudaCheckError(); + + // reset bbox in upper nodes + util::cuda::lambdaKernel<<>>(mData.nodeCount[2], [=] __device__(size_t tid, Data *d_data) { + d_data->getUpper(tid).mBBox = CoordBBox(); + }, mDeviceData); + cudaCheckError(); + + // propagate bbox from lower -> upper/parent node + util::cuda::lambdaKernel<<>>(mData.nodeCount[1], [=] __device__(size_t tid, Data *d_data) { + const uint64_t lowerKey = d_data->d_lower_keys[tid]; + auto &upper = d_data->getUpper(lowerKey >> 15); + auto &lower = d_data->getLower(tid); + upper.mBBox.expandAtomic(lower.bbox()); + }, mDeviceData); + mMemPool.free(mData.d_lower_keys); + cudaCheckError() + + // propagate bbox from upper -> root/parent node + util::cuda::lambdaKernel<<>>(mData.nodeCount[2], [=] __device__(size_t tid, Data *d_data) { + d_data->getRoot().mBBox.expandAtomic(d_data->getUpper(tid).bbox()); + }, mDeviceData); + cudaCheckError(); + + // update the world-bbox in the root node + util::cuda::lambdaKernel<<<1, 1, 0, mStream>>>(1, [=] __device__(size_t, Data *d_data) { + d_data->getGrid().mWorldBBox = d_data->getRoot().mBBox.transform(d_data->map); + }, mDeviceData); + cudaCheckError(); +}// PointsToGrid::processBBox + +//------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +template +GridHandle// Grid +voxelsToGrid(const PtrT d_ijk, size_t voxelCount, double voxelSize, const BufferT &buffer, cudaStream_t stream) +{ + PointsToGrid converter(voxelSize, Vec3d(0.0), stream); + return converter.getHandle(d_ijk, voxelCount, buffer); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +template +GridHandle// Grid with PointType coordinates as blind data +pointsToGrid(const PtrT d_xyz, int pointCount, int maxPointsPerVoxel, int tolerance, int maxIterations, PointType type, const BufferT &buffer, cudaStream_t stream) +{ + PointsToGrid converter(maxPointsPerVoxel, tolerance, maxIterations, Vec3d(0.0), stream); + converter.setPointType(type); + return converter.getHandle(d_xyz, pointCount, buffer); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +template +GridHandle +pointsToGrid(std::vector> vec, const BufferT &buffer, cudaStream_t stream) +{ + std::vector> handles; + for (auto &p : vec) handles.push_back(pointsToGrid(std::get<0>(p), std::get<1>(p), std::get<2>(p), std::get<3>(p), buffer, stream)); + return mergeDeviceGrids(handles, stream); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +template +GridHandle +voxelsToGrid(std::vector> vec, const BufferT &buffer, cudaStream_t stream) +{ + std::vector> handles; + for (auto &p : vec) handles.push_back(voxelsToGrid(std::get<0>(p), std::get<1>(p), std::get<2>(p), buffer, stream)); + return mergeDeviceGrids(handles, stream); +} + +}// namespace tools::cuda ====================================================================================================================================== + +//------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +template +[[deprecated("Use cuda::pointsToGrid instead")]] +GridHandle +cudaPointsToGrid(const PtrT dWorldPoints, + int pointCount, + double voxelSize = 1.0, + PointType type = PointType::Default, + const BufferT &buffer = BufferT(), + cudaStream_t stream = 0) +{ + return tools::cuda::pointsToGrid(dWorldPoints, pointCount, voxelSize, type, buffer, stream); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +template +[[deprecated("Use cuda::pointsToGrid instead")]] +GridHandle +cudaPointsToGrid(std::vector> pointSet, + const BufferT &buffer = BufferT(), + cudaStream_t stream = 0) +{ + return tools::cuda::pointsToGrid(pointSet, buffer, stream); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +template +[[deprecated("Use cuda::voxelsToGrid instead")]] +GridHandle +cudaVoxelsToGrid(const PtrT dGridVoxels, + size_t voxelCount, + double voxelSize = 1.0, + const BufferT &buffer = BufferT(), + cudaStream_t stream = 0) +{ + return tools::cuda::voxelsToGrid(dGridVoxels, voxelCount, voxelSize, buffer, stream); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +template +[[deprecated("Use cuda::voxelsToGrid instead")]] +GridHandle +cudaVoxelsToGrid(std::vector> pointSet, + const BufferT &buffer = BufferT(), + cudaStream_t stream = 0) +{ + return tools::cuda::voxelsToGrid(pointSet, buffer, stream); +} + +}// namespace nanovdb + +#endif // NVIDIA_TOOLS_CUDA_POINTSTOGRID_CUH_HAS_BEEN_INCLUDED diff --git a/nanovdb/nanovdb/tools/cuda/SignedFloodFill.cuh b/nanovdb/nanovdb/tools/cuda/SignedFloodFill.cuh new file mode 100644 index 0000000000..82aece2784 --- /dev/null +++ b/nanovdb/nanovdb/tools/cuda/SignedFloodFill.cuh @@ -0,0 +1,213 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: MPL-2.0 + +/*! + \file nanovdb/tools/cuda/SignedFloodFill.cuh + + \author Ken Museth + + \date May 3, 2023 + + \brief Performs signed flood-fill operation on the hierarchical tree structure on the device + + \todo This tools needs to handle the (extremely) rare case when root node + needs to be modified during the signed flood fill operation. This happens + when the root-table needs to be expanded with tile values (of size 4096^3) + that are completely inside the implicit surface. + + \warning The header file contains cuda device code so be sure + to only include it in .cu files (or other .cuh files) +*/ + +#ifndef NANOVDB_TOOLS_CUDA_SIGNEDFLOODFILL_CUH_HAS_BEEN_INCLUDED +#define NANOVDB_TOOLS_CUDA_SIGNEDFLOODFILL_CUH_HAS_BEEN_INCLUDED + +#include +#include +#include +#include +#include + +namespace nanovdb { + +namespace tools::cuda { + +/// @brief Performs signed flood-fill operation on the hierarchical tree structure on the device +/// @tparam BuildT Build type of the grid to be flood-filled +/// @param d_grid Non-const device pointer to the grid that will be flood-filled +/// @param verbose If true timing information will be printed to the terminal +/// @param stream optional cuda stream +template +typename util::enable_if::is_float, void>::type +signedFloodFill(NanoGrid *d_grid, bool verbose = false, cudaStream_t stream = 0); + +namespace {// anonymous namespace + +template +class SignedFloodFill +{ +public: + SignedFloodFill(bool verbose = false, cudaStream_t stream = 0) + : mStream(stream), mVerbose(verbose) {} + + /// @brief Toggle on and off verbose mode + /// @param on if true verbose is turned on + void setVerbose(bool on = true) {mVerbose = on;} + + void operator()(NanoGrid *d_grid); + +private: + cudaStream_t mStream{0}; + util::cuda::Timer mTimer; + bool mVerbose{false}; + +};// SignedFloodFill + +//================================================================================================ + +template +__global__ void processRootKernel(NanoTree *tree) +{ + // auto &root = tree->root(); + /* + using ChildT = typename RootT::ChildNodeType; + // Insert the child nodes into a map sorted according to their origin + std::map nodeKeys; + typename RootT::ChildOnIter it = root.beginChildOn(); + for (; it; ++it) nodeKeys.insert(std::pair(it.getCoord(), &(*it))); + static const Index DIM = RootT::ChildNodeType::DIM; + + // We employ a simple z-scanline algorithm that inserts inactive tiles with + // the inside value if they are sandwiched between inside child nodes only! + typename std::map::const_iterator b = nodeKeys.begin(), e = nodeKeys.end(); + if ( b == e ) return; + for (typename std::map::const_iterator a = b++; b != e; ++a, ++b) { + Coord d = b->first - a->first; // delta of neighboring coordinates + if (d[0]!=0 || d[1]!=0 || d[2]==Int32(DIM)) continue;// not same z-scanline or neighbors + const ValueT fill[] = { a->second->getLastValue(), b->second->getFirstValue() }; + if (!(fill[0] < 0) || !(fill[1] < 0)) continue; // scanline isn't inside + Coord c = a->first + Coord(0u, 0u, DIM); + for (; c[2] != b->first[2]; c[2] += DIM) root.addTile(c, mInside, false); + } + */ + //root.setBackground(mOutside, /*updateChildNodes=*/false); +}// processRootKernel + +//================================================================================================ + +template +__global__ void processNodeKernel(NanoTree *tree, size_t count) +{ + using NodeT = typename NanoNode::type; + const int tid = blockIdx.x * blockDim.x + threadIdx.x; + if (tid >= count) return; + const uint32_t nValue = tid & (NodeT::SIZE - 1u); + auto &node = *(tree->template getFirstNode() + (tid >> (3*NodeT::LOG2DIM))); + const auto &mask = node.childMask(); + if (mask.isOn(nValue)) return;// ignore if child + auto value = tree->background();// initiate to outside value + auto n = mask.template findNext(nValue); + if (n < NodeT::SIZE) { + if (node.getChild(n)->getFirstValue() < 0) value = -value; + } else if ((n = mask.template findPrev(nValue)) < NodeT::SIZE) { + if (node.getChild(n)->getLastValue() < 0) value = -value; + } else if (node.getValue(0)<0) { + value = -value; + } + node.setValue(nValue, value); +}// processNodeKernel + +//================================================================================================ + +template +__global__ void processLeafKernel(NanoTree *tree, size_t count) +{ + using LeafT = NanoLeaf; + const size_t tid = blockIdx.x * blockDim.x + threadIdx.x; + if (tid >= count) return; + const uint32_t nVoxel = tid & (LeafT::SIZE - 1u); + auto *leaf = tree->getFirstLeaf() + (tid >> (3*LeafT::LOG2DIM)); + const auto &mask = leaf->valueMask(); + if (mask.isOn(nVoxel)) return; + auto *buffer = leaf->mValues; + auto n = mask.template findNext(nVoxel); + if (n == LeafT::SIZE && (n = mask.template findPrev(nVoxel)) == LeafT::SIZE) n = 0u; + buffer[nVoxel] = buffer[n]<0 ? -tree->background() : tree->background(); +}// processLeafKernel + +//================================================================================================ + +template +__global__ void cpyNodeCountKernel(NanoGrid *d_grid, uint64_t *d_count) +{ + NANOVDB_ASSERT(d_grid->isSequential()); + for (int i=0; i<3; ++i) *d_count++ = d_grid->tree().nodeCount(i); + *d_count = d_grid->tree().root().tileCount(); +} + +}// anonymous namespace + +//================================================================================================ + +template +void SignedFloodFill::operator()(NanoGrid *d_grid) +{ + static_assert(BuildTraits::is_float, "cuda::SignedFloodFill only works on float grids"); + NANOVDB_ASSERT(d_grid); + uint64_t count[4], *d_count = nullptr; + cudaCheck(util::cuda::mallocAsync((void**)&d_count, 4*sizeof(uint64_t), mStream)); + cpyNodeCountKernel<<<1, 1, 0, mStream>>>(d_grid, d_count); + cudaCheckError(); + cudaCheck(cudaMemcpyAsync(&count, d_count, 4*sizeof(uint64_t), cudaMemcpyDeviceToHost, mStream)); + cudaCheck(util::cuda::freeAsync(d_count, mStream)); + + static const int threadsPerBlock = 128; + auto blocksPerGrid = [&](size_t count)->uint32_t{return (count + (threadsPerBlock - 1)) / threadsPerBlock;}; + auto *tree = reinterpret_cast*>(d_grid + 1); + + if (mVerbose) mTimer.start("\nProcess leaf nodes"); + processLeafKernel<<>>(tree, count[0]<<9); + cudaCheckError(); + + if (mVerbose) mTimer.restart("Process lower internal nodes"); + processNodeKernel<<>>(tree, count[1]<<12); + cudaCheckError(); + + if (mVerbose) mTimer.restart("Process upper internal nodes"); + processNodeKernel<<>>(tree, count[2]<<15); + cudaCheckError(); + + //if (mVerbose) mTimer.restart("Process root node"); + //processRootKernel<<<1, 1, 0, mStream>>>(tree); + if (mVerbose) mTimer.stop(); + cudaCheckError(); +}// SignedFloodFill::operator() + +//================================================================================================ + +template +typename util::enable_if::is_float, void>::type +signedFloodFill(NanoGrid *d_grid, bool verbose, cudaStream_t stream) +{ + SignedFloodFill sff(verbose, stream); + sff(d_grid); + auto *d_gridData = d_grid->data(); + Checksum cs = getChecksum(d_gridData, stream); + if (cs.isFull()) {// CheckMode::Partial checksum is unaffected + updateChecksum(d_gridData, CheckMode::Full, stream); + } +} + +}// namespace tools::cuda + +template +[[deprecated("Use nanovdb::tools::cuda::signedFloodFill instead.")]] +typename util::enable_if::is_float, void>::type +cudaSignedFloodFill(NanoGrid *d_grid, bool verbose = false, cudaStream_t stream = 0) +{ + return tools::cuda::signedFloodFill(d_grid, verbose, stream); +} + +}// namespace nanovdb + +#endif // NANOVDB_TOOLS_CUDA_SIGNEDFLOODFILL_CUH_HAS_BEEN_INCLUDED diff --git a/nanovdb/nanovdb/unittest/TestNanoVDB.cc b/nanovdb/nanovdb/unittest/TestNanoVDB.cc index aa84b99202..4d39b443bb 100644 --- a/nanovdb/nanovdb/unittest/TestNanoVDB.cc +++ b/nanovdb/nanovdb/unittest/TestNanoVDB.cc @@ -13,23 +13,23 @@ #include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include #include #include -#include -#include -#include +#include +#include +#include #if !defined(_MSC_VER) // does not compile in msvc c++ due to zero-sized arrays. #include @@ -63,7 +63,7 @@ struct Sphere const ValueT dst = this->sdf(ijk); return dst >= mBackground ? mBackground : dst <= -mBackground ? -mBackground : dst; } - ValueT operator()(const nanovdb::Vec3& p) const + ValueT operator()(const nanovdb::math::Vec3& p) const { const ValueT dst = this->sdf(p); return dst >= mBackground ? mBackground : dst <= -mBackground ? -mBackground : dst; @@ -83,15 +83,15 @@ struct Sphere } private: - ValueT sdf(nanovdb::Vec3 xyz) const + ValueT sdf(nanovdb::math::Vec3 xyz) const { xyz *= mVoxelSize; xyz -= mCenter; return xyz.length() - mRadius; } - ValueT sdf(const nanovdb::Coord& ijk) const { return this->sdf(nanovdb::Vec3(ijk[0], ijk[1], ijk[2])); } - static_assert(nanovdb::is_floating_point::value, "Sphere: expect floating point"); - const nanovdb::Vec3 mCenter; + ValueT sdf(const nanovdb::Coord& ijk) const { return this->sdf(nanovdb::math::Vec3(ijk[0], ijk[1], ijk[2])); } + static_assert(nanovdb::util::is_floating_point::value, "Sphere: expect floating point"); + const nanovdb::math::Vec3 mCenter; const ValueT mRadius, mVoxelSize, mBackground; }; // Sphere @@ -163,12 +163,14 @@ class TestNanoVDB : public ::testing::Test void SetUp() override { + mStr = new char[256]; // Code here will be called immediately after the constructor (right // before each test). } void TearDown() override { + delete [] mStr; // Code here will be called immediately after each test (right // before the destructor). } @@ -185,7 +187,8 @@ class TestNanoVDB : public ::testing::Test const auto n = sizeof(T); std::cerr << "Size of " << s << ": " << n << " bytes which is" << (n % 32 == 0 ? " " : " NOT ") << "32 byte aligned" << std::endl; } - nanovdb::CpuTimer mTimer; + nanovdb::util::Timer mTimer; + char *mStr; }; // TestNanoVDB template @@ -206,28 +209,33 @@ class TestOffsets : public ::testing::Test }; // TestOffsets -using MyTypes = ::testing::Types; +// Ordering of types is identical to GridType in NanoVDB.h +using MyTypes = ::testing::Types;// GridType::UInt8 = 26 TYPED_TEST_SUITE(TestOffsets, MyTypes); @@ -235,6 +243,7 @@ TEST_F(TestNanoVDB, Version) { EXPECT_EQ( 4u, sizeof(uint32_t)); EXPECT_EQ( 4u, sizeof(nanovdb::Version)); + char str[30]; {// default constructor nanovdb::Version v; EXPECT_EQ(uint32_t(NANOVDB_MAJOR_VERSION_NUMBER), v.getMajor()); @@ -244,10 +253,7 @@ TEST_F(TestNanoVDB, Version) ss << NANOVDB_MAJOR_VERSION_NUMBER << "." << NANOVDB_MINOR_VERSION_NUMBER << "." << NANOVDB_PATCH_VERSION_NUMBER; - auto c_str = v.c_str(); - EXPECT_EQ(ss.str(), std::string(c_str)); - std::free(const_cast(c_str)); - //std::cerr << v.c_str() << std::endl; + EXPECT_EQ(ss.str(), std::string(nanovdb::toStr(str, v))); } {// detailed constructor const uint32_t major = (1u << 11) - 1;// maximum allowed value @@ -259,10 +265,7 @@ TEST_F(TestNanoVDB, Version) EXPECT_EQ(patch, v.getPatch()); std::stringstream ss; ss << major << "." << minor << "." << patch; - auto c_str = v.c_str(); - EXPECT_EQ(ss.str(), std::string(c_str)); - std::free(const_cast(c_str)); - //std::cerr << v.c_str() << std::endl; + EXPECT_EQ(ss.str(), std::string(nanovdb::toStr(str, v))); } {// smallest possible version number const uint32_t major = 1u; @@ -274,10 +277,7 @@ TEST_F(TestNanoVDB, Version) EXPECT_EQ(patch, v.getPatch()); std::stringstream ss; ss << major << "." << minor << "." << patch; - auto c_str = v.c_str(); - EXPECT_EQ(ss.str(), std::string(c_str)); - std::free(const_cast(c_str)); - //std::cerr << "version.data = " << v.id() << std::endl; + EXPECT_EQ(ss.str(), std::string(nanovdb::toStr(str, v))); } {// test comparison operators EXPECT_EQ( nanovdb::Version(28, 2, 7), nanovdb::Version( 28, 2, 7) ); @@ -331,7 +331,7 @@ TEST_F(TestNanoVDB, Version) EXPECT_LT(tmp.version, T(29,0,0).version); } } -} +}// Version TEST_F(TestNanoVDB, Basic) { @@ -366,38 +366,86 @@ TEST_F(TestNanoVDB, Basic) EXPECT_EQ(i, *j); //std::cerr << "i="<()); + EXPECT_EQ( strcmp(nanovdb::toStr(mStr, nanovdb::GridType::Unknown ), "?"), 0 ); + EXPECT_EQ( strcmp(nanovdb::toStr(mStr, nanovdb::GridType::Float ), "float"), 0 ); + EXPECT_EQ( strcmp(nanovdb::toStr(mStr, nanovdb::GridType::Double ), "double"), 0 ); + EXPECT_EQ( strcmp(nanovdb::toStr(mStr, nanovdb::GridType::Int16 ), "int16"), 0 ); + EXPECT_EQ( strcmp(nanovdb::toStr(mStr, nanovdb::GridType::Int32 ), "int32"), 0 ); + EXPECT_EQ( strcmp(nanovdb::toStr(mStr, nanovdb::GridType::Int64 ), "int64"), 0 ); + EXPECT_EQ( strcmp(nanovdb::toStr(mStr, nanovdb::GridType::Vec3f ), "Vec3f"), 0 ); + EXPECT_EQ( strcmp(nanovdb::toStr(mStr, nanovdb::GridType::Vec3d ), "Vec3d"), 0 ); + EXPECT_EQ( strcmp(nanovdb::toStr(mStr, nanovdb::GridType::Mask ), "Mask"), 0 ); + EXPECT_EQ( strcmp(nanovdb::toStr(mStr, nanovdb::GridType::Half ), "Half"), 0 ); + EXPECT_EQ( strcmp(nanovdb::toStr(mStr, nanovdb::GridType::UInt32 ), "uint32"), 0 ); + EXPECT_EQ( strcmp(nanovdb::toStr(mStr, nanovdb::GridType::Boolean ), "bool"), 0 ); + EXPECT_EQ( strcmp(nanovdb::toStr(mStr, nanovdb::GridType::RGBA8 ), "RGBA8"), 0 ); + EXPECT_EQ( strcmp(nanovdb::toStr(mStr, nanovdb::GridType::Fp4 ), "Float4"), 0 ); + EXPECT_EQ( strcmp(nanovdb::toStr(mStr, nanovdb::GridType::Fp8 ), "Float8"), 0 ); + EXPECT_EQ( strcmp(nanovdb::toStr(mStr, nanovdb::GridType::Fp16 ), "Float16"), 0 ); + EXPECT_EQ( strcmp(nanovdb::toStr(mStr, nanovdb::GridType::FpN ), "FloatN"), 0 ); + EXPECT_EQ( strcmp(nanovdb::toStr(mStr, nanovdb::GridType::Vec4f ), "Vec4f"), 0 ); + EXPECT_EQ( strcmp(nanovdb::toStr(mStr, nanovdb::GridType::Vec4d ), "Vec4d"), 0 ); + EXPECT_EQ( strcmp(nanovdb::toStr(mStr, nanovdb::GridType::Index ), "Index"), 0 ); + EXPECT_EQ( strcmp(nanovdb::toStr(mStr, nanovdb::GridType::OnIndex ), "OnIndex"), 0 ); + EXPECT_EQ( strcmp(nanovdb::toStr(mStr, nanovdb::GridType::IndexMask ), "IndexMask"), 0 ); + EXPECT_EQ( strcmp(nanovdb::toStr(mStr, nanovdb::GridType::OnIndexMask ), "OnIndexMask"), 0 ); + EXPECT_EQ( strcmp(nanovdb::toStr(mStr, nanovdb::GridType::PointIndex ), "PointIndex"), 0 ); + EXPECT_EQ( strcmp(nanovdb::toStr(mStr, nanovdb::GridType::Vec3u8 ), "Vec3u8"), 0 ); + EXPECT_EQ( strcmp(nanovdb::toStr(mStr, nanovdb::GridType::Vec3u16 ), "Vec3u16"), 0 ); + EXPECT_EQ( strcmp(nanovdb::toStr(mStr, nanovdb::GridType::End ), "End"), 0 ); + } + {// toStr(GridClass) + EXPECT_EQ(7, nanovdb::strlen()); + EXPECT_EQ( strcmp(nanovdb::toStr(mStr, nanovdb::GridClass::Unknown ), "?"), 0 ); + EXPECT_EQ( strcmp(nanovdb::toStr(mStr, nanovdb::GridClass::LevelSet ), "SDF"), 0 ); + EXPECT_EQ( strcmp(nanovdb::toStr(mStr, nanovdb::GridClass::FogVolume ), "FOG"), 0 ); + EXPECT_EQ( strcmp(nanovdb::toStr(mStr, nanovdb::GridClass::Staggered ), "MAC"), 0 ); + EXPECT_EQ( strcmp(nanovdb::toStr(mStr, nanovdb::GridClass::PointIndex ), "PNTIDX"), 0 ); + EXPECT_EQ( strcmp(nanovdb::toStr(mStr, nanovdb::GridClass::PointData ), "PNTDAT"), 0 ); + EXPECT_EQ( strcmp(nanovdb::toStr(mStr, nanovdb::GridClass::Topology ), "TOPO"), 0 ); + EXPECT_EQ( strcmp(nanovdb::toStr(mStr, nanovdb::GridClass::VoxelVolume ), "VOX"), 0 ); + EXPECT_EQ( strcmp(nanovdb::toStr(mStr, nanovdb::GridClass::IndexGrid ), "INDEX"), 0 ); + EXPECT_EQ( strcmp(nanovdb::toStr(mStr, nanovdb::GridClass::TensorGrid ), "TENSOR"), 0 ); + EXPECT_EQ( strcmp(nanovdb::toStr(mStr, nanovdb::GridClass::End ), "END"), 0 ); + } + {// toStr(GridFlags) + EXPECT_EQ(23, nanovdb::strlen()); + EXPECT_EQ( strcmp(nanovdb::toStr(mStr, nanovdb::GridFlags::HasLongGridName ), "has long grid name"), 0 ); + EXPECT_EQ( strcmp(nanovdb::toStr(mStr, nanovdb::GridFlags::HasBBox ), "has bbox"), 0 ); + EXPECT_EQ( strcmp(nanovdb::toStr(mStr, nanovdb::GridFlags::HasMinMax ), "has min/max"), 0 ); + EXPECT_EQ( strcmp(nanovdb::toStr(mStr, nanovdb::GridFlags::HasAverage ), "has average"), 0 ); + EXPECT_EQ( strcmp(nanovdb::toStr(mStr, nanovdb::GridFlags::HasStdDeviation ), "has standard deviation"), 0 ); + EXPECT_EQ( strcmp(nanovdb::toStr(mStr, nanovdb::GridFlags::IsBreadthFirst ), "is breadth-first"), 0 ); + EXPECT_EQ( strcmp(nanovdb::toStr(mStr, nanovdb::GridFlags::End ), "end"), 0 ); + } + {// toStr(Codec) + EXPECT_EQ(6, nanovdb::strlen()); + EXPECT_EQ( strcmp(nanovdb::io::toStr(mStr, nanovdb::io::Codec::NONE ), "NONE"), 0 ); + EXPECT_EQ( strcmp(nanovdb::io::toStr(mStr, nanovdb::io::Codec::ZIP ), "ZIP"), 0 ); + EXPECT_EQ( strcmp(nanovdb::io::toStr(mStr, nanovdb::io::Codec::BLOSC ), "BLOSC"), 0 ); + EXPECT_EQ( strcmp(nanovdb::io::toStr(mStr, nanovdb::io::Codec::End ), "END"), 0 ); + } + {// toStr(version) + EXPECT_EQ(8, nanovdb::strlen()); + EXPECT_EQ( strcmp(nanovdb::toStr(mStr, nanovdb::Version(12,34,56) ), "12.34.56"), 0 ); + } + {// toStr(MagicType) + EXPECT_EQ(25, nanovdb::strlen()); + EXPECT_EQ( strcmp(nanovdb::toStr(mStr, nanovdb::MagicType::Unknown ), "unknown"), 0 ); + EXPECT_EQ( strcmp(nanovdb::toStr(mStr, nanovdb::MagicType::OpenVDB ), "openvdb"), 0 ); + EXPECT_EQ( strcmp(nanovdb::toStr(mStr, nanovdb::MagicType::NanoVDB ), "nanovdb"), 0 ); + EXPECT_EQ( strcmp(nanovdb::toStr(mStr, nanovdb::MagicType::NanoGrid ), "nanovdb::Grid"), 0 ); + EXPECT_EQ( strcmp(nanovdb::toStr(mStr, nanovdb::MagicType::NanoFile ), "nanovdb::File"), 0 ); + EXPECT_EQ( strcmp(nanovdb::toStr(mStr, nanovdb::MagicType::NanoNode ), "nanovdb::NodeManager"), 0 ); + EXPECT_EQ( strcmp(nanovdb::toStr(mStr, nanovdb::MagicType::NanoFrag ), "fragmented nanovdb::Grid"), 0 ); + } +}// toStr TEST_F(TestNanoVDB, Assumptions) { @@ -417,14 +465,14 @@ TEST_F(TestNanoVDB, Assumptions) EXPECT_EQ(-1, b.a.i); EXPECT_EQ(reinterpret_cast(&b), reinterpret_cast(&(b.a))); EXPECT_EQ(reinterpret_cast(&(b.a)), reinterpret_cast(&(b.a.i))); - EXPECT_EQ(nanovdb::AlignUp<32>(48), 64U); - EXPECT_EQ(nanovdb::AlignUp<8>(16), 16U); -} + EXPECT_EQ(nanovdb::math::AlignUp<32>(48), 64U); + EXPECT_EQ(nanovdb::math::AlignUp<8>(16), 16U); +}// Assumptions TEST_F(TestNanoVDB, Magic) { - EXPECT_EQ(0x304244566f6e614eUL, NANOVDB_MAGIC_NUMBER); // Magic number: "NanoVDB0" in hex) - EXPECT_EQ(0x4e616e6f56444230UL, nanovdb::io::reverseEndianness(NANOVDB_MAGIC_NUMBER)); + EXPECT_EQ(0x304244566f6e614eUL, NANOVDB_MAGIC_NUMB); // Magic number: "NanoVDB0" in hex) + EXPECT_EQ(0x4e616e6f56444230UL, nanovdb::io::reverseEndianness(NANOVDB_MAGIC_NUMB)); // Verify little endian representation const char* str = "NanoVDB0"; // note it's exactly 8 bytes @@ -439,7 +487,7 @@ TEST_F(TestNanoVDB, Magic) uint64_t magic; ss1 >> magic; - EXPECT_EQ(magic, NANOVDB_MAGIC_NUMBER); + EXPECT_EQ(magic, NANOVDB_MAGIC_NUMB); // Verify big endian representation std::stringstream ss2; @@ -451,11 +499,11 @@ TEST_F(TestNanoVDB, Magic) EXPECT_EQ("0x4e616e6f56444230UL", ss2.str()); ss2 >> magic; - EXPECT_EQ(magic, nanovdb::io::reverseEndianness(NANOVDB_MAGIC_NUMBER)); + EXPECT_EQ(magic, nanovdb::io::reverseEndianness(NANOVDB_MAGIC_NUMB)); {// test all magic numbers const std::string a_str("NanoVDB0"), b_str("NanoVDB1"), c_str("NanoVDB2"); - const uint64_t a = NANOVDB_MAGIC_NUMBER;// NanoVDB0 + const uint64_t a = NANOVDB_MAGIC_NUMB;// NanoVDB0 const uint64_t b = NANOVDB_MAGIC_GRID;// NanoVDB1 const uint64_t c = NANOVDB_MAGIC_FILE;// NanoVDB2 const uint64_t m = NANOVDB_MAGIC_MASK;// masks out most significant byte @@ -473,7 +521,7 @@ TEST_F(TestNanoVDB, Magic) EXPECT_EQ('1', bb[7]); EXPECT_EQ('2', cc[7]); EXPECT_EQ(m & a, m & b); - EXPECT_EQ(NANOVDB_MAGIC_MASK & NANOVDB_MAGIC_NUMBER, NANOVDB_MAGIC_MASK & NANOVDB_MAGIC_FILE); + EXPECT_EQ(NANOVDB_MAGIC_MASK & NANOVDB_MAGIC_NUMB, NANOVDB_MAGIC_MASK & NANOVDB_MAGIC_FILE); } }// Magic @@ -481,74 +529,68 @@ TEST_F(TestNanoVDB, FindBits) { for (uint32_t i = 0; i < 32; ++i) { uint32_t word = uint32_t(1) << i; - EXPECT_EQ(i, nanovdb::FindLowestOn(word)); - EXPECT_EQ(i, nanovdb::FindHighestOn(word)); + EXPECT_EQ(i, nanovdb::util::findLowestOn(word)); + EXPECT_EQ(i, nanovdb::util::findHighestOn(word)); } for (uint32_t i = 0; i < 64; ++i) { uint64_t word = uint64_t(1) << i; - EXPECT_EQ(i, nanovdb::FindLowestOn(word)); - EXPECT_EQ(i, nanovdb::FindHighestOn(word)); + EXPECT_EQ(i, nanovdb::util::findLowestOn(word)); + EXPECT_EQ(i, nanovdb::util::findHighestOn(word)); } -} +}// FindBits TEST_F(TestNanoVDB, CRC32) { { // test function that uses iterators const std::string s{"The quick brown fox jumps over the lazy dog"}; std::stringstream ss; - ss << std::hex << std::setw(8) << std::setfill('0') << nanovdb::crc32::checksum(s.c_str(), s.size()); + ss << std::hex << std::setw(8) << std::setfill('0') << nanovdb::util::crc32(s.c_str(), s.size()); EXPECT_EQ("414fa339", ss.str()); } { // test the checksum for a modified string const std::string s{"The quick brown Fox jumps over the lazy dog"}; std::stringstream ss; - ss << std::hex << std::setw(8) << std::setfill('0') << nanovdb::crc32::checksum(s.c_str(), s.size()); + ss << std::hex << std::setw(8) << std::setfill('0') << nanovdb::util::crc32(s.c_str(), s.size()); EXPECT_NE("414fa339", ss.str()); } { // test function that uses void pointer and byte size const std::string s{"The quick brown fox jumps over the lazy dog"}; std::stringstream ss; - ss << std::hex << std::setw(8) << std::setfill('0') << nanovdb::crc32::checksum(s.c_str(), s.size()); + ss << std::hex << std::setw(8) << std::setfill('0') << nanovdb::util::crc32(s.c_str(), s.size()); EXPECT_EQ("414fa339", ss.str()); } { // test accumulation const std::string s1{"The quick brown fox jum"}; - uint32_t crc = nanovdb::crc32::checksum(s1.c_str(), s1.size()); + uint32_t crc = nanovdb::util::crc32(s1.c_str(), s1.size()); const std::string s2{"ps over the lazy dog"}; - crc = nanovdb::crc32::checksum(s2.c_str(), s2.size(), crc); + crc = nanovdb::util::crc32(s2.c_str(), s2.size(), crc); std::stringstream ss; ss << std::hex << std::setw(8) << std::setfill('0') << crc; EXPECT_EQ("414fa339", ss.str()); } { // test accumulation with lookup table - auto lut = nanovdb::crc32::createLut(); + auto lut = nanovdb::util::createCrc32Lut(); const std::string s1{"The quick brown fox jum"}; - uint32_t crc = nanovdb::crc32::checksum(s1.c_str(), s1.size(), lut.get()); + uint32_t crc = nanovdb::util::crc32(s1.c_str(), s1.size(), lut.get()); const std::string s2{"ps over the lazy dog"}; - crc = nanovdb::crc32::checksum(s2.c_str(), s2.size(), lut.get(), crc); + crc = nanovdb::util::crc32(s2.c_str(), s2.size(), lut.get(), crc); std::stringstream ss; ss << std::hex << std::setw(8) << std::setfill('0') << crc; EXPECT_EQ("414fa339", ss.str()); } { - //EXPECT_EQ(~uint64_t(0), nanovdb::GridChecksum::EMPTY); - nanovdb::GridChecksum cs(~uint64_t(0)); - EXPECT_EQ(nanovdb::ChecksumMode::Disable, cs.mode()); + EXPECT_EQ(sizeof(uint64_t), sizeof(nanovdb::Checksum)); + nanovdb::Checksum cs; + EXPECT_EQ(nanovdb::CheckMode::Disable, cs.mode()); + EXPECT_EQ(~uint64_t(0), cs.full()); EXPECT_TRUE(cs.isEmpty()); EXPECT_FALSE(cs.isFull()); } - { - nanovdb::GridChecksum cs; - EXPECT_EQ(~uint64_t(0), cs.checksum()); - EXPECT_EQ(nanovdb::ChecksumMode::Disable, cs.mode()); - EXPECT_TRUE(cs.isEmpty()); - EXPECT_FALSE(cs.isFull()); - } -} +}// CRC32 TEST_F(TestNanoVDB, Range1D) { - nanovdb::Range1D r1(0, 20, 2); + nanovdb::util::Range1D r1(0, 20, 2); EXPECT_FALSE(r1.empty()); EXPECT_EQ(2U, r1.grainsize()); EXPECT_EQ(20U, r1.size()); @@ -557,7 +599,7 @@ TEST_F(TestNanoVDB, Range1D) EXPECT_EQ(0U, r1.begin()); EXPECT_EQ(20U, r1.end()); - nanovdb::Range1D r2(r1, nanovdb::Split()); + nanovdb::util::Range1D r2(r1, nanovdb::util::Split()); EXPECT_FALSE(r1.empty()); EXPECT_EQ(2U, r1.grainsize()); @@ -574,11 +616,11 @@ TEST_F(TestNanoVDB, Range1D) EXPECT_TRUE(r2.is_divisible()); EXPECT_EQ(10U, r2.begin()); EXPECT_EQ(20U, r2.end()); -} +}// Range1D TEST_F(TestNanoVDB, Range2D) { - nanovdb::Range<2, int> r1(-20, 20, 1u, 0, 20, 2u); + nanovdb::util::Range<2, int> r1(-20, 20, 1u, 0, 20, 2u); EXPECT_FALSE(r1.empty()); EXPECT_EQ(1U, r1[0].grainsize()); @@ -595,7 +637,7 @@ TEST_F(TestNanoVDB, Range2D) EXPECT_EQ(0, r1[1].begin()); EXPECT_EQ(20, r1[1].end()); - nanovdb::Range<2, int> r2(r1, nanovdb::Split()); + nanovdb::util::Range<2, int> r2(r1, nanovdb::util::Split()); EXPECT_FALSE(r1.empty()); EXPECT_EQ(1U, r1[0].grainsize()); @@ -627,11 +669,11 @@ TEST_F(TestNanoVDB, Range2D) EXPECT_EQ(0, r2[1].begin()); EXPECT_EQ(20, r2[1].end()); EXPECT_EQ(r1[1], r2[1]); -} +}// Range2D TEST_F(TestNanoVDB, Range3D) { - nanovdb::Range<3, int> r1(-20, 20, 1u, 0, 20, 2u, 0, 10, 5); + nanovdb::util::Range<3, int> r1(-20, 20, 1u, 0, 20, 2u, 0, 10, 5); EXPECT_FALSE(r1.empty()); EXPECT_EQ(1U, r1[0].grainsize()); @@ -655,7 +697,7 @@ TEST_F(TestNanoVDB, Range3D) EXPECT_EQ(0, r1[2].begin()); EXPECT_EQ(10, r1[2].end()); - nanovdb::Range<3, int> r2(r1, nanovdb::Split()); + nanovdb::util::Range<3, int> r2(r1, nanovdb::util::Split()); EXPECT_FALSE(r1.empty()); EXPECT_EQ(1U, r1[0].grainsize()); @@ -702,7 +744,7 @@ TEST_F(TestNanoVDB, Range3D) EXPECT_EQ(0, r2[2].begin()); EXPECT_EQ(10, r2[2].end()); EXPECT_EQ(r1[2], r2[2]); -} +}// Range3D TEST_F(TestNanoVDB, invoke) { @@ -715,11 +757,11 @@ TEST_F(TestNanoVDB, invoke) auto kernel1 = [&array](){array[1]=1; }; auto kernel2 = [&array](){array[2]=2; }; auto kernel3 = [&array](){array[3]=3; }; - nanovdb::invoke(kernel0, kernel1, kernel2, kernel3); + nanovdb::util::invoke(kernel0, kernel1, kernel2, kernel3); for (int i=0; i()) - // <<"> is padding with " << (nanovdb::AlignUp(offset)-offset) + //std::cerr << "RootData<"<()) + // <<"> is padding with " << (nanovdb::math::AlignUp(offset)-offset) // << " bytes" << std::endl; //std::cerr << "Is padded: " << (DataT::padding() ? "yes" : "no") << std::endl; - EXPECT_EQ(DataT::padding()>0, offset != nanovdb::AlignUp(offset)); - offset = nanovdb::AlignUp(offset); + EXPECT_EQ(DataT::padding()>0, offset != nanovdb::math::AlignUp(offset)); + offset = nanovdb::math::AlignUp(offset); EXPECT_EQ(offset, (int)sizeof(DataT));// size of RootData } {// check memory layout of upper internal nodes using DataT = typename nanovdb::NanoUpper::DataType; - bool test = nanovdb::is_same::value; + bool test = nanovdb::util::is_same::value; EXPECT_TRUE(test); int offsets[] = { NANOVDB_OFFSETOF(DataT, mBBox), @@ -2325,34 +2396,34 @@ TYPED_TEST(TestOffsets, NanoVDB) offset += sizeof(ValueType); EXPECT_EQ(*p++, offset); offset += sizeof(ValueType); - if (offset!=nanovdb::AlignUp(offset)) is_padded=true; - offset = nanovdb::AlignUp(offset); + if (offset!=nanovdb::math::AlignUp(offset)) is_padded=true; + offset = nanovdb::math::AlignUp(offset); EXPECT_EQ(*p++, offset); offset += sizeof(StatsT); - if (offset!=nanovdb::AlignUp(offset)) is_padded=true; - offset = nanovdb::AlignUp(offset); + if (offset!=nanovdb::math::AlignUp(offset)) is_padded=true; + offset = nanovdb::math::AlignUp(offset); EXPECT_EQ(*p++, offset); offset += sizeof(StatsT); - if (offset!=nanovdb::AlignUp<32>(offset)) is_padded=true; - offset = nanovdb::AlignUp<32>(offset); + if (offset!=nanovdb::math::AlignUp<32>(offset)) is_padded=true; + offset = nanovdb::math::AlignUp<32>(offset); EXPECT_EQ(*p++, offset); - if (sizeof(ValueType)!=nanovdb::AlignUp<8>(sizeof(ValueType))) is_padded=true; - const size_t tile_size = nanovdb::AlignUp<8>(sizeof(ValueType)); + if (sizeof(ValueType)!=nanovdb::math::AlignUp<8>(sizeof(ValueType))) is_padded=true; + const size_t tile_size = nanovdb::math::AlignUp<8>(sizeof(ValueType)); EXPECT_EQ(sizeof(typename DataT::Tile), tile_size); offset += (32*32*32)*tile_size; - if (offset!=nanovdb::AlignUp(offset)) is_padded=true; + if (offset!=nanovdb::math::AlignUp(offset)) is_padded=true; //if (is_padded != (DataT::padding()>0)) { - // std::cerr << "Upper InternalData<" << nanovdb::toStr(nanovdb::mapToGridType()) + // std::cerr << "Upper InternalData<" << nanovdb::toStr(nanovdb::toGridType()) // << "> is padding: " << (DataT::padding() ? "yes" : "no") << std::endl; // std::cerr << "is_padded: " << (is_padded>0 ? "yes" : "no") << std::endl; //} EXPECT_EQ(is_padded, bool(DataT::padding())); - offset = nanovdb::AlignUp(offset); + offset = nanovdb::math::AlignUp(offset); EXPECT_EQ(sizeof(DataT), (size_t)offset); } {// check memory of lower internal nodes using DataT = typename nanovdb::NanoLower::DataType; - bool test = nanovdb::is_same::value; + bool test = nanovdb::util::is_same::value; EXPECT_TRUE(test); int offsets[] = { NANOVDB_OFFSETOF(DataT, mBBox), @@ -2380,34 +2451,34 @@ TYPED_TEST(TestOffsets, NanoVDB) offset += sizeof(ValueType); EXPECT_EQ(*p++, offset); offset += sizeof(ValueType); - if (offset!=nanovdb::AlignUp(offset)) is_padded=true; - offset = nanovdb::AlignUp(offset); + if (offset!=nanovdb::math::AlignUp(offset)) is_padded=true; + offset = nanovdb::math::AlignUp(offset); EXPECT_EQ(*p++, offset); offset += sizeof(StatsT); - if (offset!=nanovdb::AlignUp(offset)) is_padded=true; - offset = nanovdb::AlignUp(offset); + if (offset!=nanovdb::math::AlignUp(offset)) is_padded=true; + offset = nanovdb::math::AlignUp(offset); EXPECT_EQ(*p++, offset); offset += sizeof(StatsT); - if (offset!=nanovdb::AlignUp<32>(offset)) is_padded=true; - offset = nanovdb::AlignUp<32>(offset); + if (offset!=nanovdb::math::AlignUp<32>(offset)) is_padded=true; + offset = nanovdb::math::AlignUp<32>(offset); EXPECT_EQ(*p++, offset); - if (sizeof(ValueType)!=nanovdb::AlignUp<8>(sizeof(ValueType))) is_padded=true; - const size_t tile_size = nanovdb::AlignUp<8>(sizeof(ValueType)); + if (sizeof(ValueType)!=nanovdb::math::AlignUp<8>(sizeof(ValueType))) is_padded=true; + const size_t tile_size = nanovdb::math::AlignUp<8>(sizeof(ValueType)); EXPECT_EQ(sizeof(typename DataT::Tile), tile_size); offset += (16*16*16)*tile_size; - if (offset!=nanovdb::AlignUp(offset)) is_padded=true; + if (offset!=nanovdb::math::AlignUp(offset)) is_padded=true; //if (is_padded != (DataT::padding()>0)) { - // std::cerr << "Lower InternalData<" << nanovdb::toStr(nanovdb::mapToGridType()) + // std::cerr << "Lower InternalData<" << nanovdb::toStr(nanovdb::toGridType()) // << "> is padding: " << (DataT::padding() ? "yes" : "no") << std::endl; // std::cerr << "is_padded: " << (is_padded>0 ? "yes" : "no") << std::endl; //} EXPECT_EQ(is_padded, bool(DataT::padding())); - offset = nanovdb::AlignUp(offset); + offset = nanovdb::math::AlignUp(offset); EXPECT_EQ(sizeof(DataT), (size_t)offset); } {// check memory of leaf nodes using DataT = typename nanovdb::LeafNode::DataType; - bool test = nanovdb::is_same::value; + bool test = nanovdb::util::is_same::value; EXPECT_TRUE(test); int offsets[] = { NANOVDB_OFFSETOF(DataT, mBBoxMin), @@ -2427,10 +2498,37 @@ TYPED_TEST(TestOffsets, NanoVDB) EXPECT_EQ(*p++, offset); offset += 64;// = 8*8*8/8 checkLeaf(offset); - offset = nanovdb::AlignUp(offset); + offset = nanovdb::math::AlignUp(offset); EXPECT_EQ(sizeof(DataT), (size_t)offset); - //std::cerr << "LeafData<" << nanovdb::toStr(nanovdb::mapToGridType()) - // <<"> is padding: " << (DataT::padding() ? "yes" : "no") << std::endl; +#if 0// disable with 0 + char str[30]; + std::cerr << "LeafData<" << nanovdb::toStr(str, nanovdb::toGridType()) + << "> is padded: " << (DataT::padding() ? "yes" : "no") << std::endl; +/* + LeafData is padded: no + LeafData is padded: yes + LeafData is padded: no + LeafData is padded: no + LeafData is padded: no + LeafData is padded: no + LeafData is padded: yes + LeafData is padded: no + LeafData is padded: yes + LeafData is padded: yes + LeafData is padded: yes + LeafData is padded: no + LeafData is padded: no + LeafData is padded: no + LeafData is padded: no + LeafData is padded: no + LeafData is padded: no + LeafData is padded: no + LeafData is padded: yes + LeafData is padded: yes + LeafData is padded: yes + LeafData is padded: no +*/ +#endif } }// TestOffsets NanoVDB @@ -2445,13 +2543,13 @@ void checkLeaf(int &offset) offset += sizeof(ValueType); EXPECT_EQ(NANOVDB_OFFSETOF(DataT, mMaximum), offset); offset += sizeof(ValueType); - offset = nanovdb::AlignUp(offset); + offset = nanovdb::math::AlignUp(offset); EXPECT_EQ(NANOVDB_OFFSETOF(DataT, mAverage), offset); offset += sizeof(StatsT); - offset = nanovdb::AlignUp(offset); + offset = nanovdb::math::AlignUp(offset); EXPECT_EQ(NANOVDB_OFFSETOF(DataT, mStdDevi), offset); offset += sizeof(StatsT); - offset = nanovdb::AlignUp<32>(offset); + offset = nanovdb::math::AlignUp<32>(offset); EXPECT_EQ(NANOVDB_OFFSETOF(DataT, mValues), offset); offset += (8*8*8)*sizeof(ValueType); } @@ -2534,7 +2632,7 @@ void checkLeaf(int &offset) offset += sizeof(uint16_t); EXPECT_EQ(NANOVDB_OFFSETOF(DataT, mDev), offset); offset += sizeof(uint16_t); - offset = nanovdb::AlignUp<32>(offset); + offset = nanovdb::math::AlignUp<32>(offset); EXPECT_EQ(NANOVDB_OFFSETOF(DataT, mCode), offset); offset += 256*sizeof(uint8_t); } @@ -2555,7 +2653,7 @@ void checkLeaf(int &offset) offset += sizeof(uint16_t); EXPECT_EQ(NANOVDB_OFFSETOF(DataT, mDev), offset); offset += sizeof(uint16_t); - offset = nanovdb::AlignUp<32>(offset); + offset = nanovdb::math::AlignUp<32>(offset); EXPECT_EQ(NANOVDB_OFFSETOF(DataT, mCode), offset); offset += 512*sizeof(uint8_t); } @@ -2576,7 +2674,7 @@ void checkLeaf(int &offset) offset += sizeof(uint16_t); EXPECT_EQ(NANOVDB_OFFSETOF(DataT, mDev), offset); offset += sizeof(uint16_t); - offset = nanovdb::AlignUp<32>(offset); + offset = nanovdb::math::AlignUp<32>(offset); EXPECT_EQ(NANOVDB_OFFSETOF(DataT, mCode), offset); offset += 512*sizeof(uint16_t); } @@ -2597,7 +2695,7 @@ void checkLeaf(int &offset) offset += sizeof(uint16_t); EXPECT_EQ(NANOVDB_OFFSETOF(DataT, mDev), offset); offset += sizeof(uint16_t); - offset = nanovdb::AlignUp<32>(offset); + offset = nanovdb::math::AlignUp<32>(offset); } template<> @@ -2608,7 +2706,7 @@ void checkLeaf(int &offset) offset += sizeof(uint64_t); EXPECT_EQ(NANOVDB_OFFSETOF(DataT, mPointCount), offset); offset += sizeof(uint64_t); - offset = nanovdb::AlignUp<32>(offset); + offset = nanovdb::math::AlignUp<32>(offset); EXPECT_EQ(NANOVDB_OFFSETOF(DataT, mValues), offset); offset += (8*8*8)*sizeof(uint16_t); } @@ -2648,8 +2746,8 @@ TEST_F(TestNanoVDB, BasicGrid) */ } - EXPECT_EQ(sizeof(GridT), nanovdb::AlignUp(8 + 8 + 4 + 4 + 8 + nanovdb::GridData::MaxNameSize + 48 + sizeof(nanovdb::Map) + 24 + 4 + 4 + 8 + 4)); - EXPECT_EQ(sizeof(TreeT), nanovdb::AlignUp(4*8 + 3*4 + 3*4 + 8)); + EXPECT_EQ(sizeof(GridT), nanovdb::math::AlignUp(8 + 8 + 4 + 4 + 8 + nanovdb::GridData::MaxNameSize + 48 + sizeof(nanovdb::Map) + 24 + 4 + 4 + 8 + 4)); + EXPECT_EQ(sizeof(TreeT), nanovdb::math::AlignUp(4*8 + 3*4 + 3*4 + 8)); EXPECT_EQ(sizeof(TreeT), size_t(4*8 + 3*4 + 3*4 + 8));// should already be 32 byte aligned uint64_t bytes[6] = {GridT::memUsage(), TreeT::memUsage(), RootT::memUsage(1), NodeT2::memUsage(), NodeT1::memUsage(), LeafT::DataType::memUsage()}; @@ -2774,7 +2872,7 @@ TEST_F(TestNanoVDB, BasicGrid) data->mMap.set(mat, invMat, 1.0); data->mGridClass = nanovdb::GridClass::Unknown; data->mGridType = nanovdb::GridType::Float; - data->mMagic = NANOVDB_MAGIC_NUMBER; + data->mMagic = NANOVDB_MAGIC_NUMB; data->mVersion = nanovdb::Version(); #endif memcpy(data->mGridName, name.c_str(), name.size() + 1); @@ -2940,10 +3038,10 @@ TEST_F(TestNanoVDB, BasicGrid) TEST_F(TestNanoVDB, GridBuilderEmpty) { { // empty grid - using SrcGridT = nanovdb::build::Grid; + using SrcGridT = nanovdb::tools::build::Grid; SrcGridT srcGrid(0.0f, "test"); auto srcAcc = srcGrid.getAccessor(); - auto handle = nanovdb::createNanoGrid(srcGrid); + auto handle = nanovdb::tools::createNanoGrid(srcGrid); EXPECT_TRUE(handle); auto* meta = handle.gridMetaData(); EXPECT_TRUE(meta); @@ -2982,10 +3080,10 @@ TEST_F(TestNanoVDB, GridBuilderEmpty) TEST_F(TestNanoVDB, BuilderGridEmpty) { { // empty grid - using SrcGridT = nanovdb::build::Grid; + using SrcGridT = nanovdb::tools::build::Grid; SrcGridT grid(0.0f, "test"); auto srcAcc = grid.getAccessor(); - auto handle = nanovdb::createNanoGrid(grid); + auto handle = nanovdb::tools::createNanoGrid(grid); EXPECT_TRUE(handle); auto* meta = handle.gridMetaData(); EXPECT_TRUE(meta); @@ -3024,7 +3122,7 @@ TEST_F(TestNanoVDB, BuilderGridEmpty) TEST_F(TestNanoVDB, CreateNanoGrid_Basic1) { { // 1 grid point - using SrcGridT = nanovdb::build::Grid; + using SrcGridT = nanovdb::tools::build::Grid; const nanovdb::Coord ijk(1,2,3); SrcGridT grid(0.0f); auto srcAcc = grid.getAccessor(); @@ -3036,7 +3134,7 @@ TEST_F(TestNanoVDB, CreateNanoGrid_Basic1) EXPECT_EQ(1.0f, srcAcc.getValue(ijk)); EXPECT_EQ(1.0f, srcAcc.getValue(1,2,3)); - auto handle = nanovdb::createNanoGrid(grid); + auto handle = nanovdb::tools::createNanoGrid(grid); EXPECT_TRUE(handle); auto* meta = handle.gridMetaData(); EXPECT_TRUE(meta); @@ -3075,7 +3173,7 @@ TEST_F(TestNanoVDB, CreateNanoGrid_Basic1) TEST_F(TestNanoVDB, CreateNanoGrid_addTile) { { // 1 grid point and 1 tile - using SrcGridT = nanovdb::build::Grid; + using SrcGridT = nanovdb::tools::build::Grid; const nanovdb::Coord ijk(1,2,3); SrcGridT grid(0.0f); auto srcAcc = grid.getAccessor(); @@ -3093,7 +3191,7 @@ TEST_F(TestNanoVDB, CreateNanoGrid_addTile) EXPECT_EQ(2.0f, srcAcc.getValue(ijk2)); EXPECT_EQ(2.0f, srcAcc.getValue(-1,-2,-3)); - auto handle = nanovdb::createNanoGrid(grid); + auto handle = nanovdb::tools::createNanoGrid(grid); EXPECT_TRUE(handle); auto* meta = handle.gridMetaData(); EXPECT_TRUE(meta); @@ -3127,7 +3225,7 @@ TEST_F(TestNanoVDB, CreateNanoGrid_addTile) TEST_F(TestNanoVDB, GridBuilderValueMask) { { // 1 grid point - using SrcGridT = nanovdb::build::Grid; + using SrcGridT = nanovdb::tools::build::Grid; const nanovdb::Coord ijk(1,2,3); SrcGridT grid(false); auto srcAcc = grid.getAccessor(); @@ -3137,7 +3235,7 @@ TEST_F(TestNanoVDB, GridBuilderValueMask) EXPECT_EQ(1u, nodeCount[1]); EXPECT_EQ(1u, nodeCount[2]); EXPECT_EQ(true, srcAcc.getValue(ijk)); - auto handle = nanovdb::createNanoGrid(grid); + auto handle = nanovdb::tools::createNanoGrid(grid); EXPECT_TRUE(handle); auto* meta = handle.gridMetaData(); EXPECT_TRUE(meta); @@ -3178,7 +3276,7 @@ TEST_F(TestNanoVDB, GridBuilderValueMask) TEST_F(TestNanoVDB, GridBuilderBasic2) { { // 2 grid points - using SrcGridT = nanovdb::build::Grid; + using SrcGridT = nanovdb::tools::build::Grid; SrcGridT grid(0.0f, "test"); auto srcAcc = grid.getAccessor(); const nanovdb::Coord ijk1(1,2,3), ijk2(2,-2,9); @@ -3191,7 +3289,7 @@ TEST_F(TestNanoVDB, GridBuilderBasic2) EXPECT_EQ(2u, nodeCount[1]); EXPECT_EQ(2u, nodeCount[2]); - nanovdb::build::NodeManager srcMgr(grid); + nanovdb::tools::build::NodeManager srcMgr(grid); EXPECT_EQ(2u, srcMgr.nodeCount(0)); EXPECT_EQ(2u, srcMgr.nodeCount(1)); EXPECT_EQ(2u, srcMgr.nodeCount(2)); @@ -3201,7 +3299,7 @@ TEST_F(TestNanoVDB, GridBuilderBasic2) //for (int i=0;i indexBBox = dstGrid->indexBBox(); + const nanovdb::math::BBox indexBBox = dstGrid->indexBBox(); EXPECT_DOUBLE_EQ( 0.0, indexBBox[0][0]); EXPECT_DOUBLE_EQ( 0.0, indexBBox[0][1]); EXPECT_DOUBLE_EQ( 0.0, indexBBox[0][2]); @@ -3335,9 +3433,9 @@ TEST_F(TestNanoVDB, GridBuilderPrune) TEST_F(TestNanoVDB, GridBuilder_Vec3f) { using VoxelT = nanovdb::Vec3f; - EXPECT_EQ(nanovdb::AlignUp(12 + 3 + 1 + 2*4 + 64 + 3*(2*4 + 512*4)), sizeof(nanovdb::NanoLeaf)); + EXPECT_EQ(nanovdb::math::AlignUp(12 + 3 + 1 + 2*4 + 64 + 3*(2*4 + 512*4)), sizeof(nanovdb::NanoLeaf)); { // 3 grid point - using SrcGridT = nanovdb::build::Grid; + using SrcGridT = nanovdb::tools::build::Grid; SrcGridT srcGrid(VoxelT(0.0f)); auto srcAcc = srcGrid.getAccessor(); srcAcc.setValue(nanovdb::Coord( 1, 2, 3), nanovdb::Vec3f(1.0f)); @@ -3349,7 +3447,7 @@ TEST_F(TestNanoVDB, GridBuilder_Vec3f) EXPECT_EQ(nanovdb::Vec3f(2.0f), srcAcc.getValue(nanovdb::Coord(-10, 20,-50))); EXPECT_EQ(nanovdb::Vec3f(3.0f), srcAcc.getValue(nanovdb::Coord( 50,-12, 30))); - auto handle = nanovdb::createNanoGrid(srcGrid, nanovdb::StatsMode::All); + auto handle = nanovdb::tools::createNanoGrid(srcGrid, nanovdb::tools::StatsMode::All); EXPECT_TRUE(handle); auto* meta = handle.gridMetaData(); EXPECT_TRUE(meta); @@ -3358,7 +3456,7 @@ TEST_F(TestNanoVDB, GridBuilder_Vec3f) EXPECT_EQ(uint32_t(NANOVDB_MINOR_VERSION_NUMBER), meta->version().getMinor()); EXPECT_EQ(uint32_t(NANOVDB_PATCH_VERSION_NUMBER), meta->version().getPatch()); EXPECT_EQ("", std::string(meta->shortGridName())); - EXPECT_EQ(nanovdb::mapToGridType(), meta->gridType()); + EXPECT_EQ(nanovdb::toGridType(), meta->gridType()); EXPECT_EQ(nanovdb::GridClass::Unknown, meta->gridClass()); auto* dstGrid = handle.grid(); EXPECT_TRUE(dstGrid); @@ -3400,9 +3498,9 @@ TEST_F(TestNanoVDB, GridBuilder_Vec3f) TEST_F(TestNanoVDB, GridBuilder_Vec4f) { using VoxelT = nanovdb::Vec4f; - EXPECT_EQ(nanovdb::AlignUp(12 + 3 + 1 + 2*4 + 64 + 4*(2*4 + 512*4)), sizeof(nanovdb::NanoLeaf)); + EXPECT_EQ(nanovdb::math::AlignUp(12 + 3 + 1 + 2*4 + 64 + 4*(2*4 + 512*4)), sizeof(nanovdb::NanoLeaf)); { // 3 grid point - using SrcGridT = nanovdb::build::Grid; + using SrcGridT = nanovdb::tools::build::Grid; SrcGridT srcGrid(VoxelT(0.0f)); auto srcAcc = srcGrid.getAccessor(); srcAcc.setValue(nanovdb::Coord( 1, 2, 3), nanovdb::Vec4f(1.0f)); @@ -3414,7 +3512,7 @@ TEST_F(TestNanoVDB, GridBuilder_Vec4f) EXPECT_EQ(nanovdb::Vec4f(2.0f), srcAcc.getValue(nanovdb::Coord(-10, 20,-50))); EXPECT_EQ(nanovdb::Vec4f(3.0f), srcAcc.getValue(nanovdb::Coord( 50,-12, 30))); - auto handle = nanovdb::createNanoGrid(srcGrid, nanovdb::StatsMode::All); + auto handle = nanovdb::tools::createNanoGrid(srcGrid, nanovdb::tools::StatsMode::All); EXPECT_TRUE(handle); auto* meta = handle.gridMetaData(); EXPECT_TRUE(meta); @@ -3423,7 +3521,7 @@ TEST_F(TestNanoVDB, GridBuilder_Vec4f) EXPECT_EQ(uint32_t(NANOVDB_MINOR_VERSION_NUMBER), meta->version().getMinor()); EXPECT_EQ(uint32_t(NANOVDB_PATCH_VERSION_NUMBER), meta->version().getPatch()); EXPECT_EQ("", std::string(meta->shortGridName())); - EXPECT_EQ(nanovdb::mapToGridType(), meta->gridType()); + EXPECT_EQ(nanovdb::toGridType(), meta->gridType()); EXPECT_EQ(nanovdb::GridClass::Unknown, meta->gridClass()); auto* dstGrid = handle.grid(); EXPECT_TRUE(dstGrid); @@ -3467,7 +3565,7 @@ TEST_F(TestNanoVDB, GridBuilder_Fp4) using VoxelT = nanovdb::Fp4; EXPECT_EQ(96u + 512u/2, sizeof(nanovdb::NanoLeaf)); { // 3 grid point - using SrcGridT = nanovdb::build::Grid; + using SrcGridT = nanovdb::tools::build::Grid; SrcGridT srcGrid(0.0f); auto srcAcc = srcGrid.getAccessor(); srcAcc.setValue(nanovdb::Coord( 1, 2, 3), 1.0f); @@ -3479,7 +3577,7 @@ TEST_F(TestNanoVDB, GridBuilder_Fp4) EXPECT_EQ(2.0f, srcAcc.getValue(nanovdb::Coord(-10, 20,-50))); EXPECT_EQ(3.0f, srcAcc.getValue(nanovdb::Coord( 50,-12, 30))); - auto handle = nanovdb::createNanoGrid(srcGrid, nanovdb::StatsMode::All); + auto handle = nanovdb::tools::createNanoGrid(srcGrid, nanovdb::tools::StatsMode::All); EXPECT_TRUE(handle); auto* meta = handle.gridMetaData(); EXPECT_TRUE(meta); @@ -3488,7 +3586,7 @@ TEST_F(TestNanoVDB, GridBuilder_Fp4) EXPECT_EQ(uint32_t(NANOVDB_MINOR_VERSION_NUMBER), meta->version().getMinor()); EXPECT_EQ(uint32_t(NANOVDB_PATCH_VERSION_NUMBER), meta->version().getPatch()); EXPECT_EQ("", std::string(meta->shortGridName())); - EXPECT_EQ(nanovdb::mapToGridType(), meta->gridType()); + EXPECT_EQ(nanovdb::toGridType(), meta->gridType()); EXPECT_EQ(nanovdb::GridClass::Unknown, meta->gridClass()); auto* dstGrid = handle.grid(); EXPECT_TRUE(dstGrid); @@ -3532,20 +3630,20 @@ TEST_F(TestNanoVDB, GridBuilder_Fp4) auto mgrHandle = nanovdb::createNodeManager(*dstGrid); auto *nodeMgr = mgrHandle.mgr(); - EXPECT_TRUE(nanovdb::isValid(nodeMgr)); + EXPECT_TRUE(nanovdb::isAligned(nodeMgr)); EXPECT_TRUE(nodeMgr->isLinear()); uint64_t n[3]={0}; for (auto it2 = dstGrid->tree().root().cbeginChild(); it2; ++it2) { auto *node2 = &nodeMgr->upper(n[0]++); - EXPECT_TRUE(nanovdb::isValid(node2)); + EXPECT_TRUE(nanovdb::isAligned(node2)); EXPECT_EQ(&*it2, node2); for (auto it1 = it2->cbeginChild(); it1; ++it1) { auto *node1 = &nodeMgr->lower(n[1]++); - EXPECT_TRUE(nanovdb::isValid(node1)); + EXPECT_TRUE(nanovdb::isAligned(node1)); EXPECT_EQ(&*it1, node1); for (auto it0 = it1->cbeginChild(); it0; ++it0) { auto *node0 = &nodeMgr->leaf(n[2]++); - EXPECT_TRUE(nanovdb::isValid(node0)); + EXPECT_TRUE(nanovdb::isAligned(node0)); EXPECT_EQ(&*it0, node0); }// loop over child nodes of the lower internal node }// loop over child nodes of the upper internal node @@ -3559,11 +3657,11 @@ TEST_F(TestNanoVDB, GridBuilder_Fp4) const nanovdb::Vec3d center(0), origin(0); const float tolerance = 0.5f * voxelSize; - auto handle = nanovdb::createLevelSetSphere(radius, center, + auto handle = nanovdb::tools::createLevelSetSphere(radius, center, voxelSize, halfWidth, origin, "sphere", - nanovdb::StatsMode::Default, - nanovdb::ChecksumMode::Default); + nanovdb::tools::StatsMode::Default, + nanovdb::CheckMode::Default); auto* nanoGrid = handle.grid(); EXPECT_TRUE(nanoGrid); Sphere sphere(center, radius, voxelSize, halfWidth); @@ -3574,11 +3672,11 @@ TEST_F(TestNanoVDB, GridBuilder_Fp4) EXPECT_NEAR(nanoAcc.getValue(p), sphere(p), tolerance); } }; - nanovdb::forEach(nanoGrid->indexBBox(), kernel); + nanovdb::util::forEach(nanoGrid->indexBBox(), kernel); nanovdb::io::writeGrid("data/sphere_fp4.nvdb", handle); ASSERT_THROW(nanovdb::io::readGrid("data/sphere_fp4.nvdb", 1), std::runtime_error); - //nanovdb::CpuTimer timer; + //nanovdb::util::Timer timer; //timer.start("read all grids"); //handle = nanovdb::io::readGrid("data/sphere_fp4.nvdb"); //timer.start("read first grid"); @@ -3586,7 +3684,7 @@ TEST_F(TestNanoVDB, GridBuilder_Fp4) //timer.stop(); nanoGrid = handle.grid(); EXPECT_TRUE(nanoGrid); - nanovdb::forEach(nanoGrid->indexBBox(), kernel); + nanovdb::util::forEach(nanoGrid->indexBBox(), kernel); //timer.start("read first grid"); //handle = nanovdb::io::readGrid("data/sphere_fp4.nvdb", 0); @@ -3595,7 +3693,7 @@ TEST_F(TestNanoVDB, GridBuilder_Fp4) //timer.stop(); nanoGrid = handle.grid(); EXPECT_TRUE(nanoGrid); - nanovdb::forEach(nanoGrid->indexBBox(), kernel); + nanovdb::util::forEach(nanoGrid->indexBBox(), kernel); } } // GridBuilder_Fp4 @@ -3604,7 +3702,7 @@ TEST_F(TestNanoVDB, GridBuilder_Fp8) using VoxelT = nanovdb::Fp8; EXPECT_EQ(96u + 512u, sizeof(nanovdb::NanoLeaf)); { // 3 grid point - using SrcGridT = nanovdb::build::Grid; + using SrcGridT = nanovdb::tools::build::Grid; SrcGridT srcGrid(0.0f); auto srcAcc = srcGrid.getAccessor(); @@ -3617,7 +3715,7 @@ TEST_F(TestNanoVDB, GridBuilder_Fp8) EXPECT_EQ(2.0f, srcAcc.getValue(nanovdb::Coord(-10, 20,-50))); EXPECT_EQ(3.0f, srcAcc.getValue(nanovdb::Coord( 50,-12, 30))); - auto handle = nanovdb::createNanoGrid(srcGrid, nanovdb::StatsMode::All); + auto handle = nanovdb::tools::createNanoGrid(srcGrid, nanovdb::tools::StatsMode::All); EXPECT_TRUE(handle); auto* meta = handle.gridMetaData(); EXPECT_TRUE(meta); @@ -3626,7 +3724,7 @@ TEST_F(TestNanoVDB, GridBuilder_Fp8) EXPECT_EQ(uint32_t(NANOVDB_MINOR_VERSION_NUMBER), meta->version().getMinor()); EXPECT_EQ(uint32_t(NANOVDB_PATCH_VERSION_NUMBER), meta->version().getPatch()); EXPECT_EQ("", std::string(meta->shortGridName())); - EXPECT_EQ(nanovdb::mapToGridType(), meta->gridType()); + EXPECT_EQ(nanovdb::toGridType(), meta->gridType()); EXPECT_EQ(nanovdb::GridClass::Unknown, meta->gridClass()); auto* dstGrid = handle.grid(); EXPECT_TRUE(dstGrid); @@ -3667,20 +3765,20 @@ TEST_F(TestNanoVDB, GridBuilder_Fp8) auto mgrHandle = nanovdb::createNodeManager(*dstGrid); auto *nodeMgr = mgrHandle.mgr(); - EXPECT_TRUE(nanovdb::isValid(nodeMgr)); + EXPECT_TRUE(nanovdb::isAligned(nodeMgr)); EXPECT_TRUE(nodeMgr->isLinear()); uint64_t n[3]={0}; for (auto it2 = dstGrid->tree().root().cbeginChild(); it2; ++it2) { auto *node2 = &nodeMgr->upper(n[0]++); - EXPECT_TRUE(nanovdb::isValid(node2)); + EXPECT_TRUE(nanovdb::isAligned(node2)); EXPECT_EQ(&*it2, node2); for (auto it1 = it2->cbeginChild(); it1; ++it1) { auto *node1 = &nodeMgr->lower(n[1]++); - EXPECT_TRUE(nanovdb::isValid(node1)); + EXPECT_TRUE(nanovdb::isAligned(node1)); EXPECT_EQ(&*it1, node1); for (auto it0 = it1->cbeginChild(); it0; ++it0) { auto *node0 = &nodeMgr->leaf(n[2]++); - EXPECT_TRUE(nanovdb::isValid(node0)); + EXPECT_TRUE(nanovdb::isAligned(node0)); EXPECT_EQ(&*it0, node0); }// loop over child nodes of the lower internal node }// loop over child nodes of the upper internal node @@ -3694,11 +3792,11 @@ TEST_F(TestNanoVDB, GridBuilder_Fp8) const nanovdb::Vec3d center(0), origin(0); const float tolerance = 0.05f * voxelSize; - auto handle = nanovdb::createLevelSetSphere(radius, center, + auto handle = nanovdb::tools::createLevelSetSphere(radius, center, voxelSize, halfWidth, origin, "sphere", - nanovdb::StatsMode::Default, - nanovdb::ChecksumMode::Default); + nanovdb::tools::StatsMode::Default, + nanovdb::CheckMode::Default); auto* nanoGrid = handle.grid(); EXPECT_TRUE(nanoGrid); Sphere sphere(center, radius, float(voxelSize), float(halfWidth)); @@ -3709,14 +3807,14 @@ TEST_F(TestNanoVDB, GridBuilder_Fp8) EXPECT_NEAR(nanoAcc.getValue(p), sphere(p), tolerance); } }; - nanovdb::forEach(nanoGrid->indexBBox(), kernel); + nanovdb::util::forEach(nanoGrid->indexBBox(), kernel); nanovdb::io::writeGrid("data/sphere_fp8.nvdb", handle); handle = nanovdb::io::readGrid("data/sphere_fp8.nvdb"); nanoGrid = handle.grid(); EXPECT_TRUE(nanoGrid); - nanovdb::forEach(nanoGrid->indexBBox(), kernel); + nanovdb::util::forEach(nanoGrid->indexBBox(), kernel); } } // GridBuilder_Fp8 @@ -3725,7 +3823,7 @@ TEST_F(TestNanoVDB, GridBuilder_Fp16) using VoxelT = nanovdb::Fp16; EXPECT_EQ(96u + 512u*2, sizeof(nanovdb::NanoLeaf)); { // 3 grid point - using SrcGridT = nanovdb::build::Grid; + using SrcGridT = nanovdb::tools::build::Grid; SrcGridT srcGrid(0.0f); auto srcAcc = srcGrid.getAccessor(); srcAcc.setValue(nanovdb::Coord( 1, 2, 3), 1.0f); @@ -3737,7 +3835,7 @@ TEST_F(TestNanoVDB, GridBuilder_Fp16) EXPECT_EQ(2.0f, srcAcc.getValue(nanovdb::Coord(-10, 20,-50))); EXPECT_EQ(3.0f, srcAcc.getValue(nanovdb::Coord( 50,-12, 30))); - auto handle = nanovdb::createNanoGrid(srcGrid, nanovdb::StatsMode::All); + auto handle = nanovdb::tools::createNanoGrid(srcGrid, nanovdb::tools::StatsMode::All); EXPECT_TRUE(handle); auto* meta = handle.gridMetaData(); EXPECT_TRUE(meta); @@ -3746,7 +3844,7 @@ TEST_F(TestNanoVDB, GridBuilder_Fp16) EXPECT_EQ(uint32_t(NANOVDB_MINOR_VERSION_NUMBER), meta->version().getMinor()); EXPECT_EQ(uint32_t(NANOVDB_PATCH_VERSION_NUMBER), meta->version().getPatch()); EXPECT_EQ("", std::string(meta->shortGridName())); - EXPECT_EQ(nanovdb::mapToGridType(), meta->gridType()); + EXPECT_EQ(nanovdb::toGridType(), meta->gridType()); EXPECT_EQ(nanovdb::GridClass::Unknown, meta->gridClass()); auto* dstGrid = handle.grid(); EXPECT_TRUE(dstGrid); @@ -3787,20 +3885,20 @@ TEST_F(TestNanoVDB, GridBuilder_Fp16) auto mgrHandle = nanovdb::createNodeManager(*dstGrid); auto *nodeMgr = mgrHandle.mgr(); - EXPECT_TRUE(nanovdb::isValid(nodeMgr)); + EXPECT_TRUE(nanovdb::isAligned(nodeMgr)); EXPECT_TRUE(nodeMgr->isLinear()); uint64_t n[3]={0}; for (auto it2 = dstGrid->tree().root().cbeginChild(); it2; ++it2) { auto *node2 = &nodeMgr->upper(n[0]++); - EXPECT_TRUE(nanovdb::isValid(node2)); + EXPECT_TRUE(nanovdb::isAligned(node2)); EXPECT_EQ(&*it2, node2); for (auto it1 = it2->cbeginChild(); it1; ++it1) { auto *node1 = &nodeMgr->lower(n[1]++); - EXPECT_TRUE(nanovdb::isValid(node1)); + EXPECT_TRUE(nanovdb::isAligned(node1)); EXPECT_EQ(&*it1, node1); for (auto it0 = it1->cbeginChild(); it0; ++it0) { auto *node0 = &nodeMgr->leaf(n[2]++); - EXPECT_TRUE(nanovdb::isValid(node0)); + EXPECT_TRUE(nanovdb::isAligned(node0)); EXPECT_EQ(&*it0, node0); }// loop over child nodes of the lower internal node }// loop over child nodes of the upper internal node @@ -3814,11 +3912,11 @@ TEST_F(TestNanoVDB, GridBuilder_Fp16) const nanovdb::Vec3d center(0), origin(0); const float tolerance = 0.005f * voxelSize; - auto handle = nanovdb::createLevelSetSphere(radius, center, + auto handle = nanovdb::tools::createLevelSetSphere(radius, center, voxelSize, halfWidth, origin, "sphere", - nanovdb::StatsMode::Default, - nanovdb::ChecksumMode::Default); + nanovdb::tools::StatsMode::Default, + nanovdb::CheckMode::Default); auto* nanoGrid = handle.grid(); EXPECT_TRUE(nanoGrid); Sphere sphere(center, radius, float(voxelSize), float(halfWidth)); @@ -3829,14 +3927,14 @@ TEST_F(TestNanoVDB, GridBuilder_Fp16) EXPECT_NEAR(nanoAcc.getValue(p), sphere(p), tolerance); } }; - nanovdb::forEach(nanoGrid->indexBBox(), kernel); + nanovdb::util::forEach(nanoGrid->indexBBox(), kernel); nanovdb::io::writeGrid("data/sphere_fp16.nvdb", handle); handle = nanovdb::io::readGrid("data/sphere_fp16.nvdb"); nanoGrid = handle.grid(); EXPECT_TRUE(nanoGrid); - nanovdb::forEach(nanoGrid->indexBBox(), kernel); + nanovdb::util::forEach(nanoGrid->indexBBox(), kernel); } } // GridBuilder_Fp16 @@ -3845,7 +3943,7 @@ TEST_F(TestNanoVDB, GridBuilder_FpN_Basic1) using VoxelT = nanovdb::FpN; EXPECT_EQ(96u, sizeof(nanovdb::NanoLeaf)); { // 1 grid point - using SrcGridT = nanovdb::build::Grid; + using SrcGridT = nanovdb::tools::build::Grid; SrcGridT srcGrid(0.0f); auto srcAcc = srcGrid.getAccessor(); srcAcc.setValue(nanovdb::Coord( 0, 0, 0), 1.0f); @@ -3853,7 +3951,7 @@ TEST_F(TestNanoVDB, GridBuilder_FpN_Basic1) EXPECT_TRUE(srcAcc.isValueOn(nanovdb::Coord(0, 0, 0))); EXPECT_EQ(1.0f, srcAcc.getValue(nanovdb::Coord( 0, 0, 0))); - auto handle = nanovdb::createNanoGrid(srcGrid, nanovdb::StatsMode::All); + auto handle = nanovdb::tools::createNanoGrid(srcGrid, nanovdb::tools::StatsMode::All); EXPECT_TRUE(handle); auto* meta = handle.gridMetaData(); EXPECT_TRUE(meta); @@ -3862,7 +3960,7 @@ TEST_F(TestNanoVDB, GridBuilder_FpN_Basic1) EXPECT_EQ(uint32_t(NANOVDB_MINOR_VERSION_NUMBER), meta->version().getMinor()); EXPECT_EQ(uint32_t(NANOVDB_PATCH_VERSION_NUMBER), meta->version().getPatch()); EXPECT_EQ("", std::string(meta->shortGridName())); - EXPECT_EQ(nanovdb::mapToGridType(), meta->gridType()); + EXPECT_EQ(nanovdb::toGridType(), meta->gridType()); EXPECT_EQ(nanovdb::GridClass::Unknown, meta->gridClass()); auto* dstGrid = handle.grid(); EXPECT_TRUE(dstGrid); @@ -3905,7 +4003,7 @@ TEST_F(TestNanoVDB, GridBuilder_FpN_Basic3) using VoxelT = nanovdb::FpN; EXPECT_EQ(96u, sizeof(nanovdb::NanoLeaf)); { // 3 grid point - using SrcGridT = nanovdb::build::Grid; + using SrcGridT = nanovdb::tools::build::Grid; SrcGridT srcGrid(0.0f); auto srcAcc = srcGrid.getAccessor(); srcAcc.setValue(nanovdb::Coord( 1, 2, 3), 1.0f); @@ -3917,7 +4015,7 @@ TEST_F(TestNanoVDB, GridBuilder_FpN_Basic3) EXPECT_EQ(2.0f, srcAcc.getValue(nanovdb::Coord(-10, 20,-50))); EXPECT_EQ(3.0f, srcAcc.getValue(nanovdb::Coord( 50,-12, 30))); - auto handle = nanovdb::createNanoGrid(srcGrid, nanovdb::StatsMode::All); + auto handle = nanovdb::tools::createNanoGrid(srcGrid, nanovdb::tools::StatsMode::All); EXPECT_TRUE(handle); auto* meta = handle.gridMetaData(); EXPECT_TRUE(meta); @@ -3926,7 +4024,7 @@ TEST_F(TestNanoVDB, GridBuilder_FpN_Basic3) EXPECT_EQ(uint32_t(NANOVDB_MINOR_VERSION_NUMBER), meta->version().getMinor()); EXPECT_EQ(uint32_t(NANOVDB_PATCH_VERSION_NUMBER), meta->version().getPatch()); EXPECT_EQ("", std::string(meta->shortGridName())); - EXPECT_EQ(nanovdb::mapToGridType(), meta->gridType()); + EXPECT_EQ(nanovdb::toGridType(), meta->gridType()); EXPECT_EQ(nanovdb::GridClass::Unknown, meta->gridClass()); auto* dstGrid = handle.grid(); EXPECT_TRUE(dstGrid); @@ -3967,20 +4065,20 @@ TEST_F(TestNanoVDB, GridBuilder_FpN_Basic3) auto mgrHandle = nanovdb::createNodeManager(*dstGrid); auto *nodeMgr = mgrHandle.mgr(); - EXPECT_TRUE(nanovdb::isValid(nodeMgr)); + EXPECT_TRUE(nanovdb::isAligned(nodeMgr)); EXPECT_FALSE(nodeMgr->isLinear()); uint64_t n[3]={0}; for (auto it2 = dstGrid->tree().root().cbeginChild(); it2; ++it2) { auto *node2 = &nodeMgr->upper(n[0]++); - EXPECT_TRUE(nanovdb::isValid(node2)); + EXPECT_TRUE(nanovdb::isAligned(node2)); EXPECT_EQ(&*it2, node2); for (auto it1 = it2->cbeginChild(); it1; ++it1) { auto *node1 = &nodeMgr->lower(n[1]++); - EXPECT_TRUE(nanovdb::isValid(node1)); + EXPECT_TRUE(nanovdb::isAligned(node1)); EXPECT_EQ(&*it1, node1); for (auto it0 = it1->cbeginChild(); it0; ++it0) { auto *node0 = &nodeMgr->leaf(n[2]++); - EXPECT_TRUE(nanovdb::isValid(node0)); + EXPECT_TRUE(nanovdb::isAligned(node0)); EXPECT_EQ(&*it0, node0); }// loop over child nodes of the lower internal node }// loop over child nodes of the upper internal node @@ -4000,11 +4098,11 @@ TEST_F(TestNanoVDB, GridBuilder_FpN_Sphere) const nanovdb::Vec3d center(0), origin(0); const float tolerance = 0.5f * voxelSize; - auto handle = nanovdb::createLevelSetSphere(radius, center, + auto handle = nanovdb::tools::createLevelSetSphere(radius, center, voxelSize, halfWidth, origin, "sphere", - nanovdb::StatsMode::Default, - nanovdb::ChecksumMode::Default, + nanovdb::tools::StatsMode::Default, + nanovdb::CheckMode::Default, tolerance, false); auto* nanoGrid = handle.grid(); @@ -4017,26 +4115,26 @@ TEST_F(TestNanoVDB, GridBuilder_FpN_Sphere) EXPECT_NEAR(nanoAcc.getValue(p), sphere(p), tolerance); } }; - nanovdb::forEach(nanoGrid->indexBBox(), kernel); + nanovdb::util::forEach(nanoGrid->indexBBox(), kernel); nanovdb::io::writeGrid("data/sphere_fpN.nvdb", handle); handle = nanovdb::io::readGrid("data/sphere_fpN.nvdb"); nanoGrid = handle.grid(); EXPECT_TRUE(nanoGrid); - nanovdb::forEach(nanoGrid->indexBBox(), kernel); + nanovdb::util::forEach(nanoGrid->indexBBox(), kernel); } } // GridBuilder_FpN_Sphere TEST_F(TestNanoVDB, NodeManager) { { // 1 active voxel - using SrcGridT = nanovdb::build::Grid; + using SrcGridT = nanovdb::tools::build::Grid; SrcGridT srcGrid(0.0f, "test", nanovdb::GridClass::LevelSet); auto srcAcc = srcGrid.getAccessor(); const nanovdb::Coord x0(1, 2, 3), x1(1, 2, 4); srcAcc.setValue(x1, 1.0f); - auto handle = nanovdb::createNanoGrid(srcGrid); + auto handle = nanovdb::tools::createNanoGrid(srcGrid); EXPECT_TRUE(handle); auto* dstGrid = handle.grid(); EXPECT_TRUE(dstGrid); @@ -4052,8 +4150,7 @@ TEST_F(TestNanoVDB, NodeManager) auto *nodeMgr = nodeMgrHandle.mgr(); EXPECT_TRUE(nodeMgr); EXPECT_TRUE(nanovdb::isAligned(nodeMgr)); - EXPECT_TRUE(nanovdb::isValid(nodeMgr)); - EXPECT_FALSE(nanovdb::isValid(nullptr)); + EXPECT_TRUE(nanovdb::isAligned(nodeMgr)); EXPECT_TRUE(nanovdb::isAligned(nullptr)); EXPECT_TRUE(nodeMgr->isLinear()); @@ -4085,15 +4182,15 @@ TEST_F(TestNanoVDB, NodeManager) uint64_t n[3]={0}; for (auto it2 = dstGrid->tree().root().cbeginChild(); it2; ++it2) { auto *node2 = &nodeMgr->upper(n[0]++); - EXPECT_TRUE(nanovdb::isValid(node2)); + EXPECT_TRUE(nanovdb::isAligned(node2)); EXPECT_EQ(&*it2, node2); for (auto it1 = it2->cbeginChild(); it1; ++it1) { auto *node1 = &nodeMgr->lower(n[1]++); - EXPECT_TRUE(nanovdb::isValid(node1)); + EXPECT_TRUE(nanovdb::isAligned(node1)); EXPECT_EQ(&*it1, node1); for (auto it0 = it1->cbeginChild(); it0; ++it0) { auto *node0 = &nodeMgr->leaf(n[2]++); - EXPECT_TRUE(nanovdb::isValid(node0)); + EXPECT_TRUE(nanovdb::isAligned(node0)); EXPECT_EQ(&*it0, node0); }// loop over child nodes of the lower internal node }// loop over child nodes of the upper internal node @@ -4103,13 +4200,13 @@ TEST_F(TestNanoVDB, NodeManager) EXPECT_EQ(dstGrid->tree().nodeCount(2), n[0]); } { // 2 active voxels - using SrcGridT = nanovdb::build::Grid; + using SrcGridT = nanovdb::tools::build::Grid; SrcGridT srcGrid(0.0f, "test", nanovdb::GridClass::LevelSet); auto srcAcc = srcGrid.getAccessor(); const nanovdb::Coord x0(1, 2, 3), x1(2,-2, 9), x2(1, 2, 4); srcAcc.setValue(x1, 1.0f); srcAcc.setValue(x2, 2.0f); - auto handle = nanovdb::createNanoGrid(srcGrid); + auto handle = nanovdb::tools::createNanoGrid(srcGrid); EXPECT_TRUE(handle); auto* dstGrid = handle.grid(); EXPECT_TRUE(dstGrid); @@ -4153,15 +4250,15 @@ TEST_F(TestNanoVDB, NodeManager) uint64_t n[3]={0}; for (auto it2 = dstGrid->tree().root().cbeginChild(); it2; ++it2) { auto *node2 = &nodeMgr->upper(n[0]++); - EXPECT_TRUE(nanovdb::isValid(node2)); + EXPECT_TRUE(nanovdb::isAligned(node2)); EXPECT_EQ(&*it2, node2); for (auto it1 = it2->cbeginChild(); it1; ++it1) { auto *node1 = &nodeMgr->lower(n[1]++); - EXPECT_TRUE(nanovdb::isValid(node1)); + EXPECT_TRUE(nanovdb::isAligned(node1)); EXPECT_EQ(&*it1, node1); for (auto it0 = it1->cbeginChild(); it0; ++it0) { auto *node0 = &nodeMgr->leaf(n[2]++); - EXPECT_TRUE(nanovdb::isValid(node0)); + EXPECT_TRUE(nanovdb::isAligned(node0)); EXPECT_EQ(&*it0, node0); }// loop over child nodes of the lower internal node }// loop over child nodes of the upper internal node @@ -4184,13 +4281,13 @@ TEST_F(TestNanoVDB, NodeManager) } } EXPECT_EQ(voxelCount, voxels.size()); - using SrcGridT = nanovdb::build::Grid; + using SrcGridT = nanovdb::tools::build::Grid; SrcGridT srcGrid(-1.0f, "test", nanovdb::GridClass::LevelSet); auto srcAcc = srcGrid.getAccessor(); for (size_t i=0; i(); EXPECT_TRUE(dstGrid); @@ -4212,15 +4309,15 @@ TEST_F(TestNanoVDB, NodeManager) uint64_t n[3]={0}; for (auto it2 = dstGrid->tree().root().cbeginChild(); it2; ++it2) { auto *node2 = &nodeMgr->upper(n[0]++); - EXPECT_TRUE(nanovdb::isValid(node2)); + EXPECT_TRUE(nanovdb::isAligned(node2)); EXPECT_EQ(&*it2, node2); for (auto it1 = it2->cbeginChild(); it1; ++it1) { auto *node1 = &nodeMgr->lower(n[1]++); - EXPECT_TRUE(nanovdb::isValid(node1)); + EXPECT_TRUE(nanovdb::isAligned(node1)); EXPECT_EQ(&*it1, node1); for (auto it0 = it1->cbeginChild(); it0; ++it0) { auto *node0 = &nodeMgr->leaf(n[2]++); - EXPECT_TRUE(nanovdb::isValid(node0)); + EXPECT_TRUE(nanovdb::isAligned(node0)); EXPECT_EQ(&*it0, node0); }// loop over child nodes of the lower internal node }// loop over child nodes of the upper internal node @@ -4234,7 +4331,7 @@ TEST_F(TestNanoVDB, NodeManager) TEST_F(TestNanoVDB, GridBuilderBasicDense) { { // dense functor - using SrcGridT = nanovdb::build::Grid; + using SrcGridT = nanovdb::tools::build::Grid; SrcGridT srcGrid(0.0f, "test", nanovdb::GridClass::LevelSet); const nanovdb::CoordBBox bbox(nanovdb::Coord(0), nanovdb::Coord(100)); auto func = [](const nanovdb::Coord&) { return 1.0f; }; @@ -4244,7 +4341,7 @@ TEST_F(TestNanoVDB, GridBuilderBasicDense) EXPECT_EQ(1.0f, srcAcc.getValue(*ijk)); EXPECT_TRUE(srcAcc.isActive(*ijk)); } - auto handle = nanovdb::createNanoGrid(srcGrid); + auto handle = nanovdb::tools::createNanoGrid(srcGrid); EXPECT_TRUE(handle); auto* meta = handle.gridMetaData(); EXPECT_TRUE(meta); @@ -4284,7 +4381,7 @@ TEST_F(TestNanoVDB, GridBuilderBasicDense) TEST_F(TestNanoVDB, GridBuilderBackground) { { - using SrcGridT = nanovdb::build::Grid; + using SrcGridT = nanovdb::tools::build::Grid; SrcGridT srcGrid(0.5f); auto acc = srcGrid.getAccessor(); @@ -4297,7 +4394,7 @@ TEST_F(TestNanoVDB, GridBuilderBackground) EXPECT_TRUE(acc.isActive(nanovdb::Coord(1))); EXPECT_EQ(0, acc.getValue(nanovdb::Coord(2))); EXPECT_TRUE(acc.isActive(nanovdb::Coord(1))); - auto gridHdl = nanovdb::createNanoGrid(srcGrid); + auto gridHdl = nanovdb::tools::createNanoGrid(srcGrid); auto grid = gridHdl.grid(); EXPECT_TRUE(grid); EXPECT_FALSE(grid->isEmpty()); @@ -4309,7 +4406,7 @@ TEST_F(TestNanoVDB, GridBuilderBackground) TEST_F(TestNanoVDB, GridBuilderSphere) { - using SrcGridT = nanovdb::build::Grid; + using SrcGridT = nanovdb::tools::build::Grid; Sphere sphere(nanovdb::Vec3d(50), 20.0f); EXPECT_EQ(3.0f, sphere.background()); EXPECT_EQ(3.0f, sphere(nanovdb::Coord(100))); @@ -4323,7 +4420,7 @@ TEST_F(TestNanoVDB, GridBuilderSphere) //mTimer.start("GridBulder Sphere"); srcGrid(sphere, bbox); //mTimer.stop(); - auto handle = nanovdb::createNanoGrid(srcGrid); + auto handle = nanovdb::tools::createNanoGrid(srcGrid); EXPECT_TRUE(handle); EXPECT_EQ(1u, handle.gridCount()); auto* meta = handle.gridMetaData(); @@ -4381,7 +4478,7 @@ TEST_F(TestNanoVDB, createLevelSetSphere) EXPECT_EQ(-1.0f, sphere(nanovdb::Coord(center, center, center+radius-1))); EXPECT_EQ( 2.0f, sphere(nanovdb::Coord(center, center, center+radius+2))); //mTimer.start("createLevelSetSphere"); - auto handle = nanovdb::createLevelSetSphere(radius, nanovdb::Vec3d(center), + auto handle = nanovdb::tools::createLevelSetSphere(radius, nanovdb::Vec3d(center), voxelSize, width, nanovdb::Vec3d(0), gridName); //mTimer.stop(); const nanovdb::CoordBBox bbox(nanovdb::Coord(center-radius-width-1), @@ -4417,7 +4514,7 @@ TEST_F(TestNanoVDB, createLevelSetSphere) //std::cerr << "bbox.min = (" << dstGrid->indexBBox()[0][0] << ", " << dstGrid->indexBBox()[0][1] << ", " << dstGrid->indexBBox()[0][2] << ")" << std::endl; //std::cerr << "bbox.max = (" << dstGrid->indexBBox()[1][0] << ", " << dstGrid->indexBBox()[1][1] << ", " << dstGrid->indexBBox()[1][2] << ")" << std::endl; std::atomic count{0}; - nanovdb::forEach(bbox, [&](const nanovdb::CoordBBox &b){ + nanovdb::util::forEach(bbox, [&](const nanovdb::CoordBBox &b){ auto dstAcc = dstGrid->getAccessor(); for (auto it = b.begin(); it; ++it) { const nanovdb::Coord ijk = *it; @@ -4432,8 +4529,8 @@ TEST_F(TestNanoVDB, createLevelSetSphere) TEST_F(TestNanoVDB, createFogVolumeSphere) { - auto handle = nanovdb::createFogVolumeSphere(20.0f, nanovdb::Vec3d(50), - 1.0, 3.0, nanovdb::Vec3d(0), "sphere_20"); + auto handle = nanovdb::tools::createFogVolumeSphere(20.0f, nanovdb::Vec3d(50), + 1.0, 3.0, nanovdb::Vec3d(0), "sphere_20"); const nanovdb::CoordBBox bbox(nanovdb::Coord(0), nanovdb::Coord(100)); EXPECT_TRUE(handle); @@ -4499,7 +4596,7 @@ TEST_F(TestNanoVDB, createPointSphere) EXPECT_EQ(-1.0f, sphere(nanovdb::Coord(0, 0, 99))); EXPECT_EQ(1.0f, sphere(nanovdb::Coord(0, 0, 101))); - auto handle = nanovdb::createPointSphere(1,// pointer per voxel + auto handle = nanovdb::tools::createPointSphere(1,// pointer per voxel 100.0,// radius of sphere nanovdb::Vec3d(0),// center sphere 1.0,// voxel size @@ -4535,7 +4632,7 @@ TEST_F(TestNanoVDB, createPointSphere) for (nanovdb::Coord ijk = bbox[0]; ijk[0] <= bbox[1][0]; ++ijk[0]) { for (ijk[1] = bbox[0][1]; ijk[1] <= bbox[1][1]; ++ijk[1]) { for (ijk[2] = bbox[0][2]; ijk[2] <= bbox[1][2]; ++ijk[2]) { - if (nanovdb::Abs(sphere(ijk)) < 0.5f) { + if (nanovdb::math::Abs(sphere(ijk)) < 0.5f) { ++count; EXPECT_TRUE(acc.isActive(ijk)); EXPECT_TRUE(acc.getValue(ijk) != std::numeric_limits::max()); @@ -4545,7 +4642,7 @@ TEST_F(TestNanoVDB, createPointSphere) EXPECT_LT(begin, end); EXPECT_EQ(1u, n); // exactly one point per voxel const nanovdb::Vec3f p = *begin;// + ijk.asVec3s();// local voxel coordinate + global index coordinates - EXPECT_TRUE(nanovdb::Abs(sphere(p)) <= 1.0f); + EXPECT_TRUE(nanovdb::math::Abs(sphere(p)) <= 1.0f); } else { EXPECT_FALSE(acc.isActive(ijk)); EXPECT_TRUE(acc.getValue(ijk) < 512 || acc.getValue(ijk) == std::numeric_limits::max()); @@ -4561,7 +4658,7 @@ TEST_F(TestNanoVDB, createPointSphere) TEST_F(TestNanoVDB, createLevelSetTorus) { - auto handle = nanovdb::createLevelSetTorus(100.0f, 50.0f, nanovdb::Vec3d(50), + auto handle = nanovdb::tools::createLevelSetTorus(100.0f, 50.0f, nanovdb::Vec3d(50), 1.0, 3.0, nanovdb::Vec3d(0), "torus_100"); EXPECT_TRUE(handle); @@ -4598,7 +4695,7 @@ TEST_F(TestNanoVDB, createLevelSetTorus) TEST_F(TestNanoVDB, createFogVolumeTorus) { - auto handle = nanovdb::createFogVolumeTorus(100.0f, 50.0f, nanovdb::Vec3d(50), + auto handle = nanovdb::tools::createFogVolumeTorus(100.0f, 50.0f, nanovdb::Vec3d(50), 1.0, 3.0, nanovdb::Vec3d(0), "torus_100"); EXPECT_TRUE(handle); @@ -4639,7 +4736,7 @@ TEST_F(TestNanoVDB, createFogVolumeTorus) TEST_F(TestNanoVDB, createLevelSetBox) { - auto handle = nanovdb::createLevelSetBox(40.0f, 60.0f, 80.0f, nanovdb::Vec3d(50), + auto handle = nanovdb::tools::createLevelSetBox(40.0f, 60.0f, 80.0f, nanovdb::Vec3d(50), 1.0, 3.0, nanovdb::Vec3d(0), "box"); EXPECT_TRUE(handle); EXPECT_EQ(1u, handle.gridCount()); @@ -4675,7 +4772,7 @@ TEST_F(TestNanoVDB, createLevelSetBox) TEST_F(TestNanoVDB, createFogVolumeBox) { - auto handle = nanovdb::createFogVolumeBox(40.0f, 60.0f, 80.0f, nanovdb::Vec3d(50), + auto handle = nanovdb::tools::createFogVolumeBox(40.0f, 60.0f, 80.0f, nanovdb::Vec3d(50), 1.0, 3.0, nanovdb::Vec3d(0), "box"); EXPECT_TRUE(handle); EXPECT_EQ(1u, handle.gridCount()); @@ -4711,7 +4808,7 @@ TEST_F(TestNanoVDB, createFogVolumeBox) TEST_F(TestNanoVDB, createLevelSetOctahedron) { - auto handle = nanovdb::createLevelSetOctahedron(100.0f, nanovdb::Vec3d(50), + auto handle = nanovdb::tools::createLevelSetOctahedron(100.0f, nanovdb::Vec3d(50), 1.0f, 3.0f, nanovdb::Vec3d(0), "octahedron"); EXPECT_TRUE(handle); EXPECT_EQ(1u, handle.gridCount()); @@ -4740,7 +4837,7 @@ TEST_F(TestNanoVDB, createLevelSetOctahedron) EXPECT_TRUE(dstAcc.isActive(nanovdb::Coord(100, 50, 50))); EXPECT_EQ(1.0f, dstAcc.getValue(nanovdb::Coord(101, 50, 50))); EXPECT_TRUE(dstAcc.isActive(nanovdb::Coord(101, 50, 50))); - EXPECT_EQ(-nanovdb::Sqrt(4.0f/3.0f), dstAcc.getValue(nanovdb::Coord(98, 50, 50))); + EXPECT_EQ(-nanovdb::math::Sqrt(4.0f/3.0f), dstAcc.getValue(nanovdb::Coord(98, 50, 50))); EXPECT_TRUE(dstAcc.isActive(nanovdb::Coord(98, 50, 50))); } // createLevelSetOctahedron @@ -4774,7 +4871,7 @@ TEST_F(TestNanoVDB, CNanoVDBSize) #if !defined(DISABLE_PNANOVDB) && !defined(_MSC_VER) TEST_F(TestNanoVDB, PNanoVDB_Basic) { - EXPECT_EQ(NANOVDB_MAGIC_NUMBER, PNANOVDB_MAGIC_NUMBER); + EXPECT_EQ(NANOVDB_MAGIC_NUMB, PNANOVDB_MAGIC_NUMBER); EXPECT_EQ(NANOVDB_MAJOR_VERSION_NUMBER, PNANOVDB_MAJOR_VERSION_NUMBER); EXPECT_EQ(NANOVDB_MINOR_VERSION_NUMBER, PNANOVDB_MINOR_VERSION_NUMBER); @@ -5015,8 +5112,10 @@ TYPED_TEST(TestOffsets, PNanoVDB) grid_type = PNANOVDB_GRID_TYPE_VEC3U8; } else if (std::is_same::value) { grid_type = PNANOVDB_GRID_TYPE_VEC3U16; + } else if (std::is_same::value) { + grid_type = PNANOVDB_GRID_TYPE_UINT8; } else { - EXPECT_TRUE(!"your forgot to add a grid_type to TestOffsets::PNanoVDB!"); + EXPECT_FALSE("your forgot to add a grid_type to TestOffsets::PNanoVDB!"); } using nodeLeaf_t = typename nanovdb::LeafData; using leaf_t = typename nanovdb::LeafNode; @@ -5064,18 +5163,18 @@ TYPED_TEST(TestOffsets, PNanoVDB) // test GridBlindMetaData EXPECT_EQ((int)sizeof(nanovdb::GridBlindMetaData), PNANOVDB_GRIDBLINDMETADATA_SIZE); - EXPECT_EQ(NANOVDB_OFFSETOF(nanovdb::GridBlindMetaData, mDataOffset), PNANOVDB_GRIDBLINDMETADATA_OFF_BYTE_OFFSET); - EXPECT_EQ(NANOVDB_OFFSETOF(nanovdb::GridBlindMetaData, mValueCount), PNANOVDB_GRIDBLINDMETADATA_OFF_ELEMENT_COUNT); - EXPECT_EQ(NANOVDB_OFFSETOF(nanovdb::GridBlindMetaData, mValueSize), PNANOVDB_GRIDBLINDMETADATA_OFF_FLAGS); + EXPECT_EQ(NANOVDB_OFFSETOF(nanovdb::GridBlindMetaData, mDataOffset), PNANOVDB_GRIDBLINDMETADATA_OFF_DATA_OFFSET); + EXPECT_EQ(NANOVDB_OFFSETOF(nanovdb::GridBlindMetaData, mValueCount), PNANOVDB_GRIDBLINDMETADATA_OFF_VALUE_COUNT); + EXPECT_EQ(NANOVDB_OFFSETOF(nanovdb::GridBlindMetaData, mValueSize), PNANOVDB_GRIDBLINDMETADATA_OFF_VALUE_SIZE); EXPECT_EQ(NANOVDB_OFFSETOF(nanovdb::GridBlindMetaData, mSemantic), PNANOVDB_GRIDBLINDMETADATA_OFF_SEMANTIC); EXPECT_EQ(NANOVDB_OFFSETOF(nanovdb::GridBlindMetaData, mDataClass), PNANOVDB_GRIDBLINDMETADATA_OFF_DATA_CLASS); EXPECT_EQ(NANOVDB_OFFSETOF(nanovdb::GridBlindMetaData, mDataType), PNANOVDB_GRIDBLINDMETADATA_OFF_DATA_TYPE); EXPECT_EQ(NANOVDB_OFFSETOF(nanovdb::GridBlindMetaData, mName), PNANOVDB_GRIDBLINDMETADATA_OFF_NAME); EXPECT_EQ((int)sizeof(pnanovdb_gridblindmetadata_t), PNANOVDB_GRIDBLINDMETADATA_SIZE); - EXPECT_EQ(NANOVDB_OFFSETOF(pnanovdb_gridblindmetadata_t, byte_offset), PNANOVDB_GRIDBLINDMETADATA_OFF_BYTE_OFFSET); - EXPECT_EQ(NANOVDB_OFFSETOF(pnanovdb_gridblindmetadata_t, element_count), PNANOVDB_GRIDBLINDMETADATA_OFF_ELEMENT_COUNT); - EXPECT_EQ(NANOVDB_OFFSETOF(pnanovdb_gridblindmetadata_t, flags), PNANOVDB_GRIDBLINDMETADATA_OFF_FLAGS); + EXPECT_EQ(NANOVDB_OFFSETOF(pnanovdb_gridblindmetadata_t, data_offset), PNANOVDB_GRIDBLINDMETADATA_OFF_DATA_OFFSET); + EXPECT_EQ(NANOVDB_OFFSETOF(pnanovdb_gridblindmetadata_t, value_count), PNANOVDB_GRIDBLINDMETADATA_OFF_VALUE_COUNT); + EXPECT_EQ(NANOVDB_OFFSETOF(pnanovdb_gridblindmetadata_t, value_size), PNANOVDB_GRIDBLINDMETADATA_OFF_VALUE_SIZE); EXPECT_EQ(NANOVDB_OFFSETOF(pnanovdb_gridblindmetadata_t, semantic), PNANOVDB_GRIDBLINDMETADATA_OFF_SEMANTIC); EXPECT_EQ(NANOVDB_OFFSETOF(pnanovdb_gridblindmetadata_t, data_class), PNANOVDB_GRIDBLINDMETADATA_OFF_DATA_CLASS); EXPECT_EQ(NANOVDB_OFFSETOF(pnanovdb_gridblindmetadata_t, data_type), PNANOVDB_GRIDBLINDMETADATA_OFF_DATA_TYPE); @@ -5193,12 +5292,12 @@ TEST_F(TestNanoVDB, GridStats) { using GridT = nanovdb::NanoGrid; Sphere sphere(nanovdb::Vec3d(50), 50.0f); - nanovdb::build::Grid grid(sphere.background(), "test", nanovdb::GridClass::LevelSet); + nanovdb::tools::build::Grid grid(sphere.background(), "test", nanovdb::GridClass::LevelSet); const nanovdb::CoordBBox bbox(nanovdb::Coord(-100), nanovdb::Coord(100)); //mTimer.start("GridBuilder"); grid(sphere, bbox); //mTimer.stop(); - nanovdb::CreateNanoGrid> converter(grid); + nanovdb::tools::CreateNanoGrid> converter(grid); auto handle1 = converter.getHandle(); auto handle2 = converter.getHandle(); EXPECT_TRUE(handle1); @@ -5221,8 +5320,8 @@ TEST_F(TestNanoVDB, GridStats) { // reset stats in grid2 //grid2->tree().data()->mVoxelCount = uint64_t(0); - grid2->data()->mWorldBBox = nanovdb::BBox(); - grid2->tree().root().data()->mBBox = nanovdb::BBox(); + grid2->data()->mWorldBBox = nanovdb::math::BBox(); + grid2->tree().root().data()->mBBox = nanovdb::math::BBox(); for (uint32_t i = 0; i < grid2->tree().nodeCount(0); ++i) { auto& leaf = mgr2->leaf(i); auto* data = leaf.data(); @@ -5281,7 +5380,7 @@ TEST_F(TestNanoVDB, GridStats) } //mTimer.start("GridStats"); - nanovdb::gridStats(*grid2); + nanovdb::tools::updateGridStats(grid2); //mTimer.stop(); { // check stats in grid2 @@ -5328,12 +5427,12 @@ TEST_F(TestNanoVDB, ScalarSampleFromVoxels) auto trilinearIndex = [&](const nanovdb::Coord& ijk) -> float { return 0.34f + 1.6f * dx * ijk[0] + 6.7f * dx * ijk[1] - 3.5f * dx * ijk[2]; // index coordinates }; - using SrcGridT = nanovdb::build::Grid; + using SrcGridT = nanovdb::tools::build::Grid; SrcGridT srcGrid(1.0f); srcGrid.setTransform(dx); const nanovdb::CoordBBox bbox(nanovdb::Coord(0), nanovdb::Coord(128)); srcGrid(trilinearIndex, bbox); - auto handle = nanovdb::createNanoGrid(srcGrid); + auto handle = nanovdb::tools::createNanoGrid(srcGrid); EXPECT_TRUE(handle); EXPECT_EQ(1u, handle.gridCount()); auto* grid = handle.grid(); @@ -5346,10 +5445,10 @@ TEST_F(TestNanoVDB, ScalarSampleFromVoxels) //std::cerr << "Trilinear: exact = " << exact << ", approx = " << approx << std::endl; auto acc = grid->getAccessor(); - auto sampler0 = nanovdb::createSampler<0>(grid->tree()); - auto sampler1 = nanovdb::createSampler<1>(acc); - auto sampler2 = nanovdb::createSampler<2>(acc); - auto sampler3 = nanovdb::createSampler<3>(acc); + auto sampler0 = nanovdb::math::createSampler<0>(grid->tree()); + auto sampler1 = nanovdb::math::createSampler<1>(acc); + auto sampler2 = nanovdb::math::createSampler<2>(acc); + auto sampler3 = nanovdb::math::createSampler<3>(acc); //std::cerr << "0'th order: v = " << sampler0(xyz) << std::endl; EXPECT_EQ(approx, sampler0(xyz)); EXPECT_NE(exact, sampler0(xyz)); @@ -5386,12 +5485,12 @@ TEST_F(TestNanoVDB, VectorSampleFromVoxels) auto trilinearIndex = [&](const nanovdb::Coord& ijk) -> nanovdb::Vec3f { return nanovdb::Vec3f(0.34f, 1.6f * dx * ijk[0] + 6.7f * dx * ijk[1], -3.5f * dx * ijk[2]); // index coordinates }; - using SrcGridT = nanovdb::build::Grid; + using SrcGridT = nanovdb::tools::build::Grid; SrcGridT srcGrid(nanovdb::Vec3f(1.0f)); const nanovdb::CoordBBox bbox(nanovdb::Coord(0), nanovdb::Coord(128)); srcGrid(trilinearIndex, bbox); srcGrid.setTransform(dx); - auto handle = nanovdb::createNanoGrid(srcGrid); + auto handle = nanovdb::tools::createNanoGrid(srcGrid); EXPECT_TRUE(handle); EXPECT_EQ(1u, handle.gridCount()); auto* grid = handle.grid(); @@ -5403,69 +5502,84 @@ TEST_F(TestNanoVDB, VectorSampleFromVoxels) //std::cerr << "Trilinear: exact = " << exact << ", approx = " << approx << std::endl; auto acc = grid->getAccessor(); - auto sampler0 = nanovdb::createSampler<0>(acc); + auto sampler0 = nanovdb::math::createSampler<0>(acc); //std::cerr << "0'th order: v = " << sampler0(ijk) << std::endl; EXPECT_EQ(approx, sampler0(ijk)); - auto sampler1 = nanovdb::createSampler<1>(acc); // faster since it's using an accessor!!! + auto sampler1 = nanovdb::math::createSampler<1>(acc); // faster since it's using an accessor!!! //std::cerr << "1'th order: v = " << sampler1(ijk) << std::endl; for (int i = 0; i < 3; ++i) EXPECT_NEAR(exact[i], sampler1(ijk)[i], 1e-5); //EXPECT_FALSE(sampler1.zeroCrossing());// triggeres a static_assert error //EXPECT_FALSE(sampler1.gradient(grid->indexToWorld(ijk)));// triggeres a static_assert error - nanovdb::SampleFromVoxels, 3> sampler3(grid->tree()); - //auto sampler3 = nanovdb::createSampler<3>( acc ); + nanovdb::math::SampleFromVoxels, 3> sampler3(grid->tree()); + //auto sampler3 = nanovdb::math::createSampler<3>( acc ); //std::cerr << "3'rd order: v = " << sampler3(ijk) << std::endl; for (int i = 0; i < 3; ++i) EXPECT_NEAR(exact[i], sampler3(ijk)[i], 1e-5); } // VectorSampleFromVoxels -TEST_F(TestNanoVDB, GridChecksum) -{ - EXPECT_TRUE(nanovdb::ChecksumMode::Disable < nanovdb::ChecksumMode::End); - EXPECT_TRUE(nanovdb::ChecksumMode::Partial < nanovdb::ChecksumMode::End); - EXPECT_TRUE(nanovdb::ChecksumMode::Full < nanovdb::ChecksumMode::End); - EXPECT_TRUE(nanovdb::ChecksumMode::Default < nanovdb::ChecksumMode::End); - EXPECT_NE(nanovdb::ChecksumMode::Disable, nanovdb::ChecksumMode::Partial); - EXPECT_NE(nanovdb::ChecksumMode::Disable, nanovdb::ChecksumMode::Full); - EXPECT_NE(nanovdb::ChecksumMode::Full, nanovdb::ChecksumMode::Partial); - EXPECT_NE(nanovdb::ChecksumMode::Default, nanovdb::ChecksumMode::Disable); - EXPECT_EQ(nanovdb::ChecksumMode::Default, nanovdb::ChecksumMode::Partial); - EXPECT_NE(nanovdb::ChecksumMode::Default, nanovdb::ChecksumMode::Full); - - nanovdb::CpuTimer timer; - //timer.start("nanovdb::createLevelSetSphere"); - auto handle = nanovdb::createLevelSetSphere(100.0f, +TEST_F(TestNanoVDB, Checksum) +{ + EXPECT_LT(nanovdb::CheckMode::Disable, nanovdb::CheckMode::End); + EXPECT_LT(nanovdb::CheckMode::Partial, nanovdb::CheckMode::End); + EXPECT_LT(nanovdb::CheckMode::Full, nanovdb::CheckMode::End); + EXPECT_LT(nanovdb::CheckMode::Default, nanovdb::CheckMode::End); + EXPECT_NE(nanovdb::CheckMode::Disable, nanovdb::CheckMode::Partial); + EXPECT_NE(nanovdb::CheckMode::Disable, nanovdb::CheckMode::Full); + EXPECT_NE(nanovdb::CheckMode::Full, nanovdb::CheckMode::Partial); + EXPECT_NE(nanovdb::CheckMode::Default, nanovdb::CheckMode::Disable); + EXPECT_EQ(nanovdb::CheckMode::Default, nanovdb::CheckMode::Partial); + EXPECT_NE(nanovdb::CheckMode::Default, nanovdb::CheckMode::Full); + + nanovdb::Checksum checksum1, checksum2, checksum3; + EXPECT_EQ(sizeof(checksum1), sizeof(uint64_t)); + EXPECT_EQ(~uint64_t(0), checksum1.full()); + EXPECT_EQ(checksum1.mode(), nanovdb::CheckMode::Disable); + EXPECT_EQ(nanovdb::toCheckMode(checksum1), nanovdb::CheckMode::Disable); + checksum1.head() = 0u; + EXPECT_EQ(checksum1.mode(), nanovdb::CheckMode::Partial); + checksum1.tail() = 0u; + EXPECT_EQ(checksum1.mode(), nanovdb::CheckMode::Full); + EXPECT_EQ( uint64_t(0), checksum1.full()); + checksum1.disable(); + EXPECT_EQ(~uint64_t(0), checksum1.full()); + EXPECT_EQ(checksum1.mode(), nanovdb::CheckMode::Disable); + EXPECT_EQ(nanovdb::toCheckMode(checksum1), nanovdb::CheckMode::Disable); + EXPECT_EQ(checksum1, checksum3); + + nanovdb::util::Timer timer; + //timer.start("nanovdb::tools::createLevelSetSphere"); + auto handle = nanovdb::tools::createLevelSetSphere(100.0f, nanovdb::Vec3d(50), 1.0, 3.0, nanovdb::Vec3d(0), "sphere_20", - nanovdb::StatsMode::Disable, - nanovdb::ChecksumMode::Disable); + nanovdb::tools::StatsMode::Disable, + nanovdb::CheckMode::Disable); //timer.stop(); EXPECT_TRUE(handle); EXPECT_EQ(1u, handle.gridCount()); auto* grid = handle.grid(); EXPECT_TRUE(grid); - nanovdb::GridChecksum checksum1, checksum2, checksum3; - - EXPECT_EQ(checksum1, checksum3); - //timer.start("Partial checksum"); - checksum3(*grid, nanovdb::ChecksumMode::Partial); + checksum3 = nanovdb::tools::evalChecksum(grid, nanovdb::CheckMode::Partial); + //checksum3(*grid, nanovdb::CheckMode::Partial); //timer.stop(); EXPECT_NE(checksum1, checksum3); //timer.start("Full checksum"); - checksum1(*grid, nanovdb::ChecksumMode::Full); + checksum1 = nanovdb::tools::evalChecksum(grid, nanovdb::CheckMode::Full); + //checksum1(*grid, nanovdb::CheckMode::Full); //timer.stop(); - checksum2(*grid, nanovdb::ChecksumMode::Full); + //checksum2(*grid, nanovdb::CheckMode::Full); + checksum2 = nanovdb::tools::evalChecksum(grid, nanovdb::CheckMode::Full); EXPECT_EQ(checksum1, checksum2); @@ -5474,72 +5588,84 @@ TEST_F(TestNanoVDB, GridChecksum) leaf->data()->mValues[0] += 0.00001f; // slightly modify a single voxel value - checksum2(*grid, nanovdb::ChecksumMode::Full); + checksum2 = nanovdb::tools::evalChecksum(grid, nanovdb::CheckMode::Full); + //checksum2(*grid, nanovdb::CheckMode::Full); EXPECT_NE(checksum1, checksum2); leaf->data()->mValues[0] -= 0.00001f; // change back the single voxel value to it's original value - checksum2(*grid, nanovdb::ChecksumMode::Full); + checksum2 = nanovdb::tools::evalChecksum(grid, nanovdb::CheckMode::Full); + //checksum2(*grid, nanovdb::CheckMode::Full); EXPECT_EQ(checksum1, checksum2); leaf->data()->mValueMask.toggle(0); // change a single bit in a value mask - checksum2(*grid, nanovdb::ChecksumMode::Full); + checksum2 = nanovdb::tools::evalChecksum(grid, nanovdb::CheckMode::Full); + //checksum2(*grid, nanovdb::CheckMode::Full); EXPECT_NE(checksum1, checksum2); //timer.start("Incomplete checksum"); - checksum2(*grid, nanovdb::ChecksumMode::Partial); + checksum2 = nanovdb::tools::evalChecksum(grid, nanovdb::CheckMode::Partial); + //checksum2(*grid, nanovdb::CheckMode::Partial); //timer.stop(); EXPECT_EQ(checksum2, checksum3); } // GridChecksum TEST_F(TestNanoVDB, GridValidator) { - nanovdb::CpuTimer timer; - //timer.start("nanovdb::createLevelSetSphere"); - auto handle = nanovdb::createLevelSetSphere(100.0f, + nanovdb::util::Timer timer; + //timer.start("nanovdb::tools::createLevelSetSphere"); + auto handle = nanovdb::tools::createLevelSetSphere(100.0f, nanovdb::Vec3d(50), 1.0, 3.0, nanovdb::Vec3d(0), "sphere_20", - nanovdb::StatsMode::All, - nanovdb::ChecksumMode::Full); + nanovdb::tools::StatsMode::All, + nanovdb::CheckMode::Full); //timer.stop(); EXPECT_TRUE(handle); EXPECT_EQ(1u, handle.gridCount()); auto* grid = handle.grid(); EXPECT_TRUE(grid); + { + auto mode = nanovdb::toCheckMode(grid->mChecksum); + EXPECT_EQ(nanovdb::CheckMode::Full, mode); + EXPECT_EQ(nanovdb::CheckMode::Full, grid->mChecksum.mode()); + char str[30]; + EXPECT_TRUE(nanovdb::util::streq(nanovdb::toStr(str, mode), "full")); + } + //timer.start("isValid - not detailed"); - EXPECT_TRUE(nanovdb::isValid(*grid, false, true)); + EXPECT_TRUE(nanovdb::tools::isValid(grid, nanovdb::CheckMode::Partial, true)); //timer.stop(); //timer.start("isValid - detailed"); - EXPECT_TRUE(nanovdb::isValid(*grid, true, true)); + EXPECT_TRUE(nanovdb::tools::isValid(grid, nanovdb::CheckMode::Full, true)); //timer.stop(); //timer.start("Full checksum"); - auto fastChecksum = nanovdb::checksum(*grid, nanovdb::ChecksumMode::Full); + auto fastChecksum = nanovdb::tools::evalChecksum(grid, nanovdb::CheckMode::Full); //timer.stop(); - EXPECT_EQ(fastChecksum, nanovdb::checksum(*grid, nanovdb::ChecksumMode::Full)); + EXPECT_EQ(fastChecksum, nanovdb::tools::evalChecksum(grid, nanovdb::CheckMode::Full)); //auto mgr = nanovdb::createLeafMg auto* leaf = grid->tree().getFirstLeaf(); leaf->data()->mValues[0] += 0.00001f; // slightly modify a single voxel value - EXPECT_NE(fastChecksum, nanovdb::checksum(*grid, nanovdb::ChecksumMode::Full)); - EXPECT_FALSE(nanovdb::isValid(*grid, true, false)); + EXPECT_NE(fastChecksum, nanovdb::tools::evalChecksum(grid, nanovdb::CheckMode::Full)); + EXPECT_FALSE(nanovdb::tools::isValid(grid, nanovdb::CheckMode::Full, false)); leaf->data()->mValues[0] -= 0.00001f; // change back the single voxel value to it's original value - EXPECT_EQ(fastChecksum, nanovdb::checksum(*grid, nanovdb::ChecksumMode::Full)); - EXPECT_TRUE(nanovdb::isValid(*grid, true, true)); + EXPECT_EQ(fastChecksum, nanovdb::tools::evalChecksum(grid, nanovdb::CheckMode::Full)); + EXPECT_TRUE(nanovdb::tools::isValid(grid, nanovdb::CheckMode::Full, true)); leaf->data()->mValueMask.toggle(0); // change a singel bit in a value mask - EXPECT_NE(fastChecksum, nanovdb::checksum(*grid, nanovdb::ChecksumMode::Full)); - EXPECT_FALSE(nanovdb::isValid(*grid, true, false)); + EXPECT_NE(fastChecksum, nanovdb::tools::evalChecksum(grid, nanovdb::CheckMode::Full)); + EXPECT_FALSE(nanovdb::tools::isValid(grid, nanovdb::CheckMode::Full, false)); } // GridValidator TEST_F(TestNanoVDB, RandomReadAccessor) @@ -5548,7 +5674,7 @@ TEST_F(TestNanoVDB, RandomReadAccessor) const int voxelCount = 512, min = -10000, max = 10000; std::srand(98765); auto op = [&](){return rand() % (max - min) + min;}; - using SrcGridT = nanovdb::build::Grid; + using SrcGridT = nanovdb::tools::build::Grid; for (int i=0; i<10; ++i) { SrcGridT srcGrid(background); auto acc = srcGrid.getAccessor(); @@ -5560,7 +5686,7 @@ TEST_F(TestNanoVDB, RandomReadAccessor) ijk[2] = op(); acc.setValue(ijk, 1.0f*j); } - auto gridHdl = nanovdb::createNanoGrid(srcGrid); + auto gridHdl = nanovdb::tools::createNanoGrid(srcGrid); EXPECT_TRUE(gridHdl); EXPECT_EQ(1u, gridHdl.gridCount()); auto grid = gridHdl.grid(); @@ -5610,7 +5736,7 @@ TEST_F(TestNanoVDB, RandomReadAccessor) TEST_F(TestNanoVDB, StandardDeviation) { using OpT = nanovdb::GetNodeInfo; - using SrcGridT = nanovdb::build::Grid; + using SrcGridT = nanovdb::tools::build::Grid; SrcGridT srcGrid(0.5f); { @@ -5620,11 +5746,11 @@ TEST_F(TestNanoVDB, StandardDeviation) acc.setValue(nanovdb::Coord(1), 3.0f); acc.setValue(nanovdb::Coord(2), 0.0f); } - auto gridHdl = nanovdb::createNanoGrid(srcGrid); + auto gridHdl = nanovdb::tools::createNanoGrid(srcGrid); EXPECT_TRUE(gridHdl); auto grid = gridHdl.grid(); EXPECT_TRUE(grid); - nanovdb::gridStats(*grid); + nanovdb::tools::updateGridStats(grid); auto acc = grid->tree().getAccessor(); { @@ -5682,13 +5808,13 @@ TEST_F(TestNanoVDB, BoxStencil) const float a = 0.54f, b[3]={0.12f, 0.78f,-0.34f}; const nanovdb::Coord min(-17, -10, -8), max(10, 21, 13); const nanovdb::CoordBBox bbox(min, max), bbox2(min, max.offsetBy(-1)); - using SrcGridT = nanovdb::build::Grid; + using SrcGridT = nanovdb::tools::build::Grid; SrcGridT srcGrid(0.0f); auto func = [&](const nanovdb::Coord &ijk) { return a + b[0]*ijk[0] + b[1]*ijk[1] + b[2]*ijk[2]; }; srcGrid(func, bbox); - auto handle = nanovdb::createNanoGrid(srcGrid); + auto handle = nanovdb::tools::createNanoGrid(srcGrid); EXPECT_TRUE(handle); EXPECT_EQ(1u, handle.gridCount()); auto* grid = handle.grid(); @@ -5700,7 +5826,7 @@ TEST_F(TestNanoVDB, BoxStencil) auto func2 = [&](const nanovdb::Vec3f &xyz) { return a + b[0]*xyz[0] + b[1]*xyz[1] + b[2]*xyz[2]; }; - nanovdb::BoxStencil s(*grid); + nanovdb::math::BoxStencil s(*grid); for (auto it = bbox2.begin(); it; ++it) { const nanovdb::Coord p = *it; s.moveTo(p); @@ -5718,7 +5844,7 @@ TEST_F(TestNanoVDB, CurvatureStencil) {// test of level set to sphere at (6,8,10) with R=10 and dx=0.5 const float radius = 10.0f; const nanovdb::Vec3d center(6.0, 8.0, 10.0);//i.e. (12,16,20) in index space - auto handle = nanovdb::createLevelSetSphere(radius, + auto handle = nanovdb::tools::createLevelSetSphere(radius, center, 0.5, // dx 20.0); // half-width so dense inside @@ -5728,7 +5854,7 @@ TEST_F(TestNanoVDB, CurvatureStencil) auto* grid = handle.grid(); EXPECT_TRUE(grid); - nanovdb::CurvatureStencil cs(*grid); + nanovdb::math::CurvatureStencil cs(*grid); nanovdb::Coord xyz(20,16,20);//i.e. 8 voxel or 4 world units away from the center cs.moveTo(xyz); @@ -5783,14 +5909,14 @@ TEST_F(TestNanoVDB, CurvatureStencil) // sparse level set sphere nanovdb::Vec3d C(0.35f, 0.35f, 0.35f); double r = 0.15, voxelSize = 1.0/(dim-1); - auto handle = nanovdb::createLevelSetSphere(r, C, voxelSize); + auto handle = nanovdb::tools::createLevelSetSphere(r, C, voxelSize); EXPECT_TRUE(handle); EXPECT_EQ(1u, handle.gridCount()); auto* sphere = handle.grid(); EXPECT_TRUE(sphere); - nanovdb::CurvatureStencil cs(*sphere); - const auto ijk = nanovdb::RoundDown(sphere->worldToIndex(nanovdb::Vec3d(0.35, 0.35, 0.35 + 0.15))); + nanovdb::math::CurvatureStencil cs(*sphere); + const auto ijk = nanovdb::math::RoundDown(sphere->worldToIndex(nanovdb::Vec3d(0.35, 0.35, 0.35 + 0.15))); const nanovdb::Vec3d tmp(ijk[0],ijk[1],ijk[2]); const double radius = (sphere->indexToWorld(tmp)-nanovdb::Vec3d(0.35)).length(); //std::cerr << "\rRadius = " << radius << std::endl; @@ -5858,7 +5984,7 @@ TEST_F(TestNanoVDB, GradStencil) {// test of level set to sphere at (6,8,10) with R=10 and dx=0.5 const float radius = 10.0f;// 20 voxels const nanovdb::Vec3d center(6.0, 8.0, 10.0);//i.e. (12,16,20) in index space - auto handle = nanovdb::createLevelSetSphere(radius, + auto handle = nanovdb::tools::createLevelSetSphere(radius, center, 0.5, // dx 20.0);// width, so dense inside @@ -5869,7 +5995,7 @@ TEST_F(TestNanoVDB, GradStencil) EXPECT_TRUE(grid); EXPECT_EQ(0.5f, grid->voxelSize()[0]); - nanovdb::GradStencil cs(*grid); + nanovdb::math::GradStencil cs(*grid); nanovdb::Coord ijk(12, 16, 20);// on the surface in the +x direction const nanovdb::Vec3d xyz(ijk[0], ijk[1], ijk[2]); @@ -5906,7 +6032,7 @@ TEST_F(TestNanoVDB, WenoStencil) {// test of level set to sphere at (6,8,10) with R=10 and dx=0.5 const float radius = 10.0f;// 20 voxels const nanovdb::Vec3d center(6.0, 8.0, 10.0);//i.e. (12,16,20) in index space - auto handle = nanovdb::createLevelSetSphere(radius, + auto handle = nanovdb::tools::createLevelSetSphere(radius, center, 0.5, // dx 20.0);// width, so dense inside @@ -5917,7 +6043,7 @@ TEST_F(TestNanoVDB, WenoStencil) EXPECT_TRUE(grid); EXPECT_EQ(0.5f, grid->voxelSize()[0]); - nanovdb::WenoStencil cs(*grid); + nanovdb::math::WenoStencil cs(*grid); nanovdb::Coord ijk(12, 16, 20);// on the surface in the +x direction const nanovdb::Vec3d xyz(ijk[0], ijk[1], ijk[2]); @@ -5951,7 +6077,7 @@ TEST_F(TestNanoVDB, WenoStencil) TEST_F(TestNanoVDB, StencilIntersection) { - using SrcGridT = nanovdb::build::Grid; + using SrcGridT = nanovdb::tools::build::Grid; const nanovdb::Coord ijk(1,4,-9); SrcGridT srcGrid(0.0f); auto acc = srcGrid.getAccessor(); @@ -5971,12 +6097,12 @@ TEST_F(TestNanoVDB, StencilIntersection) for (int pz=0; pz<2; ++pz) { acc.setValue(ijk.offsetBy(0,0,1), pz ? 1.0f : -1.0f); ++cases; - auto handle = nanovdb::createNanoGrid(srcGrid); + auto handle = nanovdb::tools::createNanoGrid(srcGrid); EXPECT_TRUE(handle); auto grid = handle.grid(); EXPECT_TRUE(grid); EXPECT_EQ(7, int(grid->activeVoxelCount())); - nanovdb::GradStencil stencil(*grid); + nanovdb::math::GradStencil stencil(*grid); stencil.moveTo(ijk); const int count = mx + px + my + py + mz + pz;// number of intersections EXPECT_TRUE(stencil.intersects() == (count > 0)); @@ -6009,39 +6135,45 @@ TEST_F(TestNanoVDB, MultiFile) } std::vector> handles; { // add an int32_t grid - nanovdb::build::Grid grid(-1, "Int32 grid"); + nanovdb::tools::build::Grid grid(-1, "Int32 grid"); auto acc = grid.getAccessor(); acc.setValue(nanovdb::Coord(-256), 10); - handles.push_back(nanovdb::createNanoGrid(grid)); + handles.push_back(nanovdb::tools::createNanoGrid(grid)); } { // add an empty int32_t grid - nanovdb::build::Grid grid(-4, "Int32 grid, empty"); - handles.push_back(nanovdb::createNanoGrid(grid)); + nanovdb::tools::build::Grid grid(-4, "Int32 grid, empty"); + handles.push_back(nanovdb::tools::createNanoGrid(grid)); } { // add a Vec3f grid - nanovdb::build::Grid grid(nanovdb::Vec3f(0.0f, 0.0f, -1.0f),"Float vector grid",nanovdb::GridClass::Staggered); + nanovdb::tools::build::Grid grid(nanovdb::Vec3f(0.0f, 0.0f, -1.0f),"Float vector grid",nanovdb::GridClass::Staggered); auto acc = grid.getAccessor(); acc.setValue(nanovdb::Coord(-256), nanovdb::Vec3f(1.0f, 0.0f, 0.0f)); - handles.push_back(nanovdb::createNanoGrid(grid)); + handles.push_back(nanovdb::tools::createNanoGrid(grid)); } { // add an int64_t grid - nanovdb::build::Grid grid(0, "Int64 grid"); + nanovdb::tools::build::Grid grid(0, "Int64 grid"); auto acc = grid.getAccessor(); acc.setValue(nanovdb::Coord(0), 10); - handles.push_back(nanovdb::createNanoGrid(grid)); + handles.push_back(nanovdb::tools::createNanoGrid(grid)); + } + { // add an uint8_t grid + nanovdb::tools::build::Grid grid(0, "UInt8 grid"); + auto acc = grid.getAccessor(); + acc.setValue(nanovdb::Coord(0), 8u); + handles.push_back(nanovdb::tools::createNanoGrid(grid)); } for (int i = 0; i < 10; ++i) { const float radius = 100.0f; const float voxelSize = 1.0f, width = 3.0f; const nanovdb::Vec3d center(i * 10.0f, 0.0f, 0.0f); - handles.push_back(nanovdb::createLevelSetSphere(radius, center, voxelSize, width, + handles.push_back(nanovdb::tools::createLevelSetSphere(radius, center, voxelSize, width, nanovdb::Vec3d(0), "Level set sphere at (" + std::to_string(i * 10) + ",0,0)")); } { // add a double grid - nanovdb::build::Grid grid(0.0, "Double grid", nanovdb::GridClass::FogVolume); + nanovdb::tools::build::Grid grid(0.0, "Double grid", nanovdb::GridClass::FogVolume); auto acc = grid.getAccessor(); acc.setValue(nanovdb::Coord(6000), 1.0); - handles.push_back(nanovdb::createNanoGrid(grid)); + handles.push_back(nanovdb::tools::createNanoGrid(grid)); } #if defined(NANOVDB_USE_BLOSC) nanovdb::io::writeGrids("data/multi1.nvdb", handles, nanovdb::io::Codec::BLOSC); @@ -6054,14 +6186,14 @@ TEST_F(TestNanoVDB, MultiFile) //mTimer.start("nanovdb::io::readGridMetaData"); auto meta = nanovdb::io::readGridMetaData("data/multi1.nvdb"); //mTimer.stop(); - EXPECT_EQ(15u, meta.size()); + EXPECT_EQ(16u, meta.size()); EXPECT_EQ(std::string("Double grid"), meta.back().gridName); } { // read in32 grid and test values //mTimer.start("Reading multiple grids from file"); auto handles = nanovdb::io::readGrids("data/multi1.nvdb"); //mTimer.stop(); - EXPECT_EQ(15u, handles.size()); + EXPECT_EQ(16u, handles.size()); auto& handle = handles.front(); EXPECT_EQ(1u, handle.gridCount()); EXPECT_EQ(std::string("Int32 grid"), handle.gridMetaData()->shortGridName()); @@ -6107,7 +6239,7 @@ TEST_F(TestNanoVDB, MultiFile) //mTimer.start("Reading multiple grids from file"); auto handles = nanovdb::io::readGrids("data/multi1.nvdb"); //mTimer.stop(); - EXPECT_EQ(15u, handles.size()); + EXPECT_EQ(16u, handles.size()); auto& handle = handles[1]; EXPECT_TRUE(handle); EXPECT_EQ(1u, handle.gridCount()); @@ -6145,7 +6277,7 @@ TEST_F(TestNanoVDB, MultiFile) //mTimer.start("Reading multiple grids from file"); auto handles = nanovdb::io::readGrids("data/multi1.nvdb"); //mTimer.stop(); - EXPECT_EQ(15u, handles.size()); + EXPECT_EQ(16u, handles.size()); auto& handle = handles[3]; EXPECT_EQ(1u, handle.gridCount()); EXPECT_TRUE(handle); @@ -6165,9 +6297,35 @@ TEST_F(TestNanoVDB, MultiFile) EXPECT_TRUE(grid->isUnknown()); EXPECT_FALSE(grid->isStaggered()); } + /* + { // read uint8 grid and test values + //mTimer.start("Reading multiple grids from file"); + auto handles = nanovdb::io::readGrids("data/multi1.nvdb"); + //mTimer.stop(); + EXPECT_EQ(16u, handles.size()); + auto& handle = handles[4]; + EXPECT_EQ(1u, handle.gridCount()); + EXPECT_TRUE(handle); + EXPECT_EQ(std::string("UInt8 grid"), handle.gridMetaData()->shortGridName()); + auto* grid = handle.grid(); + EXPECT_TRUE(grid); + EXPECT_EQ(handle.gridMetaData()->indexBBox(), grid->indexBBox()); + EXPECT_EQ(1u, grid->activeVoxelCount()); + const nanovdb::Coord ijk(0); + EXPECT_EQ(8u, grid->tree().getValue(ijk)); + EXPECT_EQ(0, grid->tree().getValue(ijk + nanovdb::Coord(1, 0, 0))); + EXPECT_EQ(8u, grid->tree().root().minimum()); + EXPECT_EQ(8u, grid->tree().root().maximum()); + EXPECT_EQ(nanovdb::CoordBBox(ijk, ijk), grid->indexBBox()); + EXPECT_FALSE(grid->isLevelSet()); + EXPECT_FALSE(grid->isFogVolume()); + EXPECT_TRUE(grid->isUnknown()); + EXPECT_FALSE(grid->isStaggered()); + } + */ { // read vec3f grid and test values auto handles = nanovdb::io::readGrids("data/multi1.nvdb"); - EXPECT_EQ(15u, handles.size()); + EXPECT_EQ(16u, handles.size()); auto& handle = handles[2]; EXPECT_TRUE(handle); EXPECT_EQ(1u, handle.gridCount()); @@ -6189,7 +6347,7 @@ TEST_F(TestNanoVDB, MultiFile) } { // read double grid and test values auto handles = nanovdb::io::readGrids("data/multi1.nvdb"); - EXPECT_EQ(15u, handles.size()); + EXPECT_EQ(16u, handles.size()); auto& handle = handles.back(); EXPECT_TRUE(handle); EXPECT_EQ(1u, handle.gridCount()); @@ -6217,8 +6375,8 @@ TEST_F(TestNanoVDB, HostBuffer) std::vector > gridHdls; // create two grids... - gridHdls.push_back(nanovdb::createLevelSetSphere(100.0, nanovdb::Vec3d(-20, 0, 0), 1.0, 3.0, nanovdb::Vec3d(0), "spheref")); - gridHdls.push_back(nanovdb::createLevelSetSphere(100.0, nanovdb::Vec3d( 20, 0, 0), 1.0, 3.0, nanovdb::Vec3d(0), "sphered")); + gridHdls.push_back(nanovdb::tools::createLevelSetSphere(100.0, nanovdb::Vec3d(-20, 0, 0), 1.0, 3.0, nanovdb::Vec3d(0), "spheref")); + gridHdls.push_back(nanovdb::tools::createLevelSetSphere(100.0, nanovdb::Vec3d( 20, 0, 0), 1.0, 3.0, nanovdb::Vec3d(0), "sphered")); EXPECT_TRUE(gridHdls[0]); auto* meta0 = gridHdls[0].gridMetaData(); @@ -6256,8 +6414,8 @@ TEST_F(TestNanoVDB, HostBuffer) std::vector > gridHdls; // create two grids... - gridHdls.push_back(nanovdb::createLevelSetSphere(100.0, nanovdb::Vec3d(-20, 0, 0), 1.0, 3.0, nanovdb::Vec3d(0), "spheref", nanovdb::StatsMode::BBox, nanovdb::ChecksumMode::Partial, pool)); - gridHdls.push_back(nanovdb::createLevelSetSphere(100.0, nanovdb::Vec3d( 20, 0, 0), 1.0, 3.0, nanovdb::Vec3d(0), "sphered", nanovdb::StatsMode::BBox, nanovdb::ChecksumMode::Partial, pool)); + gridHdls.push_back(nanovdb::tools::createLevelSetSphere(100.0, nanovdb::Vec3d(-20, 0, 0), 1.0, 3.0, nanovdb::Vec3d(0), "spheref", nanovdb::tools::StatsMode::BBox, nanovdb::CheckMode::Partial, pool)); + gridHdls.push_back(nanovdb::tools::createLevelSetSphere(100.0, nanovdb::Vec3d( 20, 0, 0), 1.0, 3.0, nanovdb::Vec3d(0), "sphered", nanovdb::tools::StatsMode::BBox, nanovdb::CheckMode::Partial, pool)); EXPECT_TRUE(gridHdls[0]); auto* meta0 = gridHdls[0].gridMetaData(); @@ -6337,8 +6495,8 @@ TEST_F(TestNanoVDB, HostBuffer) std::vector > gridHdls; // create two grids... - ASSERT_THROW(gridHdls.push_back(nanovdb::createLevelSetSphere( 100.0f, nanovdb::Vec3d(-20, 0, 0), 1.0, 3.0, nanovdb::Vec3d(0), "spheref", nanovdb::StatsMode::BBox, nanovdb::ChecksumMode::Partial, pool)), std::runtime_error); - ASSERT_THROW(gridHdls.push_back(nanovdb::createLevelSetSphere( 100.0, nanovdb::Vec3d( 20, 0, 0), 1.0, 3.0, nanovdb::Vec3d(0), "sphered", nanovdb::StatsMode::BBox, nanovdb::ChecksumMode::Partial, pool)), std::runtime_error); + ASSERT_THROW(gridHdls.push_back(nanovdb::tools::createLevelSetSphere( 100.0f, nanovdb::Vec3d(-20, 0, 0), 1.0, 3.0, nanovdb::Vec3d(0), "spheref", nanovdb::tools::StatsMode::BBox, nanovdb::CheckMode::Partial, pool)), std::runtime_error); + ASSERT_THROW(gridHdls.push_back(nanovdb::tools::createLevelSetSphere( 100.0, nanovdb::Vec3d( 20, 0, 0), 1.0, 3.0, nanovdb::Vec3d(0), "sphered", nanovdb::tools::StatsMode::BBox, nanovdb::CheckMode::Partial, pool)), std::runtime_error); } {// zero internal memory size ASSERT_THROW(nanovdb::HostBuffer::createPool(0), std::runtime_error); @@ -6359,8 +6517,8 @@ TEST_F(TestNanoVDB, HostBuffer) std::vector > gridHdls; // create two grids... - gridHdls.push_back(nanovdb::createLevelSetSphere( 100.0f, nanovdb::Vec3d(-20, 0, 0), 1.0, 3.0, nanovdb::Vec3d(0), "spheref", nanovdb::StatsMode::BBox, nanovdb::ChecksumMode::Partial, pool)); - gridHdls.push_back(nanovdb::createLevelSetSphere( 100.0, nanovdb::Vec3d( 20, 0, 0), 1.0, 3.0, nanovdb::Vec3d(0), "sphered", nanovdb::StatsMode::BBox, nanovdb::ChecksumMode::Partial, pool)); + gridHdls.push_back(nanovdb::tools::createLevelSetSphere( 100.0f, nanovdb::Vec3d(-20, 0, 0), 1.0, 3.0, nanovdb::Vec3d(0), "spheref", nanovdb::tools::StatsMode::BBox, nanovdb::CheckMode::Partial, pool)); + gridHdls.push_back(nanovdb::tools::createLevelSetSphere( 100.0, nanovdb::Vec3d( 20, 0, 0), 1.0, 3.0, nanovdb::Vec3d(0), "sphered", nanovdb::tools::StatsMode::BBox, nanovdb::CheckMode::Partial, pool)); EXPECT_TRUE(gridHdls[0]); auto* meta0 = gridHdls[0].gridMetaData(); @@ -6415,8 +6573,8 @@ TEST_F(TestNanoVDB, HostBuffer) std::vector > gridHdls; // create two grids... - ASSERT_THROW(gridHdls.push_back(nanovdb::createLevelSetSphere( 100.0, nanovdb::Vec3d(-20, 0, 0), 1.0, 3.0, nanovdb::Vec3d(0), "spheref", nanovdb::StatsMode::BBox, nanovdb::ChecksumMode::Partial, pool)), std::runtime_error); - ASSERT_THROW(gridHdls.push_back(nanovdb::createLevelSetSphere( 100.0, nanovdb::Vec3d( 20, 0, 0), 1.0, 3.0, nanovdb::Vec3d(0), "sphered", nanovdb::StatsMode::BBox, nanovdb::ChecksumMode::Partial, pool)), std::runtime_error); + ASSERT_THROW(gridHdls.push_back(nanovdb::tools::createLevelSetSphere( 100.0, nanovdb::Vec3d(-20, 0, 0), 1.0, 3.0, nanovdb::Vec3d(0), "spheref", nanovdb::tools::StatsMode::BBox, nanovdb::CheckMode::Partial, pool)), std::runtime_error); + ASSERT_THROW(gridHdls.push_back(nanovdb::tools::createLevelSetSphere( 100.0, nanovdb::Vec3d( 20, 0, 0), 1.0, 3.0, nanovdb::Vec3d(0), "sphered", nanovdb::tools::StatsMode::BBox, nanovdb::CheckMode::Partial, pool)), std::runtime_error); EXPECT_FALSE(pool.isManaged()); pool.resizePool(1<<26);// resize to 64 MB @@ -6437,8 +6595,8 @@ TEST_F(TestNanoVDB, HostBuffer) EXPECT_FALSE(buffer.isFull()); EXPECT_TRUE(buffer.isManaged()); - gridHdls.push_back(nanovdb::createLevelSetSphere( 100.0, nanovdb::Vec3d(-20, 0, 0), 1.0, 3.0, nanovdb::Vec3d(0), "spheref", nanovdb::StatsMode::BBox, nanovdb::ChecksumMode::Partial, pool)); - gridHdls.push_back(nanovdb::createLevelSetSphere( 100.0, nanovdb::Vec3d( 20, 0, 0), 1.0, 3.0, nanovdb::Vec3d(0), "sphered", nanovdb::StatsMode::BBox, nanovdb::ChecksumMode::Partial, pool)); + gridHdls.push_back(nanovdb::tools::createLevelSetSphere( 100.0, nanovdb::Vec3d(-20, 0, 0), 1.0, 3.0, nanovdb::Vec3d(0), "spheref", nanovdb::tools::StatsMode::BBox, nanovdb::CheckMode::Partial, pool)); + gridHdls.push_back(nanovdb::tools::createLevelSetSphere( 100.0, nanovdb::Vec3d( 20, 0, 0), 1.0, 3.0, nanovdb::Vec3d(0), "sphered", nanovdb::tools::StatsMode::BBox, nanovdb::CheckMode::Partial, pool)); EXPECT_TRUE(gridHdls[0]); auto* meta0 = gridHdls[0].gridMetaData(); @@ -6483,23 +6641,25 @@ TEST_F(TestNanoVDB, HostBuffer) auto pool = nanovdb::HostBuffer::createPool(poolSize, nanovdb::alignPtr(array.get())); EXPECT_EQ(128ULL * 1024 * 1024, pool.poolSize()); auto handles = nanovdb::io::readGrids("data/multi1.nvdb", 0, pool); - EXPECT_EQ(15u, handles.size()); + EXPECT_EQ(16u, handles.size()); for (auto &h : handles) EXPECT_TRUE(h); EXPECT_EQ(std::string("Int32 grid"), handles[0].grid()->gridName()); EXPECT_EQ(std::string("Int32 grid, empty"), handles[1].grid()->gridName()); EXPECT_EQ(std::string("Float vector grid"), handles[2].grid()->gridName()); EXPECT_EQ(std::string("Int64 grid"), handles[3].grid()->gridName()); - EXPECT_EQ(std::string("Double grid"), handles[14].grid()->gridName()); + EXPECT_EQ(std::string("UInt8 grid"), handles[4].grid()->gridName()); + EXPECT_EQ(std::string("Double grid"), handles[15].grid()->gridName()); pool.reset(); for (auto &h : handles) EXPECT_FALSE(h); handles = nanovdb::io::readGrids("data/multi1.nvdb", 0, pool); - EXPECT_EQ(15u, handles.size()); + EXPECT_EQ(16u, handles.size()); for (auto &h : handles) EXPECT_TRUE(h); EXPECT_EQ(std::string("Int32 grid"), handles[0].grid()->gridName()); EXPECT_EQ(std::string("Int32 grid, empty"), handles[1].grid()->gridName()); EXPECT_EQ(std::string("Float vector grid"), handles[2].grid()->gridName()); EXPECT_EQ(std::string("Int64 grid"), handles[3].grid()->gridName()); - EXPECT_EQ(std::string("Double grid"), handles[14].grid()->gridName()); + EXPECT_EQ(std::string("UInt8 grid"), handles[4].grid()->gridName()); + EXPECT_EQ(std::string("Double grid"), handles[15].grid()->gridName()); } catch(const std::exception& e) { std::cout << "Unable to read \"data/multi1.nvdb\" for unit-test\n" << e.what() << std::endl; } @@ -6513,7 +6673,7 @@ TEST_F(TestNanoVDB, NodeIterators) const float halfWidth = 3.0f; const nanovdb::Vec3d center(0); //mTimer.start("Create level set sphere"); - auto handle1 = nanovdb::createLevelSetSphere(radius, center, voxelSize, halfWidth); + auto handle1 = nanovdb::tools::createLevelSetSphere(radius, center, voxelSize, halfWidth); //mTimer.stop(); auto *fltGrid = handle1.grid(); EXPECT_TRUE(fltGrid); @@ -6626,13 +6786,13 @@ TEST_F(TestNanoVDB, BasicValueIndexStats) EXPECT_EQ(64u, size4 - size3);// 512 bits = 64 bytes } EXPECT_TRUE(nanovdb::Version() >= nanovdb::Version(32,3,4)); - using SrcGridT = nanovdb::build::Grid; + using SrcGridT = nanovdb::tools::build::Grid; SrcGridT srcGrid(0.0f); auto acc = srcGrid.getAccessor(); const nanovdb::Coord ijk(0,0,1); acc.setValue(ijk, 1.0f); - auto handle1 = nanovdb::createNanoGrid(srcGrid); + auto handle1 = nanovdb::tools::createNanoGrid(srcGrid); auto *fltGrid = handle1.grid(); EXPECT_TRUE(fltGrid); @@ -6648,7 +6808,7 @@ TEST_F(TestNanoVDB, BasicValueIndexStats) EXPECT_EQ(1.0f, fltGrid->tree().getValue(ijk)); EXPECT_EQ(0.0f, fltGrid->tree().getValue(nanovdb::Coord(0,0,0))); - auto handle2 = nanovdb::createNanoGrid(*fltGrid, 1u, true, true); + auto handle2 = nanovdb::tools::createNanoGrid(*fltGrid, 1u, true, true); auto *idxGrid = handle2.grid(); EXPECT_TRUE(idxGrid); EXPECT_EQ(1u, idxGrid->blindDataCount()); @@ -6714,13 +6874,13 @@ TEST_F(TestNanoVDB, BasicValueIndexStats) TEST_F(TestNanoVDB, BasicValueIndexStats2) { EXPECT_TRUE(nanovdb::Version() >= nanovdb::Version(32,3,4)); - using SrcGridT = nanovdb::build::Grid; + using SrcGridT = nanovdb::tools::build::Grid; SrcGridT srcGrid(0.0f); auto acc = srcGrid.getAccessor(); const nanovdb::Coord ijk(0,0,1); acc.setValue(ijk, 1.0f); - auto handle2 = nanovdb::createNanoGrid(srcGrid, 1u, true, true); + auto handle2 = nanovdb::tools::createNanoGrid(srcGrid, 1u, true, true); auto *idxGrid = handle2.grid(); EXPECT_TRUE(idxGrid); @@ -6784,12 +6944,12 @@ TEST_F(TestNanoVDB, BasicValueIndexStats2) TEST_F(TestNanoVDB, ValueMask2ValueIndex) { - using SrcGridT = nanovdb::build::Grid; + using SrcGridT = nanovdb::tools::build::Grid; SrcGridT srcGrid(true); auto acc = srcGrid.getAccessor(); const nanovdb::Coord ijk(0,0,1); acc.setValue(ijk, true); - auto handle = nanovdb::createNanoGrid(srcGrid, 0u, false, false);// no stats or tiles + auto handle = nanovdb::tools::createNanoGrid(srcGrid, 0u, false, false);// no stats or tiles auto *idxGrid = handle.grid(); EXPECT_TRUE(idxGrid); EXPECT_EQ(1u, idxGrid->activeVoxelCount()); @@ -6798,12 +6958,12 @@ TEST_F(TestNanoVDB, ValueMask2ValueIndex) TEST_F(TestNanoVDB, ValueMask2ValueOnIndex) { - using SrcGridT = nanovdb::build::Grid; + using SrcGridT = nanovdb::tools::build::Grid; SrcGridT srcGrid(true); auto acc = srcGrid.getAccessor(); const nanovdb::Coord ijk(0,0,1); acc.setValue(ijk, true); - auto handle = nanovdb::createNanoGrid(srcGrid, 0u, true, false);// stats but no tiles + auto handle = nanovdb::tools::createNanoGrid(srcGrid, 0u, true, false);// stats but no tiles auto *idxGrid = handle.grid(); EXPECT_TRUE(idxGrid); EXPECT_EQ(1u, idxGrid->activeVoxelCount()); @@ -6831,12 +6991,12 @@ TEST_F(TestNanoVDB, ValueMask2ValueOnIndex) TEST_F(TestNanoVDB, BasicValueIndexNoStats) { EXPECT_TRUE(nanovdb::Version() >= nanovdb::Version(32,3,4)); - using SrcGridT = nanovdb::build::Grid; + using SrcGridT = nanovdb::tools::build::Grid; SrcGridT srcGrid(0.0f); auto acc = srcGrid.getAccessor(); const nanovdb::Coord ijk(0,0,1); acc.setValue(ijk, 1.0f); - nanovdb::CreateNanoGrid converter(srcGrid); + nanovdb::tools::CreateNanoGrid converter(srcGrid); auto handle1 = converter.getHandle(); auto *fltGrid = handle1.grid(); EXPECT_TRUE(fltGrid); @@ -6911,12 +7071,12 @@ TEST_F(TestNanoVDB, BasicValueIndexNoStats) TEST_F(TestNanoVDB, BasicValueIndexNoStatsNoTiles) { EXPECT_TRUE(nanovdb::Version() >= nanovdb::Version(32,3,4)); - using SrcGridT = nanovdb::build::Grid; + using SrcGridT = nanovdb::tools::build::Grid; SrcGridT srcGrid(0.0f); auto acc = srcGrid.getAccessor(); const nanovdb::Coord ijk(0,0,1); acc.setValue(ijk, 1.0f); - nanovdb::CreateNanoGrid converter(srcGrid); + nanovdb::tools::CreateNanoGrid converter(srcGrid); auto handle1 = converter.getHandle(); auto *fltGrid = handle1.grid(); @@ -6997,12 +7157,12 @@ TEST_F(TestNanoVDB, BasicValueIndexNoStatsNoTiles) TEST_F(TestNanoVDB, SparseIndexGridBuilder1) { EXPECT_TRUE(nanovdb::Version() >= nanovdb::Version(32,3,4)); - using SrcGridT = nanovdb::build::Grid; + using SrcGridT = nanovdb::tools::build::Grid; SrcGridT srcGrid(0.0f); auto acc = srcGrid.getAccessor(); const nanovdb::Coord ijk(0,0,1); acc.setValue(ijk, 1.0f); - nanovdb::CreateNanoGrid converter(srcGrid); + nanovdb::tools::CreateNanoGrid converter(srcGrid); auto handle1 = converter.getHandle(); auto *fltGrid = handle1.grid(); EXPECT_TRUE(fltGrid); @@ -7083,7 +7243,7 @@ TEST_F(TestNanoVDB, IndexGridBuilder2) const float halfWidth = 3.0f; const nanovdb::Vec3d center(0); //mTimer.start("Create level set sphere"); - auto handle1 = nanovdb::createLevelSetSphere(radius, center, voxelSize, halfWidth); + auto handle1 = nanovdb::tools::createLevelSetSphere(radius, center, voxelSize, halfWidth); //mTimer.stop(); auto *fltGrid = handle1.grid(); EXPECT_TRUE(fltGrid); @@ -7092,7 +7252,7 @@ TEST_F(TestNanoVDB, IndexGridBuilder2) //std::cerr << "FloatGrid footprint: " << (fltGrid->gridSize()>>20) << "MB" << std::endl; // create an IndexGrid for the FloatGrid - nanovdb::CreateNanoGrid builder2(*fltGrid); + nanovdb::tools::CreateNanoGrid builder2(*fltGrid); //mTimer.start("Create IndexGrid"); auto handle2 = builder2.getHandle(1u); //mTimer.stop(); @@ -7185,7 +7345,7 @@ TEST_F(TestNanoVDB, IndexGridBuilder2) } //mTimer.restart("Parallel bbox test of value buffer"); // here is a multi-threaded version - nanovdb::forEach(idxGrid->indexBBox(),[&](const nanovdb::CoordBBox &bbox){ + nanovdb::util::forEach(idxGrid->indexBBox(),[&](const nanovdb::CoordBBox &bbox){ auto idxAcc = idxTree.getAccessor();// NOT thread-safe! auto fltAcc = fltTree.getAccessor();// NOT thread-safe! for (auto it = bbox.begin(); it; ++it) EXPECT_EQ(values[idxAcc.getValue(*it)], fltAcc.getValue(*it)); @@ -7211,7 +7371,7 @@ TEST_F(TestNanoVDB, IndexGridBuilder2) }// loop over leaf nodes //mTimer.restart("Parallel leaf iterator test of active voxels"); auto *idxLeaf0 = idxTree.getFirstNode<0>(); - nanovdb::forEach(nanovdb::Range1D(0,idxTree.nodeCount(0)),[&](const nanovdb::Range1D &r){ + nanovdb::util::forEach(nanovdb::util::Range1D(0,idxTree.nodeCount(0)),[&](const nanovdb::util::Range1D &r){ auto fltAcc = fltTree.getAccessor();// NOT thread-safe! for (auto i=r.begin(); i!=r.end(); ++i){ auto *idxLeaf = idxLeaf0 + i; @@ -7226,7 +7386,7 @@ TEST_F(TestNanoVDB, IndexGridBuilder2) //mTimer.stop(); //mTimer.start("Dense IndexGrid: Parallel leaf iterator test of active voxels"); auto *leaf = idxTree.getFirstNode<0>(); - nanovdb::forEach(nanovdb::Range1D(0,idxTree.nodeCount(0)),[&](const nanovdb::Range1D &r){ + nanovdb::util::forEach(nanovdb::util::Range1D(0,idxTree.nodeCount(0)),[&](const nanovdb::util::Range1D &r){ auto fltAcc = fltTree.getAccessor();// NOT thread-safe! for (auto i=r.begin(); i!=r.end(); ++i){ for (auto vox = leaf[i].beginValueOn(); vox; ++vox) { @@ -7248,7 +7408,7 @@ TEST_F(TestNanoVDB, SparseIndexGridBuilder2) const float halfWidth = 3.0f; const nanovdb::Vec3d center(0); //mTimer.start("Create level set sphere"); - auto handle1 = nanovdb::createLevelSetSphere(radius, center, voxelSize, halfWidth); + auto handle1 = nanovdb::tools::createLevelSetSphere(radius, center, voxelSize, halfWidth); //mTimer.stop(); auto *fltGrid = handle1.grid(); EXPECT_TRUE(fltGrid); @@ -7257,7 +7417,7 @@ TEST_F(TestNanoVDB, SparseIndexGridBuilder2) //std::cerr << "FloatGrid footprint: " << (fltGrid->gridSize()>>20) << "MB" << std::endl; // create an IndexGrid for the FloatGrid - nanovdb::CreateNanoGrid builder2(*fltGrid); + nanovdb::tools::CreateNanoGrid builder2(*fltGrid); //mTimer.start("Create IndexGrid"); auto handle2 = builder2.getHandle(1u, false, true); //mTimer.stop(); @@ -7331,7 +7491,7 @@ TEST_F(TestNanoVDB, SparseIndexGridBuilder2) } //mTimer.restart("Parallel bbox test of value buffer"); // here is a multi-threaded version - nanovdb::forEach(idxGrid->indexBBox(),[&](const nanovdb::CoordBBox &bbox){ + nanovdb::util::forEach(idxGrid->indexBBox(),[&](const nanovdb::CoordBBox &bbox){ auto idxAcc = idxTree.getAccessor();// NOT thread-safe! auto fltAcc = fltTree.getAccessor();// NOT thread-safe! uint64_t n; @@ -7364,7 +7524,7 @@ TEST_F(TestNanoVDB, SparseIndexGridBuilder2) }// loop over leaf nodes //mTimer.start("Sparse IndexGrid: Parallel leaf iterator test of active voxels"); auto *leaf = idxTree.getFirstNode<0>(); - nanovdb::forEach(nanovdb::Range1D(0,idxTree.nodeCount(0)),[&](const nanovdb::Range1D &r){ + nanovdb::util::forEach(nanovdb::util::Range1D(0,idxTree.nodeCount(0)),[&](const nanovdb::util::Range1D &r){ auto fltAcc = fltTree.getAccessor();// NOT thread-safe! for (auto i=r.begin(); i!=r.end(); ++i){ for (auto vox = leaf[i].beginValueOn(); vox; ++vox) { @@ -7386,7 +7546,7 @@ TEST_F(TestNanoVDB, ChannelIndexGridBuilder) const float halfWidth = 3.0f; const nanovdb::Vec3d center(0); //mTimer.start("Create level set sphere"); - auto handle1 = nanovdb::createLevelSetSphere(radius, center, voxelSize, halfWidth); + auto handle1 = nanovdb::tools::createLevelSetSphere(radius, center, voxelSize, halfWidth); //mTimer.stop(); auto *fltGrid = handle1.grid(); EXPECT_TRUE(fltGrid); @@ -7395,7 +7555,7 @@ TEST_F(TestNanoVDB, ChannelIndexGridBuilder) //std::cerr << "FloatGrid footprint: " << (fltGrid->gridSize()>>20) << "MB" << std::endl; // create an IndexGrid for the FloatGrid - nanovdb::CreateNanoGrid builder2(*fltGrid); + nanovdb::tools::CreateNanoGrid builder2(*fltGrid); //mTimer.start("Create IndexGrid"); auto handle2 = builder2.getHandle(channels, false); //mTimer.stop(); @@ -7430,7 +7590,7 @@ TEST_F(TestNanoVDB, ChannelIndexGridBuilder) //mTimer.start("Parallel leaf iterator test of active voxels in channel"); const float *values = idxGrid->getBlindData(i); EXPECT_TRUE(values); - nanovdb::forEach(0,idxTree.nodeCount(0),8,[&](const nanovdb::Range1D &r){ + nanovdb::util::forEach(0,idxTree.nodeCount(0),8,[&](const nanovdb::util::Range1D &r){ auto fltAcc = fltTree.getAccessor();// NOT thread-safe! for (auto i=r.begin(); i!=r.end(); ++i){ for (auto vox = leaf[i].beginValueOn(); vox; ++vox) { @@ -7451,7 +7611,7 @@ TEST_F(TestNanoVDB, ChannelIndexGridBuilder) //mTimer.start("Parallel leaf iterator test of active voxels in channel"); const float *values = idxGrid->getBlindData(i); EXPECT_TRUE(values); - nanovdb::forEach(0,idxTree.nodeCount(0),8,[&](const nanovdb::Range1D &r){ + nanovdb::util::forEach(0,idxTree.nodeCount(0),8,[&](const nanovdb::util::Range1D &r){ nanovdb::ChannelAccessor acc(*idxGrid, i);// NOT thread-safe EXPECT_TRUE(acc); auto fltAcc = fltTree.getAccessor();// NOT thread-safe! @@ -7474,14 +7634,14 @@ TEST_F(TestNanoVDB, ChannelIndexGridBuilder) TEST_F(TestNanoVDB, HelloWorld_IndexGrid_Dense) { const nanovdb::Coord ijk(101,0,0); - auto handle1 = nanovdb::createLevelSetSphere(); + auto handle1 = nanovdb::tools::createLevelSetSphere(); auto *fltGrid = handle1.grid(); EXPECT_TRUE(fltGrid); //std::cerr << "Grid size: " << (fltGrid->gridSize() >> 20) << " MB\n"; EXPECT_EQ(1.0f, fltGrid->tree().getValue(ijk)); {// create an IndexGrid with an internal channel and write it to file - nanovdb::io::writeGrid("data/index_grid.nvdb", nanovdb::createNanoGrid(*fltGrid,1u, true, true));// 1 channel, include stats and tile values + nanovdb::io::writeGrid("data/index_grid.nvdb", nanovdb::tools::createNanoGrid(*fltGrid,1u, true, true));// 1 channel, include stats and tile values } {// read and test IndexGrid auto tmp = nanovdb::io::readGrid("data/index_grid.nvdb"); @@ -7494,7 +7654,7 @@ TEST_F(TestNanoVDB, HelloWorld_IndexGrid_Dense) EXPECT_EQ(1.0f, acc(ijk)); // compute the gradient from channel ID 0 - nanovdb::GradStencil> stencil(acc); + nanovdb::math::GradStencil> stencil(acc); stencil.moveTo(ijk); EXPECT_EQ(nanovdb::Vec3f(1.0f,0.0f,0.0f), stencil.gradient()); @@ -7510,14 +7670,14 @@ TEST_F(TestNanoVDB, HelloWorld_IndexGrid_Dense) TEST_F(TestNanoVDB, HelloWorld_IndexGrid_Sparse) { const nanovdb::Coord ijk(101,0,0); - auto handle1 = nanovdb::createLevelSetSphere(); + auto handle1 = nanovdb::tools::createLevelSetSphere(); auto *fltGrid = handle1.grid(); EXPECT_TRUE(fltGrid); //std::cerr << "Grid size: " << (fltGrid->gridSize() >> 20) << " MB\n"; EXPECT_EQ(1.0f, fltGrid->tree().getValue(ijk)); {// create an IndexGrid with an internal channel and write it to file - nanovdb::io::writeGrid("data/index_grid.nvdb", nanovdb::createNanoGrid(*fltGrid, 1u, false, true));// 1 channel, no stats and include tile values + nanovdb::io::writeGrid("data/index_grid.nvdb", nanovdb::tools::createNanoGrid(*fltGrid, 1u, false, true));// 1 channel, no stats and include tile values } {// read and test IndexGrid auto tmp = nanovdb::io::readGrid("data/index_grid.nvdb"); @@ -7530,7 +7690,7 @@ TEST_F(TestNanoVDB, HelloWorld_IndexGrid_Sparse) EXPECT_EQ(1.0f, acc(ijk)); // compute the gradient from channel ID 0 - nanovdb::GradStencil> stencil(acc); + nanovdb::math::GradStencil> stencil(acc); stencil.moveTo(ijk); EXPECT_EQ(nanovdb::Vec3f(1.0f,0.0f,0.0f), stencil.gradient()); @@ -7546,14 +7706,14 @@ TEST_F(TestNanoVDB, HelloWorld_IndexGrid_Sparse) TEST_F(TestNanoVDB, HelloWorld_IndexGrid_Sparse2) { const nanovdb::Coord ijk(101,0,0); - auto handle1 = nanovdb::createLevelSetSphere(); + auto handle1 = nanovdb::tools::createLevelSetSphere(); auto *fltGrid = handle1.grid(); EXPECT_TRUE(fltGrid); //std::cerr << "Grid size: " << (fltGrid->gridSize() >> 20) << " MB\n"; EXPECT_EQ(1.0f, fltGrid->tree().getValue(ijk)); {// create an IndexGrid with an internal channel and write it to file - nanovdb::io::writeGrid("data/index_grid2.nvdb", nanovdb::createNanoGrid(*fltGrid, 1u, false, false));// 1 channel, no stats and no tile values + nanovdb::io::writeGrid("data/index_grid2.nvdb", nanovdb::tools::createNanoGrid(*fltGrid, 1u, false, false));// 1 channel, no stats and no tile values } {// read and test IndexGrid auto tmp = nanovdb::io::readGrid("data/index_grid2.nvdb"); @@ -7566,7 +7726,7 @@ TEST_F(TestNanoVDB, HelloWorld_IndexGrid_Sparse2) EXPECT_EQ(1.0f, acc(ijk)); // compute the gradient from channel ID 0 - nanovdb::GradStencil> stencil(acc); + nanovdb::math::GradStencil> stencil(acc); stencil.moveTo(ijk); EXPECT_EQ(nanovdb::Vec3f(1.0f,0.0f,0.0f), stencil.gradient()); @@ -7584,7 +7744,7 @@ TEST_F(TestNanoVDB, writeReadUncompressedGrid) using GridHandleT = nanovdb::GridHandle; const nanovdb::Coord ijk(101,0,0); std::vector handles1; - handles1.emplace_back(nanovdb::createLevelSetSphere()); + handles1.emplace_back(nanovdb::tools::createLevelSetSphere()); EXPECT_EQ(1u, handles1.size()); auto *fltGrid1 = handles1[0].grid(); EXPECT_TRUE(fltGrid1); @@ -7605,7 +7765,7 @@ TEST_F(TestNanoVDB, writeReadUncompressedGridRaw) using GridHandleT = nanovdb::GridHandle; const nanovdb::Coord ijk(101,0,0); std::vector handles1; - handles1.emplace_back(nanovdb::createLevelSetSphere()); + handles1.emplace_back(nanovdb::tools::createLevelSetSphere()); EXPECT_EQ(1u, handles1.size()); auto *fltGrid1 = handles1[0].grid(); EXPECT_TRUE(fltGrid1); @@ -7623,7 +7783,7 @@ TEST_F(TestNanoVDB, writeReadUncompressedGridRaw) TEST_F(TestNanoVDB, GridMetaData) { - auto handle = nanovdb::createLevelSetSphere(); + auto handle = nanovdb::tools::createLevelSetSphere(); auto *grid = handle.grid(); EXPECT_TRUE(grid); EXPECT_TRUE(grid->isRootConnected()); @@ -7638,7 +7798,7 @@ TEST_F(TestNanoVDB, GridMetaData) TEST_F(TestNanoVDB, BuildTree) { nanovdb::CoordBBox bbox(nanovdb::Coord(0), nanovdb::Coord(511)); - nanovdb::build::Grid grid1(false), grid2(false); + nanovdb::tools::build::Grid grid1(false), grid2(false); { //mTimer.start("Serial build::Tree"); auto kernel = [&](const nanovdb::CoordBBox& bbox) { @@ -7654,7 +7814,7 @@ TEST_F(TestNanoVDB, BuildTree) auto acc = grid2.getWriteAccessor(); for (auto it = bbox.begin(); it; ++it) acc.setValueOn(*it); }; - nanovdb::forEach(bbox, kernel); + nanovdb::util::forEach(bbox, kernel); //mTimer.stop(); } { @@ -7670,20 +7830,20 @@ TEST_F(TestNanoVDB, CreateNanoGridFromFloat) using SrcGridT = nanovdb::FloatGrid; const float tolerance = 0.001f; const nanovdb::Coord ijk(101,0,0); - auto srcHandle = nanovdb::createLevelSetSphere(); + auto srcHandle = nanovdb::tools::createLevelSetSphere(); SrcGridT *srcGrid = srcHandle.grid(); EXPECT_TRUE(srcGrid); //std::cerr << "Grid size: " << (srcGrid->gridSize() >> 20) << " MB\n"; EXPECT_EQ(1.0f, srcGrid->tree().getValue(ijk)); - nanovdb::CreateNanoGrid converter(*srcGrid); + nanovdb::tools::CreateNanoGrid converter(*srcGrid); {// create nanovdb::FloatGrid from nanovdb::FloatGrid using DstBuildT = float; auto dstHandle = converter.getHandle(); auto *dstGrid = dstHandle.grid(); EXPECT_TRUE(dstGrid); - //std::cerr << "Grid<"<())<<"> size: " << (dstGrid->gridSize() >> 20) << " MB\n"; + //std::cerr << "Grid<"<())<<"> size: " << (dstGrid->gridSize() >> 20) << " MB\n"; EXPECT_EQ(1.0f, dstGrid->tree().getValue(ijk)); } {// create nanovdb::DoubleGrid from nanovdb::FloatGrid @@ -7691,7 +7851,7 @@ TEST_F(TestNanoVDB, CreateNanoGridFromFloat) auto dstHandle = converter.getHandle(); auto *dstGrid = dstHandle.grid(); EXPECT_TRUE(dstGrid); - //std::cerr << "Grid<"<())<<"> size: " << (dstGrid->gridSize() >> 20) << " MB\n"; + //std::cerr << "Grid<"<())<<"> size: " << (dstGrid->gridSize() >> 20) << " MB\n"; EXPECT_EQ(1.0, dstGrid->tree().getValue(ijk)); } {// create nanovdb::Fp4Grid from nanovdb::FloatGrid @@ -7699,7 +7859,7 @@ TEST_F(TestNanoVDB, CreateNanoGridFromFloat) auto dstHandle = converter.getHandle(); auto *dstGrid = dstHandle.grid(); EXPECT_TRUE(dstGrid); - //std::cerr << "Grid<"<())<<"> size: " << (dstGrid->gridSize() >> 20) << " MB\n"; + //std::cerr << "Grid<"<())<<"> size: " << (dstGrid->gridSize() >> 20) << " MB\n"; EXPECT_NEAR(1.0f, dstGrid->tree().getValue(ijk), tolerance); //EXPECT_EQ(1.0f, dstGrid->tree().getValue(ijk)); } @@ -7708,7 +7868,7 @@ TEST_F(TestNanoVDB, CreateNanoGridFromFloat) auto dstHandle = converter.getHandle(); auto *dstGrid = dstHandle.grid(); EXPECT_TRUE(dstGrid); - //std::cerr << "Grid<"<())<<"> size: " << (dstGrid->gridSize() >> 20) << " MB\n"; + //std::cerr << "Grid<"<())<<"> size: " << (dstGrid->gridSize() >> 20) << " MB\n"; EXPECT_NEAR(1.0f, dstGrid->tree().getValue(ijk), tolerance); //EXPECT_EQ(1.0f, dstGrid->tree().getValue(ijk)); } @@ -7717,7 +7877,7 @@ TEST_F(TestNanoVDB, CreateNanoGridFromFloat) auto dstHandle = converter.getHandle(); auto *dstGrid = dstHandle.grid(); EXPECT_TRUE(dstGrid); - //std::cerr << "Grid<"<())<<"> size: " << (dstGrid->gridSize() >> 20) << " MB\n"; + //std::cerr << "Grid<"<())<<"> size: " << (dstGrid->gridSize() >> 20) << " MB\n"; EXPECT_NEAR(1.0f, dstGrid->tree().getValue(ijk), tolerance); //EXPECT_EQ(1.0f, dstGrid->tree().getValue(ijk)); } @@ -7726,7 +7886,7 @@ TEST_F(TestNanoVDB, CreateNanoGridFromFloat) auto dstHandle = converter.getHandle(); auto *dstGrid = dstHandle.grid(); EXPECT_TRUE(dstGrid); - //std::cerr << "Grid<"<())<<"> size: " << (dstGrid->gridSize() >> 20) << " MB\n"; + //std::cerr << "Grid<"<())<<"> size: " << (dstGrid->gridSize() >> 20) << " MB\n"; EXPECT_NEAR(1.0f, dstGrid->tree().getValue(ijk), tolerance); //EXPECT_EQ(1.0f, dstGrid->tree().getValue(ijk)); } @@ -7735,7 +7895,7 @@ TEST_F(TestNanoVDB, CreateNanoGridFromFloat) auto dstHandle = converter.getHandle(); auto *dstGrid = dstHandle.grid(); EXPECT_TRUE(dstGrid); - //std::cerr << "Grid<"<())<<"> size: " << (dstGrid->gridSize() >> 20) << " MB\n"; + //std::cerr << "Grid<"<())<<"> size: " << (dstGrid->gridSize() >> 20) << " MB\n"; EXPECT_EQ(true, dstGrid->tree().getValue(ijk)); } }// CreateNanoGridFromFloat @@ -7743,7 +7903,7 @@ TEST_F(TestNanoVDB, CreateNanoGridFromFloat) TEST_F(TestNanoVDB, CreateNanoGridFromVec3f) { using SrcBuildT = nanovdb::Vec3f; - using SrcGridT = nanovdb::build::Grid; + using SrcGridT = nanovdb::tools::build::Grid; // const SrcBuildT a(1.5f,0.0f,-9.1f), b(0.0f,0.0f,0.0f); @@ -7753,15 +7913,15 @@ TEST_F(TestNanoVDB, CreateNanoGridFromVec3f) EXPECT_EQ(a, grid.tree().getValue(p)); EXPECT_EQ(b, grid.tree().getValue(q)); // - auto srcHandle = nanovdb::createNanoGrid(grid); + auto srcHandle = nanovdb::tools::createNanoGrid(grid); auto *srcGrid = srcHandle.grid(); EXPECT_TRUE(srcGrid); EXPECT_EQ(a, srcGrid->tree().getValue(p)); EXPECT_EQ(b, srcGrid->tree().getValue(q)); - {// create nanovdb::ValueIndexGrid from nanovdb::build::Grid + {// create nanovdb::ValueIndexGrid from nanovdb::tools::build::Grid using DstBuildT = nanovdb::ValueIndex; - auto handle = nanovdb::createNanoGrid(grid, 0u, false, false);// no channels, stats or tiles + auto handle = nanovdb::tools::createNanoGrid(grid, 0u, false, false);// no channels, stats or tiles auto *idxGrid = handle.grid(); EXPECT_TRUE(idxGrid); EXPECT_EQ(1u, idxGrid->activeVoxelCount()); @@ -7769,9 +7929,9 @@ TEST_F(TestNanoVDB, CreateNanoGridFromVec3f) EXPECT_EQ(1, idxGrid->tree().getValue(q)); EXPECT_EQ(8, idxGrid->tree().getValue(p)); } - {// create nanovdb::ValueOnIndexGrid from nanovdb::build::Grid + {// create nanovdb::ValueOnIndexGrid from nanovdb::tools::build::Grid using DstBuildT = nanovdb::ValueOnIndex; - auto handle = nanovdb::createNanoGrid(grid, 0u, false, false);// no channels, stats or tiles + auto handle = nanovdb::tools::createNanoGrid(grid, 0u, false, false);// no channels, stats or tiles auto *idxGrid = handle.grid(); EXPECT_TRUE(idxGrid); EXPECT_EQ(1u, idxGrid->activeVoxelCount()); @@ -7782,7 +7942,7 @@ TEST_F(TestNanoVDB, CreateNanoGridFromVec3f) {// create nanovdb::ValueIndexGrid from nanovdb::Grid using DstBuildT = nanovdb::ValueIndex; using SrcGridT = nanovdb::Vec3fGrid; - auto handle = nanovdb::createNanoGrid(*srcGrid, 0u, false, false);// no channels, stats or tiles + auto handle = nanovdb::tools::createNanoGrid(*srcGrid, 0u, false, false);// no channels, stats or tiles auto *idxGrid = handle.grid(); EXPECT_TRUE(idxGrid); EXPECT_EQ(1u, idxGrid->activeVoxelCount()); @@ -7793,7 +7953,7 @@ TEST_F(TestNanoVDB, CreateNanoGridFromVec3f) {// create nanovdb::ValueOnIndexGrid from nanovdb::Grid using DstBuildT = nanovdb::ValueOnIndex; using SrcGridT = nanovdb::Vec3fGrid; - auto handle = nanovdb::createNanoGrid(*srcGrid, 0u, false, false);// no channels, stats or tiles + auto handle = nanovdb::tools::createNanoGrid(*srcGrid, 0u, false, false);// no channels, stats or tiles auto *idxGrid = handle.grid(); EXPECT_TRUE(idxGrid); EXPECT_EQ(1u, idxGrid->activeVoxelCount()); @@ -7805,7 +7965,7 @@ TEST_F(TestNanoVDB, CreateNanoGridFromVec3f) TEST_F(TestNanoVDB, LongGridName) { - using SrcGridT = nanovdb::build::Grid; + using SrcGridT = nanovdb::tools::build::Grid; nanovdb::GridData tmp; tmp.init(); EXPECT_EQ('\0', tmp.mGridName[0]); @@ -7823,7 +7983,7 @@ TEST_F(TestNanoVDB, LongGridName) EXPECT_EQ(gridName, srcGrid.getName()); srcGrid.tree().setValue(nanovdb::Coord(-256), 10.0f); const bool isLong = length > limit; - auto handle = nanovdb::createNanoGrid(srcGrid); + auto handle = nanovdb::tools::createNanoGrid(srcGrid); auto* dstGrid = handle.grid(); EXPECT_TRUE(dstGrid); EXPECT_EQ(1u, dstGrid->activeVoxelCount()); @@ -7870,10 +8030,10 @@ TEST_F(TestNanoVDB, mergeSplitGrids) size_t size1 = 0, size2 = 0; std::vector> handles1, handles2; std::vector gridNames; - //nanovdb::CpuTimer timer("create 5 host grids"); + //nanovdb::util::Timer timer("create 5 host grids"); for (int radius = 100; radius<150; radius += 10) { gridNames.emplace_back("sphere_" + std::to_string(radius)); - handles1.emplace_back(nanovdb::createLevelSetSphere(radius,nanovdb::Vec3d(0),1,3, + handles1.emplace_back(nanovdb::tools::createLevelSetSphere(radius,nanovdb::Vec3d(0),1,3, nanovdb::Vec3d(0), gridNames.back())); EXPECT_FALSE(handles1.back().isPadded()); size1 += handles1.back().size(); @@ -7883,7 +8043,7 @@ TEST_F(TestNanoVDB, mergeSplitGrids) //timer.restart("create 5 host grids"); for (int radius = 150; radius<200; radius += 10) { gridNames.emplace_back("sphere_" + std::to_string(radius)); - handles2.emplace_back(nanovdb::createLevelSetSphere(radius,nanovdb::Vec3d(0),1,3, + handles2.emplace_back(nanovdb::tools::createLevelSetSphere(radius,nanovdb::Vec3d(0),1,3, nanovdb::Vec3d(0), gridNames.back())); size2 += handles2.back().size(); } @@ -7959,17 +8119,17 @@ TEST_F(TestNanoVDB, mergeSplitGrids) //timer.stop(); }// mergeSplitGrids -TEST_F(TestNanoVDB, writeReadRadGrid) +TEST_F(TestNanoVDB, writeReadGridBuffer) { const nanovdb::Coord ijk(101,0,0); - auto handle1 = nanovdb::createLevelSetSphere(); + auto handle1 = nanovdb::tools::createLevelSetSphere(); auto *fltGrid = handle1.grid(); EXPECT_TRUE(fltGrid); //std::cerr << "Grid size: " << (fltGrid->gridSize() >> 20) << " MB\n"; EXPECT_EQ(1.0f, fltGrid->tree().getValue(ijk)); {// create an IndexGrid with an internal channel and write it to file - auto handle = nanovdb::createNanoGrid(*fltGrid,1u, true, true);// 1 channel, include stats and tile values + auto handle = nanovdb::tools::createNanoGrid(*fltGrid,1u, true, true);// 1 channel, include stats and tile values handle.write("data/raw_grid.nvdb"); } {// read and test IndexGrid @@ -7988,7 +8148,7 @@ TEST_F(TestNanoVDB, writeReadRadGrid) EXPECT_EQ(1.0f, acc(ijk)); // compute the gradient from channel ID 0 - nanovdb::GradStencil> stencil(acc); + nanovdb::math::GradStencil> stencil(acc); stencil.moveTo(ijk); EXPECT_EQ(nanovdb::Vec3f(1.0f,0.0f,0.0f), stencil.gradient()); @@ -7999,11 +8159,11 @@ TEST_F(TestNanoVDB, writeReadRadGrid) stencil.moveTo(ijk);// re-populates the stencil cache EXPECT_EQ(nanovdb::Vec3f(0.5f,0.0f,0.0f), stencil.gradient()); } -}// writeReadRadGrid +}// writeReadGridBuffer TEST_F(TestNanoVDB, GridHandleIO) { - auto handle = nanovdb::createLevelSetSphere(); + auto handle = nanovdb::tools::createLevelSetSphere(); EXPECT_TRUE(handle.grid()); handle.write("data/sphere_raw.nvdb"); ASSERT_THROW(handle.read("data/dummy_raw.nvdb"), std::ios_base::failure); @@ -8016,15 +8176,15 @@ TEST_F(TestNanoVDB, GridHandleIO) EXPECT_TRUE(handle.grid()); ASSERT_THROW(handle.read("data/merge1.nvdb"), std::logic_error); ASSERT_THROW(handle.read("data/merge1.nvdb"), std::exception); -} +}// GridHandleIO TEST_F(TestNanoVDB, GridCountAndIndex) { {// create multiple grids and write them to file std::vector> handles; - handles.emplace_back(nanovdb::createLevelSetSphere()); - handles.emplace_back(nanovdb::createLevelSetSphere()); - handles.emplace_back(nanovdb::createLevelSetSphere()); + handles.emplace_back(nanovdb::tools::createLevelSetSphere()); + handles.emplace_back(nanovdb::tools::createLevelSetSphere()); + handles.emplace_back(nanovdb::tools::createLevelSetSphere()); EXPECT_EQ(3u, handles.size()); for (auto &h : handles) EXPECT_EQ(1u, h.gridCount()); nanovdb::io::writeGrids("data/3_spheres.nvdb", handles); @@ -8036,8 +8196,8 @@ TEST_F(TestNanoVDB, GridCountAndIndex) EXPECT_TRUE(grid); EXPECT_EQ(0u, grid->gridIndex()); EXPECT_EQ(1u, grid->gridCount()); - EXPECT_TRUE(nanovdb::validateChecksum(*grid)); - EXPECT_TRUE(nanovdb::validateChecksum(*grid, nanovdb::ChecksumMode::Full)); + EXPECT_TRUE(nanovdb::tools::validateChecksum(grid)); + EXPECT_TRUE(nanovdb::tools::validateChecksum(grid, nanovdb::CheckMode::Full)); } {// readGrid one by one for (uint32_t i=0; i<3u; ++i) { @@ -8047,8 +8207,8 @@ TEST_F(TestNanoVDB, GridCountAndIndex) EXPECT_TRUE(grid); EXPECT_EQ(0u, grid->gridIndex()); EXPECT_EQ(1u, grid->gridCount()); - EXPECT_TRUE(nanovdb::validateChecksum(*grid)); - EXPECT_TRUE(nanovdb::validateChecksum(*grid, nanovdb::ChecksumMode::Full)); + EXPECT_TRUE(nanovdb::tools::validateChecksum(grid)); + EXPECT_TRUE(nanovdb::tools::validateChecksum(grid, nanovdb::CheckMode::Full)); } } {// read all grids @@ -8060,8 +8220,8 @@ TEST_F(TestNanoVDB, GridCountAndIndex) EXPECT_TRUE(grid); EXPECT_EQ(i, grid->gridIndex()); EXPECT_EQ(3u, grid->gridCount()); - EXPECT_TRUE(nanovdb::validateChecksum(*grid)); - EXPECT_TRUE(nanovdb::validateChecksum(*grid, nanovdb::ChecksumMode::Full)); + EXPECT_TRUE(nanovdb::tools::validateChecksum(grid)); + EXPECT_TRUE(nanovdb::tools::validateChecksum(grid, nanovdb::CheckMode::Full)); } } {// read all raw grids @@ -8073,8 +8233,8 @@ TEST_F(TestNanoVDB, GridCountAndIndex) EXPECT_TRUE(grid); EXPECT_EQ(i, grid->gridIndex()); EXPECT_EQ(3u, grid->gridCount()); - EXPECT_TRUE(nanovdb::validateChecksum(*grid)); - EXPECT_TRUE(nanovdb::validateChecksum(*grid, nanovdb::ChecksumMode::Full)); + EXPECT_TRUE(nanovdb::tools::validateChecksum(grid)); + EXPECT_TRUE(nanovdb::tools::validateChecksum(grid, nanovdb::CheckMode::Full)); } } {// read all raw grids @@ -8086,8 +8246,8 @@ TEST_F(TestNanoVDB, GridCountAndIndex) EXPECT_TRUE(grid); EXPECT_EQ(i, grid->gridIndex()); EXPECT_EQ(3u, grid->gridCount()); - EXPECT_TRUE(nanovdb::validateChecksum(*grid)); - EXPECT_TRUE(nanovdb::validateChecksum(*grid, nanovdb::ChecksumMode::Full)); + EXPECT_TRUE(nanovdb::tools::validateChecksum(grid)); + EXPECT_TRUE(nanovdb::tools::validateChecksum(grid, nanovdb::CheckMode::Full)); } } {// read single raw grid @@ -8099,8 +8259,8 @@ TEST_F(TestNanoVDB, GridCountAndIndex) EXPECT_TRUE(grid); EXPECT_EQ(0u, grid->gridIndex()); EXPECT_EQ(1u, grid->gridCount()); - EXPECT_TRUE(nanovdb::validateChecksum(*grid)); - EXPECT_TRUE(nanovdb::validateChecksum(*grid, nanovdb::ChecksumMode::Full)); + EXPECT_TRUE(nanovdb::tools::validateChecksum(grid)); + EXPECT_TRUE(nanovdb::tools::validateChecksum(grid, nanovdb::CheckMode::Full)); } ASSERT_THROW(handle.read("data/3_spheres_raw.nvdb", 4), std::runtime_error); ASSERT_THROW(handle.read("data/3_spheres_raw.nvdb",-1), std::runtime_error); @@ -8113,8 +8273,8 @@ TEST_F(TestNanoVDB, GridCountAndIndex) EXPECT_TRUE(grid); EXPECT_EQ(0u, grid->gridIndex()); EXPECT_EQ(1u, grid->gridCount()); - EXPECT_TRUE(nanovdb::validateChecksum(*grid)); - EXPECT_TRUE(nanovdb::validateChecksum(*grid, nanovdb::ChecksumMode::Full)); + EXPECT_TRUE(nanovdb::tools::validateChecksum(grid)); + EXPECT_TRUE(nanovdb::tools::validateChecksum(grid, nanovdb::CheckMode::Full)); } ASSERT_THROW(nanovdb::io::readGrid("data/3_spheres_raw.nvdb", 4), std::runtime_error); } @@ -8125,7 +8285,7 @@ TEST_F(TestNanoVDB, CustomStreamIO) std::ostringstream outputStream(std::ios_base::out | std::ios_base::binary); { std::vector> handles; - handles.emplace_back(nanovdb::createLevelSetSphere()); + handles.emplace_back(nanovdb::tools::createLevelSetSphere()); EXPECT_EQ(1u, handles.size()); nanovdb::io::writeGrids(outputStream, handles, nanovdb::io::Codec::NONE); } @@ -8143,8 +8303,8 @@ TEST_F(TestNanoVDB, CustomStreamIO) EXPECT_TRUE(grid); EXPECT_EQ(0u, grid->gridIndex()); EXPECT_EQ(1u, grid->gridCount()); - EXPECT_TRUE(nanovdb::validateChecksum(*grid)); - EXPECT_TRUE(nanovdb::validateChecksum(*grid, nanovdb::ChecksumMode::Full)); + EXPECT_TRUE(nanovdb::tools::validateChecksum(grid)); + EXPECT_TRUE(nanovdb::tools::validateChecksum(grid, nanovdb::CheckMode::Full)); } }// CustomStreamIO @@ -8152,7 +8312,7 @@ TEST_F(TestNanoVDB, CustomStreamGridHandleIO) { std::ostringstream outputStream(std::ios_base::out | std::ios_base::binary); { - nanovdb::createLevelSetSphere().write(outputStream); + nanovdb::tools::createLevelSetSphere().write(outputStream); } std::string payload = outputStream.str(); @@ -8168,11 +8328,158 @@ TEST_F(TestNanoVDB, CustomStreamGridHandleIO) EXPECT_TRUE(grid); EXPECT_EQ(0u, grid->gridIndex()); EXPECT_EQ(1u, grid->gridCount()); - EXPECT_TRUE(nanovdb::validateChecksum(*grid)); - EXPECT_TRUE(nanovdb::validateChecksum(*grid, nanovdb::ChecksumMode::Full)); + EXPECT_TRUE(nanovdb::tools::validateChecksum(grid)); + EXPECT_TRUE(nanovdb::tools::validateChecksum(grid, nanovdb::CheckMode::Full)); } }// CustomStreamGridHandleIO +// make -j testNanoVDB && ./unittest/testNanoVDB --gtest_filter="*strcpy" +TEST_F(TestNanoVDB, strcpy) +{ + EXPECT_EQ(mStr, nanovdb::util::strcpy(mStr, "this is a test")); + //std::cerr << "mStr = \"" << mStr << "\"" << std::endl; + EXPECT_TRUE(nanovdb::util::streq(mStr, "this is a test")); + EXPECT_EQ(nanovdb::util::strlen(mStr), std::strlen("this is a test")); + + EXPECT_EQ(mStr, nanovdb::util::strcpy(mStr, "this is a test 2")); + //std::cerr << "mStr = \"" << mStr << "\"" << std::endl; + EXPECT_TRUE(nanovdb::util::streq(mStr, "this is a test 2")); + EXPECT_EQ(nanovdb::util::strlen(mStr), std::strlen("this is a test 2")); + + EXPECT_EQ(mStr, nanovdb::util::strcpy(mStr, "")); + //std::cerr << "mStr = \"" << mStr << "\"" << std::endl; + EXPECT_TRUE(nanovdb::util::streq(mStr, "")); + EXPECT_EQ(nanovdb::util::strlen(mStr), std::strlen("")); + + EXPECT_EQ(mStr, nanovdb::util::strcpy(mStr, 0)); + //std::cerr << "mStr = \"" << mStr << "\"" << std::endl; + EXPECT_TRUE(nanovdb::util::streq(mStr, "0")); + EXPECT_EQ(nanovdb::util::strlen(mStr), std::strlen("0")); + + EXPECT_EQ(mStr, nanovdb::util::strcpy(mStr, 1234567)); + //std::cerr << "mStr = \"" << mStr << "\"" << std::endl; + EXPECT_TRUE(nanovdb::util::streq(mStr, "1234567")); + EXPECT_EQ(nanovdb::util::strlen(mStr), std::strlen("1234567")); + + EXPECT_EQ(mStr, nanovdb::util::strcpy(mStr, 1234567, 10)); + //std::cerr << "mStr = \"" << mStr << "\"" << std::endl; + EXPECT_TRUE(nanovdb::util::streq(mStr, "1234567")); + + EXPECT_EQ(mStr, nanovdb::util::strcpy(mStr, -123456)); + //std::cerr << "mStr = \"" << mStr << "\"" << std::endl; + EXPECT_TRUE(nanovdb::util::streq(mStr, "-123456")); + EXPECT_EQ(nanovdb::util::strlen(mStr), std::strlen("-123456")); + + EXPECT_EQ(mStr, nanovdb::util::strcpy(mStr, 1234567,2)); + //std::cerr << "mStr = \"" << mStr << "\"" << std::endl; + EXPECT_TRUE(nanovdb::util::streq(mStr, "100101101011010000111")); + EXPECT_EQ(nanovdb::util::strlen(mStr), std::strlen("100101101011010000111")); +}// strcpy + +// make -j testNanoVDB && ./unittest/testNanoVDB --gtest_filter="*strcat" +TEST_F(TestNanoVDB, strcat) +{ + char str[100];// = {'\0'};// important to null terminate + str[0] = '\0';// important to null terminate + + EXPECT_EQ(str, nanovdb::util::strcat(str, "1 ")); + //std::cerr << "str = \"" << str << "\"" << std::endl; + EXPECT_TRUE(nanovdb::util::streq(str, "1 ")); + + EXPECT_EQ(str, nanovdb::util::strcat(str, "2 ")); + //std::cerr << "str = \"" << str << "\"" << std::endl; + EXPECT_TRUE(nanovdb::util::streq(str, "1 2 ")); + + EXPECT_EQ(str, nanovdb::util::strcat(str, "")); + //std::cerr << "str = \"" << str << "\"" << std::endl; + EXPECT_TRUE(nanovdb::util::streq(str, "1 2 ")); + + EXPECT_EQ(str, nanovdb::util::strcat(str, 0)); + //std::cerr << "str = \"" << str << "\"" << std::endl; + EXPECT_TRUE(nanovdb::util::streq(str, "1 2 0")); + + EXPECT_EQ(str, nanovdb::util::strcat(str, 1234567)); + //std::cerr << "str = \"" << str << "\"" << std::endl; + EXPECT_TRUE(nanovdb::util::streq(str, "1 2 01234567")); + + EXPECT_EQ(str, nanovdb::util::strcat(str, 1234567, 10)); + //std::cerr << "str = \"" << str << "\"" << std::endl; + EXPECT_TRUE(nanovdb::util::streq(str, "1 2 012345671234567")); + + EXPECT_EQ(str, nanovdb::util::strcat(str, -123456)); + //std::cerr << "str = \"" << str << "\"" << std::endl; + EXPECT_TRUE(nanovdb::util::streq(str, "1 2 012345671234567-123456")); + + EXPECT_EQ(str, nanovdb::util::strcat(str, 1234567,2)); + //std::cerr << "str = \"" << str << "\"" << std::endl; + EXPECT_TRUE(nanovdb::util::streq(str, "1 2 012345671234567-123456100101101011010000111")); +}// strcat + +// make -j testNanoVDB && ./unittest/testNanoVDB --gtest_filter="*checkGrid" +TEST_F(TestNanoVDB, checkGrid) +{ + char str[100]; + + auto handle = nanovdb::tools::createLevelSetSphere(); + auto *grid = handle.grid(); + EXPECT_TRUE(grid); + + nanovdb::tools::checkGrid( nanovdb::util::PtrAdd(grid, 1), str); + //std::cerr << "str = \"" << str << "\"" << std::endl; + EXPECT_TRUE(nanovdb::util::streq(str, "Invalid pointer: Grid is misaligned")); + + grid->mMagic = 0; + nanovdb::tools::checkGrid( grid, str); + //std::cerr << "str = \"" << str << "\"" << std::endl; + EXPECT_TRUE(nanovdb::util::streq(str, "Invalid magic number: unknown")); + grid->mMagic = NANOVDB_MAGIC_NUMB; + + grid->mVersion = 0; + nanovdb::tools::checkGrid( grid, str); + //std::cerr << "str = \"" << str << "\"" << std::endl; + EXPECT_TRUE(nanovdb::util::streq(str, "Incompatible version number: 0.0.0")); + grid->mVersion = nanovdb::Version(); + + grid->mGridCount = 0; + nanovdb::tools::checkGrid( grid, str); + //std::cerr << "str = \"" << str << "\"" << std::endl; + EXPECT_TRUE(nanovdb::util::streq(str, "Zero grid count")); + grid->mGridCount = 1; + + grid->mGridIndex = 1; + nanovdb::tools::checkGrid( grid, str); + //std::cerr << "str = \"" << str << "\"" << std::endl; + EXPECT_TRUE(nanovdb::util::streq(str, "grid index(1) >= grid count(1)")); + grid->mGridIndex = 0; + + grid->mGridClass = nanovdb::GridClass::End; + nanovdb::tools::checkGrid( grid, str); + //std::cerr << "str = \"" << str << "\"" << std::endl; + EXPECT_TRUE(nanovdb::util::streq(str, "Invalid GridClass(END)")); + grid->mGridClass = nanovdb::GridClass::Staggered; + + grid->mGridType = nanovdb::GridType::End; + nanovdb::tools::checkGrid( grid, str); + //std::cerr << "str = \"" << str << "\"" << std::endl; + EXPECT_TRUE(nanovdb::util::streq(str, "Invalid GridType(End)")); + + grid->mGridType = nanovdb::GridType::Vec3f; + nanovdb::tools::checkGrid( grid, str); + //std::cerr << "str = \"" << str << "\"" << std::endl; + EXPECT_TRUE(nanovdb::util::streq(str, "Invalid combination of BuildType(float) and GridType(Vec3f)")); + + grid->mGridType = nanovdb::GridType::Float; + nanovdb::tools::checkGrid( grid, str); + //std::cerr << "str = \"" << str << "\"" << std::endl; + EXPECT_TRUE(nanovdb::util::streq(str, "Invalid combination of GridType(float) and GridClass(MAC)")); + grid->mGridClass = nanovdb::GridClass::LevelSet; + + memset(str, 0, 100); + nanovdb::tools::checkGrid( grid, str, nanovdb::CheckMode::Full); + //nanovdb::tools::checkGrid( grid, str, nanovdb::ChecksumMode::Full);// deprecation warning + EXPECT_TRUE(nanovdb::util::empty(str)); +}// checkGrid + int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); diff --git a/nanovdb/nanovdb/unittest/TestNanoVDB.cu b/nanovdb/nanovdb/unittest/TestNanoVDB.cu index fc88e95d99..0b0ee9eccf 100644 --- a/nanovdb/nanovdb/unittest/TestNanoVDB.cu +++ b/nanovdb/nanovdb/unittest/TestNanoVDB.cu @@ -4,39 +4,41 @@ #include #include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include // for std::sort +#include // for std::setw, std::setfill namespace nanovdb {// this namespace is required by gtest namespace test { -// used for testing CudaDeviceBuffer +// used for testing cuda::DeviceBuffer void device2host(size_t count) { const size_t size = count * sizeof(float); - auto buffer = nanovdb::CudaDeviceBuffer::create(size, nullptr, false);// on device only + auto buffer = nanovdb::cuda::DeviceBuffer::create(size, nullptr, false);// on device only EXPECT_EQ(size, buffer.size()); EXPECT_FALSE(buffer.data()); EXPECT_TRUE(buffer.deviceData()); float *d_array = reinterpret_cast(buffer.deviceData()); constexpr unsigned int num_threads = 256; unsigned int num_blocks = num_blocks = (static_cast(count) + num_threads - 1) / num_threads; - cudaLambdaKernel<<>>(count, [=] __device__ (size_t i) {d_array[i] = float(i);}); + nanovdb::util::cuda::lambdaKernel<<>>(count, [=] __device__ (size_t i) {d_array[i] = float(i);}); buffer.deviceDownload();// copy device -> host EXPECT_EQ(size, buffer.size()); EXPECT_TRUE(buffer.data()); @@ -44,7 +46,7 @@ void device2host(size_t count) float *array = reinterpret_cast(buffer.data()); for (size_t i=0; i(buffer.deviceData()); constexpr unsigned int num_threads = 256; unsigned int num_blocks = num_blocks = (static_cast(count) + num_threads - 1) / num_threads; - cudaLambdaKernel<<>>(count, [=] __device__ (size_t i) { + nanovdb::util::cuda::lambdaKernel<<>>(count, [=] __device__ (size_t i) { if (d_array[i] != float(i)) *d_test = false; d_array[i] = float(i) + 1.0f; }); @@ -95,25 +97,25 @@ void cudaStr() int n, *d_n; cudaCheck(cudaMalloc((void**)&d_n, sizeof(int))); - cudaLambdaKernel<<<1, 1>>>(1, [=] __device__ (size_t) { - cudaStrcpy(d_str, "this is a test"); + nanovdb::util::cuda::lambdaKernel<<<1, 1>>>(1, [=] __device__ (size_t) { + nanovdb::util::strcpy(d_str, "this is a test"); }); cudaCheck(cudaMemcpy(str, d_str, size, cudaMemcpyDeviceToHost)); EXPECT_STREQ(str, "this is a test"); - cudaLambdaKernel<<<1, 1>>>(1, [=] __device__ (size_t) { - cudaStrcat(d_str, " #2"); + nanovdb::util::cuda::lambdaKernel<<<1, 1>>>(1, [=] __device__ (size_t) { + nanovdb::util::strcat(d_str, " #2"); }); cudaCheck(cudaMemcpy(str, d_str, size, cudaMemcpyDeviceToHost)); EXPECT_STREQ(str, "this is a test #2"); - cudaLambdaKernel<<<1, 1>>>(1, [=] __device__ (size_t) { - *d_n = cudaStrcmp(d_str, "this is a test"); + nanovdb::util::cuda::lambdaKernel<<<1, 1>>>(1, [=] __device__ (size_t) { + *d_n = nanovdb::util::strcmp(d_str, "this is a test"); }); cudaCheck(cudaMemcpy(&n, d_n, sizeof(int), cudaMemcpyDeviceToHost)); //std::cerr << "n = " << n << std::endl; EXPECT_EQ(signum(std::strcmp(str, "this is a test")), signum(n)); - cudaLambdaKernel<<<1, 1>>>(1, [=] __device__ (size_t) { - *d_n = cudaStrcmp(d_str, "this is a test #2"); + nanovdb::util::cuda::lambdaKernel<<<1, 1>>>(1, [=] __device__ (size_t) { + *d_n = nanovdb::util::strcmp(d_str, "this is a test #2"); }); cudaCheck(cudaMemcpy(&n, d_n, sizeof(int), cudaMemcpyDeviceToHost)); EXPECT_EQ(std::strcmp(str, "this is a test #2"), n); @@ -146,7 +148,7 @@ TEST(TestNanoVDBCUDA, Basic_CudaPointsToGrid_float) cudaCheck(cudaMalloc(&d_coords, num_points * sizeof(nanovdb::Coord))); cudaCheck(cudaMemcpy(d_coords, coords, num_points * sizeof(nanovdb::Coord), cudaMemcpyHostToDevice));// CPU -> GPU - auto handle = nanovdb::cudaVoxelsToGrid(d_coords, num_points); + auto handle = nanovdb::tools::cuda::voxelsToGrid(d_coords, num_points); cudaCheck(cudaFree(d_coords)); EXPECT_TRUE(handle.deviceData());// grid only exists on the GPU EXPECT_FALSE(handle.data());// no grid was yet allocated on the CPU @@ -236,7 +238,7 @@ struct AccessLeafMask{ TEST(TestNanoVDBCUDA, Basic_CudaPointsToGrid_ValueIndex) { using BuildT = nanovdb::ValueIndex; - using GridT = nanovdb::NanoGrid; + using GridT = nanovdb::NanoGrid; const size_t num_points = 3; nanovdb::Coord coords[num_points] = {nanovdb::Coord(1, 2, 3), nanovdb::Coord(1, 2, 4), @@ -244,10 +246,10 @@ TEST(TestNanoVDBCUDA, Basic_CudaPointsToGrid_ValueIndex) cudaCheck(cudaMalloc(&d_coords, num_points * sizeof(nanovdb::Coord))); cudaCheck(cudaMemcpy(d_coords, coords, num_points * sizeof(nanovdb::Coord), cudaMemcpyHostToDevice));// CPU -> GPU #if 0 - nanovdb::CudaPointsToGrid converter; + nanovdb::tools::cuda::PointsToGrid converter; auto handle = converter.getHandle(d_coords, num_points); #else - auto handle = nanovdb::cudaVoxelsToGrid(d_coords, num_points); + auto handle = nanovdb::tools::cuda::voxelsToGrid(d_coords, num_points); #endif cudaCheck(cudaFree(d_coords)); EXPECT_TRUE(handle.deviceData());// grid only exists on the GPU @@ -313,10 +315,10 @@ TEST(TestNanoVDBCUDA, Basic_CudaPointsToGrid_ValueOnIndex) cudaCheck(cudaMemcpy(d_coords, coords, num_points * sizeof(nanovdb::Coord), cudaMemcpyHostToDevice));// CPU -> GPU #if 0 - nanovdb::CudaPointsToGrid converter; + nanovdb::tools::cuda::PointsToGrid converter; auto handle = converter.getHandle(d_coords, num_points); #else - auto handle = nanovdb::cudaVoxelsToGrid(d_coords, num_points); + auto handle = nanovdb::tools::cuda::voxelsToGrid(d_coords, num_points); #endif cudaCheck(cudaFree(d_coords)); @@ -411,10 +413,10 @@ TEST(TestNanoVDBCUDA, Basic_CudaPointsToGrid_ValueOnIndexMask) cudaCheck(cudaMemcpy(d_coords, coords, num_points * sizeof(nanovdb::Coord), cudaMemcpyHostToDevice));// CPU -> GPU #if 0 - nanovdb::CudaPointsToGrid converter; + nanovdb::tools::cuda::PointsToGrid converter; auto handle = converter.getHandle(d_coords, num_points); #else - auto handle = nanovdb::cudaVoxelsToGrid(d_coords, num_points); + auto handle = nanovdb::tools::cuda::voxelsToGrid(d_coords, num_points); #endif cudaCheck(cudaFree(d_coords)); @@ -509,7 +511,7 @@ TEST(TestNanoVDBCUDA, Basic_CudaPointsToGrid_ValueOnIndexMask) TEST(TestNanoVDBCUDA, Large_CudaPointsToGrid_old) { using BuildT = nanovdb::ValueOnIndex; - //nanovdb::CpuTimer timer; + //nanovdb::util::Timer timer; const size_t voxelCount = 1 << 20;// 1048576 std::vector voxels; {//generate random voxels @@ -524,14 +526,14 @@ TEST(TestNanoVDBCUDA, Large_CudaPointsToGrid_old) } #if 0 {// Build grid on CPU - nanovdb::build::Grid buildGrid(0.0f); + nanovdb::tools::build::Grid buildGrid(0.0f); //timer.start("Building grid on CPU from "+std::to_string(voxels.size())+" points"); - nanovdb::forEach(0, voxelCount, voxelCount >> 6, [&](const nanovdb::Range1D &r){ + nanovdb::util::forEach0, voxelCount, voxelCount >> 6, [&](const nanovdb::util::Range1D &r){ auto acc = buildGrid.getWriteAccessor(); for (size_t i=r.begin(); i!=r.end(); ++i) acc.setValueOn(voxels[i]); }); //timer.restart("Converting CPU build::Grid to nanovdb"); - auto handle = nanovdb::createNanoGrid(buildGrid); + auto handle = nanovdb::tools::createNanoGrid(buildGrid); //timer.stop(); } #endif @@ -544,7 +546,7 @@ TEST(TestNanoVDBCUDA, Large_CudaPointsToGrid_old) //timer.stop(); //timer.start("Building grid on GPU from "+std::to_string(voxels.size())+" points"); - auto handle = nanovdb::cudaVoxelsToGrid(d_coords, voxelCount, 1.0); + auto handle = nanovdb::tools::cuda::voxelsToGrid(d_coords, voxelCount, 1.0); //timer.stop(); EXPECT_TRUE(handle.deviceData());// grid only exists on the GPU @@ -568,7 +570,7 @@ TEST(TestNanoVDBCUDA, Large_CudaPointsToGrid_old) EXPECT_EQ(nanovdb::Vec3d(1.0), grid->voxelSize()); //timer.restart("Parallel unit-testing on CPU"); - nanovdb::forEach(voxels,[&](const nanovdb::Range1D &r){ + nanovdb::util::forEach(voxels,[&](const nanovdb::util::Range1D &r){ auto acc = grid->getAccessor(); for (size_t i=r.begin(); i!=r.end(); ++i) { const nanovdb::Coord &ijk = voxels[i]; @@ -590,10 +592,10 @@ TEST(TestNanoVDBCUDA, mergeSplitGrids) size_t size1 = 0, size2 = 0; std::vector> handles1, handles2; std::vector gridNames; - //nanovdb::CpuTimer timer("create 5 host grids"); + //nanovdb::util::Timer timer("create 5 host grids"); for (int radius = 100; radius<150; radius += 10) { gridNames.emplace_back("sphere_" + std::to_string(radius)); - handles1.emplace_back(nanovdb::createLevelSetSphere(radius,nanovdb::Vec3d(0),1,3, + handles1.emplace_back(nanovdb::tools::createLevelSetSphere(radius,nanovdb::Vec3d(0),1,3, nanovdb::Vec3d(0), gridNames.back())); EXPECT_FALSE(handles1.back().isPadded()); size1 += handles1.back().size(); @@ -603,7 +605,7 @@ TEST(TestNanoVDBCUDA, mergeSplitGrids) //timer.restart("create 5 host grids"); for (int radius = 150; radius<200; radius += 10) { gridNames.emplace_back("sphere_" + std::to_string(radius)); - handles2.emplace_back(nanovdb::createLevelSetSphere(radius,nanovdb::Vec3d(0),1,3, + handles2.emplace_back(nanovdb::tools::createLevelSetSphere(radius,nanovdb::Vec3d(0),1,3, nanovdb::Vec3d(0), gridNames.back())); size2 += handles2.back().size(); } @@ -665,15 +667,15 @@ TEST(TestNanoVDBCUDA, mergeSplitGrids) TEST(TestNanoVDBCUDA, mergeSplitDeviceGrids) { - using BufferT = nanovdb::CudaDeviceBuffer; + using BufferT = nanovdb::cuda::DeviceBuffer; using HandleT = nanovdb::GridHandle; size_t size = 0; std::vector handles; std::vector gridNames; - //nanovdb::CpuTimer timer("create 10 host grids"); + //nanovdb::util::Timer timer("create 10 host grids"); for (int radius = 100; radius<200; radius += 10) { gridNames.emplace_back("sphere_" + std::to_string(radius)); - handles.emplace_back(nanovdb::createLevelSetSphere(radius,nanovdb::Vec3d(0),1,3, + handles.emplace_back(nanovdb::tools::createLevelSetSphere(radius,nanovdb::Vec3d(0),1,3, nanovdb::Vec3d(0), gridNames.back())); EXPECT_FALSE(handles.back().isPadded()); size += handles.back().size(); @@ -682,7 +684,7 @@ TEST(TestNanoVDBCUDA, mergeSplitDeviceGrids) for (auto &h : handles) h.deviceUpload(); EXPECT_EQ(10u, handles.size()); //timer.restart("merging device grids"); - auto mergedHandle = nanovdb::mergeDeviceGrids(handles); + auto mergedHandle = nanovdb::cuda::mergeGridHandles(handles); EXPECT_EQ(size, mergedHandle.size()); EXPECT_FALSE(mergedHandle.data()); EXPECT_TRUE(mergedHandle.deviceData()); @@ -704,7 +706,7 @@ TEST(TestNanoVDBCUDA, mergeSplitDeviceGrids) EXPECT_EQ(strcmp(gridNames[i].c_str(), gridData->mGridName),0); } //timer.restart("splitting device grids"); - auto splitHandles = nanovdb::splitDeviceGrids(mergedHandle); + auto splitHandles = nanovdb::cuda::splitGridHandles(mergedHandle); //timer.restart("unit-test split grids"); EXPECT_EQ(10u, splitHandles.size()); for (uint32_t i=0u; i<10u; ++i) { @@ -724,14 +726,14 @@ TEST(TestNanoVDBCUDA, mergeSplitDeviceGrids) // make -j 4 testNanoVDB && ./unittest/testNanoVDB --gtest_filter="*Cuda*" --gtest_break_on_failure TEST(TestNanoVDBCUDA, CudaIndexGridToGrid_basic) { - using BufferT = nanovdb::CudaDeviceBuffer; + using BufferT = nanovdb::cuda::DeviceBuffer; const float value = 1.23456f, backgroud = 1.0f; const nanovdb::Coord ijk(1,2,3); nanovdb::GridHandle floatHdl; nanovdb::FloatGrid *floatGrid = nullptr; - //nanovdb::CpuTimer timer; + //nanovdb::util::Timer timer; {// create float grid with one active voxel - nanovdb::build::Grid grid(backgroud); + nanovdb::tools::build::Grid grid(backgroud); auto srcAcc = grid.getAccessor(); srcAcc.setValue(ijk, value); auto nodeCount = grid.nodeCount(); @@ -741,7 +743,7 @@ TEST(TestNanoVDBCUDA, CudaIndexGridToGrid_basic) EXPECT_EQ(value, srcAcc.getValue(ijk)); EXPECT_EQ(value, srcAcc.getValue(1,2,3)); //timer.start("Create FloatGrid on CPU"); - floatHdl = nanovdb::createNanoGrid, float, BufferT>(grid); + floatHdl = nanovdb::tools::createNanoGrid, float, BufferT>(grid); EXPECT_TRUE(floatHdl); floatGrid = floatHdl.grid(); EXPECT_TRUE(floatGrid); @@ -758,8 +760,8 @@ TEST(TestNanoVDBCUDA, CudaIndexGridToGrid_basic) EXPECT_TRUE(acc.isActive(ijk)); } //timer.restart("Create IndexGrid on CPU"); - using BufferT = nanovdb::CudaDeviceBuffer; - auto idxHdl = nanovdb::createNanoGrid(*floatGrid, 0u, false, false, 1); + using BufferT = nanovdb::cuda::DeviceBuffer; + auto idxHdl = nanovdb::tools::createNanoGrid(*floatGrid, 0u, false, false, 1); //timer.restart("Copy IndexGrid from CPU to GPU"); EXPECT_FALSE(idxHdl.deviceGrid()); idxHdl.deviceUpload(); @@ -770,7 +772,7 @@ TEST(TestNanoVDBCUDA, CudaIndexGridToGrid_basic) EXPECT_EQ(1u + 512u, idxGrid->valueCount());// background + 512 values in one leaf node float *values = new float[idxGrid->valueCount()], *d_values = nullptr; values[0] = backgroud; - const float *q = floatGrid->tree().getFirstLeaf()->data()->mValues; + const float *q = floatGrid->tree().getFirstLeaf()->mValues; for (float *p=values+1, *e=p+512;p!=e; ++p) *p = *q++; //timer.restart("Allocate and copy values from CPU to GPU"); cudaCheck(cudaMalloc((void**)&d_values, idxGrid->valueCount()*sizeof(float))); @@ -780,7 +782,7 @@ TEST(TestNanoVDBCUDA, CudaIndexGridToGrid_basic) auto *d_idxGrid = idxHdl.deviceGrid(); EXPECT_TRUE(d_idxGrid); //timer.restart("Call CudaIndexToGrid"); - auto hdl = nanovdb::cudaIndexToGrid(d_idxGrid, d_values); + auto hdl = nanovdb::tools::cuda::indexToGrid(d_idxGrid, d_values); //timer.restart("unit-test"); EXPECT_FALSE(hdl.grid());// no host grid EXPECT_TRUE(hdl.deviceGrid()); @@ -822,14 +824,14 @@ TEST(TestNanoVDBCUDA, CudaIndexGridToGrid_basic) TEST(TestNanoVDBCUDA, CudaIndexGridToGrid_ValueIndex) { using BuildT = nanovdb::ValueIndex; - using BufferT = nanovdb::CudaDeviceBuffer; - //nanovdb::CpuTimer timer("Create FloatGrid on CPU"); - auto floatHdl = nanovdb::createLevelSetSphere(100,nanovdb::Vec3d(0),1,3, nanovdb::Vec3d(0), "test"); + using BufferT = nanovdb::cuda::DeviceBuffer; + //nanovdb::util::Timer timer("Create FloatGrid on CPU"); + auto floatHdl = nanovdb::tools::createLevelSetSphere(100,nanovdb::Vec3d(0),1,3, nanovdb::Vec3d(0), "test"); auto *floatGrid = floatHdl.grid(); EXPECT_TRUE(floatGrid); auto acc = floatGrid->getAccessor(); //timer.restart("Create IndexGrid on CPU"); - auto idxHdl = nanovdb::createNanoGrid(*floatGrid); + auto idxHdl = nanovdb::tools::createNanoGrid(*floatGrid); //timer.restart("Copy IndexGrid from CPU to GPU"); idxHdl.deviceUpload(); auto *idxGrid = idxHdl.grid(); @@ -850,7 +852,7 @@ TEST(TestNanoVDBCUDA, CudaIndexGridToGrid_ValueIndex) auto *d_idxGrid = idxHdl.deviceGrid(); EXPECT_TRUE(d_idxGrid); //timer.restart("Call CudaIndexToGrid"); - auto hdl = nanovdb::cudaIndexToGrid(d_idxGrid, d_values); + auto hdl = nanovdb::tools::cuda::indexToGrid(d_idxGrid, d_values); //timer.restart("unit-test"); EXPECT_FALSE(hdl.grid());// no host grid EXPECT_TRUE(hdl.deviceGrid()); @@ -872,14 +874,14 @@ TEST(TestNanoVDBCUDA, CudaIndexGridToGrid_ValueIndex) TEST(TestNanoVDBCUDA, CudaIndexGridToGrid_ValueOnIndex) { using BuildT = nanovdb::ValueOnIndex; - using BufferT = nanovdb::CudaDeviceBuffer; - //nanovdb::CpuTimer timer("Create FloatGrid on CPU"); - auto floatHdl = nanovdb::createLevelSetSphere(100,nanovdb::Vec3d(0),1,3, nanovdb::Vec3d(0), "test"); + using BufferT = nanovdb::cuda::DeviceBuffer; + //nanovdb::util::Timer timer("Create FloatGrid on CPU"); + auto floatHdl = nanovdb::tools::createLevelSetSphere(100,nanovdb::Vec3d(0),1,3, nanovdb::Vec3d(0), "test"); auto *floatGrid = floatHdl.grid(); EXPECT_TRUE(floatGrid); auto acc = floatGrid->getAccessor(); //timer.restart("Create IndexGrid on CPU"); - auto idxHdl = nanovdb::createNanoGrid(*floatGrid); + auto idxHdl = nanovdb::tools::createNanoGrid(*floatGrid); //timer.restart("Copy IndexGrid from CPU to GPU"); idxHdl.deviceUpload(); auto *idxGrid = idxHdl.grid(); @@ -902,7 +904,7 @@ TEST(TestNanoVDBCUDA, CudaIndexGridToGrid_ValueOnIndex) auto *d_idxGrid = idxHdl.deviceGrid(); EXPECT_TRUE(d_idxGrid); //timer.restart("Call CudaIndexToGrid"); - auto hdl = nanovdb::cudaIndexToGrid(d_idxGrid, d_values); + auto hdl = nanovdb::tools::cuda::indexToGrid(d_idxGrid, d_values); //timer.restart("unit-test"); EXPECT_FALSE(hdl.grid());// no host grid EXPECT_TRUE(hdl.deviceGrid()); @@ -923,9 +925,9 @@ TEST(TestNanoVDBCUDA, CudaIndexGridToGrid_ValueOnIndex) TEST(TestNanoVDBCUDA, CudaSignedFloodFill) { - using BufferT = nanovdb::CudaDeviceBuffer; - //nanovdb::CpuTimer timer("Create FloatGrid on CPU"); - auto floatHdl = nanovdb::createLevelSetSphere(100); + using BufferT = nanovdb::cuda::DeviceBuffer; + //nanovdb::util::Timer timer("Create FloatGrid on CPU"); + auto floatHdl = nanovdb::tools::createLevelSetSphere(100); auto *floatGrid = floatHdl.grid(); EXPECT_TRUE(floatGrid); auto acc = floatGrid->getAccessor(); @@ -946,8 +948,8 @@ TEST(TestNanoVDBCUDA, CudaSignedFloodFill) auto *d_floatGrid = floatHdl.deviceGrid(); EXPECT_TRUE(d_floatGrid); //timer.restart("Signed flood-fill on the GPU"); - //nanovdb::cudaSignedFloodFill(d_floatGrid, true); - nanovdb::cudaSignedFloodFill(d_floatGrid); + //nanovdb::cuda::signedFloodFill(d_floatGrid, true); + nanovdb::tools::cuda::signedFloodFill(d_floatGrid); //timer.restart("Copy FloatGrid from GPU to CPU"); floatHdl.deviceDownload();// GPU -> CPU //timer.stop(); @@ -970,8 +972,8 @@ TEST(TestNanoVDBCUDA, OneVoxelToGrid) cudaCheck(cudaMalloc(&d_coords, num_points * sizeof(nanovdb::Coord))); cudaCheck(cudaMemcpy(d_coords, coords, num_points * sizeof(nanovdb::Coord), cudaMemcpyHostToDevice));// CPU -> GPU - //nanovdb::GpuTimer timer("Create FloatGrid on GPU"); - nanovdb::CudaPointsToGrid converter; + //nanovdb::util::cuda::Timer timer("Create FloatGrid on GPU"); + nanovdb::tools::cuda::PointsToGrid converter; auto handle = converter.getHandle(d_coords, num_points); cudaCheck(cudaFree(d_coords)); //timer.stop(); @@ -1034,8 +1036,8 @@ TEST(TestNanoVDBCUDA, ThreePointsToGrid) cudaCheck(cudaMalloc(&d_points, num_points * sizeof(Vec3T))); cudaCheck(cudaMemcpy(d_points, points, num_points * sizeof(Vec3T), cudaMemcpyHostToDevice));// CPU -> GPU - //nanovdb::GpuTimer timer("Create FloatGrid on GPU"); - nanovdb::CudaPointsToGrid converter; + //nanovdb::util::cuda::Timer timer("Create FloatGrid on GPU"); + nanovdb::tools::cuda::PointsToGrid converter; auto handle = converter.getHandle(d_points, num_points); cudaCheck(cudaFree(d_points)); //timer.stop(); @@ -1150,8 +1152,8 @@ TEST(TestNanoVDBCUDA, EightVoxelsToFloatGrid) cudaCheck(cudaMalloc(&d_coords, num_points * sizeof(nanovdb::Coord))); cudaCheck(cudaMemcpy(d_coords, coords, num_points * sizeof(nanovdb::Coord), cudaMemcpyHostToDevice));// CPU -> GPU - //nanovdb::GpuTimer timer("Create FloatGrid on GPU"); - nanovdb::CudaPointsToGrid converter; + //nanovdb::util::cuda::Timer timer("Create FloatGrid on GPU"); + nanovdb::tools::cuda::PointsToGrid converter; auto handle = converter.getHandle(d_coords, num_points); //timer.stop(); cudaCheck(cudaFree(d_coords)); @@ -1210,7 +1212,7 @@ TEST(TestNanoVDBCUDA, Random_CudaPointsToGrid_World64) { using BuildT = nanovdb::Point;//uint32_t; using Vec3T = nanovdb::Vec3d; - //nanovdb::CpuTimer timer; + //nanovdb::util::Timer timer; const size_t pointCount = 1 << 20;// 1048576 std::vector points; //generate random points @@ -1233,7 +1235,7 @@ TEST(TestNanoVDBCUDA, Random_CudaPointsToGrid_World64) const double voxelSize = 8.0; //timer.start("Building grid on GPU from "+std::to_string(points.size())+" points"); - nanovdb::CudaPointsToGrid converter(voxelSize);// unit map + nanovdb::tools::cuda::PointsToGrid converter(voxelSize);// unit map //converter.setVerbose(); auto handle = converter.getHandle(d_points, pointCount); //timer.stop(); @@ -1294,7 +1296,7 @@ TEST(TestNanoVDBCUDA, Random_CudaPointsToGrid_World64) } //timer.restart("Parallel unit-testing on CPU"); - nanovdb::forEach(points,[&](const nanovdb::Range1D &r){ + nanovdb::util::forEach(points,[&](const nanovdb::util::Range1D &r){ nanovdb::PointAccessor acc(*grid); EXPECT_TRUE(acc); const Vec3T *start = nullptr, *stop = nullptr; @@ -1321,11 +1323,12 @@ TEST(TestNanoVDBCUDA, Random_CudaPointsToGrid_World64) //timer.stop(); }// Random_CudaPointsToGrid_World64 + TEST(TestNanoVDBCUDA, Large_CudaPointsToGrid_World64) { using BuildT = nanovdb::Point; using Vec3T = nanovdb::Vec3d; - //nanovdb::CpuTimer timer; + //nanovdb::util::Timer timer; const size_t pointCount = 1 << 20;// 1048576 std::vector points; //generate random points @@ -1348,7 +1351,7 @@ TEST(TestNanoVDBCUDA, Large_CudaPointsToGrid_World64) const double voxelSize = 8.0; //timer.start("Building grid on GPU from "+std::to_string(points.size())+" points"); - nanovdb::CudaPointsToGrid converter(voxelSize);// unit map + nanovdb::tools::cuda::PointsToGrid converter(voxelSize);// fixed voxel size //converter.setVerbose(); auto handle = converter.getHandle(d_points, pointCount); //timer.stop(); @@ -1411,7 +1414,7 @@ TEST(TestNanoVDBCUDA, Large_CudaPointsToGrid_World64) } //timer.restart("Parallel unit-testing on CPU"); - nanovdb::forEach(points,[&](const nanovdb::Range1D &r){ + nanovdb::util::forEach(points,[&](const nanovdb::util::Range1D &r){ nanovdb::PointAccessor acc(*grid); EXPECT_TRUE(acc); const Vec3T *start = nullptr, *stop = nullptr; @@ -1432,7 +1435,7 @@ TEST(TestNanoVDBCUDA, Large_CudaPointsToGrid_World64) bool test = false; for (uint64_t j=0; test == false && j( (points[i] - xyz).lengthSqr() ); + test = nanovdb::math::isApproxZero( (points[i] - xyz).lengthSqr() ); } EXPECT_TRUE(test); } @@ -1441,13 +1444,132 @@ TEST(TestNanoVDBCUDA, Large_CudaPointsToGrid_World64) //timer.stop(); }// Large_CudaPointsToGrid_World64 +TEST(TestNanoVDBCUDA, Large_CudaPointsToGrid_World64_density) +{// unlike the previous unit-test this one selects the dx to match a specefic point density + using BuildT = nanovdb::Point; + using Vec3T = nanovdb::Vec3d; + //nanovdb::util::Timer timer; + const size_t pointCount = 1 << 20;// 1048576 + std::vector points; + //generate random points + points.reserve(pointCount); + std::srand(98765); + const int max = 512, min = -max; + auto op = [&](){return rand() % (max - min) + min;}; + //timer.start("Creating "+std::to_string(pointCount)+" random points on the CPU"); + while (points.size() < pointCount) points.emplace_back(op(), op(), op()); + //timer.stop(); + EXPECT_EQ(pointCount, points.size()); + Vec3T* d_points; + const size_t pointSize = points.size() * sizeof(Vec3T); + //std::cerr << "Point footprint: " << (pointSize >> 20) << " MB" << std::endl; + //timer.start("Allocating "+std::to_string(pointSize >> 20)+" MB on the GPU"); + cudaCheck(cudaMalloc(&d_points, pointSize)); + //timer.restart("Copying points from CPU to GPU"); + cudaCheck(cudaMemcpy(d_points, points.data(), pointSize, cudaMemcpyHostToDevice)); + //timer.stop(); + + const int targetPointsPerVoxel = 60, tolerance = 1; + //timer.start("Building grid on GPU from "+std::to_string(points.size())+" points"); + nanovdb::tools::cuda::PointsToGrid converter(targetPointsPerVoxel, tolerance);// fixed density + //converter.setVerbose(2); + auto handle = converter.getHandle(d_points, pointCount); + //timer.stop(); + cudaCheck(cudaFree(d_points)); + //std::cerr << "Grid size: " << (handle.size() >> 20) << " MB" << std::endl; + + const uint32_t maxPointsPerVoxel = converter.maxPointsPerVoxel(); + const uint32_t maxPointsPerLeaf = converter.maxPointsPerLeaf(); + EXPECT_NEAR(maxPointsPerVoxel, targetPointsPerVoxel, tolerance); + EXPECT_LE(maxPointsPerLeaf, targetPointsPerVoxel*512); + //std::cerr << "maxPointsPerLeaf = " << maxPointsPerLeaf << " maxPointsPerVoxel = " << maxPointsPerVoxel << std::endl; + + EXPECT_TRUE(handle.deviceData());// grid only exists on the GPU + EXPECT_TRUE(handle.deviceGrid()); + EXPECT_FALSE(handle.deviceGrid(0)); + EXPECT_TRUE(handle.deviceGrid(0)); + EXPECT_FALSE(handle.deviceGrid(1)); + EXPECT_FALSE(handle.data());// no grid was yet allocated on the CPU + + //timer.start("Allocating and copying grid from GPU to CPU"); + auto *grid = handle.grid();// no grid on the CPU + EXPECT_FALSE(grid); + handle.deviceDownload();// creates a copy on the CPU + EXPECT_TRUE(handle.deviceData()); + EXPECT_TRUE(handle.data()); + auto *data = handle.gridData(); + EXPECT_TRUE(data); + grid = handle.grid(); + EXPECT_TRUE(grid); + //EXPECT_TRUE(grid->isLexicographic()); + EXPECT_TRUE(grid->isBreadthFirst()); + //EXPECT_EQ(nanovdb::Vec3d(voxelSize), grid->voxelSize()); + EXPECT_EQ(pointCount, grid->pointCount()); + EXPECT_TRUE(nanovdb::CoordBBox::createCube(min, max-1).isInside(grid->indexBBox())); + //std::cerr << grid->indexBBox() << std::endl; + + EXPECT_STREQ("World64: Vec3 point coordinates in world space", grid->blindMetaData(0).mName); + { + auto mgrHdl = nanovdb::createNodeManager(*grid); + auto *mgr = mgrHdl.mgr(); + EXPECT_TRUE(mgr); + for (uint32_t i=0; ileafCount(); ++i) { + const auto &leaf = mgr->leaf(i); + for (int j=0; j<512; ++j) { + EXPECT_LE(leaf.getValue(j), maxPointsPerLeaf); + if (leaf.isActive(j)) { + if (j>0) { + EXPECT_LE(leaf.getValue(j) - leaf.getValue(j-1), maxPointsPerVoxel + tolerance); + } else { + EXPECT_LE(leaf.getValue(0), maxPointsPerVoxel); + } + } else if (j>0) { + EXPECT_EQ(leaf.getValue(j), leaf.getValue(j-1)); + } else { + EXPECT_EQ(leaf.getValue(0), 0u); + } + }// loop over voxels + }// loop over leaf nodes + } + + //timer.restart("Parallel unit-testing on CPU"); + nanovdb::util::forEach(points,[&](const nanovdb::util::Range1D &r){ + nanovdb::PointAccessor acc(*grid); + EXPECT_TRUE(acc); + const Vec3T *start = nullptr, *stop = nullptr; + for (size_t i=r.begin(); i!=r.end(); ++i) { + const nanovdb::Coord ijk = grid->worldToIndex(points[i]).round(); + EXPECT_TRUE(acc.probeLeaf(ijk)!=nullptr); + EXPECT_TRUE(acc.isActive(ijk)); + EXPECT_LE(acc.getValue(ijk), pointCount); + const auto *leaf = acc.get>(ijk); + EXPECT_TRUE(leaf); + const auto offset = leaf->CoordToOffset(ijk); + EXPECT_EQ(ijk, leaf->offsetToGlobalCoord(offset)); + const uint64_t count = acc.voxelPoints(ijk, start, stop); + EXPECT_TRUE(start); + EXPECT_TRUE(stop); + EXPECT_LT(start, stop); + EXPECT_LE(count, maxPointsPerVoxel + tolerance); + bool test = false; + for (uint64_t j=0; test == false && j( (points[i] - xyz).lengthSqr() ); + } + EXPECT_TRUE(test); + } + }); + + //timer.stop(); +}// Large_CudaPointsToGrid_World64_density + TEST(TestNanoVDBCUDA, Sphere_CudaPointsToGrid_World32) { using BuildT = nanovdb::Point; using Vec3T = nanovdb::Vec3f; - //nanovdb::CpuTimer timer("Generate sphere with points"); - auto pointsHandle = nanovdb::createPointSphere(8, 100.0, nanovdb::Vec3d(0.0), 0.5); + //nanovdb::util::Timer timer("Generate sphere with points"); + auto pointsHandle = nanovdb::tools::createPointSphere(8, 100.0, nanovdb::Vec3d(0.0), 0.5); //timer.stop(); auto *pointGrid = pointsHandle.grid(); @@ -1473,7 +1595,7 @@ TEST(TestNanoVDBCUDA, Sphere_CudaPointsToGrid_World32) //timer.stop(); //timer.start("Building grid on GPU from "+std::to_string(pointCount)+" points"); - nanovdb::CudaPointsToGrid converter(pointGrid->map()); + nanovdb::tools::cuda::PointsToGrid converter(pointGrid->map()); //converter.setVerbose(); auto handle = converter.getHandle(d_points, pointCount); //timer.stop(); @@ -1535,7 +1657,7 @@ TEST(TestNanoVDBCUDA, Sphere_CudaPointsToGrid_World32) } //timer.restart("Parallel unit-testing on CPU"); - nanovdb::forEach(0u, pointCount, 1u,[&](const nanovdb::Range1D &r){ + nanovdb::util::forEach(0u, pointCount, 1u,[&](const nanovdb::util::Range1D &r){ nanovdb::PointAccessor acc(*grid); EXPECT_TRUE(acc); const Vec3T *start = nullptr, *stop = nullptr; @@ -1570,8 +1692,8 @@ TEST(TestNanoVDBCUDA, Sphere_CudaPointsToGrid_Voxel32) using BuildT = nanovdb::Point; using Vec3T = nanovdb::Vec3f; - //nanovdb::CpuTimer timer("Generate sphere with points"); - auto pointsHandle = nanovdb::createPointSphere(8, 100.0, nanovdb::Vec3d(0.0), 0.5); + //nanovdb::util::Timer timer("Generate sphere with points"); + auto pointsHandle = nanovdb::tools::createPointSphere(8, 100.0, nanovdb::Vec3d(0.0), 0.5); //timer.stop(); auto *pointGrid = pointsHandle.grid(); @@ -1598,7 +1720,7 @@ TEST(TestNanoVDBCUDA, Sphere_CudaPointsToGrid_Voxel32) //timer.start("Building grid on GPU from "+std::to_string(pointCount)+" points"); ///////////////////////////////////////////////////////////////////////// - nanovdb::CudaPointsToGrid converter(pointGrid->map()); + nanovdb::tools::cuda::PointsToGrid converter(pointGrid->map()); //converter.setVerbose(); converter.setPointType(nanovdb::PointType::Voxel32); auto handle = converter.getHandle(d_points, pointCount); @@ -1662,7 +1784,7 @@ TEST(TestNanoVDBCUDA, Sphere_CudaPointsToGrid_Voxel32) } //timer.restart("Parallel unit-testing on CPU"); - nanovdb::forEach(0u, pointCount, 1u,[&](const nanovdb::Range1D &r){ + nanovdb::util::forEach(0u, pointCount, 1u,[&](const nanovdb::util::Range1D &r){ nanovdb::PointAccessor acc(*grid); EXPECT_TRUE(acc); const Vec3T *start = nullptr, *stop = nullptr; @@ -1704,8 +1826,8 @@ TEST(TestNanoVDBCUDA, Sphere_CudaPointsToGrid_Voxel16) using BuildT = nanovdb::Point; using Vec3T = nanovdb::Vec3f; - //nanovdb::CpuTimer timer("Generate sphere with points"); - auto pointsHandle = nanovdb::createPointSphere(8, 100.0, nanovdb::Vec3d(0.0), 0.5); + //nanovdb::util::Timer timer("Generate sphere with points"); + auto pointsHandle = nanovdb::tools::createPointSphere(8, 100.0, nanovdb::Vec3d(0.0), 0.5); //timer.stop(); auto *pointGrid = pointsHandle.grid(); @@ -1732,7 +1854,7 @@ TEST(TestNanoVDBCUDA, Sphere_CudaPointsToGrid_Voxel16) //timer.start("Building grid on GPU from "+std::to_string(pointCount)+" points"); ///////////////////////////////////////////////////////////////////////// - nanovdb::CudaPointsToGrid converter(pointGrid->map()); + nanovdb::tools::cuda::PointsToGrid converter(pointGrid->map()); //converter.setVerbose(); converter.setPointType(nanovdb::PointType::Voxel16); auto handle = converter.getHandle(d_points, pointCount); @@ -1796,7 +1918,7 @@ TEST(TestNanoVDBCUDA, Sphere_CudaPointsToGrid_Voxel16) } //timer.restart("Parallel unit-testing on CPU"); - nanovdb::forEach(0u, pointCount, 1u,[&](const nanovdb::Range1D &r){ + nanovdb::util::forEach(0u, pointCount, 1u,[&](const nanovdb::util::Range1D &r){ nanovdb::PointAccessor acc(*grid); EXPECT_TRUE(acc); const nanovdb::Vec3u16 *start = nullptr, *stop = nullptr; @@ -1831,8 +1953,8 @@ TEST(TestNanoVDBCUDA, Sphere_CudaPointsToGrid_Voxel8) using BuildT = nanovdb::Point; using Vec3T = nanovdb::Vec3f; - //nanovdb::CpuTimer timer("Generate sphere with points"); - auto pointsHandle = nanovdb::createPointSphere(8, 100.0, nanovdb::Vec3d(0.0), 0.5); + //nanovdb::util::Timer timer("Generate sphere with points"); + auto pointsHandle = nanovdb::tools::createPointSphere(8, 100.0, nanovdb::Vec3d(0.0), 0.5); //timer.stop(); auto *pointGrid = pointsHandle.grid(); @@ -1861,7 +1983,7 @@ TEST(TestNanoVDBCUDA, Sphere_CudaPointsToGrid_Voxel8) //timer.start("Building grid on GPU from "+std::to_string(pointCount)+" points"); ///////////////////////////////////////////////////////////////////////// //auto handle = nanovdb::cudaPointsToGrid(d_points, pointCount, nanovdb::PointType::Voxel8); - nanovdb::CudaPointsToGrid converter(pointGrid->map()); + nanovdb::tools::cuda::PointsToGrid converter(pointGrid->map()); //converter.setVerbose(); converter.setPointType(nanovdb::PointType::Voxel8); auto handle = converter.getHandle(d_points, pointCount); @@ -1925,7 +2047,7 @@ TEST(TestNanoVDBCUDA, Sphere_CudaPointsToGrid_Voxel8) } //timer.restart("Parallel unit-testing on CPU"); - nanovdb::forEach(0u, pointCount, 1u,[&](const nanovdb::Range1D &r){ + nanovdb::util::forEach(0u, pointCount, 1u,[&](const nanovdb::util::Range1D &r){ nanovdb::PointAccessor acc(*grid); EXPECT_TRUE(acc); const nanovdb::Vec3u8 *start = nullptr, *stop = nullptr; @@ -1960,8 +2082,8 @@ TEST(TestNanoVDBCUDA, Sphere_CudaPointsToGrid_PointID) using BuildT = nanovdb::Point; using Vec3T = nanovdb::Vec3f; - //nanovdb::CpuTimer timer("Generate sphere with points"); - auto pointsHandle = nanovdb::createPointSphere(8, 100.0, nanovdb::Vec3d(0.0), 0.5); + //nanovdb::util::Timer timer("Generate sphere with points"); + auto pointsHandle = nanovdb::tools::createPointSphere(8, 100.0, nanovdb::Vec3d(0.0), 0.5); //timer.stop(); auto *pointGrid = pointsHandle.grid(); @@ -1990,7 +2112,7 @@ TEST(TestNanoVDBCUDA, Sphere_CudaPointsToGrid_PointID) //timer.start("Building grid on GPU from "+std::to_string(pointCount)+" points"); ///////////////////////////////////////////////////////////////////////// //auto handle = nanovdb::cudaPointsToGrid(d_points, pointCount, nanovdb::PointType::Voxel8); - nanovdb::CudaPointsToGrid converter(pointGrid->map()); + nanovdb::tools::cuda::PointsToGrid converter(pointGrid->map()); //converter.setVerbose(2); converter.setPointType(nanovdb::PointType::PointID); auto handle = converter.getHandle(d_points, pointCount); @@ -2054,7 +2176,7 @@ TEST(TestNanoVDBCUDA, Sphere_CudaPointsToGrid_PointID) } //timer.restart("Parallel unit-testing on CPU"); - nanovdb::forEach(0u, pointCount, 1u,[&](const nanovdb::Range1D &r){ + nanovdb::util::forEach(0u, pointCount, 1u,[&](const nanovdb::util::Range1D &r){ nanovdb::PointAccessor acc(*grid); EXPECT_TRUE(acc); const uint32_t *start = nullptr, *stop = nullptr; @@ -2080,14 +2202,14 @@ TEST(TestNanoVDBCUDA, Sphere_CudaPointsToGrid_PointID) TEST(TestNanoVDBCUDA, NanoGrid_Rgba8) { - using BuildT = nanovdb::Rgba8; + using BuildT = nanovdb::math::Rgba8; using GridT = nanovdb::NanoGrid; const size_t num_points = 1; nanovdb::Coord coords[num_points] = {nanovdb::Coord(1, 2, 3)}, *d_coords = nullptr; cudaCheck(cudaMalloc(&d_coords, num_points * sizeof(nanovdb::Coord))); cudaCheck(cudaMemcpy(d_coords, coords, num_points * sizeof(nanovdb::Coord), cudaMemcpyHostToDevice));// CPU -> GPU - nanovdb::CudaPointsToGrid converter; + nanovdb::tools::cuda::PointsToGrid converter; auto handle = converter.getHandle(d_coords, num_points); cudaCheck(cudaFree(d_coords)); @@ -2121,7 +2243,7 @@ TEST(TestNanoVDBCUDA, cudaAddBlindData) nanovdb::Coord coords[num_points] = {nanovdb::Coord(1, 2, 3), nanovdb::Coord(10,20,8)}, *d_coords = nullptr; cudaCheck(cudaMalloc(&d_coords, num_points * sizeof(nanovdb::Coord))); cudaCheck(cudaMemcpy(d_coords, coords, num_points * sizeof(nanovdb::Coord), cudaMemcpyHostToDevice));// CPU -> GPU - auto handle = nanovdb::cudaVoxelsToGrid(d_coords, num_points); + auto handle = nanovdb::tools::cuda::voxelsToGrid(d_coords, num_points); cudaCheck(cudaFree(d_coords)); EXPECT_TRUE(handle.deviceData());// grid only exists on the GPU EXPECT_FALSE(handle.data());// no grid was yet allocated on the CPU @@ -2138,13 +2260,13 @@ TEST(TestNanoVDBCUDA, cudaAddBlindData) cudaCheck(cudaMalloc(&d_blind, num_points * sizeof(float))); cudaCheck(cudaMemcpy(d_blind, blind, num_points * sizeof(float), cudaMemcpyHostToDevice));// CPU -> GPU - //nanovdb::GpuTimer timer("cudaAddBlindData"); - auto handle2 = nanovdb::cudaAddBlindData(d_grid, d_blind, num_points); + //nanovdb::util::cuda::Timer timer("cudaAddBlindData"); + auto handle2 = nanovdb::tools::cuda::addBlindData(d_grid, d_blind, num_points); cudaCheck(cudaFree(d_blind)); //timer.stop(); EXPECT_TRUE(handle2.deviceData());// grid only exists on the GPU EXPECT_FALSE(handle2.data());// no grid was yet allocated on the CPU - EXPECT_EQ(handle2.size(), handle.size() + sizeof(nanovdb::GridBlindMetaData) + nanovdb::AlignUp(num_points*sizeof(float))); + EXPECT_EQ(handle2.size(), handle.size() + sizeof(nanovdb::GridBlindMetaData) + nanovdb::math::AlignUp(num_points*sizeof(float))); auto *grid2 = handle2.grid();// no grid on the CPU EXPECT_FALSE(grid2); @@ -2174,7 +2296,7 @@ TEST(TestNanoVDBCUDA, cudaAddBlindData) cudaCheck(cudaMalloc(&d_blind2, num_points * sizeof(nanovdb::Vec3f))); cudaCheck(cudaMemcpy(d_blind2, blind2, num_points * sizeof(nanovdb::Vec3f), cudaMemcpyHostToDevice));// CPU -> GPU - auto handle3 = nanovdb::cudaAddBlindData(d_grid2, d_blind2, num_points, + auto handle3 = nanovdb::tools::cuda::addBlindData(d_grid2, d_blind2, num_points, nanovdb::GridBlindDataClass::AttributeArray, nanovdb::GridBlindDataSemantic::PointPosition, "this is a test"); @@ -2207,7 +2329,7 @@ TEST(TestNanoVDBCUDA, cudaAddBlindData) TEST(TestNanoVDBCUDA, testGridHandleCopy) { - auto cudaHandle = nanovdb::createLevelSetSphere(100); + auto cudaHandle = nanovdb::tools::createLevelSetSphere(100); { auto *floatGrid = cudaHandle.grid(); EXPECT_TRUE(floatGrid); @@ -2231,13 +2353,18 @@ TEST(TestNanoVDBCUDA, testGridHandleCopy) // make -j testNanoVDB && ./unittest/testNanoVDB --gtest_break_on_failure --gtest_filter="*compareNodeOrdering" TEST(TestNanoVDBCUDA, compareNodeOrdering) { - using namespace nanovdb; #if 0 const int voxelCount = 2; Coord coords[voxelCount]={Coord(-1,0,0), Coord(0,0,0)}; #else const int voxelCount = 5; - Coord coords[voxelCount]={Coord(0,0,0), Coord(256,0,0), Coord(0,0,8), Coord(0,-256,0), Coord(0,2,4)}; + nanovdb::Coord coords[voxelCount]={ + nanovdb::Coord(0,0,0), + nanovdb::Coord(256,0,0), + nanovdb::Coord(0,0,8), + nanovdb::Coord(0,-256,0), + nanovdb::Coord(0,2,4) + }; #endif {// check coordToKey and keyToCoord used in CudaPointsToGrid @@ -2268,13 +2395,13 @@ TEST(TestNanoVDBCUDA, compareNodeOrdering) } } - GridHandle handle1, handle2; + nanovdb::GridHandle handle1, handle2; { - build::FloatGrid grid(0.0f); + nanovdb::tools::build::FloatGrid grid(0.0f); auto acc = grid.getAccessor(); for (int i=0; i(); EXPECT_TRUE(grid1); @@ -2299,13 +2426,13 @@ TEST(TestNanoVDBCUDA, compareNodeOrdering) } { - Coord *d_coords = nullptr; - cudaCheck(cudaMalloc(&d_coords, voxelCount * sizeof(Coord))); - cudaCheck(cudaMemcpy(d_coords, coords, voxelCount * sizeof(Coord), cudaMemcpyHostToDevice));// CPU -> GPU + nanovdb::Coord *d_coords = nullptr; + cudaCheck(cudaMalloc(&d_coords, voxelCount * sizeof(nanovdb::Coord))); + cudaCheck(cudaMemcpy(d_coords, coords, voxelCount * sizeof(nanovdb::Coord), cudaMemcpyHostToDevice));// CPU -> GPU #if 0 - auto cudaHandle = cudaVoxelsToGrid(d_coords, voxelCount); + auto cudaHandle = nanovdb::tools::cuda::voxelsToGrid(d_coords, voxelCount); #else - auto cudaHandle = cudaVoxelsToGrid(nanovdb::make_fancy(d_coords), voxelCount); + auto cudaHandle = nanovdb::tools::cuda::voxelsToGrid(nanovdb::make_fancy(d_coords), voxelCount); #endif cudaCheck(cudaFree(d_coords)); cudaHandle.deviceDownload(); @@ -2365,7 +2492,7 @@ template void test_ptr(const PtrT ptr) { using T = typename nanovdb::pointer_traits::element_type; - static const bool test = nanovdb::is_same::type>::value; + static const bool test = nanovdb::util::is_same::type>::value; EXPECT_TRUE(test); EXPECT_EQ(sizeof(float), nanovdb::pointer_traits::element_size); EXPECT_EQ(3.14f, *ptr); @@ -2380,34 +2507,34 @@ TEST(TestNanoVDBCUDA, fancy_ptr) EXPECT_EQ(sizeof(uint8_t), nanovdb::pointer_traits>::element_size); {// test raw pointer - bool test = nanovdb::is_same::element_type, float>::value; + bool test = nanovdb::util::is_same::element_type, float>::value; EXPECT_TRUE(test); - test = nanovdb::is_same::element_type, const float>::value; + test = nanovdb::util::is_same::element_type, const float>::value; EXPECT_TRUE(test); EXPECT_EQ(sizeof(float), nanovdb::pointer_traits::element_size); EXPECT_EQ(sizeof(float), nanovdb::pointer_traits::element_size); } {// test std::shared_ptr - bool test = nanovdb::is_same>::element_type, float>::value; + bool test = nanovdb::util::is_same>::element_type, float>::value; EXPECT_TRUE(test); - test = nanovdb::is_same>::element_type, const float>::value; + test = nanovdb::util::is_same>::element_type, const float>::value; EXPECT_TRUE(test); EXPECT_EQ(sizeof(float), nanovdb::pointer_traits>::element_size); EXPECT_EQ(sizeof(float), nanovdb::pointer_traits>::element_size); } {// test std::unique_ptr - bool test = nanovdb::is_same>::element_type, float>::value; + bool test = nanovdb::util::is_same>::element_type, float>::value; EXPECT_TRUE(test); - test = nanovdb::is_same>::element_type, const float>::value; + test = nanovdb::util::is_same>::element_type, const float>::value; EXPECT_TRUE(test); EXPECT_EQ(sizeof(float), nanovdb::pointer_traits>::element_size); EXPECT_EQ(sizeof(float), nanovdb::pointer_traits>::element_size); } {// test fancy_ptr - bool test = nanovdb::is_same>::element_type, const float>::value; + bool test = nanovdb::util::is_same>::element_type, const float>::value; EXPECT_TRUE(test); EXPECT_EQ(sizeof(float), nanovdb::pointer_traits>::element_size); - test = nanovdb::is_same>::element_type, const float>::value; + test = nanovdb::util::is_same>::element_type, const float>::value; EXPECT_TRUE(test); EXPECT_EQ(sizeof(float), nanovdb::pointer_traits>::element_size); } @@ -2426,13 +2553,13 @@ TEST(TestNanoVDBCUDA, CudaGridChecksum) const std::string s{"The quick brown fox jumps over the lazy dog"}; { // test CPU implementation of crc32 without a lookup table std::stringstream ss; - ss << std::hex << std::setw(8) << std::setfill('0') << nanovdb::crc32::checksum(s.c_str(), s.size()); + ss << std::hex << std::setw(8) << std::setfill('0') << nanovdb::util::crc32(s.c_str(), s.size()); EXPECT_EQ("414fa339", ss.str());// 414FA339 from https://rosettagit.org/drafts/crc-32/#c-1 } { // test CPU implementation of crc32 with a lookup table - auto lut = nanovdb::crc32::createLut(); + auto lut = nanovdb::util::createCrc32Lut(); std::stringstream ss; - ss << std::hex << std::setw(8) << std::setfill('0') << nanovdb::crc32::checksum(s.c_str(), s.size(), lut.get()); + ss << std::hex << std::setw(8) << std::setfill('0') << nanovdb::util::crc32(s.c_str(), s.size(), lut.get()); EXPECT_EQ("414fa339", ss.str());// 414FA339 from https://rosettagit.org/drafts/crc-32/#c-1 } {// test GPU implementation @@ -2441,7 +2568,7 @@ TEST(TestNanoVDBCUDA, CudaGridChecksum) cudaCheck(cudaMalloc((void**)&d_checksum, 4)); cudaCheck(cudaMalloc((void**)&d_str, s.size())); cudaCheck(cudaMemcpy(d_str, s.data(), s.size(), cudaMemcpyHostToDevice)); - nanovdb::crc32::checksumKernel<<<1, 1>>>((const uint8_t*)d_str, d_checksum, 1, s.size()); + nanovdb::util::cuda::crc32Kernel<<<1, 1>>>((const uint8_t*)d_str, d_checksum, 1, s.size()); cudaCheck(cudaMemcpy(&checksum, d_checksum, 4, cudaMemcpyDeviceToHost)); cudaCheck(cudaFree(d_str)); cudaCheck(cudaFree(d_checksum)); @@ -2449,7 +2576,7 @@ TEST(TestNanoVDBCUDA, CudaGridChecksum) ss << std::hex << std::setw(8) << std::setfill('0') << checksum; EXPECT_EQ("414fa339", ss.str());// 414FA339 from https://rosettagit.org/drafts/crc-32/#c-1 } - auto handle = nanovdb::createLevelSetSphere(100); + auto handle = nanovdb::tools::createLevelSetSphere(100); EXPECT_TRUE(handle.data()); auto *grid = handle.grid(); EXPECT_TRUE(grid); @@ -2458,38 +2585,40 @@ TEST(TestNanoVDBCUDA, CudaGridChecksum) #if 0// entire grid or just GridData+TreeData+RootData const size_t size = handle.size(); #else - const uint64_t size = grid->memUsage() + grid->tree().memUsage() + grid->tree().root().memUsage() - 16; + //const uint64_t size = grid->memUsage() + grid->tree().memUsage() + grid->tree().root().memUsage() - 16; + const uint64_t size = grid->memUsage() + grid->tree().memUsage() - 16; #endif //std::cerr << "Grid + tree + root data is " << size << " bytes\n"; - nanovdb::CpuTimer cpuTimer; - nanovdb::GpuTimer gpuTimer; + nanovdb::util::Timer cpuTimer; + nanovdb::util::cuda::Timer gpuTimer; + auto lut = nanovdb::util::createCrc32Lut(); + void *ptr = nanovdb::util::PtrAdd(handle.data(), 16); {//benchmark CPU version that uses a table //cpuTimer.start("CPU Tabled CRC of level set sphere"); - auto lut = nanovdb::crc32::createLut(); - checksum = nanovdb::crc32::checksum(handle.data()+16, size, lut.get()); + checksum = nanovdb::util::crc32(ptr, size, lut.get()); //cpuTimer.stop(); //std::cerr << checksum << std::endl; } {//benchmark CPU version that uses no table //cpuTimer.start("CPU Untabled CRC of level set sphere"); - auto checksum2 = nanovdb::crc32::checksum(handle.data()+16, size); + auto checksum2 = nanovdb::util::crc32(ptr, size); //cpuTimer.stop(); //std::cerr << checksum2 << std::endl; EXPECT_EQ(checksum, checksum2); } {//benchmark CPU version that uses table - //cpuTimer.start("CPU tabled crc32::CRC of level set sphere"); - auto lut = nanovdb::crc32::createLut(); - auto checksum2 = nanovdb::crc32::checksum(handle.data()+16, size, lut.get()); + //cpuTimer.start("CPU tabled util::CRC of level set sphere"); + auto checksum2 = nanovdb::util::crc32(ptr, size, lut.get()); //cpuTimer.stop(); //std::cerr << checksum2 << std::endl; EXPECT_EQ(checksum, checksum2); } uint32_t checksum2, *d_checksum; cudaCheck(cudaMalloc((void**)&d_checksum, 4)); + void *d_ptr = nanovdb::util::PtrAdd(handle.deviceData(), 16); {//benchmark GPU version that uses no table //gpuTimer.start("GPU Untabled CRC of level set sphere"); - nanovdb::crc32::checksumKernel<<<1, 1>>>(handle.deviceData()+16, d_checksum, 1, size); + nanovdb::util::cuda::crc32Kernel<<<1, 1>>>(d_ptr, d_checksum, 1, size); //gpuTimer.stop(); cudaCheck(cudaMemcpy(&checksum2, d_checksum, 4, cudaMemcpyDeviceToHost)); //std::cerr << checksum2 << std::endl; @@ -2497,38 +2626,38 @@ TEST(TestNanoVDBCUDA, CudaGridChecksum) } {//benchmark GPU version that uses no table //gpuTimer.start("GPU tabled CRC of level set sphere"); - uint32_t *d_lut = nanovdb::crc32::cudaCreateLut(); - nanovdb::crc32::checksumKernel<<<1, 1>>>(handle.deviceData()+16, d_checksum, 1, size, d_lut); + auto lut = nanovdb::util::cuda::createCrc32Lut(); + uint32_t *d_lut = lut.get(); + nanovdb::util::cuda::crc32Kernel<<<1, 1>>>(d_ptr, d_checksum, 1, size, d_lut); //gpuTimer.stop(); cudaCheck(cudaMemcpy(&checksum2, d_checksum, 4, cudaMemcpyDeviceToHost)); - cudaCheck(cudaFree(d_lut)); //std::cerr << checksum2 << std::endl; EXPECT_EQ(checksum, checksum2); } { //cpuTimer.start("CPU GridChecksum of level set sphere"); - nanovdb::GridChecksum cs; - cs(*grid); - checksum2 = cs.checksum(0);// only check the checksum of grid, tree and root data + nanovdb::Checksum cs = nanovdb::tools::evalChecksum(grid, nanovdb::CheckMode::Partial); + //cs(*grid); + //checksum2 = cs.checksum(0);// only check the checksum of grid, tree and root data //cpuTimer.stop(); //std::cerr << checksum2 << std::endl; - EXPECT_EQ(checksum, checksum2); + EXPECT_EQ(checksum, cs.head()); } - uint64_t fullChecksum; + nanovdb::Checksum fullChecksum; { //cpuTimer.start("CPU FULL cudaGridChecksum tabled CRC of level set sphere"); - nanovdb::updateChecksum(*handle.grid(), nanovdb::ChecksumMode::Full); + nanovdb::tools::updateChecksum(handle.grid(), nanovdb::CheckMode::Full); //cpuTimer.stop(); fullChecksum = handle.grid()->checksum(); - EXPECT_EQ(checksum, fullChecksum & 0xFFFFFFFF); + EXPECT_EQ(checksum, fullChecksum.head()); } { //gpuTimer.start("GPU FULL cudaGridChecksum tabled CRC of level set sphere"); - nanovdb::cudaGridChecksum(handle.deviceGrid(), nanovdb::ChecksumMode::Full); + nanovdb::tools::cuda::updateChecksum(handle.deviceGrid(), nanovdb::CheckMode::Full); //gpuTimer.stop(); - uint64_t fullChecksum2; + nanovdb::Checksum fullChecksum2; cudaCheck(cudaMemcpy(&fullChecksum2, (const uint8_t*)handle.deviceGrid() + 8, 8, cudaMemcpyDeviceToHost)); - EXPECT_EQ(checksum, fullChecksum2 & 0xFFFFFFFF); + EXPECT_EQ(checksum, fullChecksum2.head()); EXPECT_EQ(fullChecksum, fullChecksum2); } cudaCheck(cudaFree(d_checksum)); @@ -2539,7 +2668,7 @@ size_t countActiveVoxels(const nanovdb::NodeManager *d_mgr) { size_t count[2], *d_count; cudaCheck(cudaMalloc((void**)&d_count, 2*sizeof(size_t))); - cudaLambdaKernel<<<1,1>>>(1, [=] __device__ (size_t){ + nanovdb::util::cuda::lambdaKernel<<<1,1>>>(1, [=] __device__ (size_t){ d_count[0] = 0; for (int i=0; ileafCount(); ++i) d_count[0] += d_mgr->leaf(i).valueMask().countOn(); for (int i=0; ilowerCount(); ++i) d_count[0] += d_mgr->lower(i).valueMask().countOn(); @@ -2555,7 +2684,7 @@ size_t countActiveVoxels(const nanovdb::NodeManager *d_mgr) TEST(TestNanoVDBCUDA, NodeManager) { - auto handle = nanovdb::createLevelSetSphere(100); + auto handle = nanovdb::tools::createLevelSetSphere(100); EXPECT_TRUE(handle.data()); auto *grid = handle.grid(); EXPECT_TRUE(grid); @@ -2563,7 +2692,7 @@ TEST(TestNanoVDBCUDA, NodeManager) auto *d_grid = handle.deviceGrid(); EXPECT_TRUE(d_grid); size_t count = 0; - nanovdb::CpuTimer cpuTimer; + nanovdb::util::Timer cpuTimer; { //cpuTimer.start("CPU NodeManager"); auto handle2 = nanovdb::createNodeManager<>(*grid); @@ -2573,10 +2702,10 @@ TEST(TestNanoVDBCUDA, NodeManager) count = mgr->grid().tree().activeVoxelCount(); } - nanovdb::GpuTimer gpuTimer; + nanovdb::util::cuda::Timer gpuTimer; { //gpuTimer.start("GPU NodeManager"); - auto handle2 = nanovdb::cudaCreateNodeManager(d_grid); + auto handle2 = nanovdb::cuda::createNodeManager(d_grid); //gpuTimer.stop(); auto *d_mgr = handle2.deviceMgr(); EXPECT_TRUE(d_mgr); @@ -2587,13 +2716,13 @@ TEST(TestNanoVDBCUDA, NodeManager) TEST(TestNanoVDBCUDA, GridStats) { using GridT = nanovdb::NanoGrid; - auto handle = nanovdb::createLevelSetSphere(100, + auto handle = nanovdb::tools::createLevelSetSphere(100, nanovdb::Vec3d(0), 1.0, 3.0, nanovdb::Vec3d(0), "test", - nanovdb::StatsMode::Disable); + nanovdb::tools::StatsMode::Disable); EXPECT_TRUE(handle.data()); GridT *grid = handle.grid(); EXPECT_TRUE(grid); @@ -2624,8 +2753,8 @@ TEST(TestNanoVDBCUDA, GridStats) EXPECT_EQ(n0, grid->tree().nodeCount(0)); } { - //nanovdb::CpuTimer cpuTimer("CPU gridStats: Default = Full"); - nanovdb::gridStats(*grid); + //nanovdb::util::Timer cpuTimer("CPU gridStats: Default = Full"); + nanovdb::tools::updateGridStats(grid); //cpuTimer.stop(); } {// check min/max using const iterators @@ -2674,8 +2803,8 @@ TEST(TestNanoVDBCUDA, GridStats) } { - //nanovdb::GpuTimer gpuTimer("GPU gridStats: Default = Full"); - nanovdb::cudaGridStats(d_grid); + //nanovdb::util::cuda::Timer gpuTimer("GPU gridStats: Default = Full"); + nanovdb::tools::cuda::updateGridStats(d_grid); //gpuTimer.stop(); } {// check bbox and stats of device grid @@ -2691,3 +2820,32 @@ TEST(TestNanoVDBCUDA, GridStats) EXPECT_EQ(grid->tree().root().stdDeviation(), data->mStdDevi); } }// GridStats + +TEST(TestNanoVDBCUDA, cudaIsValid) +{ + const auto mode = nanovdb::CheckMode::Full; + using GridT = nanovdb::NanoGrid; + auto handle = nanovdb::tools::createLevelSetSphere(100, + nanovdb::Vec3d(0), + 1.0, + 3.0, + nanovdb::Vec3d(0), + "test", + nanovdb::tools::StatsMode::Disable, + mode); + EXPECT_TRUE(handle.data()); + GridT *grid = handle.grid(); + EXPECT_TRUE(grid); + handle.deviceUpload(); + GridT *d_grid = handle.deviceGrid(); + EXPECT_TRUE(d_grid); + const bool verbose = false; + + EXPECT_TRUE(nanovdb::isValid(grid, mode, verbose)); + EXPECT_TRUE(nanovdb::tools::cuda::isValid(d_grid, mode, verbose)); + + grid->mGridType = nanovdb::GridType::Vec3f; + EXPECT_FALSE(nanovdb::isValid(grid, mode, verbose)); + handle.deviceUpload(); + EXPECT_FALSE(nanovdb::tools::cuda::isValid(d_grid, mode, verbose)); +}// cudaIsValid diff --git a/nanovdb/nanovdb/unittest/TestOpenVDB.cc b/nanovdb/nanovdb/unittest/TestOpenVDB.cc index e14792cb81..06b9da7f4c 100644 --- a/nanovdb/nanovdb/unittest/TestOpenVDB.cc +++ b/nanovdb/nanovdb/unittest/TestOpenVDB.cc @@ -7,22 +7,22 @@ #include // for FILE #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #if !defined(_MSC_VER) // does not compile in msvc c++ due to zero-sized arrays. #include -#include +#include #endif #include @@ -58,12 +58,14 @@ class TestOpenVDB : public ::testing::Test void SetUp() override { openvdb::initialize(); + mStr = new char[256]; // Code here will be called immediately after the constructor (right // before each test). } void TearDown() override { + delete [] mStr; // Code here will be called immediately after each test (right // before the destructor). } @@ -183,6 +185,7 @@ class TestOpenVDB : public ::testing::Test } openvdb::util::CpuTimer mTimer; + char *mStr; }; // TestOpenVDB // make -j && ./unittest/testOpenVDB --gtest_break_on_failure --gtest_filter="*getExtrema" @@ -190,7 +193,7 @@ TEST_F(TestOpenVDB, getExtrema) { using wBBoxT = openvdb::math::BBox; auto srcGrid = this->getSrcGrid(false, 0, 3);// level set of a bunny if available, else an octahedron - auto handle = nanovdb::createNanoGrid(*srcGrid, nanovdb::StatsMode::All); + auto handle = nanovdb::tools::createNanoGrid(*srcGrid, nanovdb::tools::StatsMode::All); EXPECT_TRUE(handle); auto* dstGrid = handle.grid(); EXPECT_TRUE(dstGrid); @@ -205,15 +208,15 @@ TEST_F(TestOpenVDB, getExtrema) const wBBoxT iBBox = wBBox.applyInverseMap(*indexToWorldMap); //std::cerr << "Query bbox: iBBox = " << iBBox << ", wBBox = " << wBBox << std::endl; - const nanovdb::CoordBBox bbox(nanovdb::Round(iBBox.min()), - nanovdb::Round(iBBox.max())); + const nanovdb::CoordBBox bbox(nanovdb::math::Round(iBBox.min()), + nanovdb::math::Round(iBBox.max())); //std::cerr << "Query index bbox = " << bbox << std::endl; //nanovdb::NodeManager mgr(*dstGrid); //std::cerr << "Root child nodes: " << mgr.nodeCount(2) << std::endl; //mTimer.start("getExtrema"); - nanovdb::Extrema ext1 = nanovdb::getExtrema(*dstGrid, bbox), ext2; + nanovdb::tools::Extrema ext1 = nanovdb::tools::getExtrema(*dstGrid, bbox), ext2; //mTimer.restart("naive approach"); for (auto it = bbox.begin(); it; ++it) ext2.add(dstAcc.getValue(*it)); //mTimer.stop(); @@ -242,9 +245,9 @@ TEST_F(TestOpenVDB, MapToNano) EXPECT_EQ(ijk2, nanovdb::Coord(1, 2, -4)); } {// Vec3f - constexpr bool test1 = nanovdb::is_same::type>::value; + constexpr bool test1 = nanovdb::util::is_same::type>::value; EXPECT_TRUE(test1); - constexpr bool test2 = nanovdb::is_same::type>::value; + constexpr bool test2 = nanovdb::util::is_same::type>::value; EXPECT_FALSE(test2); const openvdb::Vec3f xyz1(1, 2, -4); nanovdb::Vec3f xyz2(-2, 7, 9); @@ -253,9 +256,9 @@ TEST_F(TestOpenVDB, MapToNano) EXPECT_EQ(xyz2, nanovdb::Vec3f(1, 2, -4)); } {// Vec4d - constexpr bool test1 = nanovdb::is_same::type>::value; + constexpr bool test1 = nanovdb::util::is_same::type>::value; EXPECT_TRUE(test1); - constexpr bool test2 = nanovdb::is_same::type>::value; + constexpr bool test2 = nanovdb::util::is_same::type>::value; EXPECT_FALSE(test2); const openvdb::Vec4d xyz1(1, 2, -4, 7); nanovdb::Vec4d xyz2(-2, 7, 9, -4); @@ -264,9 +267,9 @@ TEST_F(TestOpenVDB, MapToNano) EXPECT_EQ(xyz2, nanovdb::Vec4d(1, 2, -4, 7)); } {// MaskValue - constexpr bool test1 = nanovdb::is_same::type>::value; + constexpr bool test1 = nanovdb::util::is_same::type>::value; EXPECT_TRUE(test1); - constexpr bool test2 = nanovdb::is_same::type>::value; + constexpr bool test2 = nanovdb::util::is_same::type>::value; EXPECT_FALSE(test2); EXPECT_EQ(sizeof(nanovdb::ValueMask), sizeof(openvdb::ValueMask)); } @@ -295,8 +298,8 @@ TEST_F(TestOpenVDB, BasicGrid) const std::string name("test name"); - EXPECT_EQ(nanovdb::AlignUp(8 + 8 + 2 + 2 + 4 + 8 + nanovdb::GridData::MaxNameSize + 48 + sizeof(nanovdb::Map) + 24 + 4 + 4 + 8 + 4), sizeof(GridT)); - EXPECT_EQ(nanovdb::AlignUp(4*8 + 2 * 4 * 3 + 8), sizeof(TreeT)); + EXPECT_EQ(nanovdb::math::AlignUp(8 + 8 + 2 + 2 + 4 + 8 + nanovdb::GridData::MaxNameSize + 48 + sizeof(nanovdb::Map) + 24 + 4 + 4 + 8 + 4), sizeof(GridT)); + EXPECT_EQ(nanovdb::math::AlignUp(4*8 + 2 * 4 * 3 + 8), sizeof(TreeT)); EXPECT_EQ(size_t(4*8 + 2 * 4 * 3 + 8), sizeof(TreeT));// should already be 32 byte aligned size_t bytes[9]; @@ -524,7 +527,7 @@ TEST_F(TestOpenVDB, BasicGrid) EXPECT_EQ(uint32_t(NANOVDB_PATCH_VERSION_NUMBER), grid->version().getPatch()); EXPECT_TRUE(grid->isValid()); EXPECT_EQ(grid->gridType(), nanovdb::GridType::Float); - EXPECT_EQ(grid->gridClass(), nanovdb::GridClass::Unknown); + EXPECT_EQ(grid->gridClass(),nanovdb::GridClass::Unknown); EXPECT_FALSE(grid->isLevelSet()); EXPECT_FALSE(grid->isFogVolume()); EXPECT_FALSE(grid->isStaggered()); @@ -553,12 +556,35 @@ TEST_F(TestOpenVDB, BasicGrid) } } // BaseGrid + +TEST_F(TestOpenVDB, MagicType) +{ + {// toMagic(uint64_t) + EXPECT_EQ( nanovdb::toMagic(NANOVDB_MAGIC_NUMB), nanovdb::MagicType::NanoVDB ); + EXPECT_EQ( nanovdb::toMagic(NANOVDB_MAGIC_GRID), nanovdb::MagicType::NanoGrid ); + EXPECT_EQ( nanovdb::toMagic(NANOVDB_MAGIC_FILE), nanovdb::MagicType::NanoFile ); + EXPECT_EQ( nanovdb::toMagic(NANOVDB_MAGIC_NODE), nanovdb::MagicType::NanoNode ); + EXPECT_EQ( nanovdb::toMagic(NANOVDB_MAGIC_FRAG), nanovdb::MagicType::NanoFrag ); + EXPECT_EQ( nanovdb::toMagic( 0x56444220UL), nanovdb::MagicType::OpenVDB ); + } + + {// toStr(MagicType) + EXPECT_EQ( strcmp(nanovdb::toStr(mStr, nanovdb::MagicType::Unknown ), "unknown"), 0 ); + EXPECT_EQ( strcmp(nanovdb::toStr(mStr, nanovdb::MagicType::OpenVDB ), "openvdb"), 0 ); + EXPECT_EQ( strcmp(nanovdb::toStr(mStr, nanovdb::MagicType::NanoVDB ), "nanovdb"), 0 ); + EXPECT_EQ( strcmp(nanovdb::toStr(mStr, nanovdb::MagicType::NanoGrid ), "nanovdb::Grid"), 0 ); + EXPECT_EQ( strcmp(nanovdb::toStr(mStr, nanovdb::MagicType::NanoFile ), "nanovdb::File"), 0 ); + EXPECT_EQ( strcmp(nanovdb::toStr(mStr, nanovdb::MagicType::NanoNode ), "nanovdb::NodeManager"), 0 ); + EXPECT_EQ( strcmp(nanovdb::toStr(mStr, nanovdb::MagicType::NanoFrag ), "fragmented nanovdb::Grid"), 0 ); + } +} + TEST_F(TestOpenVDB, OpenToNanoVDB_Empty) { { // empty grid openvdb::FloatGrid srcGrid(0.0f); auto srcAcc = srcGrid.getAccessor(); - auto handle = nanovdb::createNanoGrid(srcGrid); + auto handle = nanovdb::tools::createNanoGrid(srcGrid); EXPECT_TRUE(handle); auto* meta = handle.gridMetaData(); EXPECT_TRUE(meta); @@ -593,7 +619,7 @@ TEST_F(TestOpenVDB, OpenToNanoVDB_Basic1) srcAcc.setValue(openvdb::Coord(1, 2, 3), 1.0f); EXPECT_TRUE(srcAcc.isValueOn(openvdb::Coord(1, 2, 3))); EXPECT_EQ(1.0f, srcAcc.getValue(openvdb::Coord(1, 2, 3))); - auto handle = nanovdb::createNanoGrid(srcGrid, nanovdb::StatsMode::All); + auto handle = nanovdb::tools::createNanoGrid(srcGrid, nanovdb::tools::StatsMode::All); EXPECT_TRUE(handle); auto* meta = handle.gridMetaData(); EXPECT_TRUE(meta); @@ -632,13 +658,13 @@ TEST_F(TestOpenVDB, OpenToNanoVDB_Model) { auto srcGrid = this->getSrcGrid(false); //mTimer.start("Generating NanoVDB grid"); - auto handle = nanovdb::createNanoGrid(*srcGrid); + auto handle = nanovdb::tools::createNanoGrid(*srcGrid); //mTimer.start("Writing NanoVDB grid"); nanovdb::io::writeGrid("data/test.nvdb", handle, this->getCodec()); //mTimer.stop(); auto dstGrid = handle.grid(); - EXPECT_TRUE(nanovdb::isValid(dstGrid)); + EXPECT_TRUE(nanovdb::isAligned(dstGrid)); auto kernel = [&](const openvdb::CoordBBox& bbox) { using CoordT = const nanovdb::Coord; @@ -679,9 +705,9 @@ TEST_F(TestOpenVDB, OpenToNanoVDB_Fp4) EXPECT_EQ(2.0f, srcAcc.getValue(openvdb::Coord(-10, 20,-50))); EXPECT_EQ(3.0f, srcAcc.getValue(openvdb::Coord( 50,-12, 30))); - nanovdb::CreateNanoGrid converter(srcGrid); + nanovdb::tools::CreateNanoGrid converter(srcGrid); //converter.setVerbose(); - converter.setStats(nanovdb::StatsMode::All); + converter.setStats(nanovdb::tools::StatsMode::All); auto handle = converter.getHandle();// (srcGrid); EXPECT_TRUE(handle); @@ -733,7 +759,7 @@ TEST_F(TestOpenVDB, OpenToNanoVDB_Fp4) {// Model auto openGrid = this->getSrcGrid(false); const float tolerance = 0.5f*openGrid->voxelSize()[0]; - nanovdb::CreateNanoGrid converter(*openGrid); + nanovdb::tools::CreateNanoGrid converter(*openGrid); converter.enableDithering(); //converter.setVerbose(2); auto handle = converter.getHandle(); @@ -774,8 +800,8 @@ TEST_F(TestOpenVDB, OpenToNanoVDB_Fp8) EXPECT_EQ(2.0f, srcAcc.getValue(openvdb::Coord(-10, 20,-50))); EXPECT_EQ(3.0f, srcAcc.getValue(openvdb::Coord( 50,-12, 30))); - nanovdb::CreateNanoGrid converter(srcGrid); - converter.setStats(nanovdb::StatsMode::All); + nanovdb::tools::CreateNanoGrid converter(srcGrid); + converter.setStats(nanovdb::tools::StatsMode::All); auto handle = converter.getHandle(); EXPECT_TRUE(handle); @@ -816,7 +842,7 @@ TEST_F(TestOpenVDB, OpenToNanoVDB_Fp8) {// Model auto openGrid = this->getSrcGrid(false); const float tolerance = 0.05f*openGrid->voxelSize()[0]; - nanovdb::CreateNanoGrid converter(*openGrid); + nanovdb::tools::CreateNanoGrid converter(*openGrid); auto handle = converter.getHandle(); converter.enableDithering(); //converter.setVerbose(2); @@ -858,9 +884,9 @@ TEST_F(TestOpenVDB, OpenToNanoVDB_Fp16) EXPECT_EQ(2.0f, srcAcc.getValue(openvdb::Coord(-10, 20,-50))); EXPECT_EQ(3.0f, srcAcc.getValue(openvdb::Coord( 50,-12, 30))); - nanovdb::CreateNanoGrid converter(srcGrid); + nanovdb::tools::CreateNanoGrid converter(srcGrid); //converter.setVerbose(2); - converter.setStats(nanovdb::StatsMode::All); + converter.setStats(nanovdb::tools::StatsMode::All); auto handle = converter.getHandle(); EXPECT_TRUE(handle); @@ -902,7 +928,7 @@ TEST_F(TestOpenVDB, OpenToNanoVDB_Fp16) {// Model auto openGrid = this->getSrcGrid(false); const float tolerance = 0.005f*openGrid->voxelSize()[0]; - nanovdb::CreateNanoGrid converter(*openGrid); + nanovdb::tools::CreateNanoGrid converter(*openGrid); converter.enableDithering(); auto handle = converter.getHandle(); //converter.setVerbose(2); @@ -944,8 +970,8 @@ TEST_F(TestOpenVDB, OpenToNanoVDB_FpN) EXPECT_EQ(2.0f, srcAcc.getValue(openvdb::Coord(-10, 20,-50))); EXPECT_EQ(3.0f, srcAcc.getValue(openvdb::Coord( 50,-12, 30))); - nanovdb::CreateNanoGrid converter(srcGrid); - converter.setStats(nanovdb::StatsMode::All); + nanovdb::tools::CreateNanoGrid converter(srcGrid); + converter.setStats(nanovdb::tools::StatsMode::All); auto handle = converter.getHandle(); EXPECT_TRUE(handle); @@ -990,11 +1016,11 @@ TEST_F(TestOpenVDB, OpenToNanoVDB_FpN) #else auto openGrid = this->getSrcGrid(true, 1, 1);// FOG volume of Disney cloud or cube #endif - nanovdb::CreateNanoGrid converter(*openGrid); + nanovdb::tools::CreateNanoGrid converter(*openGrid); //converter.setVerbose(2); const float tolerance = 0.05f; - nanovdb::AbsDiff oracle(tolerance); + nanovdb::tools::AbsDiff oracle(tolerance); auto handle = converter.getHandle(oracle); auto* nanoGrid = handle.grid(); @@ -1014,13 +1040,13 @@ TEST_F(TestOpenVDB, OpenToNanoVDB_FpN) EXPECT_TRUE( oracle(exact, approx) ); } }; - nanovdb::forEach(openGrid->evalActiveVoxelBoundingBox(), kernel); + nanovdb::util::forEach(openGrid->evalActiveVoxelBoundingBox(), kernel); handle = nanovdb::io::readGrid("data/test_fpN.nvdb"); nanoGrid = handle.grid(); EXPECT_TRUE(nanoGrid); - nanovdb::forEach(openGrid->evalActiveVoxelBoundingBox(), kernel); + nanovdb::util::forEach(openGrid->evalActiveVoxelBoundingBox(), kernel); } } // OpenToNanoVDB_FpN @@ -1098,7 +1124,7 @@ TEST_F(TestOpenVDB, PointIndexGrid) EXPECT_EQ(pointCount, count); //mTimer.start("Generating NanoVDB grid from PointIndexGrid"); - auto handle = nanovdb::createNanoGrid(*srcGrid, nanovdb::StatsMode::All, nanovdb::ChecksumMode::Full); + auto handle = nanovdb::tools::createNanoGrid(*srcGrid, nanovdb::tools::StatsMode::All, nanovdb::CheckMode::Full); //mTimer.stop(); EXPECT_TRUE(handle); auto* meta = handle.gridMetaData(); @@ -1210,7 +1236,7 @@ TEST_F(TestOpenVDB, PointDataGridBasic) srcGrid->setName("PointDataGrid"); //mTimer.start("Generating NanoVDB grid from PointDataGrid"); - auto handle = nanovdb::createNanoGrid(*srcGrid); + auto handle = nanovdb::tools::createNanoGrid(*srcGrid); //mTimer.stop(); EXPECT_TRUE(handle); @@ -1242,7 +1268,7 @@ TEST_F(TestOpenVDB, PointDataGridBasic) // Create a read-only AttributeHandle. Position always uses Vec3f. openvdb::points::AttributeHandle positionHandle(leafIter->constAttributeArray("P")); openvdb::Coord ijkSrc(openvdb::Coord::min()); - nanovdb::Coord ijkDst(nanovdb::Maximum::value()); + nanovdb::Coord ijkDst(nanovdb::math::Maximum::value()); for (auto indexIter = leafIter->beginIndexOn(); indexIter; ++indexIter) { // Extract the local voxel-space position of the point relative to its occupying voxel ijk. const openvdb::Vec3f vxlSrc = positionHandle.get(*indexIter); @@ -1330,7 +1356,7 @@ TEST_F(TestOpenVDB, PointDataGridRandom) srcGrid->setName("PointDataGrid"); //mTimer.start("Generating NanoVDB grid from PointDataGrid"); - auto handle = nanovdb::createNanoGrid(*srcGrid); + auto handle = nanovdb::tools::createNanoGrid(*srcGrid); //mTimer.stop(); EXPECT_TRUE(handle); @@ -1354,7 +1380,7 @@ TEST_F(TestOpenVDB, PointDataGridRandom) // Create a read-only AttributeHandle. Position always uses Vec3f. openvdb::points::AttributeHandle positionHandle(leafIter->constAttributeArray("P")); openvdb::Coord ijkSrc(openvdb::Coord::min()); - nanovdb::Coord ijkDst(nanovdb::Maximum::value()); + nanovdb::Coord ijkDst(nanovdb::math::Maximum::value()); for (auto indexIter = leafIter->beginIndexOn(); indexIter; ++indexIter) { // Extract the local voxel-space position of the point relative to its occupying voxel ijk. const openvdb::Vec3f vxlSrc = positionHandle.get(*indexIter); @@ -1429,7 +1455,7 @@ TEST_F(TestOpenVDB, CNanoVDB) { auto srcGrid = this->getSrcGrid(); //mTimer.start("Generating NanoVDB grid"); - auto handle = nanovdb::createNanoGrid(*srcGrid); + auto handle = nanovdb::tools::createNanoGrid(*srcGrid); //mTimer.stop(); EXPECT_TRUE(handle); EXPECT_TRUE(handle.data()); @@ -1460,7 +1486,7 @@ TEST_F(TestOpenVDB, CNanoVDBTrilinear) { auto srcGrid = this->getSrcGrid(); //mTimer.start("Generating NanoVDB grid"); - auto handle = nanovdb::createNanoGrid(*srcGrid); + auto handle = nanovdb::tools::createNanoGrid(*srcGrid); //mTimer.stop(); EXPECT_TRUE(handle); EXPECT_TRUE(handle.data()); @@ -1502,7 +1528,7 @@ TEST_F(TestOpenVDB, CNanoVDBTrilinearStencil) { auto srcGrid = this->getSrcGrid(); //mTimer.start("Generating NanoVDB grid"); - auto handle = nanovdb::createNanoGrid(*srcGrid); + auto handle = nanovdb::tools::createNanoGrid(*srcGrid); //mTimer.stop(); EXPECT_TRUE(handle); EXPECT_TRUE(handle.data()); @@ -1543,13 +1569,13 @@ TEST_F(TestOpenVDB, CNanoVDBTrilinearStencil) TEST_F(TestOpenVDB, NanoToOpenVDB_BuildGrid) {// test build::Grid -> NanoVDB -> OpenVDB - nanovdb::build::Grid buildGrid(0.0f, "test", nanovdb::GridClass::LevelSet); + nanovdb::tools::build::Grid buildGrid(0.0f, "test", nanovdb::GridClass::LevelSet); auto buildAcc = buildGrid.getAccessor(); buildAcc.setValue(nanovdb::Coord(1, 2, 3), 1.0f); buildAcc.setValue(nanovdb::Coord(2, -2, 9), 2.0f); EXPECT_EQ(1.0f, buildAcc.getValue(nanovdb::Coord(1, 2, 3))); EXPECT_EQ(2.0f, buildAcc.getValue(nanovdb::Coord(2, -2, 9))); - auto handle = nanovdb::createNanoGrid(buildGrid); + auto handle = nanovdb::tools::createNanoGrid(buildGrid); EXPECT_TRUE(handle); auto* meta = handle.gridMetaData(); EXPECT_TRUE(meta); @@ -1565,7 +1591,7 @@ TEST_F(TestOpenVDB, NanoToOpenVDB_BuildGrid) EXPECT_EQ(1.0f, nanoAcc.getValue(nanovdb::Coord(1, 2, 3))); EXPECT_EQ(2.0f, nanoAcc.getValue(nanovdb::Coord(2, -2, 9))); - auto openGrid = nanovdb::nanoToOpenVDB(*nanoGrid); + auto openGrid = nanovdb::tools::nanoToOpenVDB(*nanoGrid); EXPECT_TRUE(openGrid); auto openAcc = openGrid->getAccessor(); EXPECT_EQ(1.0f, openAcc.getValue(openvdb::Coord(1, 2, 3))); @@ -1594,7 +1620,7 @@ TEST_F(TestOpenVDB, NanoToOpenVDB) //std::cerr << "Grid name: " << srcGrid->gridName() << std::endl; //mTimer.start("Deserializing NanoVDB grid"); - auto dstGrid = nanovdb::nanoToOpenVDB(*srcGrid); + auto dstGrid = nanovdb::tools::nanoToOpenVDB(*srcGrid); //mTimer.stop(); EXPECT_TRUE(dstGrid); @@ -1678,13 +1704,13 @@ TEST_F(TestOpenVDB, MultiFile) grid.setName("Int32 grid"); grid.tree().setValue(openvdb::Coord(-256), 10); EXPECT_EQ(1u, grid.activeVoxelCount()); - handles.push_back(nanovdb::createNanoGrid(grid)); + handles.push_back(nanovdb::tools::createNanoGrid(grid)); } { // 2: add an empty int32_t grid openvdb::Int32Grid grid(-4); grid.setName("Int32 grid, empty"); EXPECT_EQ(0u, grid.activeVoxelCount()); - handles.push_back(nanovdb::createNanoGrid(grid)); + handles.push_back(nanovdb::tools::createNanoGrid(grid)); } { // 3: add a ValueMask grid openvdb::MaskGrid grid(false); @@ -1698,7 +1724,7 @@ TEST_F(TestOpenVDB, MultiFile) grid.tree().evalActiveVoxelBoundingBox(bbox); //std::cerr << bbox << std::endl; EXPECT_EQ(openvdb::CoordBBox(min, max), bbox); - handles.push_back(nanovdb::createNanoGrid(grid)); + handles.push_back(nanovdb::tools::createNanoGrid(grid)); } { // 4: add a bool grid openvdb::BoolGrid grid(false); @@ -1707,7 +1733,7 @@ TEST_F(TestOpenVDB, MultiFile) EXPECT_EQ(1u, grid.activeVoxelCount()); grid.tree().setValue(openvdb::Coord( 10, 450, 90), true); EXPECT_EQ(2u, grid.activeVoxelCount()); - handles.push_back(nanovdb::createNanoGrid(grid)); + handles.push_back(nanovdb::tools::createNanoGrid(grid)); } { // 5: add a Vec3f grid openvdb::Vec3fGrid grid(openvdb::Vec3f(0.0f, 0.0f, -1.0f)); @@ -1716,7 +1742,7 @@ TEST_F(TestOpenVDB, MultiFile) EXPECT_EQ(0u, grid.activeVoxelCount()); grid.tree().setValue(openvdb::Coord(-256), openvdb::Vec3f(1.0f, 0.0f, 0.0f)); EXPECT_EQ(1u, grid.activeVoxelCount()); - handles.push_back(nanovdb::createNanoGrid(grid)); + handles.push_back(nanovdb::tools::createNanoGrid(grid)); } { // 6: add a Vec4f grid using OpenVDBVec4fGrid = openvdb::Grid::Type>; @@ -1727,7 +1753,7 @@ TEST_F(TestOpenVDB, MultiFile) EXPECT_EQ(0u, grid.activeVoxelCount()); grid.tree().setValue(openvdb::Coord(-256), openvdb::Vec4f(1.0f, 0.0f, 0.0f, 0.0f)); EXPECT_EQ(1u, grid.activeVoxelCount()); - handles.push_back(nanovdb::createNanoGrid(grid)); + handles.push_back(nanovdb::tools::createNanoGrid(grid)); OpenVDBVec4fGrid::unregisterGrid(); } { // 7: add an int64_t grid @@ -1735,7 +1761,7 @@ TEST_F(TestOpenVDB, MultiFile) grid.setName("Int64 grid"); grid.tree().setValue(openvdb::Coord(0), 10); EXPECT_EQ(1u, grid.activeVoxelCount()); - handles.push_back(nanovdb::createNanoGrid(grid)); + handles.push_back(nanovdb::tools::createNanoGrid(grid)); } for (int i = 0; i < 10; ++i) {// 8 -> 17 const float radius = 100.0f; @@ -1743,7 +1769,7 @@ TEST_F(TestOpenVDB, MultiFile) const openvdb::Vec3f center(i * 10.0f, 0.0f, 0.0f); auto srcGrid = openvdb::tools::createLevelSetSphere(radius, center, voxelSize, width); srcGrid->setName("Level set sphere at (" + std::to_string(i * 10) + ",0,0)"); - handles.push_back(nanovdb::createNanoGrid(*srcGrid)); + handles.push_back(nanovdb::tools::createNanoGrid(*srcGrid)); } { // 18: add a double grid openvdb::DoubleGrid grid(0.0); @@ -1751,7 +1777,7 @@ TEST_F(TestOpenVDB, MultiFile) grid.setGridClass(openvdb::GRID_FOG_VOLUME); grid.tree().setValue(openvdb::Coord(6000), 1.0); EXPECT_EQ(1u, grid.activeVoxelCount()); - handles.push_back(nanovdb::createNanoGrid(grid)); + handles.push_back(nanovdb::tools::createNanoGrid(grid)); } nanovdb::io::writeGrids("data/multi.nvdb", handles, this->getCodec()); @@ -1798,15 +1824,15 @@ TEST_F(TestOpenVDB, MultiFile) EXPECT_EQ(1u, tree.nodeCount(2)); auto mgrHandle = nanovdb::createNodeManager(*grid); auto *mgr = mgrHandle.mgr(); - EXPECT_TRUE(nanovdb::isValid(mgr)); + EXPECT_TRUE(nanovdb::isAligned(mgr)); const auto& leaf = mgr->leaf(0); - EXPECT_TRUE(nanovdb::isValid(&leaf)); + EXPECT_TRUE(nanovdb::isAligned(&leaf)); EXPECT_EQ(bbox, leaf.bbox()); const auto& node1 = mgr->lower(0); - EXPECT_TRUE(nanovdb::isValid(&node1)); + EXPECT_TRUE(nanovdb::isAligned(&node1)); EXPECT_EQ(bbox, node1.bbox()); const auto& node2 = mgr->upper(0); - EXPECT_TRUE(nanovdb::isValid(&node2)); + EXPECT_TRUE(nanovdb::isAligned(&node2)); EXPECT_EQ(bbox, node2.bbox()); EXPECT_FALSE(grid->isLevelSet()); EXPECT_FALSE(grid->isFogVolume()); @@ -2050,9 +2076,9 @@ TEST_F(TestOpenVDB, LongGridName) EXPECT_EQ(1u, srcGrid.activeVoxelCount()); const bool isLong = length > limit; #if 1 - auto handle = nanovdb::createNanoGrid(srcGrid); + auto handle = nanovdb::tools::createNanoGrid(srcGrid); #else - nanovdb::CreateNanoGrid converter(srcGrid); + nanovdb::tools::CreateNanoGrid converter(srcGrid); auto handle = converter.getHandle(); #endif auto* dstGrid = handle.grid(); @@ -2092,8 +2118,8 @@ TEST_F(TestOpenVDB, LevelSetFiles) foundModels.push_back(fileName.substr(pos, fileName.size() - pos - 4 )); //mTimer.restart("Generating NanoVDB grid"); - //auto handle = nanovdb::createNanoGrid(*srcGrid, nanovdb::StatsMode::All, nanovdb::ChecksumMode::Partial); - auto handle = nanovdb::createNanoGrid(*srcGrid, nanovdb::StatsMode::BBox, nanovdb::ChecksumMode::Disable); + //auto handle = nanovdb::tools::createNanoGrid(*srcGrid, nanovdb::tools::StatsMode::All, nanovdb::CheckMode::Partial); + auto handle = nanovdb::tools::createNanoGrid(*srcGrid, nanovdb::tools::StatsMode::BBox, nanovdb::CheckMode::Disable); //mTimer.restart("Writing NanoVDB grid"); nanovdb::io::writeGrid(os, handle, this->getCodec()); @@ -2170,7 +2196,7 @@ TEST_F(TestOpenVDB, FogFiles) foundModels.push_back(fileName.substr(pos, fileName.size() - pos - 4 )); //mTimer.restart("Generating NanoVDB grid"); - auto handle = nanovdb::createNanoGrid(*srcGrid, nanovdb::StatsMode::All, nanovdb::ChecksumMode::Partial); + auto handle = nanovdb::tools::createNanoGrid(*srcGrid, nanovdb::tools::StatsMode::All, nanovdb::CheckMode::Partial); //mTimer.restart("Writing NanoVDB grid"); nanovdb::io::writeGrid(os, handle, this->getCodec()); @@ -2245,7 +2271,7 @@ TEST_F(TestOpenVDB, PointFiles) EXPECT_TRUE(positionIndex != openvdb::points::AttributeSet::INVALID_POS); //mTimer.restart("Generating NanoVDB grid from PointDataGrid"); - auto handle = nanovdb::createNanoGrid(*srcGrid); + auto handle = nanovdb::tools::createNanoGrid(*srcGrid); //mTimer.restart("Writing NanoVDB grid"); nanovdb::io::writeGrid(os, handle, this->getCodec()); @@ -2268,7 +2294,7 @@ TEST_F(TestOpenVDB, PointFiles) // Create a read-only AttributeHandle. Position always uses Vec3f. openvdb::points::AttributeHandle positionHandle(leafIter->constAttributeArray("P")); openvdb::Coord ijkSrc(openvdb::Coord::min()); - nanovdb::Coord ijkDst(nanovdb::Maximum::value()); + nanovdb::Coord ijkDst(nanovdb::math::Maximum::value()); for (auto indexIter = leafIter->beginIndexOn(); indexIter; ++indexIter) { // Extract the index-space position of the point relative to its occupying voxel ijk. const openvdb::Vec3f vxlSrc = positionHandle.get(*indexIter); @@ -2326,7 +2352,7 @@ TEST_F(TestOpenVDB, Trilinear) acc.setValue(ijk, trilinear(srcGrid->indexToWorld(ijk))); } //mTimer.restart("Generating NanoVDB grid"); - auto handle = nanovdb::createNanoGrid(*srcGrid); + auto handle = nanovdb::tools::createNanoGrid(*srcGrid); //mTimer.restart("Writing NanoVDB grid"); nanovdb::io::writeGrid("data/tmp.nvdb", handle); //mTimer.stop(); @@ -2349,11 +2375,11 @@ TEST_F(TestOpenVDB, Trilinear) //std::cerr << "Trilinear: exact = " << exact << ", approx = " << approx << std::endl; auto dstAcc = dstGrid->getAccessor(); - auto sampler0 = nanovdb::createSampler<0>(dstAcc); + auto sampler0 = nanovdb::math::createSampler<0>(dstAcc); //std::cerr << "0'th order: v = " << sampler0(ijk) << std::endl; EXPECT_EQ(approx, sampler0(ijk)); - auto sampler1 = nanovdb::createSampler<1>(dstAcc); // faster since it's using an accessor!!! + auto sampler1 = nanovdb::math::createSampler<1>(dstAcc); // faster since it's using an accessor!!! //std::cerr << "1'th order: v = " << sampler1(ijk) << std::endl; EXPECT_EQ(exact, sampler1(ijk)); @@ -2367,8 +2393,8 @@ TEST_F(TestOpenVDB, Trilinear) EXPECT_NEAR(6.7f, gradWorld[1], 1e-5); EXPECT_NEAR(-3.5f, gradWorld[2], 1e-5); - nanovdb::SampleFromVoxels, 3> sampler3(dstGrid->tree()); - //auto sampler3 = nanovdb::createSampler<3>( dstAcc ); + nanovdb::math::SampleFromVoxels, 3> sampler3(dstGrid->tree()); + //auto sampler3 = nanovdb::math::createSampler<3>( dstAcc ); //std::cerr << "3'rd order: v = " << sampler3(ijk) << std::endl; EXPECT_EQ(exact, sampler3(ijk)); } // Trilinear @@ -2392,7 +2418,7 @@ TEST_F(TestOpenVDB, Triquadratic) acc.setValue(ijk, triquadratic(srcGrid->indexToWorld(ijk))); } //mTimer.restart("Generating NanoVDB grid"); - auto handle = nanovdb::createNanoGrid(*srcGrid); + auto handle = nanovdb::tools::createNanoGrid(*srcGrid); //mTimer.restart("Writing NanoVDB grid"); nanovdb::io::writeGrid("data/tmp.nvdb", handle); //mTimer.stop(); @@ -2414,21 +2440,21 @@ TEST_F(TestOpenVDB, Triquadratic) //std::cerr << "Trilinear: exact = " << exact << ", approx = " << approx << std::endl; auto dstAcc = dstGrid->getAccessor(); - auto sampler0 = nanovdb::createSampler<0>(dstAcc); + auto sampler0 = nanovdb::math::createSampler<0>(dstAcc); //std::cerr << "0'th order: v = " << sampler0(ijk) << std::endl; EXPECT_NEAR(approx, sampler0(ijk), 1e-6); - auto sampler1 = nanovdb::createSampler<1>(dstAcc); + auto sampler1 = nanovdb::math::createSampler<1>(dstAcc); //std::cerr << "1'rd order: nanovdb = " << sampler1(ijk) << ", openvdb: " << openvdb::tools::Sampler<1>::sample(srcGrid->tree(), ijk) << std::endl; EXPECT_NE(exact, sampler1(ijk)); // it's non-linear EXPECT_NEAR(sampler1(ijk), openvdb::tools::Sampler<1>::sample(srcGrid->tree(), ijk), 1e-6); - auto sampler2 = nanovdb::createSampler<2>(dstAcc); + auto sampler2 = nanovdb::math::createSampler<2>(dstAcc); //std::cerr << "2'rd order: nanovdb = " << sampler2(ijk) << ", openvdb: " << openvdb::tools::Sampler<2>::sample(srcGrid->tree(), ijk) << std::endl; EXPECT_NEAR(sampler2(ijk), openvdb::tools::Sampler<2>::sample(srcGrid->tree(), ijk), 1e-6); EXPECT_NEAR(exact, sampler2(ijk), 1e-5); // it's a 2nd order polynomial - auto sampler3 = nanovdb::createSampler<3>(dstAcc); + auto sampler3 = nanovdb::math::createSampler<3>(dstAcc); //std::cerr << "3'rd order: v = " << sampler3(ijk) << std::endl; EXPECT_NEAR(exact, sampler3(ijk), 1e-4); // it's a 2nd order polynomial } // Triquadratic @@ -2451,7 +2477,7 @@ TEST_F(TestOpenVDB, Tricubic) acc.setValue(ijk, tricubic(srcGrid->indexToWorld(ijk))); } //mTimer.restart("Generating NanoVDB grid"); - auto handle = nanovdb::createNanoGrid(*srcGrid); + auto handle = nanovdb::tools::createNanoGrid(*srcGrid); //mTimer.restart("Writing NanoVDB grid"); nanovdb::io::writeGrid("data/tmp.nvdb", handle); //mTimer.stop(); @@ -2473,21 +2499,21 @@ TEST_F(TestOpenVDB, Tricubic) //std::cerr << "Trilinear: exact = " << exact << ", approx = " << approx << std::endl; auto dstAcc = dstGrid->getAccessor(); - auto sampler0 = nanovdb::createSampler<0>(dstAcc); + auto sampler0 = nanovdb::math::createSampler<0>(dstAcc); //std::cerr << "0'th order: v = " << sampler0(ijk) << std::endl; EXPECT_NEAR(approx, sampler0(ijk), 1e-6); - auto sampler1 = nanovdb::createSampler<1>(dstAcc); + auto sampler1 = nanovdb::math::createSampler<1>(dstAcc); //std::cerr << "1'rd order: nanovdb = " << sampler1(ijk) << ", openvdb: " << openvdb::tools::Sampler<1>::sample(srcGrid->tree(), ijk) << std::endl; EXPECT_NE(exact, sampler1(ijk)); // it's non-linear EXPECT_NEAR(sampler1(ijk), openvdb::tools::Sampler<1>::sample(srcGrid->tree(), ijk), 1e-6); - auto sampler2 = nanovdb::createSampler<2>(dstAcc); + auto sampler2 = nanovdb::math::createSampler<2>(dstAcc); //std::cerr << "2'rd order: nanovdb = " << sampler2(ijk) << ", openvdb: " << openvdb::tools::Sampler<2>::sample(srcGrid->tree(), ijk) << std::endl; EXPECT_NEAR(sampler2(ijk), openvdb::tools::Sampler<2>::sample(srcGrid->tree(), ijk), 1e-6); EXPECT_NE(exact, sampler2(ijk)); // it's a 3nd order polynomial - auto sampler3 = nanovdb::createSampler<3>(dstAcc); + auto sampler3 = nanovdb::math::createSampler<3>(dstAcc); //std::cerr << "3'rd order: v = " << sampler3(ijk) << std::endl; EXPECT_NEAR(exact, sampler3(ijk), 1e-4); // it's a 3nd order polynomial } // Tricubic @@ -2495,7 +2521,7 @@ TEST_F(TestOpenVDB, Tricubic) TEST_F(TestOpenVDB, GridValidator) { auto srcGrid = this->getSrcGrid(); - auto handle = nanovdb::createNanoGrid(*srcGrid, nanovdb::StatsMode::All, nanovdb::ChecksumMode::Full); + auto handle = nanovdb::tools::createNanoGrid(*srcGrid, nanovdb::tools::StatsMode::All, nanovdb::CheckMode::Full); //mTimer.stop(); EXPECT_TRUE(handle); EXPECT_TRUE(handle.data()); @@ -2503,34 +2529,34 @@ TEST_F(TestOpenVDB, GridValidator) EXPECT_TRUE(grid); //mTimer.start("isValid - detailed"); - EXPECT_TRUE(nanovdb::isValid(*grid, true, true)); + EXPECT_TRUE(nanovdb::tools::isValid(grid, nanovdb::CheckMode::Full, true)); //mTimer.stop(); //mTimer.start("isValid - not detailed"); - EXPECT_TRUE(nanovdb::isValid(*grid, false, true)); + EXPECT_TRUE(nanovdb::tools::isValid(grid, nanovdb::CheckMode::Partial, true)); //mTimer.stop(); //mTimer.start("Fast CRC"); - auto fastChecksum = nanovdb::checksum(*grid, nanovdb::ChecksumMode::Full); + auto fastChecksum = nanovdb::tools::evalChecksum(grid, nanovdb::CheckMode::Full); //mTimer.stop(); - EXPECT_EQ(fastChecksum, nanovdb::checksum(*grid, nanovdb::ChecksumMode::Full)); + EXPECT_EQ(fastChecksum, nanovdb::tools::evalChecksum(grid, nanovdb::CheckMode::Full)); auto* leaf = grid->tree().getFirstLeaf(); - EXPECT_TRUE(nanovdb::isValid(leaf)); + EXPECT_TRUE(nanovdb::isAligned(leaf)); leaf->data()->mValues[512 >> 1] += 0.00001f; // slightly modify a single voxel value - EXPECT_NE(fastChecksum, nanovdb::checksum(*grid, nanovdb::ChecksumMode::Full)); - EXPECT_FALSE(nanovdb::isValid(*grid, true, false)); + EXPECT_NE(fastChecksum, nanovdb::tools::evalChecksum(grid, nanovdb::CheckMode::Full)); + EXPECT_FALSE(nanovdb::tools::isValid(grid, nanovdb::CheckMode::Full, false)); leaf->data()->mValues[512 >> 1] -= 0.00001f; // change back the single voxel value to it's original value - EXPECT_EQ(fastChecksum, nanovdb::checksum(*grid, nanovdb::ChecksumMode::Full)); - EXPECT_TRUE(nanovdb::isValid(*grid, true, true)); + EXPECT_EQ(fastChecksum, nanovdb::tools::evalChecksum(grid, nanovdb::CheckMode::Full)); + EXPECT_TRUE(nanovdb::tools::isValid(grid, nanovdb::CheckMode::Full, true)); leaf->data()->mValueMask.toggle(512 >> 1); // change a single bit in a value mask - EXPECT_NE(fastChecksum, nanovdb::checksum(*grid, nanovdb::ChecksumMode::Full)); - EXPECT_FALSE(nanovdb::isValid(*grid, true, false)); + EXPECT_NE(fastChecksum, nanovdb::tools::evalChecksum(grid, nanovdb::CheckMode::Full)); + EXPECT_FALSE(nanovdb::tools::isValid(grid, nanovdb::CheckMode::Full, false)); } // GridValidator TEST_F(TestOpenVDB, BenchmarkHostBuffer) @@ -2564,8 +2590,8 @@ TEST_F(TestOpenVDB, DenseIndexGrid) // read openvdb::FloatGrid auto srcGrid = this->getSrcGrid(false, 0, 0);// level set of a dragon if available, else an octahedron auto& srcTree = srcGrid->tree(); - nanovdb::CreateNanoGrid builder(*srcGrid); - builder.setStats(nanovdb::StatsMode::All); + nanovdb::tools::CreateNanoGrid builder(*srcGrid); + builder.setStats(nanovdb::tools::StatsMode::All); // openvdb::FloatGrid -> nanovdb::FloatGrid auto handle = builder.getHandle(); EXPECT_TRUE(handle); @@ -2601,7 +2627,7 @@ TEST_F(TestOpenVDB, DenseIndexGrid) } //mTimer.stop(); auto *idxLeaf0 = idxGrid->tree().getFirstNode<0>(); - nanovdb::forEach(nanovdb::Range1D(0,idxGrid->tree().nodeCount(0)),[&](const nanovdb::Range1D &r){ + nanovdb::util::forEach(nanovdb::util::Range1D(0,idxGrid->tree().nodeCount(0)),[&](const nanovdb::util::Range1D &r){ auto fltAcc = fltGrid->getAccessor();// NOT thread-safe! for (auto i=r.begin(); i!=r.end(); ++i){ auto *idxLeaf = idxLeaf0 + i; @@ -2623,7 +2649,7 @@ TEST_F(TestOpenVDB, SparseIndexGrid) auto srcGrid = this->getSrcGrid(false, 0, 0);// level set of a dragon if available, else an octahedron // openvdb::FloatGrid -> nanovdb::IndexGrid - nanovdb::CreateNanoGrid builder(*srcGrid); + nanovdb::tools::CreateNanoGrid builder(*srcGrid); //mTimer.start("Create IndexGrid"); auto handle2 = builder.getHandle(1u, false, false); //mTimer.stop(); @@ -2651,25 +2677,25 @@ TEST_F(TestOpenVDB, SparseIndexGrid) TEST_F(TestOpenVDB, BuildNodeManager) { {// test NodeManager with build::Grid - using GridT = nanovdb::build::Grid; + using GridT = nanovdb::tools::build::Grid; GridT grid(0.0f); - nanovdb::build::NodeManager mgr(grid); + nanovdb::tools::build::NodeManager mgr(grid); using TreeT = GridT::TreeType; - static const bool test = nanovdb::is_same::type, TreeT::LeafNodeType>::value; + static const bool test = nanovdb::util::is_same::type, TreeT::LeafNodeType>::value; EXPECT_TRUE(test); } {// test NodeManager with openvdb::Grid using GridT = openvdb::FloatGrid; GridT grid(0.0f); - nanovdb::build::NodeManager mgr(grid); + nanovdb::tools::build::NodeManager mgr(grid); using TreeT = GridT::TreeType; - static const bool test = nanovdb::is_same::type, TreeT::LeafNodeType>::value; + static const bool test = nanovdb::util::is_same::type, TreeT::LeafNodeType>::value; EXPECT_TRUE(test); } {// test NodeTrait on nanovdb::Grid using GridT = nanovdb::NanoGrid; using TreeT = GridT::TreeType; - static const bool test = nanovdb::is_same::type, TreeT::LeafNodeType>::value; + static const bool test = nanovdb::util::is_same::type, TreeT::LeafNodeType>::value; EXPECT_TRUE(test); } }// BuildNodeManager @@ -2693,7 +2719,7 @@ TEST_F(TestOpenVDB, Benchmark_OpenVDB_PointIndexGrid) { const double voxelSize = 0.5; - nanovdb::CpuTimer timer("Generate sphere with points"); + nanovdb::util::Timer timer("Generate sphere with points"); auto pointsHandle = nanovdb::createPointSphere(8, 100.0, nanovdb::Vec3d(0.0), voxelSize); timer.stop(); @@ -2726,7 +2752,7 @@ TEST_F(TestOpenVDB, Benchmark_OpenVDB_PointDataGrid) { const double voxelSize = 0.5; - nanovdb::CpuTimer timer("Generate sphere with points"); + nanovdb::util::Timer timer("Generate sphere with points"); auto pointsHandle = nanovdb::createPointSphere(8, 100.0, nanovdb::Vec3d(0.0), voxelSize); timer.stop(); diff --git a/nanovdb/nanovdb/util/CpuTimer.h b/nanovdb/nanovdb/util/CpuTimer.h index 44bf155287..af1ac90d77 100644 --- a/nanovdb/nanovdb/util/CpuTimer.h +++ b/nanovdb/nanovdb/util/CpuTimer.h @@ -1,83 +1,6 @@ // Copyright Contributors to the OpenVDB Project // SPDX-License-Identifier: MPL-2.0 -/// @file CpuTimer.h -/// -/// @author Ken Museth -/// -/// @brief A simple timing class (in case openvdb::util::CpuTimer is unavailable) - -#ifndef NANOVDB_CPU_TIMER_H_HAS_BEEN_INCLUDED -#define NANOVDB_CPU_TIMER_H_HAS_BEEN_INCLUDED - -#include -#include - -namespace nanovdb { - -class CpuTimer -{ - std::chrono::high_resolution_clock::time_point mStart; -public: - /// @brief Default constructor - CpuTimer() {} - - /// @brief Constructor that starts the timer - /// @param msg string message to be printed when timer is started - /// @param os output stream for the message above - CpuTimer(const std::string &msg, std::ostream& os = std::cerr) {this->start(msg, os);} - - /// @brief Start the timer - /// @param msg string message to be printed when timer is started - /// @param os output stream for the message above - void start(const std::string &msg, std::ostream& os = std::cerr) - { - os << msg << " ... " << std::flush; - mStart = std::chrono::high_resolution_clock::now(); - } - - /// @brief elapsed time (since start) in miliseconds - template - auto elapsed() - { - auto end = std::chrono::high_resolution_clock::now(); - return std::chrono::duration_cast(end - mStart).count(); - } - - /// @brief stop the timer - /// @tparam AccuracyT Template parameter defining the accuracy of the reported times - /// @param os output stream for the message above - template - void stop(std::ostream& os = std::cerr) - { - auto end = std::chrono::high_resolution_clock::now(); - auto diff = std::chrono::duration_cast(end - mStart).count(); - os << "completed in " << diff; - if (std::is_same::value) {// resolved at compile-time - os << " microseconds" << std::endl; - } else if (std::is_same::value) { - os << " milliseconds" << std::endl; - } else if (std::is_same::value) { - os << " seconds" << std::endl; - } else { - os << " unknown time unit" << std::endl; - } - } - - /// @brief stop and start the timer - /// @tparam AccuracyT Template parameter defining the accuracy of the reported times - /// @param msg string message to be printed when timer is started - /// @param os output stream for the message above - template - void restart(const std::string &msg, std::ostream& os = std::cerr) - { - this->stop(); - this->start(msg, os); - } - - -};// CpuTimer - -} // namespace nanovdb - -#endif // NANOVDB_CPU_TIMER_HAS_BEEN_INCLUDED +#include // for NANOVDB_DEPRECATED_HEADER +#include +NANOVDB_DEPRECATED_HEADER("Include nanovdb/util/Timer.h instead.") diff --git a/nanovdb/nanovdb/util/CreateNanoGrid.h b/nanovdb/nanovdb/util/CreateNanoGrid.h index d126594187..eeef8ab71b 100644 --- a/nanovdb/nanovdb/util/CreateNanoGrid.h +++ b/nanovdb/nanovdb/util/CreateNanoGrid.h @@ -1,2065 +1,6 @@ // Copyright Contributors to the OpenVDB Project // SPDX-License-Identifier: MPL-2.0 -/*! - \file CreateNanoGrid.h - - \author Ken Museth - - \date June 26, 2020 - - \note In the examples below we assume that @c srcGrid is a exiting grid of type - SrcGridT = @c openvdb::FloatGrid, @c openvdb::FloatGrid or @c nanovdb::build::FloatGrid. - - \brief Convert any grid to a nanovdb grid of the same type, e.g. float->float - \code - auto handle = nanovdb::createNanoGrid(srcGrid); - auto *dstGrid = handle.grid(); - \endcode - - \brief Convert a grid to a nanovdb grid of a different type, e.g. float->half - \code - auto handle = nanovdb::createNanoGrid(srcGrid); - auto *dstGrid = handle.grid(); - \endcode - - \brief Convert a grid to a nanovdb grid of the same type but using a CUDA buffer - \code - auto handle = nanovdb::createNanoGrid(srcGrid); - auto *dstGrid = handle.grid(); - \endcode - - \brief Create a nanovdb grid that indices values in an existing source grid of any type. - If DstBuildT = nanovdb::ValueIndex both active and in-active values are indexed - and if DstBuildT = nanovdb::ValueOnIndex only active values are indexed. - \code - using DstBuildT = nanovdb::ValueIndex;// index both active an inactive values - auto handle = nanovdb::createNanoGridSrcGridT,DstBuildT>(srcGrid,0,false,false);//no blind data, tile values or stats - auto *dstGrid = handle.grid(); - \endcode - - \brief Create a NanoVDB grid from scratch - \code -#if defined(NANOVDB_USE_OPENVDB) && !defined(__CUDACC__) - using SrcGridT = openvdb::FloatGrid; -#else - using SrcGridT = nanovdb::build::FloatGrid; -#endif - SrcGridT srcGrid(0.0f);// create an empty source grid - auto srcAcc = srcGrid.getAccessor();// create an accessor - srcAcc.setValue(nanovdb::Coord(1,2,3), 1.0f);// set a voxel value - - auto handle = nanovdb::createNanoGrid(srcGrid);// convert source grid to a grid handle - auto dstGrid = handle.grid();// get a pointer to the destination grid - \endcode - - \brief Convert a base-pointer to an openvdb grid, denoted srcGrid, to a nanovdb - grid of the same type, e.g. float -> float or openvdb::Vec3f -> nanovdb::Vec3f - \code - auto handle = nanovdb::openToNanoVDB(*srcGrid);// convert source grid to a grid handle - auto dstGrid = handle.grid();// get a pointer to the destination grid - \endcode - - \brief Converts any existing grid to a NanoVDB grid, for example: - nanovdb::build::Grid -> nanovdb::Grid - nanovdb::Grid -> nanovdb::Grid - nanovdb::Grid -> nanovdb::Grid - openvdb::Grid -> nanovdb::Grid - openvdb::Grid -> nanovdb::Grid - openvdb::Grid -> nanovdb::Grid - openvdb::Grid -> nanovdb::Grid - - \note This files replaces GridBuilder.h, IndexGridBuilder.h and OpenToNanoVDB.h -*/ - -#ifndef NANOVDB_CREATE_NANOGRID_H_HAS_BEEN_INCLUDED -#define NANOVDB_CREATE_NANOGRID_H_HAS_BEEN_INCLUDED - -#if defined(NANOVDB_USE_OPENVDB) && !defined(__CUDACC__) -#include -#include -#include -#endif - -#include "GridBuilder.h" -#include "NodeManager.h" -#include "GridHandle.h" -#include "GridStats.h" -#include "GridChecksum.h" -#include "Range.h" -#include "Invoke.h" -#include "ForEach.h" -#include "Reduce.h" -#include "PrefixSum.h" -#include "DitherLUT.h"// for nanovdb::DitherLUT - -#include -#include -#include -#include // for memcpy -#include - -namespace nanovdb { - -// Forward declarations (defined below) -template class CreateNanoGrid; -class AbsDiff; -template struct MapToNano; - -//================================================================================================ - -#if defined(NANOVDB_USE_OPENVDB) && !defined(__CUDACC__) -/// @brief Forward declaration of free-standing function that converts an OpenVDB GridBase into a NanoVDB GridHandle -/// @tparam BufferT Type of the buffer used to allocate the destination grid -/// @param base Shared pointer to a base openvdb grid to be converted -/// @param sMode Mode for computing statistics of the destination grid -/// @param cMode Mode for computing checksums of the destination grid -/// @param verbose Mode of verbosity -/// @return Handle to the destination NanoGrid -template -GridHandle -openToNanoVDB(const openvdb::GridBase::Ptr& base, - StatsMode sMode = StatsMode::Default, - ChecksumMode cMode = ChecksumMode::Default, - int verbose = 0); -#endif - -//================================================================================================ - -/// @brief Freestanding function that creates a NanoGrid from any source grid -/// @tparam SrcGridT Type of in input (source) grid, e.g. openvdb::Grid or nanovdb::Grid -/// @tparam DstBuildT Type of values in the output (destination) nanovdb Grid, e.g. float or nanovdb::Fp16 -/// @tparam BufferT Type of the buffer used ti allocate the destination grid -/// @param srcGrid Input (source) grid to be converted -/// @param sMode Mode for computing statistics of the destination grid -/// @param cMode Mode for computing checksums of the destination grid -/// @param verbose Mode of verbosity -/// @param buffer Instance of a buffer used for allocation -/// @return Handle to the destination NanoGrid -template::type, - typename BufferT = HostBuffer> -typename disable_if::is_index || BuildTraits::is_Fp, GridHandle>::type -createNanoGrid(const SrcGridT &srcGrid, - StatsMode sMode = StatsMode::Default, - ChecksumMode cMode = ChecksumMode::Default, - int verbose = 0, - const BufferT &buffer = BufferT()); - -//================================================================================================ - -/// @brief Freestanding function that creates a NanoGrid or NanoGrid from any source grid -/// @tparam SrcGridT Type of in input (source) grid, e.g. openvdb::Grid or nanovdb::Grid -/// @tparam DstBuildT If ValueIndex all (active and inactive) values are indexed and if -/// it is ValueOnIndex only active values are indexed. -/// @tparam BufferT BufferT Type of the buffer used ti allocate the destination grid -/// @param channels If non-zero the values (active or all) in @c srcGrid are encoded as blind -/// data in the output index grid. @c channels indicates the number of copies -/// of these blind data -/// @param includeStats If true all tree nodes will includes indices for stats, i.e. min/max/avg/std-div -/// @param includeTiles If false on values in leaf nodes are indexed -/// @param verbose Mode of verbosity -/// @param buffer Instance of a buffer used for allocation -/// @return Handle to the destination NanoGrid where T = ValueIndex or ValueOnIndex -template::type, - typename BufferT = HostBuffer> -typename enable_if::is_index, GridHandle>::type -createNanoGrid(const SrcGridT &srcGrid, - uint32_t channels = 0u, - bool includeStats = true, - bool includeTiles = true, - int verbose = 0, - const BufferT &buffer = BufferT()); - -//================================================================================================ - -/// @brief Freestanding function to create a NanoGrid from any source grid -/// @tparam SrcGridT Type of in input (source) grid, e.g. openvdb::Grid or nanovdb::Grid -/// @tparam DstBuildT = FpN, i.e. variable bit-width of the output grid -/// @tparam OracleT Type of the oracle used to determine the local bit-width, i.e. N in FpN -/// @tparam BufferT Type of the buffer used to allocate the destination grid -/// @param srcGrid Input (source) grid to be converted -/// @param ditherOn switch to enable or disable dithering of quantization error -/// @param sMode Mode for computing statistics of the destination grid -/// @param cMode Mode for computing checksums of the destination grid -/// @param verbose Mode of verbosity -/// @param oracle Instance of a oracle used to determine the local bit-width, i.e. N in FpN -/// @param buffer Instance of a buffer used for allocation -/// @return Handle to the destination NanoGrid -template::type, - typename OracleT = AbsDiff, - typename BufferT = HostBuffer> -typename enable_if::value, GridHandle>::type -createNanoGrid(const SrcGridT &srcGrid, - StatsMode sMode = StatsMode::Default, - ChecksumMode cMode = ChecksumMode::Default, - bool ditherOn = false, - int verbose = 0, - const OracleT &oracle = OracleT(), - const BufferT &buffer = BufferT()); - -//================================================================================================ - -/// @brief Freestanding function to create a NanoGrid from any source grid, X=4,8,16 -/// @tparam SrcGridT Type of in input (source) grid, e.g. openvdb::Grid or nanovdb::Grid -/// @tparam DstBuildT = Fp4, Fp8 or Fp16, i.e. quantization bit-width of the output grid -/// @tparam BufferT Type of the buffer used to allocate the destination grid -/// @param srcGrid Input (source) grid to be converted -/// @param ditherOn switch to enable or disable dithering of quantization error -/// @param sMode Mode for computing statistics of the destination grid -/// @param cMode Mode for computing checksums of the destination grid -/// @param verbose Mode of verbosity -/// @param buffer Instance of a buffer used for allocation -/// @return Handle to the destination NanoGrid -template::type, - typename BufferT = HostBuffer> -typename enable_if::is_FpX, GridHandle>::type -createNanoGrid(const SrcGridT &srcGrid, - StatsMode sMode = StatsMode::Default, - ChecksumMode cMode = ChecksumMode::Default, - bool ditherOn = false, - int verbose = 0, - const BufferT &buffer = BufferT()); - -//================================================================================================ - -/// @brief Compression oracle based on absolute difference -class AbsDiff -{ - float mTolerance;// absolute error tolerance -public: - /// @note The default value of -1 means it's un-initialized! - AbsDiff(float tolerance = -1.0f) : mTolerance(tolerance) {} - AbsDiff(const AbsDiff&) = default; - ~AbsDiff() = default; - operator bool() const {return mTolerance>=0.0f;} - void init(nanovdb::GridClass gClass, float background) { - if (gClass == GridClass::LevelSet) { - static const float halfWidth = 3.0f; - mTolerance = 0.1f * background / halfWidth;// range of ls: [-3dx; 3dx] - } else if (gClass == GridClass::FogVolume) { - mTolerance = 0.01f;// range of FOG volumes: [0;1] - } else { - mTolerance = 0.0f; - } - } - void setTolerance(float tolerance) { mTolerance = tolerance; } - float getTolerance() const { return mTolerance; } - /// @brief Return true if the approximate value is within the accepted - /// absolute error bounds of the exact value. - /// - /// @details Required member method - bool operator()(float exact, float approx) const - { - return Abs(exact - approx) <= mTolerance; - } -};// AbsDiff - -inline std::ostream& operator<<(std::ostream& os, const AbsDiff& diff) -{ - os << "Absolute tolerance: " << diff.getTolerance(); - return os; -} - -//================================================================================================ - -/// @brief Compression oracle based on relative difference -class RelDiff -{ - float mTolerance;// relative error tolerance -public: - /// @note The default value of -1 means it's un-initialized! - RelDiff(float tolerance = -1.0f) : mTolerance(tolerance) {} - RelDiff(const RelDiff&) = default; - ~RelDiff() = default; - operator bool() const {return mTolerance>=0.0f;} - void setTolerance(float tolerance) { mTolerance = tolerance; } - float getTolerance() const { return mTolerance; } - /// @brief Return true if the approximate value is within the accepted - /// relative error bounds of the exact value. - /// - /// @details Required member method - bool operator()(float exact, float approx) const - { - return Abs(exact - approx)/Max(Abs(exact), Abs(approx)) <= mTolerance; - } -};// RelDiff - -inline std::ostream& operator<<(std::ostream& os, const RelDiff& diff) -{ - os << "Relative tolerance: " << diff.getTolerance(); - return os; -} - -//================================================================================================ - -/// @brief The NodeAccessor provides a uniform API for accessing nodes got NanoVDB, OpenVDB and build Grids -/// -/// @note General implementation that works with nanovdb::build::Grid -template -class NodeAccessor -{ -public: - static constexpr bool IS_OPENVDB = false; - static constexpr bool IS_NANOVDB = false; - using BuildType = typename GridT::BuildType; - using ValueType = typename GridT::ValueType; - using GridType = GridT; - using TreeType = typename GridT::TreeType; - using RootType = typename TreeType::RootNodeType; - template - using NodeType = typename NodeTrait::type; - NodeAccessor(const GridT &grid) : mMgr(const_cast(grid)) {} - const GridType& grid() const {return mMgr.grid();} - const TreeType& tree() const {return mMgr.tree();} - const RootType& root() const {return mMgr.root();} - uint64_t nodeCount(int level) const { return mMgr.nodeCount(level); } - template - const NodeType& node(uint32_t i) const {return mMgr.template node(i); } - const std::string& getName() const {return this->grid().getName();}; - bool hasLongGridName() const {return this->grid().getName().length() >= GridData::MaxNameSize;} - const nanovdb::Map& map() const {return this->grid().map();} - GridClass gridClass() const {return this->grid().gridClass();} -private: - build::NodeManager mMgr; -};// NodeAccessor - -//================================================================================================ - -/// @brief Template specialization for nanovdb::Grid which is special since its NodeManage -/// uses a handle in order to support node access on the GPU! -template -class NodeAccessor< NanoGrid > -{ -public: - static constexpr bool IS_OPENVDB = false; - static constexpr bool IS_NANOVDB = true; - using BuildType = BuildT; - using BufferType = HostBuffer; - using GridType = NanoGrid; - using ValueType = typename GridType::ValueType; - using TreeType = typename GridType::TreeType; - using RootType = typename TreeType::RootType; - template - using NodeType = typename NodeTrait::type; - NodeAccessor(const GridType &grid) - : mHandle(createNodeManager(grid)) - , mMgr(*(mHandle.template mgr())) {} - const GridType& grid() const {return mMgr.grid();} - const TreeType& tree() const {return mMgr.tree();} - const RootType& root() const {return mMgr.root();} - uint64_t nodeCount(int level) const { return mMgr.nodeCount(level); } - template - const NodeType& node(uint32_t i) const {return mMgr.template node(i); } - std::string getName() const {return std::string(this->grid().gridName());}; - bool hasLongGridName() const {return this->grid().hasLongGridName();} - const nanovdb::Map& map() const {return this->grid().map();} - GridClass gridClass() const {return this->grid().gridClass();} -private: - NodeManagerHandle mHandle; - const NodeManager &mMgr; -};// NodeAccessor - -//================================================================================================ - -/// @brief Trait that maps any type to the corresponding nanovdb type -/// @tparam T Type to be mapped -template -struct MapToNano { using type = T; }; - -#if defined(NANOVDB_USE_OPENVDB) && !defined(__CUDACC__) - -template<> -struct MapToNano {using type = nanovdb::ValueMask;}; -template -struct MapToNano>{using type = nanovdb::Vec3;}; -template -struct MapToNano>{using type = nanovdb::Vec4;}; -template<> -struct MapToNano {using type = uint32_t;}; -template<> -struct MapToNano {using type = uint32_t;}; - -/// Templated Grid with default 32->16->8 configuration -template -using OpenLeaf = openvdb::tree::LeafNode; -template -using OpenLower = openvdb::tree::InternalNode,4>; -template -using OpenUpper = openvdb::tree::InternalNode,5>; -template -using OpenRoot = openvdb::tree::RootNode>; -template -using OpenTree = openvdb::tree::Tree>; -template -using OpenGrid = openvdb::Grid>; - -//================================================================================================ - -/// @brief Template specialization for openvdb::Grid -template -class NodeAccessor> -{ -public: - static constexpr bool IS_OPENVDB = true; - static constexpr bool IS_NANOVDB = false; - using BuildType = BuildT; - using GridType = OpenGrid; - using ValueType = typename GridType::ValueType; - using TreeType = OpenTree; - using RootType = OpenRoot; - template - using NodeType = typename NodeTrait::type; - NodeAccessor(const GridType &grid) : mMgr(const_cast(grid)) { - const auto mat4 = this->grid().transform().baseMap()->getAffineMap()->getMat4(); - mMap.set(mat4, mat4.inverse()); - } - const GridType& grid() const {return mMgr.grid();} - const TreeType& tree() const {return mMgr.tree();} - const RootType& root() const {return mMgr.root();} - uint64_t nodeCount(int level) const { return mMgr.nodeCount(level); } - template - const NodeType& node(uint32_t i) const {return mMgr.template node(i); } - std::string getName() const { return this->grid().getName(); }; - bool hasLongGridName() const {return this->grid().getName().length() >= GridData::MaxNameSize;} - const nanovdb::Map& map() const {return mMap;} - GridClass gridClass() const { - switch (this->grid().getGridClass()) { - case openvdb::GRID_LEVEL_SET: - if (!is_floating_point::value) OPENVDB_THROW(openvdb::ValueError, "processGrid: Level sets are expected to be floating point types"); - return GridClass::LevelSet; - case openvdb::GRID_FOG_VOLUME: - return GridClass::FogVolume; - case openvdb::GRID_STAGGERED: - return GridClass::Staggered; - default: - return GridClass::Unknown; - } - } -private: - build::NodeManager mMgr; - nanovdb::Map mMap; -};// NodeAccessor> - -//================================================================================================ - -/// @brief Template specialization for openvdb::tools::PointIndexGrid -template <> -class NodeAccessor -{ -public: - static constexpr bool IS_OPENVDB = true; - static constexpr bool IS_NANOVDB = false; - using BuildType = openvdb::PointIndex32; - using GridType = openvdb::tools::PointIndexGrid; - using TreeType = openvdb::tools::PointIndexTree; - using RootType = typename TreeType::RootNodeType; - using ValueType = typename GridType::ValueType; - template - using NodeType = typename NodeTrait::type; - NodeAccessor(const GridType &grid) : mMgr(const_cast(grid)) { - const auto mat4 = this->grid().transform().baseMap()->getAffineMap()->getMat4(); - mMap.set(mat4, mat4.inverse()); - } - const GridType& grid() const {return mMgr.grid();} - const TreeType& tree() const {return mMgr.tree();} - const RootType& root() const {return mMgr.root();} - uint64_t nodeCount(int level) const { return mMgr.nodeCount(level); } - template - const NodeType& node(uint32_t i) const {return mMgr.template node(i); } - std::string getName() const { return this->grid().getName(); }; - bool hasLongGridName() const {return this->grid().getName().length() >= GridData::MaxNameSize;} - const nanovdb::Map& map() const {return mMap;} - GridClass gridClass() const {return GridClass::PointIndex;} -private: - build::NodeManager mMgr; - nanovdb::Map mMap; -};// NodeAccessor - -//================================================================================================ - -// @brief Template specialization for openvdb::points::PointDataGrid -template <> -class NodeAccessor -{ -public: - static constexpr bool IS_OPENVDB = true; - static constexpr bool IS_NANOVDB = false; - using BuildType = openvdb::PointDataIndex32; - using GridType = openvdb::points::PointDataGrid; - using TreeType = openvdb::points::PointDataTree; - using RootType = typename TreeType::RootNodeType; - using ValueType = typename GridType::ValueType; - template - using NodeType = typename NodeTrait::type; - NodeAccessor(const GridType &grid) : mMgr(const_cast(grid)) { - const auto mat4 = this->grid().transform().baseMap()->getAffineMap()->getMat4(); - mMap.set(mat4, mat4.inverse()); - } - const GridType& grid() const {return mMgr.grid();} - const TreeType& tree() const {return mMgr.tree();} - const RootType& root() const {return mMgr.root();} - uint64_t nodeCount(int level) const { return mMgr.nodeCount(level); } - template - const NodeType& node(uint32_t i) const {return mMgr.template node(i); } - std::string getName() const { return this->grid().getName(); }; - bool hasLongGridName() const {return this->grid().getName().length() >= GridData::MaxNameSize;} - const nanovdb::Map& map() const {return mMap;} - GridClass gridClass() const {return GridClass::PointData;} -private: - build::NodeManager mMgr; - nanovdb::Map mMap; -};// NodeAccessor - -#endif// NANOVDB_USE_OPENVDB - -//================================================================================================ - -/// @brief Creates any nanovdb Grid from any source grid (certain combinations are obviously not allowed) -template -class CreateNanoGrid -{ -public: - // SrcGridT can be either openvdb::Grid, nanovdb::Grid or nanovdb::build::Grid - using SrcNodeAccT = NodeAccessor; - using SrcBuildT = typename SrcNodeAccT::BuildType; - using SrcValueT = typename SrcNodeAccT::ValueType; - using SrcTreeT = typename SrcNodeAccT::TreeType; - using SrcRootT = typename SrcNodeAccT::RootType; - template - using SrcNodeT = typename NodeTrait::type; - - /// @brief Constructor from a source grid - /// @param srcGrid Source grid of type SrcGridT - CreateNanoGrid(const SrcGridT &srcGrid); - - /// @brief Constructor from a source node accessor (defined above) - /// @param srcNodeAcc Source node accessor of type SrcNodeAccT - CreateNanoGrid(const SrcNodeAccT &srcNodeAcc); - - /// @brief Set the level of verbosity - /// @param mode level of verbosity, mode=0 means quiet - void setVerbose(int mode = 1) { mVerbose = mode; } - - /// @brief Enable or disable dithering, i.e. randomization of the quantization error. - /// @param on enable or disable dithering - /// @warning Dithering only has an affect when DstBuildT = {Fp4, Fp8, Fp16, FpN} - void enableDithering(bool on = true) { mDitherOn = on; } - - /// @brief Set the mode used for computing statistics of the destination grid - /// @param mode specify the mode of statistics - void setStats(StatsMode mode = StatsMode::Default) { mStats = mode; } - - /// @brief Set the mode used for computing checksums of the destination grid - /// @param mode specify the mode of checksum - void setChecksum(ChecksumMode mode = ChecksumMode::Default) { mChecksum = mode; } - - /// @brief Converts the source grid into a nanovdb grid with the specified destination build type - /// @tparam DstBuildT build type of the destination, output, grid - /// @tparam BufferT Type of the buffer used for allocating the destination grid - /// @param buffer instance of the buffer use for allocation - /// @return Return an instance of a GridHandle (invoking move semantics) - /// @note This version is when DstBuildT != {FpN, ValueIndex, ValueOnIndex} - template::type, typename BufferT = HostBuffer> - typename disable_if::value || - BuildTraits::is_index, GridHandle>::type - getHandle(const BufferT &buffer = BufferT()); - - /// @brief Converts the source grid into a nanovdb grid with variable bit quantization - /// @tparam DstBuildT FpN, i.e. the destination grid uses variable bit quantization - /// @tparam OracleT Type of oracle used to determine the N in FpN - /// @tparam BufferT Type of the buffer used for allocating the destination grid - /// @param oracle Instance of the oracle used to determine the N in FpN - /// @param buffer instance of the buffer use for allocation - /// @return Return an instance of a GridHandle (invoking move semantics) - /// @note This version assumes DstBuildT == FpN - template::type, typename OracleT = AbsDiff, typename BufferT = HostBuffer> - typename enable_if::value, GridHandle>::type - getHandle(const OracleT &oracle = OracleT(), - const BufferT &buffer = BufferT()); - - /// @brief Converts the source grid into a nanovdb grid with indices to external arrays of values - /// @tparam DstBuildT ValueIndex or ValueOnIndex, i.e. index all or just active values - /// @tparam BufferT Type of the buffer used for allocating the destination grid - /// @param channels Number of copies of values encoded as blind data in the destination grid - /// @param includeStats Specify if statics should be indexed - /// @param includeTiles Specify if tile values, i.e. non-leaf-node-values, should be indexed - /// @param buffer instance of the buffer use for allocation - /// @return Return an instance of a GridHandle (invoking move semantics) - template::type, typename BufferT = HostBuffer> - typename enable_if::is_index, GridHandle>::type - getHandle(uint32_t channels = 0u, - bool includeStats = true, - bool includeTiles = true, - const BufferT &buffer = BufferT()); - - /// @brief Add blind data to the destination grid - /// @param name String name of the blind data - /// @param dataSemantic Semantics of the blind data - /// @param dataClass Class of the blind data - /// @param dataType Type of the blind data - /// @param count Element count of the blind data - /// @param size Size of each element of the blind data - /// @return Return the index used to access the blind data - uint64_t addBlindData(const std::string& name, - GridBlindDataSemantic dataSemantic, - GridBlindDataClass dataClass, - GridType dataType, - size_t count, size_t size) - { - const size_t order = mBlindMetaData.size(); - mBlindMetaData.emplace(name, dataSemantic, dataClass, dataType, order, count, size); - return order; - } - - /// @brief This method only has affect when getHandle was called with DstBuildT = ValueIndex or ValueOnIndex - /// @return Return the number of indexed values. If called before getHandle was called with - /// DstBuildT = ValueIndex or ValueOnIndex the return value is zero. Else it is a value larger than zero. - uint64_t valueCount() const {return mValIdx[0].empty() ? 0u : mValIdx[0].back();} - - /// @brief Copy values from the source grid into a provided buffer - /// @tparam DstBuildT Must be ValueIndex or ValueOnIndex, i.e. a index grid - /// @param buffer point in which to write values - template - typename enable_if::is_index>::type - copyValues(SrcValueT *buffer); - -private: - - // ========================================================= - - template - typename enable_if::value&&LEVEL==0), typename NodeTrait, LEVEL>::type*>::type - dstNode(uint64_t i) const { - static_assert(LEVEL==0 || LEVEL==1 || LEVEL==2, "Expected LEVEL== {0,1,2}"); - using NodeT = typename NodeTrait, LEVEL>::type; - return PtrAdd(mBufferPtr, mOffset[5-LEVEL]) + i; - } - template - typename enable_if::value && LEVEL==0, NanoLeaf*>::type - dstNode(uint64_t i) const {return PtrAdd>(mBufferPtr, mCodec[i].offset);} - - template NanoRoot* dstRoot() const {return PtrAdd>(mBufferPtr, mOffset.root);} - template NanoTree* dstTree() const {return PtrAdd>(mBufferPtr, mOffset.tree);} - template NanoGrid* dstGrid() const {return PtrAdd>(mBufferPtr, mOffset.grid);} - GridBlindMetaData* dstMeta(uint32_t i) const { return PtrAdd(mBufferPtr, mOffset.meta) + i;}; - - // ========================================================= - - template - typename disable_if::value || BuildTraits::is_index>::type - preProcess(); - - template - typename enable_if::is_index>::type - preProcess(uint32_t channels); - - template - typename enable_if::value>::type - preProcess(OracleT oracle); - - // ========================================================= - - // Below are private methods use to serialize nodes into NanoVDB - template - GridHandle initHandle(const BufferT& buffer); - - // ========================================================= - - template - inline typename enable_if::is_index>::type - postProcess(uint32_t channels); - - template - inline typename disable_if::is_index>::type - postProcess(); - - // ======================================================== - - template - typename disable_if::is_special>::type - processLeafs(); - - template - typename enable_if::is_index>::type - processLeafs(); - - template - typename enable_if::is_FpX>::type - processLeafs(); - - template - typename enable_if::value>::type - processLeafs(); - - template - typename enable_if::value>::type - processLeafs(); - - template - typename enable_if::value>::type - processLeafs(); - - // ========================================================= - - template - typename enable_if::is_index>::type - processInternalNodes(); - - template - typename enable_if::is_index>::type - processInternalNodes(); - - // ========================================================= - - template - typename enable_if::is_index>::type - processRoot(); - - template - typename enable_if::is_index>::type - processRoot(); - - // ========================================================= - - template - void processTree(); - - template - void processGrid(); - - template - typename enable_if::is_index, uint64_t>::type - countTileValues(uint64_t valueCount); - - template - typename enable_if::is_index, uint64_t>::type - countValues(); - -#if defined(NANOVDB_USE_OPENVDB) && !defined(__CUDACC__) - template - typename disable_if::value || - is_same::value, uint64_t>::type - countPoints() const; - - template - typename enable_if::value || - is_same::value, uint64_t>::type - countPoints() const; - - template - typename enable_if::value>::type - copyPointAttribute(size_t attIdx, AttT *attPtr); -#else - uint64_t countPoints() const {return 0u;} -#endif - - uint8_t* mBufferPtr;// pointer to the beginning of the destination nanovdb grid buffer - struct BufferOffsets { - uint64_t grid, tree, root, upper, lower, leaf, meta, blind, size; - uint64_t operator[](int i) const { return *(reinterpret_cast(this)+i); } - } mOffset; - int mVerbose; - uint64_t mLeafNodeSize;// non-trivial when DstBuiltT = FpN - - std::unique_ptr mSrcNodeAccPtr;// placeholder for potential local instance - const SrcNodeAccT &mSrcNodeAcc; - struct BlindMetaData; // forward declaration - std::set mBlindMetaData; // sorted according to BlindMetaData.order - struct Codec { float min, max; uint64_t offset; uint8_t log2; };// used for adaptive bit-rate quantization - std::unique_ptr mCodec;// defines a codec per leaf node when DstBuildT = FpN - StatsMode mStats; - ChecksumMode mChecksum; - bool mDitherOn, mIncludeStats, mIncludeTiles; - std::vector mValIdx[3];// store id of first value in node -}; // CreateNanoGrid - -//================================================================================================ - -template -CreateNanoGrid::CreateNanoGrid(const SrcGridT &srcGrid) - : mVerbose(0) - , mSrcNodeAccPtr(new SrcNodeAccT(srcGrid)) - , mSrcNodeAcc(*mSrcNodeAccPtr) - , mStats(StatsMode::Default) - , mChecksum(ChecksumMode::Default) - , mDitherOn(false) - , mIncludeStats(true) - , mIncludeTiles(true) -{ -} - -//================================================================================================ - -template -CreateNanoGrid::CreateNanoGrid(const SrcNodeAccT &srcNodeAcc) - : mVerbose(0) - , mSrcNodeAccPtr(nullptr) - , mSrcNodeAcc(srcNodeAcc) - , mStats(StatsMode::Default) - , mChecksum(ChecksumMode::Default) - , mDitherOn(false) - , mIncludeStats(true) - , mIncludeTiles(true) -{ -} - -//================================================================================================ - -template -struct CreateNanoGrid::BlindMetaData -{ - BlindMetaData(const std::string& name,// name + used to derive GridBlindDataSemantic - const std::string& type,// used to derive GridType of blind data - GridBlindDataClass dataClass, - size_t i, size_t valueCount, size_t valueSize) - : metaData(reinterpret_cast(new char[sizeof(GridBlindMetaData)])) - , order(i)// sorted id of meta data - , size(AlignUp(valueCount * valueSize)) - { - std::memset(metaData, 0, sizeof(GridBlindMetaData));// zero out all meta data - if (name.length()>=GridData::MaxNameSize) throw std::runtime_error("blind data name exceeds limit"); - std::memcpy(metaData->mName, name.c_str(), name.length() + 1); - metaData->mValueCount = valueCount; - metaData->mSemantic = BlindMetaData::mapToSemantics(name); - metaData->mDataClass = dataClass; - metaData->mDataType = BlindMetaData::mapToType(type); - metaData->mValueSize = valueSize; - NANOVDB_ASSERT(metaData->isValid()); - } - BlindMetaData(const std::string& name,// only name - GridBlindDataSemantic dataSemantic, - GridBlindDataClass dataClass, - GridType dataType, - size_t i, size_t valueCount, size_t valueSize) - : metaData(reinterpret_cast(new char[sizeof(GridBlindMetaData)])) - , order(i)// sorted id of meta data - , size(AlignUp(valueCount * valueSize)) - { - std::memset(metaData, 0, sizeof(GridBlindMetaData));// zero out all meta data - if (name.length()>=GridData::MaxNameSize) throw std::runtime_error("blind data name exceeds character limit"); - std::memcpy(metaData->mName, name.c_str(), name.length() + 1); - metaData->mValueCount = valueCount; - metaData->mSemantic = dataSemantic; - metaData->mDataClass = dataClass; - metaData->mDataType = dataType; - metaData->mValueSize = valueSize; - NANOVDB_ASSERT(metaData->isValid()); - } - ~BlindMetaData(){ delete [] reinterpret_cast(metaData); } - bool operator<(const BlindMetaData& other) const { return order < other.order; } // required by std::set - static GridType mapToType(const std::string& name) - { - GridType type = GridType::Unknown; - if ("uint32_t" == name) { - type = GridType::UInt32; - } else if ("float" == name) { - type = GridType::Float; - } else if ("vec3s"== name) { - type = GridType::Vec3f; - } else if ("int32" == name) { - type = GridType::Int32; - } else if ("int64" == name) { - type = GridType::Int64; - } - return type; - } - static GridBlindDataSemantic mapToSemantics(const std::string& name) - { - GridBlindDataSemantic semantic = GridBlindDataSemantic::Unknown; - if ("P" == name) { - semantic = GridBlindDataSemantic::PointPosition; - } else if ("V" == name) { - semantic = GridBlindDataSemantic::PointVelocity; - } else if ("Cd" == name) { - semantic = GridBlindDataSemantic::PointColor; - } else if ("N" == name) { - semantic = GridBlindDataSemantic::PointNormal; - } else if ("id" == name) { - semantic = GridBlindDataSemantic::PointId; - } - return semantic; - } - GridBlindMetaData *metaData; - const size_t order, size; -}; // CreateNanoGrid::BlindMetaData - -//================================================================================================ - -template -template -typename disable_if::value || - BuildTraits::is_index, GridHandle>::type -CreateNanoGrid::getHandle(const BufferT& pool) -{ - this->template preProcess(); - auto handle = this->template initHandle(pool); - this->template postProcess(); - return handle; -} // CreateNanoGrid::getHandle - -//================================================================================================ - -template -template -typename enable_if::value, GridHandle>::type -CreateNanoGrid::getHandle(const OracleT& oracle, const BufferT& pool) -{ - this->template preProcess(oracle); - auto handle = this->template initHandle(pool); - this->template postProcess(); - return handle; -} // CreateNanoGrid::getHandle - -//================================================================================================ - -template -template -typename enable_if::is_index, GridHandle>::type -CreateNanoGrid::getHandle(uint32_t channels, - bool includeStats, - bool includeTiles, - const BufferT &pool) -{ - mIncludeStats = includeStats; - mIncludeTiles = includeTiles; - this->template preProcess(channels); - auto handle = this->template initHandle(pool); - this->template postProcess(channels); - return handle; -}// CreateNanoGrid::getHandle - -//================================================================================================ - -template -template -GridHandle CreateNanoGrid::initHandle(const BufferT& pool) -{ - mOffset.grid = 0;// grid is always stored at the start of the buffer! - mOffset.tree = NanoGrid::memUsage(); // grid ends and tree begins - mOffset.root = mOffset.tree + NanoTree::memUsage(); // tree ends and root node begins - mOffset.upper = mOffset.root + NanoRoot::memUsage(mSrcNodeAcc.root().getTableSize()); // root node ends and upper internal nodes begin - mOffset.lower = mOffset.upper + NanoUpper::memUsage()*mSrcNodeAcc.nodeCount(2); // upper internal nodes ends and lower internal nodes begin - mOffset.leaf = mOffset.lower + NanoLower::memUsage()*mSrcNodeAcc.nodeCount(1); // lower internal nodes ends and leaf nodes begin - mOffset.meta = mOffset.leaf + mLeafNodeSize;// leaf nodes end and blind meta data begins - mOffset.blind = mOffset.meta + sizeof(GridBlindMetaData)*mBlindMetaData.size(); // meta data ends and blind data begins - mOffset.size = mOffset.blind;// end of buffer - for (const auto& b : mBlindMetaData) mOffset.size += b.size; // accumulate all the blind data - - auto buffer = BufferT::create(mOffset.size, &pool); - mBufferPtr = buffer.data(); - - // Concurrent processing of all tree levels! - invoke( [&](){this->template processLeafs();}, - [&](){this->template processInternalNodes();}, - [&](){this->template processInternalNodes();}, - [&](){this->template processRoot();}, - [&](){this->template processTree();}, - [&](){this->template processGrid();} ); - - return GridHandle(std::move(buffer)); -} // CreateNanoGrid::initHandle - -//================================================================================================ - -template -template -inline typename disable_if::value || BuildTraits::is_index>::type -CreateNanoGrid::preProcess() -{ - if (const uint64_t pointCount = this->countPoints()) { -#if defined(NANOVDB_USE_OPENVDB) && !defined(__CUDACC__) - if constexpr(is_same::value) { - if (!mBlindMetaData.empty()) throw std::runtime_error("expected no blind meta data"); - this->addBlindData("index", - GridBlindDataSemantic::PointId, - GridBlindDataClass::IndexArray, - GridType::UInt32, - pointCount, - sizeof(uint32_t)); - } else if constexpr(is_same::value) { - if (!mBlindMetaData.empty()) throw std::runtime_error("expected no blind meta data"); - auto &srcLeaf = mSrcNodeAcc.template node<0>(0); - const auto& attributeSet = srcLeaf.attributeSet(); - const auto& descriptor = attributeSet.descriptor(); - const auto& nameMap = descriptor.map(); - for (auto it = nameMap.begin(); it != nameMap.end(); ++it) { - const size_t index = it->second; - auto& attArray = srcLeaf.constAttributeArray(index); - mBlindMetaData.emplace(it->first, // name used to derive semantics - descriptor.valueType(index), // type - it->first == "id" ? GridBlindDataClass::IndexArray : GridBlindDataClass::AttributeArray, // class - index, // order - pointCount, // element count - attArray.valueTypeSize()); // element size - } - } -#endif// end NANOVDB_USE_OPENVDB - } - if (mSrcNodeAcc.hasLongGridName()) { - this->addBlindData("grid name", - GridBlindDataSemantic::Unknown, - GridBlindDataClass::GridName, - GridType::Unknown, - mSrcNodeAcc.getName().length() + 1, 1); - } - mLeafNodeSize = mSrcNodeAcc.nodeCount(0)*NanoLeaf::DataType::memUsage(); -}// CreateNanoGrid::preProcess - -//================================================================================================ - -template -template -inline typename enable_if::value>::type -CreateNanoGrid::preProcess(OracleT oracle) -{ - static_assert(is_same::value, "preProcess: expected SrcValueT == float"); - - const size_t leafCount = mSrcNodeAcc.nodeCount(0); - if (leafCount==0) { - mLeafNodeSize = 0u; - return; - } - mCodec.reset(new Codec[leafCount]); - - if constexpr(is_same::value) { - if (!oracle) oracle.init(mSrcNodeAcc.gridClass(), mSrcNodeAcc.root().background()); - } - - DitherLUT lut(mDitherOn); - forEach(0, leafCount, 4, [&](const Range1D &r) { - for (auto i=r.begin(); i!=r.end(); ++i) { - const auto &srcLeaf = mSrcNodeAcc.template node<0>(i); - float &min = mCodec[i].min = std::numeric_limits::max(); - float &max = mCodec[i].max = -min; - for (int j=0; j<512; ++j) { - float v = srcLeaf.getValue(j); - if (vmax) max = v; - } - const float range = max - min; - uint8_t &logBitWidth = mCodec[i].log2 = 0;// 0,1,2,3,4 => 1,2,4,8,16 bits - while (range > 0.0f && logBitWidth < 4u) { - const uint32_t mask = (uint32_t(1) << (uint32_t(1) << logBitWidth)) - 1u; - const float encode = mask/range; - const float decode = range/mask; - int j = 0; - do { - const float exact = srcLeaf.getValue(j);//data[j];// exact value - const uint32_t code = uint32_t(encode*(exact - min) + lut(j)); - const float approx = code * decode + min;// approximate value - j += oracle(exact, approx) ? 1 : 513; - } while(j < 512); - if (j == 512) break; - ++logBitWidth; - } - } - }); - - auto getOffset = [&](size_t i){ - --i; - return mCodec[i].offset + NanoLeaf::DataType::memUsage(1u << mCodec[i].log2); - }; - mCodec[0].offset = NanoGrid::memUsage() + - NanoTree::memUsage() + - NanoRoot::memUsage(mSrcNodeAcc.root().getTableSize()) + - NanoUpper::memUsage()*mSrcNodeAcc.nodeCount(2) + - NanoLower::memUsage()*mSrcNodeAcc.nodeCount(1); - for (size_t i=1; iaddBlindData("grid name", - GridBlindDataSemantic::Unknown, - GridBlindDataClass::GridName, - GridType::Unknown, - mSrcNodeAcc.getName().length() + 1, 1); - } -}// CreateNanoGrid::preProcess - -//================================================================================================ - -template -template -inline typename enable_if::is_index, uint64_t>::type -CreateNanoGrid::countTileValues(uint64_t valueCount) -{ - const uint64_t stats = mIncludeStats ? 4u : 0u;// minimum, maximum, average, and deviation - mValIdx[LEVEL].clear(); - mValIdx[LEVEL].resize(mSrcNodeAcc.nodeCount(LEVEL) + 1, stats);// minimum 1 entry - forEach(1, mValIdx[LEVEL].size(), 8, [&](const Range1D& r){ - for (auto i = r.begin(); i!=r.end(); ++i) { - auto &srcNode = mSrcNodeAcc.template node(i-1); - if constexpr(BuildTraits::is_onindex) {// resolved at compile time - mValIdx[LEVEL][i] += srcNode.getValueMask().countOn(); - } else { - static const uint64_t maxTileCount = uint64_t(1u) << 3*srcNode.LOG2DIM; - mValIdx[LEVEL][i] += maxTileCount - srcNode.getChildMask().countOn(); - } - } - }); - mValIdx[LEVEL][0] = valueCount; - for (size_t i=1; i - -//================================================================================================ - -template -template -inline typename enable_if::is_index, uint64_t>::type -CreateNanoGrid::countValues() -{ - const uint64_t stats = mIncludeStats ? 4u : 0u;// minimum, maximum, average, and deviation - uint64_t valueCount = 1u;// offset 0 corresponds to the background value - if (mIncludeTiles) { - if constexpr(BuildTraits::is_onindex) { - for (auto it = mSrcNodeAcc.root().cbeginValueOn(); it; ++it) ++valueCount; - } else { - for (auto it = mSrcNodeAcc.root().cbeginValueAll(); it; ++it) ++valueCount; - } - valueCount += stats;// optionally append stats for the root node - valueCount = countTileValues(valueCount); - valueCount = countTileValues(valueCount); - } - mValIdx[0].clear(); - mValIdx[0].resize(mSrcNodeAcc.nodeCount(0) + 1, 512u + stats);// minimum 1 entry - if constexpr(BuildTraits::is_onindex) { - forEach(1, mValIdx[0].size(), 8, [&](const Range1D& r) { - for (auto i = r.begin(); i != r.end(); ++i) { - mValIdx[0][i] = stats; - mValIdx[0][i] += mSrcNodeAcc.template node<0>(i-1).getValueMask().countOn(); - } - }); - } - mValIdx[0][0] = valueCount; - prefixSum(mValIdx[0], true);// inclusive prefix sum - return mValIdx[0].back(); -}// CreateNanoGrid::countValues() - -//================================================================================================ - -template -template -inline typename enable_if::is_index>::type -CreateNanoGrid::preProcess(uint32_t channels) -{ - const uint64_t valueCount = this->template countValues(); - mLeafNodeSize = mSrcNodeAcc.nodeCount(0)*NanoLeaf::DataType::memUsage(); - - uint32_t order = mBlindMetaData.size(); - for (uint32_t i=0; i()), - GridBlindDataClass::AttributeArray, - order++, - valueCount, - sizeof(SrcValueT)); - } - if (mSrcNodeAcc.hasLongGridName()) { - this->addBlindData("grid name", - GridBlindDataSemantic::Unknown, - GridBlindDataClass::GridName, - GridType::Unknown, - mSrcNodeAcc.getName().length() + 1, 1); - } -}// preProcess - -//================================================================================================ - -template -template -inline typename disable_if::is_special>::type -CreateNanoGrid::processLeafs() -{ - using DstDataT = typename NanoLeaf::DataType; - using DstValueT = typename DstDataT::ValueType; - static_assert(DstDataT::FIXED_SIZE, "Expected destination LeafNode to have fixed size"); - forEach(0, mSrcNodeAcc.nodeCount(0), 8, [&](const Range1D& r) { - auto *dstData = this->template dstNode(r.begin())->data(); - for (auto i = r.begin(); i != r.end(); ++i, ++dstData) { - auto &srcLeaf = mSrcNodeAcc.template node<0>(i); - if (DstDataT::padding()>0u) { - // Cast to void* to avoid compiler warning about missing trivial copy-assignment - std::memset(reinterpret_cast(dstData), 0, DstDataT::memUsage()); - } else { - dstData->mBBoxDif[0] = dstData->mBBoxDif[1] = dstData->mBBoxDif[2] = 0u; - dstData->mFlags = 0u;// enable rendering, no bbox, no stats - dstData->mMinimum = dstData->mMaximum = typename DstDataT::ValueType(); - dstData->mAverage = dstData->mStdDevi = 0; - } - dstData->mBBoxMin = srcLeaf.origin(); // copy origin of node - dstData->mValueMask = srcLeaf.getValueMask(); // copy value mask - DstValueT *dst = dstData->mValues; - if constexpr(is_same::value && SrcNodeAccT::IS_OPENVDB) { - const SrcValueT *src = srcLeaf.buffer().data(); - for (auto *end = dst + 512u; dst != end; dst += 4, src += 4) { - dst[0] = src[0]; // copy *all* voxel values in sets of four, i.e. loop-unrolling - dst[1] = src[1]; - dst[2] = src[2]; - dst[3] = src[3]; - } - } else { - for (uint32_t j=0; j<512u; ++j) *dst++ = static_cast(srcLeaf.getValue(j)); - } - } - }); -} // CreateNanoGrid::processLeafs - -//================================================================================================ - -template -template -inline typename enable_if::is_index>::type -CreateNanoGrid::processLeafs() -{ - using DstDataT = typename NanoLeaf::DataType; - static_assert(DstDataT::FIXED_SIZE, "Expected destination LeafNode to have fixed size"); - static_assert(DstDataT::padding()==0u, "Expected leaf nodes to have no padding"); - - forEach(0, mSrcNodeAcc.nodeCount(0), 8, [&](const Range1D& r) { - const uint8_t flags = mIncludeStats ? 16u : 0u;// 4th bit indicates stats - DstDataT *dstData = this->template dstNode(r.begin())->data();// fixed size - for (auto i = r.begin(); i != r.end(); ++i, ++dstData) { - auto &srcLeaf = mSrcNodeAcc.template node<0>(i); - dstData->mBBoxMin = srcLeaf.origin(); // copy origin of node - dstData->mBBoxDif[0] = dstData->mBBoxDif[1] = dstData->mBBoxDif[2] = 0u; - dstData->mFlags = flags; - dstData->mValueMask = srcLeaf.getValueMask(); // copy value mask - dstData->mOffset = mValIdx[0][i]; - if constexpr(BuildTraits::is_onindex) { - const uint64_t *w = dstData->mValueMask.words(); -#ifdef USE_OLD_VALUE_ON_INDEX - int32_t sum = CountOn(*w++); - uint8_t *p = reinterpret_cast(&dstData->mPrefixSum), *q = p + 7; - for (int j=0; j<7; ++j) { - *p++ = sum & 255u; - *q |= (sum >> 8) << j; - sum += CountOn(*w++); - } -#else - uint64_t &prefixSum = dstData->mPrefixSum, sum = CountOn(*w++); - prefixSum = sum; - for (int n = 9; n < 55; n += 9) {// n=i*9 where i=1,2,..6 - sum += CountOn(*w++); - prefixSum |= sum << n;// each pre-fixed sum is encoded in 9 bits - } -#endif - } else { - dstData->mPrefixSum = 0u; - } - if constexpr(BuildTraits::is_indexmask) dstData->mMask = dstData->mValueMask; - } - }); -} // CreateNanoGrid::processLeafs - -//================================================================================================ - -template -template -inline typename enable_if::value>::type -CreateNanoGrid::processLeafs() -{ - using DstDataT = typename NanoLeaf::DataType; - static_assert(DstDataT::FIXED_SIZE, "Expected destination LeafNode to have fixed size"); - forEach(0, mSrcNodeAcc.nodeCount(0), 8, [&](const Range1D& r) { - auto *dstData = this->template dstNode(r.begin())->data(); - for (auto i = r.begin(); i != r.end(); ++i, ++dstData) { - auto &srcLeaf = mSrcNodeAcc.template node<0>(i); - if (DstDataT::padding()>0u) { - // Cast to void* to avoid compiler warning about missing trivial copy-assignment - std::memset(reinterpret_cast(dstData), 0, DstDataT::memUsage()); - } else { - dstData->mBBoxDif[0] = dstData->mBBoxDif[1] = dstData->mBBoxDif[2] = 0u; - dstData->mFlags = 0u;// enable rendering, no bbox, no stats - dstData->mPadding[0] = dstData->mPadding[1] = 0u; - } - dstData->mBBoxMin = srcLeaf.origin(); // copy origin of node - dstData->mValueMask = srcLeaf.getValueMask(); // copy value mask - } - }); -} // CreateNanoGrid::processLeafs - -//================================================================================================ - -template -template -inline typename enable_if::value>::type -CreateNanoGrid::processLeafs() -{ - using DstDataT = typename NanoLeaf::DataType; - static_assert(DstDataT::FIXED_SIZE, "Expected destination LeafNode to have fixed size"); - forEach(0, mSrcNodeAcc.nodeCount(0), 8, [&](const Range1D& r) { - auto *dstData = this->template dstNode(r.begin())->data(); - for (auto i = r.begin(); i != r.end(); ++i, ++dstData) { - auto &srcLeaf = mSrcNodeAcc.template node<0>(i); - if (DstDataT::padding()>0u) { - // Cast to void* to avoid compiler warning about missing trivial copy-assignment - std::memset(reinterpret_cast(dstData), 0, DstDataT::memUsage()); - } else { - dstData->mBBoxDif[0] = dstData->mBBoxDif[1] = dstData->mBBoxDif[2] = 0u; - dstData->mFlags = 0u;// enable rendering, no bbox, no stats - } - dstData->mBBoxMin = srcLeaf.origin(); // copy origin of node - dstData->mValueMask = srcLeaf.getValueMask(); // copy value mask - if constexpr(!is_same::value) { - for (int j=0; j<512; ++j) dstData->mValues.set(j, static_cast(srcLeaf.getValue(j))); - } else if constexpr(SrcNodeAccT::IS_OPENVDB) { - dstData->mValues = *reinterpret_cast*>(srcLeaf.buffer().data()); - } else if constexpr(SrcNodeAccT::IS_NANOVDB) { - dstData->mValues = srcLeaf.data()->mValues; - } else {// build::Leaf - dstData->mValues = srcLeaf.mValues; // copy value mask - } - } - }); -} // CreateNanoGrid::processLeafs - -//================================================================================================ - -template -template -inline typename enable_if::is_FpX>::type -CreateNanoGrid::processLeafs() -{ - using DstDataT = typename NanoLeaf::DataType; - static_assert(DstDataT::FIXED_SIZE, "Expected destination LeafNode to have fixed size"); - using ArrayT = typename DstDataT::ArrayType; - static_assert(is_same::value, "Expected ValueT == float"); - using FloatT = typename std::conditional=16, double, float>::type;// 16 compression and higher requires double - static constexpr FloatT UNITS = FloatT((1 << DstDataT::bitWidth()) - 1);// # of unique non-zero values - DitherLUT lut(mDitherOn); - - forEach(0, mSrcNodeAcc.nodeCount(0), 8, [&](const Range1D& r) { - auto *dstData = this->template dstNode(r.begin())->data(); - for (auto i = r.begin(); i != r.end(); ++i, ++dstData) { - auto &srcLeaf = mSrcNodeAcc.template node<0>(i); - if (DstDataT::padding()>0u) { - // Cast to void* to avoid compiler warning about missing trivial copy-assignment - std::memset(reinterpret_cast(dstData), 0, DstDataT::memUsage()); - } else { - dstData->mFlags = dstData->mBBoxDif[2] = dstData->mBBoxDif[1] = dstData->mBBoxDif[0] = 0u; - dstData->mDev = dstData->mAvg = dstData->mMax = dstData->mMin = 0u; - } - dstData->mBBoxMin = srcLeaf.origin(); // copy origin of node - dstData->mValueMask = srcLeaf.getValueMask(); // copy value mask - // compute extrema values - float min = std::numeric_limits::max(), max = -min; - for (uint32_t j=0; j<512u; ++j) { - const float v = srcLeaf.getValue(j); - if (v < min) min = v; - if (v > max) max = v; - } - dstData->init(min, max, DstDataT::bitWidth()); - // perform quantization relative to the values in the current leaf node - const FloatT encode = UNITS/(max-min); - uint32_t offset = 0; - auto quantize = [&]()->ArrayT{ - const ArrayT tmp = static_cast(encode * (srcLeaf.getValue(offset) - min) + lut(offset)); - ++offset; - return tmp; - }; - auto *code = reinterpret_cast(dstData->mCode); - if (is_same::value) {// resolved at compile-time - for (uint32_t j=0; j<128u; ++j) { - auto tmp = quantize(); - *code++ = quantize() << 4 | tmp; - tmp = quantize(); - *code++ = quantize() << 4 | tmp; - } - } else { - for (uint32_t j=0; j<128u; ++j) { - *code++ = quantize(); - *code++ = quantize(); - *code++ = quantize(); - *code++ = quantize(); - } - } - } - }); -} // CreateNanoGrid::processLeafs - -//================================================================================================ - -template -template -inline typename enable_if::value>::type -CreateNanoGrid::processLeafs() -{ - static_assert(is_same::value, "Expected SrcValueT == float"); - DitherLUT lut(mDitherOn); - forEach(0, mSrcNodeAcc.nodeCount(0), 8, [&](const Range1D& r) { - for (auto i = r.begin(); i != r.end(); ++i) { - auto &srcLeaf = mSrcNodeAcc.template node<0>(i); - auto *dstData = this->template dstNode(i)->data(); - dstData->mBBoxMin = srcLeaf.origin(); // copy origin of node - dstData->mBBoxDif[0] = dstData->mBBoxDif[1] = dstData->mBBoxDif[2] = 0u; - const uint8_t logBitWidth = mCodec[i].log2; - dstData->mFlags = logBitWidth << 5;// pack logBitWidth into 3 MSB of mFlag - dstData->mValueMask = srcLeaf.getValueMask(); // copy value mask - const float min = mCodec[i].min, max = mCodec[i].max; - dstData->init(min, max, uint8_t(1) << logBitWidth); - // perform quantization relative to the values in the current leaf node - uint32_t offset = 0; - float encode = 0.0f; - auto quantize = [&]()->uint8_t{ - const uint8_t tmp = static_cast(encode * (srcLeaf.getValue(offset) - min) + lut(offset)); - ++offset; - return tmp; - }; - auto *dst = reinterpret_cast(dstData+1); - switch (logBitWidth) { - case 0u: {// 1 bit - encode = 1.0f/(max - min); - for (int j=0; j<64; ++j) { - uint8_t a = 0; - for (int k=0; k<8; ++k) a |= quantize() << k; - *dst++ = a; - } - } - break; - case 1u: {// 2 bits - encode = 3.0f/(max - min); - for (int j=0; j<128; ++j) { - auto a = quantize(); - a |= quantize() << 2; - a |= quantize() << 4; - *dst++ = quantize() << 6 | a; - } - } - break; - case 2u: {// 4 bits - encode = 15.0f/(max - min); - for (int j=0; j<128; ++j) { - auto a = quantize(); - *dst++ = quantize() << 4 | a; - a = quantize(); - *dst++ = quantize() << 4 | a; - } - } - break; - case 3u: {// 8 bits - encode = 255.0f/(max - min); - for (int j=0; j<128; ++j) { - *dst++ = quantize(); - *dst++ = quantize(); - *dst++ = quantize(); - *dst++ = quantize(); - } - } - break; - default: {// 16 bits - special implementation using higher bit-precision - auto *dst = reinterpret_cast(dstData+1); - const double encode = 65535.0/(max - min);// note that double is required! - for (int j=0; j<128; ++j) { - *dst++ = uint16_t(encode * (srcLeaf.getValue(offset) - min) + lut(offset)); ++offset; - *dst++ = uint16_t(encode * (srcLeaf.getValue(offset) - min) + lut(offset)); ++offset; - *dst++ = uint16_t(encode * (srcLeaf.getValue(offset) - min) + lut(offset)); ++offset; - *dst++ = uint16_t(encode * (srcLeaf.getValue(offset) - min) + lut(offset)); ++offset; - } - } - }// end switch - } - });// kernel -} // CreateNanoGrid::processLeafs - -//================================================================================================ - -template -template -inline typename enable_if::is_index>::type -CreateNanoGrid::processInternalNodes() -{ - using DstNodeT = typename NanoNode::type; - using DstValueT = typename DstNodeT::ValueType; - using DstChildT = typename NanoNode::type; - static_assert(LEVEL == 1 || LEVEL == 2, "Expected internal node"); - - const uint64_t nodeCount = mSrcNodeAcc.nodeCount(LEVEL); - if (nodeCount > 0) {// compute and temporarily encode IDs of child nodes - uint64_t childCount = 0; - auto *dstData = this->template dstNode(0)->data(); - for (uint64_t i=0; i(i).getChildMask().countOn(); - } - } - - forEach(0, nodeCount, 4, [&](const Range1D& r) { - auto *dstData = this->template dstNode(r.begin())->data(); - for (auto i = r.begin(); i != r.end(); ++i, ++dstData) { - auto &srcNode = mSrcNodeAcc.template node(i); - uint64_t childID = dstData->mFlags; - if (DstNodeT::DataType::padding()>0u) { - // Cast to void* to avoid compiler warning about missing trivial copy-assignment - std::memset(reinterpret_cast(dstData), 0, DstNodeT::memUsage()); - } else { - dstData->mFlags = 0;// enable rendering, no bbox, no stats - dstData->mMinimum = dstData->mMaximum = typename DstNodeT::ValueType(); - dstData->mAverage = dstData->mStdDevi = 0; - } - dstData->mBBox[0] = srcNode.origin(); // copy origin of node - dstData->mValueMask = srcNode.getValueMask(); // copy value mask - dstData->mChildMask = srcNode.getChildMask(); // copy child mask - for (auto it = srcNode.cbeginChildAll(); it; ++it) { - SrcValueT value{}; // default initialization - if (it.probeChild(value)) { - DstChildT *dstChild = this->template dstNode(childID++);// might be Leaf - dstData->setChild(it.pos(), dstChild); - } else { - dstData->setValue(it.pos(), static_cast(value)); - } - } - } - }); -} // CreateNanoGrid::processInternalNodes - -//================================================================================================ - -template -template -inline typename enable_if::is_index>::type -CreateNanoGrid::processInternalNodes() -{ - using DstNodeT = typename NanoNode::type; - using DstChildT = typename NanoNode::type; - static_assert(LEVEL == 1 || LEVEL == 2, "Expected internal node"); - static_assert(DstNodeT::DataType::padding()==0u, "Expected internal nodes to have no padding"); - - const uint64_t nodeCount = mSrcNodeAcc.nodeCount(LEVEL); - if (nodeCount > 0) {// compute and temporarily encode IDs of child nodes - uint64_t childCount = 0; - auto *dstData = this->template dstNode(0)->data(); - for (uint64_t i=0; i(i).getChildMask().countOn(); - } - } - - forEach(0, nodeCount, 4, [&](const Range1D& r) { - auto *dstData = this->template dstNode(r.begin())->data(); - for (auto i = r.begin(); i != r.end(); ++i, ++dstData) { - auto &srcNode = mSrcNodeAcc.template node(i); - uint64_t childID = dstData->mFlags; - dstData->mFlags = 0u; - dstData->mBBox[0] = srcNode.origin(); // copy origin of node - dstData->mValueMask = srcNode.getValueMask(); // copy value mask - dstData->mChildMask = srcNode.getChildMask(); // copy child mask - uint64_t n = mIncludeTiles ? mValIdx[LEVEL][i] : 0u; - for (auto it = srcNode.cbeginChildAll(); it; ++it) { - SrcValueT value; - if (it.probeChild(value)) { - DstChildT *dstChild = this->template dstNode(childID++);// might be Leaf - dstData->setChild(it.pos(), dstChild); - } else { - uint64_t m = 0u; - if (mIncludeTiles && !((BuildTraits::is_onindex) && dstData->mValueMask.isOff(it.pos()))) m = n++; - dstData->setValue(it.pos(), m); - } - } - if (mIncludeTiles && mIncludeStats) {// stats are always placed after the tile values - dstData->mMinimum = n++; - dstData->mMaximum = n++; - dstData->mAverage = n++; - dstData->mStdDevi = n++; - } else {// if not tiles or stats set stats to the background offset - dstData->mMinimum = 0u; - dstData->mMaximum = 0u; - dstData->mAverage = 0u; - dstData->mStdDevi = 0u; - } - } - }); -} // CreateNanoGrid::processInternalNodes - -//================================================================================================ - -template -template -inline typename enable_if::is_index>::type -CreateNanoGrid::processRoot() -{ - using DstRootT = NanoRoot; - using DstValueT = typename DstRootT::ValueType; - auto &srcRoot = mSrcNodeAcc.root(); - auto *dstData = this->template dstRoot()->data(); - const uint32_t tableSize = srcRoot.getTableSize(); - // Cast to void* to avoid compiler warning about missing trivial copy-assignment - if (DstRootT::DataType::padding()>0) std::memset(reinterpret_cast(dstData), 0, DstRootT::memUsage(tableSize)); - dstData->mTableSize = tableSize; - dstData->mMinimum = dstData->mMaximum = dstData->mBackground = srcRoot.background(); - dstData->mBBox = CoordBBox(); // // set to an empty bounding box - if (tableSize==0) return; - auto *dstChild = this->template dstNode(0);// fixed size and linear in memory - auto *dstTile = dstData->tile(0);// fixed size and linear in memory - for (auto it = srcRoot.cbeginChildAll(); it; ++it, ++dstTile) { - SrcValueT value; - if (it.probeChild(value)) { - dstTile->setChild(it.getCoord(), dstChild++, dstData); - } else { - dstTile->setValue(it.getCoord(), it.isValueOn(), static_cast(value)); - } - } -} // CreateNanoGrid::processRoot - -//================================================================================================ - -template -template -inline typename enable_if::is_index>::type -CreateNanoGrid::processRoot() -{ - using DstRootT = NanoRoot; - auto &srcRoot = mSrcNodeAcc.root(); - auto *dstData = this->template dstRoot()->data(); - const uint32_t tableSize = srcRoot.getTableSize(); - // Cast to void* to avoid compiler warning about missing trivial copy-assignment - if (DstRootT::DataType::padding()>0) std::memset(reinterpret_cast(dstData), 0, DstRootT::memUsage(tableSize)); - dstData->mTableSize = tableSize; - dstData->mBackground = 0u; - uint64_t valueCount = 0u;// the first entry is always the background value - dstData->mBBox = CoordBBox(); // set to an empty/invalid bounding box - - if (tableSize>0) { - auto *dstChild = this->template dstNode(0);// fixed size and linear in memory - auto *dstTile = dstData->tile(0);// fixed size and linear in memory - for (auto it = srcRoot.cbeginChildAll(); it; ++it, ++dstTile) { - SrcValueT tmp; - if (it.probeChild(tmp)) { - dstTile->setChild(it.getCoord(), dstChild++, dstData); - } else { - dstTile->setValue(it.getCoord(), it.isValueOn(), 0u); - if (mIncludeTiles && !((BuildTraits::is_onindex) && !dstTile->state)) dstTile->value = ++valueCount; - } - } - } - if (mIncludeTiles && mIncludeStats) {// stats are always placed after the tile values - dstData->mMinimum = ++valueCount; - dstData->mMaximum = ++valueCount; - dstData->mAverage = ++valueCount; - dstData->mStdDevi = ++valueCount; - } else if (dstData->padding()==0) { - dstData->mMinimum = 0u; - dstData->mMaximum = 0u; - dstData->mAverage = 0u; - dstData->mStdDevi = 0u; - } -} // CreateNanoGrid::processRoot - -//================================================================================================ - -template -template -void CreateNanoGrid::processTree() -{ - const uint64_t nodeCount[3] = {mSrcNodeAcc.nodeCount(0), mSrcNodeAcc.nodeCount(1), mSrcNodeAcc.nodeCount(2)}; - auto *dstTree = this->template dstTree(); - auto *dstData = dstTree->data(); - dstData->setRoot( this->template dstRoot() ); - - dstData->setFirstNode(nodeCount[2] ? this->template dstNode(0) : nullptr); - dstData->setFirstNode(nodeCount[1] ? this->template dstNode(0) : nullptr); - dstData->setFirstNode(nodeCount[0] ? this->template dstNode(0) : nullptr); - - dstData->mNodeCount[0] = static_cast(nodeCount[0]); - dstData->mNodeCount[1] = static_cast(nodeCount[1]); - dstData->mNodeCount[2] = static_cast(nodeCount[2]); - - // Count number of active leaf level tiles - dstData->mTileCount[0] = reduce(Range1D(0,nodeCount[1]), uint32_t(0), [&](Range1D &r, uint32_t sum){ - for (auto i=r.begin(); i!=r.end(); ++i) sum += mSrcNodeAcc.template node<1>(i).getValueMask().countOn(); - return sum;}, std::plus()); - - // Count number of active lower internal node tiles - dstData->mTileCount[1] = reduce(Range1D(0,nodeCount[2]), uint32_t(0), [&](Range1D &r, uint32_t sum){ - for (auto i=r.begin(); i!=r.end(); ++i) sum += mSrcNodeAcc.template node<2>(i).getValueMask().countOn(); - return sum;}, std::plus()); - - // Count number of active upper internal node tiles - dstData->mTileCount[2] = 0; - for (auto it = mSrcNodeAcc.root().cbeginValueOn(); it; ++it) dstData->mTileCount[2] += 1; - - // Count number of active voxels - dstData->mVoxelCount = reduce(Range1D(0, nodeCount[0]), uint64_t(0), [&](Range1D &r, uint64_t sum){ - for (auto i=r.begin(); i!=r.end(); ++i) sum += mSrcNodeAcc.template node<0>(i).getValueMask().countOn(); - return sum;}, std::plus()); - - dstData->mVoxelCount += uint64_t(dstData->mTileCount[0]) << 9;// = 3 * 3 - dstData->mVoxelCount += uint64_t(dstData->mTileCount[1]) << 21;// = 3 * (3+4) - dstData->mVoxelCount += uint64_t(dstData->mTileCount[2]) << 36;// = 3 * (3+4+5) - -} // CreateNanoGrid::processTree - -//================================================================================================ - -template -template -void CreateNanoGrid::processGrid() -{ - auto* dstData = this->template dstGrid()->data(); - dstData->init({GridFlags::IsBreadthFirst}, mOffset.size, mSrcNodeAcc.map(), - mapToGridType(), mapToGridClass(mSrcNodeAcc.gridClass())); - dstData->mBlindMetadataCount = static_cast(mBlindMetaData.size()); - dstData->mData1 = this->valueCount(); - - std::memset(dstData->mGridName, '\0', GridData::MaxNameSize);//overwrite mGridName - strncpy(dstData->mGridName, mSrcNodeAcc.getName().c_str(), GridData::MaxNameSize-1); - if (mSrcNodeAcc.hasLongGridName()) dstData->setLongGridNameOn();// grid name is long so store it as blind data - - // Partially process blind meta data - they will be complete in postProcess - if (mBlindMetaData.size()>0) { - auto *metaData = this->dstMeta(0); - dstData->mBlindMetadataOffset = PtrDiff(metaData, dstData); - dstData->mBlindMetadataCount = static_cast(mBlindMetaData.size()); - char *blindData = PtrAdd(mBufferPtr, mOffset.blind); - for (const auto &b : mBlindMetaData) { - std::memcpy(metaData, b.metaData, sizeof(GridBlindMetaData)); - metaData->setBlindData(blindData);// sets metaData.mOffset - if (metaData->mDataClass == GridBlindDataClass::GridName) strcpy(blindData, mSrcNodeAcc.getName().c_str()); - ++metaData; - blindData += b.size; - } - mBlindMetaData.clear(); - } -} // CreateNanoGrid::processGrid - -//================================================================================================ - -template -template -inline typename disable_if::is_index>::type -CreateNanoGrid::postProcess() -{ - if constexpr(is_same::value) mCodec.reset(); - auto *dstGrid = this->template dstGrid(); - gridStats(*dstGrid, mStats); -#if defined(NANOVDB_USE_OPENVDB) && !defined(__CUDACC__) - auto *metaData = this->dstMeta(0); - if constexpr(is_same::value || - is_same::value) { - static_assert(is_same::value, "expected DstBuildT==uint32_t"); - auto *dstData0 = this->template dstNode(0)->data(); - dstData0->mMinimum = 0; // start of prefix sum - dstData0->mMaximum = dstData0->mValues[511u]; - for (uint32_t i=1, n=mSrcNodeAcc.nodeCount(0); imMinimum = dstData0->mMinimum + dstData0->mMaximum; - dstData1->mMaximum = dstData1->mValues[511u]; - dstData0 = dstData1; - } - for (size_t i = 0, n = dstGrid->blindDataCount(); i < n; ++i, ++metaData) { - if constexpr(is_same::value) { - if (metaData->mDataClass != GridBlindDataClass::IndexArray) continue; - if (metaData->mDataType == GridType::UInt32) { - uint32_t *blindData = const_cast(metaData->template getBlindData()); - forEach(0, mSrcNodeAcc.nodeCount(0), 16, [&](const auto& r) { - auto *dstData = this->template dstNode(r.begin())->data(); - for (auto j = r.begin(); j != r.end(); ++j, ++dstData) { - uint32_t* p = blindData + dstData->mMinimum; - for (uint32_t idx : mSrcNodeAcc.template node<0>(j).indices()) *p++ = idx; - } - }); - } - } else {// if constexpr(is_same::value) - if (metaData->mDataClass != GridBlindDataClass::AttributeArray) continue; - if (auto *blindData = dstGrid->template getBlindData(i)) { - this->template copyPointAttribute(i, blindData); - } else if (auto *blindData = dstGrid->template getBlindData(i)) { - this->template copyPointAttribute(i, reinterpret_cast(blindData)); - } else if (auto *blindData = dstGrid->template getBlindData(i)) { - this->template copyPointAttribute(i, blindData); - } else if (auto *blindData = dstGrid->template getBlindData(i)) { - this->template copyPointAttribute(i, blindData); - } else { - std::cerr << "unsupported point attribute \"" << toStr(metaData->mDataType) << "\"\n"; - } - }// if - }// loop - } else { // if - (void)metaData; - } -#endif - updateChecksum(*dstGrid, mChecksum); -}// CreateNanoGrid::postProcess - -//================================================================================================ - -template -template -inline typename enable_if::is_index>::type -CreateNanoGrid::postProcess(uint32_t channels) -{ - const std::string typeName = toStr(mapToGridType()); - const uint64_t valueCount = this->valueCount(); - auto *dstGrid = this->template dstGrid(); - for (uint32_t i=0; ifindBlindData(name.c_str()); - if (j<0) throw std::runtime_error("missing " + name); - auto *metaData = this->dstMeta(j);// partially set in processGrid - metaData->mDataClass = GridBlindDataClass::ChannelArray; - metaData->mDataType = mapToGridType(); - SrcValueT *blindData = const_cast(metaData->template getBlindData()); - if (i>0) {// concurrent copy from previous channel - nanovdb::forEach(0,valueCount,1024,[&](const nanovdb::Range1D &r){ - SrcValueT *dst=blindData+r.begin(), *end=dst+r.size(), *src=dst-valueCount; - while(dst!=end) *dst++ = *src++; - }); - } else { - this->template copyValues(blindData); - } - }// loop over channels - gridStats(*(this->template dstGrid()), std::min(StatsMode::BBox, mStats)); - updateChecksum(*dstGrid, mChecksum); -}// CreateNanoGrid::postProcess - -//================================================================================================ - -template -template -typename enable_if::is_index>::type -CreateNanoGrid::copyValues(SrcValueT *buffer) -{// copy values from the source grid into the provided buffer - assert(mBufferPtr && buffer); - using StatsT = typename FloatTraits::FloatType; - - if (this->valueCount()==0) this->template countValues(); - - auto copyNodeValues = [&](const auto &node, SrcValueT *v) { - if constexpr(BuildTraits::is_onindex) { - for (auto it = node.cbeginValueOn(); it; ++it) *v++ = *it; - } else { - for (auto it = node.cbeginValueAll(); it; ++it) *v++ = *it; - } - if (mIncludeStats) { - if constexpr(SrcNodeAccT::IS_NANOVDB) {// resolved at compile time - *v++ = node.minimum(); - *v++ = node.maximum(); - if constexpr(is_same::value) { - *v++ = node.average(); - *v++ = node.stdDeviation(); - } else {// eg when SrcValueT=Vec3f and StatsT=float - *v++ = SrcValueT(node.average()); - *v++ = SrcValueT(node.stdDeviation()); - } - } else {// openvdb and nanovdb::build::Grid have no stats - *v++ = buffer[0];// background - *v++ = buffer[0];// background - *v++ = buffer[0];// background - *v++ = buffer[0];// background - } - } - };// copyNodeValues - - const SrcRootT &root = mSrcNodeAcc.root(); - buffer[0] = root.background();// Value array always starts with the background value - if (mIncludeTiles) { - copyNodeValues(root, buffer + 1u); - forEach(0, mSrcNodeAcc.nodeCount(2), 1, [&](const Range1D& r) { - for (auto i = r.begin(); i!=r.end(); ++i) { - copyNodeValues(mSrcNodeAcc.template node<2>(i), buffer + mValIdx[2][i]); - } - }); - forEach(0, mSrcNodeAcc.nodeCount(1), 1, [&](const Range1D& r) { - for (auto i = r.begin(); i!=r.end(); ++i) { - copyNodeValues(mSrcNodeAcc.template node<1>(i), buffer + mValIdx[1][i]); - } - }); - } - forEach(0, mSrcNodeAcc.nodeCount(0), 4, [&](const Range1D& r) { - for (auto i = r.begin(); i!=r.end(); ++i) { - copyNodeValues(mSrcNodeAcc.template node<0>(i), buffer + mValIdx[0][i]); - } - }); -}// CreateNanoGrid::copyValues - - -//================================================================================================ - -#if defined(NANOVDB_USE_OPENVDB) && !defined(__CUDACC__) - -template -template -typename disable_if::value || - is_same::value, uint64_t>::type -CreateNanoGrid::countPoints() const -{ - static_assert(is_same::value, "expected default template parameter"); - return 0u; -}// CreateNanoGrid::countPoints - -template -template -typename enable_if::value || - is_same::value, uint64_t>::type -CreateNanoGrid::countPoints() const -{ - static_assert(is_same::value, "expected default template parameter"); - return reduce(0, mSrcNodeAcc.nodeCount(0), 8, uint64_t(0), [&](auto &r, uint64_t sum) { - for (auto i=r.begin(); i!=r.end(); ++i) sum += mSrcNodeAcc.template node<0>(i).getLastValue(); - return sum;}, std::plus()); -}// CreateNanoGrid::countPoints - -template -template -typename enable_if::value>::type -CreateNanoGrid::copyPointAttribute(size_t attIdx, AttT *attPtr) -{ - static_assert(std::is_same::value, "Expected default parameter"); - using HandleT = openvdb::points::AttributeHandle; - forEach(0, mSrcNodeAcc.nodeCount(0), 16, [&](const auto& r) { - auto *dstData = this->template dstNode(r.begin())->data(); - for (auto i = r.begin(); i != r.end(); ++i, ++dstData) { - auto& srcLeaf = mSrcNodeAcc.template node<0>(i); - HandleT handle(srcLeaf.constAttributeArray(attIdx)); - AttT *p = attPtr + dstData->mMinimum; - for (auto iter = srcLeaf.beginIndexOn(); iter; ++iter) *p++ = handle.get(*iter); - } - }); -}// CreateNanoGrid::copyPointAttribute - -#endif - -//================================================================================================ - -template -typename disable_if::is_index || BuildTraits::is_Fp, GridHandle>::type -createNanoGrid(const SrcGridT &srcGrid, - StatsMode sMode, - ChecksumMode cMode, - int verbose, - const BufferT &buffer) -{ - CreateNanoGrid converter(srcGrid); - converter.setStats(sMode); - converter.setChecksum(cMode); - converter.setVerbose(verbose); - return converter.template getHandle(buffer); -}// createNanoGrid - -//================================================================================================ - -template -typename enable_if::is_index, GridHandle>::type -createNanoGrid(const SrcGridT &srcGrid, - uint32_t channels, - bool includeStats, - bool includeTiles, - int verbose, - const BufferT &buffer) -{ - CreateNanoGrid converter(srcGrid); - converter.setVerbose(verbose); - return converter.template getHandle(channels, includeStats, includeTiles, buffer); -} - -//================================================================================================ - -template -typename enable_if::value, GridHandle>::type -createNanoGrid(const SrcGridT &srcGrid, - StatsMode sMode, - ChecksumMode cMode, - bool ditherOn, - int verbose, - const OracleT &oracle, - const BufferT &buffer) -{ - CreateNanoGrid converter(srcGrid); - converter.setStats(sMode); - converter.setChecksum(cMode); - converter.enableDithering(ditherOn); - converter.setVerbose(verbose); - return converter.template getHandle(oracle, buffer); -}// createNanoGrid - -//================================================================================================ - -template -typename enable_if::is_FpX, GridHandle>::type -createNanoGrid(const SrcGridT &srcGrid, - StatsMode sMode, - ChecksumMode cMode, - bool ditherOn, - int verbose, - const BufferT &buffer) -{ - CreateNanoGrid converter(srcGrid); - converter.setStats(sMode); - converter.setChecksum(cMode); - converter.enableDithering(ditherOn); - converter.setVerbose(verbose); - return converter.template getHandle(buffer); -}// createNanoGrid - -//================================================================================================ - -#if defined(NANOVDB_USE_OPENVDB) && !defined(__CUDACC__) -template -GridHandle -openToNanoVDB(const openvdb::GridBase::Ptr& base, - StatsMode sMode, - ChecksumMode cMode, - int verbose) -{ - // We need to define these types because they are not defined in OpenVDB - using openvdb_Vec4fTree = typename openvdb::tree::Tree4::Type; - using openvdb_Vec4dTree = typename openvdb::tree::Tree4::Type; - using openvdb_Vec4fGrid = openvdb::Grid; - using openvdb_Vec4dGrid = openvdb::Grid; - using openvdb_UInt32Grid = openvdb::Grid; - - if (auto grid = openvdb::GridBase::grid(base)) { - return createNanoGrid(*grid, sMode, cMode, verbose); - } else if (auto grid = openvdb::GridBase::grid(base)) { - return createNanoGrid(*grid, sMode, cMode, verbose); - } else if (auto grid = openvdb::GridBase::grid(base)) { - return createNanoGrid(*grid, sMode, cMode, verbose); - } else if (auto grid = openvdb::GridBase::grid(base)) { - return createNanoGrid(*grid, sMode, cMode, verbose); - } else if (auto grid = openvdb::GridBase::grid(base)) { - return createNanoGrid(*grid, sMode, cMode, verbose); - } else if (auto grid = openvdb::GridBase::grid(base)) { - return createNanoGrid(*grid, sMode, cMode, verbose); - } else if (auto grid = openvdb::GridBase::grid(base)) { - return createNanoGrid(*grid, sMode, cMode, verbose); - } else if (auto grid = openvdb::GridBase::grid(base)) { - return createNanoGrid(*grid, sMode, cMode, verbose); - } else if (auto grid = openvdb::GridBase::grid(base)) { - return createNanoGrid(*grid, sMode, cMode, verbose); - } else if (auto grid = openvdb::GridBase::grid(base)) { - return createNanoGrid(*grid, sMode, cMode, verbose); - } else if (auto grid = openvdb::GridBase::grid(base)) { - return createNanoGrid(*grid, sMode, cMode, verbose); - } else if (auto grid = openvdb::GridBase::grid(base)) { - return createNanoGrid(*grid, sMode, cMode, verbose); - } else if (auto grid = openvdb::GridBase::grid(base)) { - return createNanoGrid(*grid, sMode, cMode, verbose); - } else { - OPENVDB_THROW(openvdb::RuntimeError, "Unrecognized OpenVDB grid type"); - } -}// openToNanoVDB -#endif - -} // namespace nanovdb - -#endif // NANOVDB_CREATE_NANOGRID_H_HAS_BEEN_INCLUDED +#include // for NANOVDB_DEPRECATED_HEADER +#include +NANOVDB_DEPRECATED_HEADER("Include nanovdb/tools/CreateNanoGrid.h instead.") diff --git a/nanovdb/nanovdb/util/DitherLUT.h b/nanovdb/nanovdb/util/DitherLUT.h index 69c3b33031..270f82d378 100644 --- a/nanovdb/nanovdb/util/DitherLUT.h +++ b/nanovdb/nanovdb/util/DitherLUT.h @@ -1,185 +1,6 @@ // Copyright Contributors to the OpenVDB Project // SPDX-License-Identifier: MPL-2.0 -// -/// @author Jeff Lait -/// -/// @date May 13, 2021 -/// -/// @file DitherLUT.h -/// -/// @brief Defines look up table to do dithering of 8^3 leaf nodes. -#ifndef NANOVDB_DITHERLUT_HAS_BEEN_INCLUDED -#define NANOVDB_DITHERLUT_HAS_BEEN_INCLUDED - -#include // for __hostdev__, Vec3, Min, Max, Pow2, Pow3, Pow4 - -namespace nanovdb { - -class DitherLUT -{ - const bool mEnable; -public: - /// @brief Constructor with an optional scaling factor for the dithering - __hostdev__ DitherLUT(bool enable = true) : mEnable(enable) {} - - /// @brief Retrieves dither threshold for an offset within an 8^3 leaf nodes. - /// - /// @param offset into the lookup table of size 512 - __hostdev__ float operator()(const int offset) - { - -// This table was generated with -/************** - -static constexpr inline uint32 -SYSwang_inthash(uint32 key) -{ - // From http://www.concentric.net/~Ttwang/tech/inthash.htm - key += ~(key << 16); - key ^= (key >> 5); - key += (key << 3); - key ^= (key >> 13); - key += ~(key << 9); - key ^= (key >> 17); - return key; -} - -static void -ut_initDitherR(float *pattern, float offset, - int x, int y, int z, int res, int goalres) -{ - // These offsets are designed to maximize the difference between - // dither values in nearby voxels within a given 2x2x2 cell, without - // producing axis-aligned artifacts. The are organized in row-major - // order. - static const float theDitherOffset[] = {0,4,6,2,5,1,3,7}; - static const float theScale = 0.125F; - int key = (((z << res) + y) << res) + x; - - if (res == goalres) - { - pattern[key] = offset; - return; - } - - // Randomly flip (on each axis) the dithering patterns used by the - // subcells. This key is xor'd with the subcell index below before - // looking up in the dither offset list. - key = SYSwang_inthash(key) & 7; - - x <<= 1; - y <<= 1; - z <<= 1; - - offset *= theScale; - for (int i = 0; i < 8; i++) - ut_initDitherR(pattern, offset+theDitherOffset[i ^ key]*theScale, - x+(i&1), y+((i&2)>>1), z+((i&4)>>2), res+1, goalres); -} - -// This is a compact algorithm that accomplishes essentially the same thing -// as ut_initDither() above. We should eventually switch to use this and -// clean the dead code. -static fpreal32 * -ut_initDitherRecursive(int goalres) -{ - const int nfloat = 1 << (goalres*3); - float *pattern = new float[nfloat]; - ut_initDitherR(pattern, 1.0F, 0, 0, 0, 0, goalres); - - // This has built an even spacing from 1/nfloat to 1.0. - // however, our dither pattern should be 1/(nfloat+1) to nfloat/(nfloat+1) - // So we do a correction here. Note that the earlier calculations are - // done with powers of 2 so are exact, so it does make sense to delay - // the renormalization to this pass. - float correctionterm = nfloat / (nfloat+1.0F); - for (int i = 0; i < nfloat; i++) - pattern[i] *= correctionterm; - return pattern; -} - - theDitherMatrix = ut_initDitherRecursive(3); - - for (int i = 0; i < 512/8; i ++) - { - for (int j = 0; j < 8; j ++) - std::cout << theDitherMatrix[i*8+j] << "f, "; - std::cout << std::endl; - } - - **************/ - static const float LUT[512] = - { - 0.14425f, 0.643275f, 0.830409f, 0.331384f, 0.105263f, 0.604289f, 0.167641f, 0.666667f, - 0.892788f, 0.393762f, 0.0818713f, 0.580897f, 0.853801f, 0.354776f, 0.916179f, 0.417154f, - 0.612086f, 0.11306f, 0.79922f, 0.300195f, 0.510721f, 0.0116959f, 0.947368f, 0.448343f, - 0.362573f, 0.861598f, 0.0506823f, 0.549708f, 0.261209f, 0.760234f, 0.19883f, 0.697856f, - 0.140351f, 0.639376f, 0.576998f, 0.0779727f, 0.522417f, 0.0233918f, 0.460039f, 0.959064f, - 0.888889f, 0.389864f, 0.327485f, 0.826511f, 0.272904f, 0.77193f, 0.709552f, 0.210526f, - 0.483431f, 0.982456f, 0.296296f, 0.795322f, 0.116959f, 0.615984f, 0.0545809f, 0.553606f, - 0.732943f, 0.233918f, 0.545809f, 0.0467836f, 0.865497f, 0.366472f, 0.803119f, 0.304094f, - 0.518519f, 0.0194932f, 0.45614f, 0.955166f, 0.729045f, 0.230019f, 0.54191f, 0.042885f, - 0.269006f, 0.768031f, 0.705653f, 0.206628f, 0.479532f, 0.978558f, 0.292398f, 0.791423f, - 0.237817f, 0.736842f, 0.424951f, 0.923977f, 0.136452f, 0.635478f, 0.323587f, 0.822612f, - 0.986355f, 0.487329f, 0.674464f, 0.175439f, 0.88499f, 0.385965f, 0.573099f, 0.0740741f, - 0.51462f, 0.0155945f, 0.202729f, 0.701754f, 0.148148f, 0.647174f, 0.834308f, 0.335283f, - 0.265107f, 0.764133f, 0.951267f, 0.452242f, 0.896686f, 0.397661f, 0.08577f, 0.584795f, - 0.8577f, 0.358674f, 0.920078f, 0.421053f, 0.740741f, 0.241715f, 0.678363f, 0.179337f, - 0.109162f, 0.608187f, 0.17154f, 0.670565f, 0.491228f, 0.990253f, 0.42885f, 0.927875f, - 0.0662768f, 0.565302f, 0.62768f, 0.128655f, 0.183236f, 0.682261f, 0.744639f, 0.245614f, - 0.814815f, 0.315789f, 0.378168f, 0.877193f, 0.931774f, 0.432749f, 0.495127f, 0.994152f, - 0.0350877f, 0.534113f, 0.97076f, 0.471735f, 0.214425f, 0.71345f, 0.526316f, 0.0272904f, - 0.783626f, 0.2846f, 0.222222f, 0.721248f, 0.962963f, 0.463938f, 0.276803f, 0.775828f, - 0.966862f, 0.467836f, 0.405458f, 0.904483f, 0.0701754f, 0.569201f, 0.881092f, 0.382066f, - 0.218324f, 0.717349f, 0.654971f, 0.155945f, 0.818713f, 0.319688f, 0.132554f, 0.631579f, - 0.0623782f, 0.561404f, 0.748538f, 0.249513f, 0.912281f, 0.413255f, 0.974659f, 0.475634f, - 0.810916f, 0.311891f, 0.499025f, 0.998051f, 0.163743f, 0.662768f, 0.226121f, 0.725146f, - 0.690058f, 0.191033f, 0.00389864f, 0.502924f, 0.557505f, 0.0584795f, 0.120858f, 0.619883f, - 0.440546f, 0.939571f, 0.752437f, 0.253411f, 0.307992f, 0.807018f, 0.869396f, 0.37037f, - 0.658869f, 0.159844f, 0.346979f, 0.846004f, 0.588694f, 0.0896686f, 0.152047f, 0.651072f, - 0.409357f, 0.908382f, 0.596491f, 0.0974659f, 0.339181f, 0.838207f, 0.900585f, 0.401559f, - 0.34308f, 0.842105f, 0.779727f, 0.280702f, 0.693957f, 0.194932f, 0.25731f, 0.756335f, - 0.592593f, 0.0935673f, 0.0311891f, 0.530214f, 0.444444f, 0.94347f, 0.506823f, 0.00779727f, - 0.68616f, 0.187135f, 0.124756f, 0.623782f, 0.288499f, 0.787524f, 0.350877f, 0.849903f, - 0.436647f, 0.935673f, 0.873294f, 0.374269f, 0.538012f, 0.0389864f, 0.60039f, 0.101365f, - 0.57115f, 0.0721248f, 0.758285f, 0.259259f, 0.719298f, 0.220273f, 0.532164f, 0.0331384f, - 0.321637f, 0.820663f, 0.00974659f, 0.508772f, 0.469786f, 0.968811f, 0.282651f, 0.781676f, - 0.539961f, 0.0409357f, 0.727096f, 0.22807f, 0.500975f, 0.00194932f, 0.563353f, 0.0643275f, - 0.290448f, 0.789474f, 0.477583f, 0.976608f, 0.251462f, 0.750487f, 0.31384f, 0.812865f, - 0.94152f, 0.442495f, 0.879142f, 0.380117f, 0.37232f, 0.871345f, 0.309942f, 0.808967f, - 0.192982f, 0.692008f, 0.130604f, 0.62963f, 0.621832f, 0.122807f, 0.559454f, 0.0604289f, - 0.660819f, 0.161793f, 0.723197f, 0.224172f, 0.403509f, 0.902534f, 0.840156f, 0.341131f, - 0.411306f, 0.910331f, 0.473684f, 0.97271f, 0.653021f, 0.153996f, 0.0916179f, 0.590643f, - 0.196881f, 0.695906f, 0.384016f, 0.883041f, 0.0955166f, 0.594542f, 0.157895f, 0.65692f, - 0.945419f, 0.446394f, 0.633528f, 0.134503f, 0.844055f, 0.345029f, 0.906433f, 0.407407f, - 0.165692f, 0.664717f, 0.103314f, 0.602339f, 0.126706f, 0.625731f, 0.189084f, 0.688109f, - 0.91423f, 0.415205f, 0.851852f, 0.352827f, 0.875244f, 0.376218f, 0.937622f, 0.438596f, - 0.317739f, 0.816764f, 0.255361f, 0.754386f, 0.996101f, 0.497076f, 0.933723f, 0.434698f, - 0.567251f, 0.0682261f, 0.504873f, 0.00584795f, 0.247563f, 0.746589f, 0.185185f, 0.684211f, - 0.037037f, 0.536062f, 0.0994152f, 0.598441f, 0.777778f, 0.278752f, 0.465887f, 0.964912f, - 0.785575f, 0.28655f, 0.847953f, 0.348928f, 0.0292398f, 0.528265f, 0.7154f, 0.216374f, - 0.39961f, 0.898636f, 0.961014f, 0.461988f, 0.0487329f, 0.547758f, 0.111111f, 0.610136f, - 0.649123f, 0.150097f, 0.212476f, 0.711501f, 0.797271f, 0.298246f, 0.859649f, 0.360624f, - 0.118908f, 0.617934f, 0.0565302f, 0.555556f, 0.329435f, 0.82846f, 0.516569f, 0.0175439f, - 0.867446f, 0.368421f, 0.805068f, 0.306043f, 0.578947f, 0.079922f, 0.267057f, 0.766082f, - 0.270955f, 0.76998f, 0.707602f, 0.208577f, 0.668616f, 0.169591f, 0.606238f, 0.107212f, - 0.520468f, 0.0214425f, 0.45809f, 0.957115f, 0.419103f, 0.918129f, 0.356725f, 0.855751f, - 0.988304f, 0.489279f, 0.426901f, 0.925926f, 0.450292f, 0.949318f, 0.512671f, 0.0136452f, - 0.239766f, 0.738791f, 0.676413f, 0.177388f, 0.699805f, 0.20078f, 0.263158f, 0.762183f, - 0.773879f, 0.274854f, 0.337232f, 0.836257f, 0.672515f, 0.173489f, 0.734893f, 0.235867f, - 0.0253411f, 0.524366f, 0.586745f, 0.0877193f, 0.423002f, 0.922027f, 0.48538f, 0.984405f, - 0.74269f, 0.243665f, 0.680312f, 0.181287f, 0.953216f, 0.454191f, 0.1423f, 0.641326f, - 0.493177f, 0.992203f, 0.430799f, 0.929825f, 0.204678f, 0.703704f, 0.890838f, 0.391813f, - 0.894737f, 0.395712f, 0.0838207f, 0.582846f, 0.0448343f, 0.54386f, 0.231969f, 0.730994f, - 0.146199f, 0.645224f, 0.832359f, 0.333333f, 0.793372f, 0.294347f, 0.980507f, 0.481481f, - 0.364522f, 0.863548f, 0.80117f, 0.302144f, 0.824561f, 0.325536f, 0.138402f, 0.637427f, - 0.614035f, 0.11501f, 0.0526316f, 0.551657f, 0.0760234f, 0.575049f, 0.88694f, 0.387914f, - }; - return mEnable ? LUT[offset & 511] : 0.5f;// branch prediction should optimize this! - } -}; // DitherLUT class - -} // end nanovdb namespace - -#endif // NANOVDB_DITHERLUT_HAS_BEEN_INCLUDED +#include // for NANOVDB_DEPRECATED_HEADER +#include +NANOVDB_DEPRECATED_HEADER("Include nanovdb/math/DitherLUT.h instead.") diff --git a/nanovdb/nanovdb/util/ForEach.h b/nanovdb/nanovdb/util/ForEach.h index fcd8eae15b..f4c20f2bce 100644 --- a/nanovdb/nanovdb/util/ForEach.h +++ b/nanovdb/nanovdb/util/ForEach.h @@ -2,7 +2,7 @@ // SPDX-License-Identifier: MPL-2.0 /*! - \file ForEach.h + \file nanovdb/util/ForEach.h \author Ken Museth @@ -11,10 +11,10 @@ \brief A unified wrapper for tbb::parallel_for and a naive std::thread fallback */ -#ifndef NANOVDB_FOREACH_H_HAS_BEEN_INCLUDED -#define NANOVDB_FOREACH_H_HAS_BEEN_INCLUDED +#ifndef NANOVDB_UTIL_FOREACH_H_HAS_BEEN_INCLUDED +#define NANOVDB_UTIL_FOREACH_H_HAS_BEEN_INCLUDED -#include "Range.h"// for Range1D +#include // for Range1D #ifdef NANOVDB_USE_TBB #include @@ -26,6 +26,8 @@ namespace nanovdb { +namespace util { + /// @brief simple wrapper for tbb::parallel_for with a naive std fallback /// /// @param range Range, CoordBBox, tbb::blocked_range, blocked_range2D, or blocked_range3D. @@ -83,6 +85,32 @@ inline void forEach(const ContainerT &c, size_t grainSize, const FuncT& fu forEach(Range1D(0, c.size(), grainSize), func); } +}// namespace util + +/// @brief Simple wrapper for the function defined above +template +[[deprecated("Use nanovdb::util::forEach instead")]] +inline void forEach(size_t begin, size_t end, size_t grainSize, const FuncT& func) +{ + util::forEach(util::Range1D(begin, end, grainSize), func); +} + +/// @brief Simple wrapper for the function defined above, which works with std::containers +template class ContainerT, typename... T, typename FuncT> +[[deprecated("Use nanovdb::util::forEach instead")]] +inline void forEach(const ContainerT &c, const FuncT& func) +{ + util::forEach(util::Range1D(0, c.size(), 1), func); +} + +/// @brief Simple wrapper for the function defined above, which works with std::containers +template class ContainerT, typename... T, typename FuncT> +[[deprecated("Use nanovdb::util::forEach instead")]] +inline void forEach(const ContainerT &c, size_t grainSize, const FuncT& func) +{ + util::forEach(util::Range1D(0, c.size(), grainSize), func); +} + }// namespace nanovdb -#endif // NANOVDB_FOREACH_H_HAS_BEEN_INCLUDED +#endif // NANOVDB_UTIL_FOREACH_H_HAS_BEEN_INCLUDED diff --git a/nanovdb/nanovdb/util/GridBuilder.h b/nanovdb/nanovdb/util/GridBuilder.h index 30fba27f94..bc1ce63eb5 100644 --- a/nanovdb/nanovdb/util/GridBuilder.h +++ b/nanovdb/nanovdb/util/GridBuilder.h @@ -1,2314 +1,6 @@ // Copyright Contributors to the OpenVDB Project // SPDX-License-Identifier: MPL-2.0 -/*! - \file GridBuilder.h - - \author Ken Museth - - \date June 26, 2020 - - \brief This file defines a minimum set of tree nodes and tools that - can be used (instead of OpenVDB) to build nanovdb grids on the CPU. -*/ - -#ifndef NANOVDB_GRID_BUILDER_H_HAS_BEEN_INCLUDED -#define NANOVDB_GRID_BUILDER_H_HAS_BEEN_INCLUDED - -#include - -#include -#include -#include // for stringstream -#include -#include // for memcpy -#include -#include -#include - -#include -#include "Range.h" -#include "ForEach.h" - -namespace nanovdb { - -namespace build { - -// ----------------------------> Froward decelerations of random access methods <-------------------------------------- - -template struct GetValue; -template struct SetValue; -template struct TouchLeaf; -template struct GetState; -template struct ProbeValue; - -// ----------------------------> RootNode <-------------------------------------- - -template -struct RootNode -{ - using ValueType = typename ChildT::ValueType; - using BuildType = typename ChildT::BuildType; - using ChildNodeType = ChildT; - using LeafNodeType = typename ChildT::LeafNodeType; - static constexpr uint32_t LEVEL = 1 + ChildT::LEVEL; // level 0 = leaf - struct Tile { - Tile(ChildT* c = nullptr) : child(c) {} - Tile(const ValueType& v, bool s) : child(nullptr), value(v), state(s) {} - bool isChild() const { return child!=nullptr; } - bool isValue() const { return child==nullptr; } - bool isActive() const { return child==nullptr && state; } - ChildT* child; - ValueType value; - bool state; - }; - using MapT = std::map; - MapT mTable; - ValueType mBackground; - - Tile* probeTile(const Coord &ijk) { - auto iter = mTable.find(CoordToKey(ijk)); - return iter == mTable.end() ? nullptr : &(iter->second); - } - - const Tile* probeTile(const Coord &ijk) const { - auto iter = mTable.find(CoordToKey(ijk)); - return iter == mTable.end() ? nullptr : &(iter->second); - } - - class ChildIterator - { - const RootNode *mParent; - typename MapT::const_iterator mIter; - public: - ChildIterator() : mParent(nullptr), mIter() {} - ChildIterator(const RootNode *parent) : mParent(parent), mIter(parent->mTable.begin()) { - while (mIter!=parent->mTable.end() && mIter->second.child==nullptr) ++mIter; - } - ChildIterator& operator=(const ChildIterator&) = default; - ChildT& operator*() const {NANOVDB_ASSERT(*this); return *mIter->second.child;} - ChildT* operator->() const {NANOVDB_ASSERT(*this); return mIter->second.child;} - Coord getOrigin() const { NANOVDB_ASSERT(*this); return mIter->first;} - Coord getCoord() const { NANOVDB_ASSERT(*this); return mIter->first;} - operator bool() const {return mParent && mIter!=mParent->mTable.end();} - ChildIterator& operator++() { - NANOVDB_ASSERT(mParent); - ++mIter; - while (mIter!=mParent->mTable.end() && mIter->second.child==nullptr) ++mIter; - return *this; - } - ChildIterator operator++(int) { - auto tmp = *this; - ++(*this); - return tmp; - } - uint32_t pos() const { - NANOVDB_ASSERT(mParent); - return uint32_t(std::distance(mParent->mTable.begin(), mIter)); - } - }; // Member class ChildIterator - - ChildIterator cbeginChild() const {return ChildIterator(this);} - ChildIterator cbeginChildOn() const {return ChildIterator(this);}// match openvdb - - class ValueIterator - { - const RootNode *mParent; - typename MapT::const_iterator mIter; - public: - ValueIterator() : mParent(nullptr), mIter() {} - ValueIterator(const RootNode *parent) : mParent(parent), mIter(parent->mTable.begin()) { - while (mIter!=parent->mTable.end() && mIter->second.child!=nullptr) ++mIter; - } - ValueIterator& operator=(const ValueIterator&) = default; - ValueType operator*() const {NANOVDB_ASSERT(*this); return mIter->second.value;} - bool isActive() const {NANOVDB_ASSERT(*this); return mIter->second.state;} - Coord getOrigin() const { NANOVDB_ASSERT(*this); return mIter->first;} - Coord getCoord() const { NANOVDB_ASSERT(*this); return mIter->first;} - operator bool() const {return mParent && mIter!=mParent->mTable.end();} - ValueIterator& operator++() { - NANOVDB_ASSERT(mParent); - ++mIter; - while (mIter!=mParent->mTable.end() && mIter->second.child!=nullptr) ++mIter; - return *this;; - } - ValueIterator operator++(int) { - auto tmp = *this; - ++(*this); - return tmp; - } - uint32_t pos() const { - NANOVDB_ASSERT(mParent); - return uint32_t(std::distance(mParent->mTable.begin(), mIter)); - } - }; // Member class ValueIterator - - ValueIterator beginValue() {return ValueIterator(this);} - ValueIterator cbeginValueAll() const {return ValueIterator(this);} - - class ValueOnIterator - { - const RootNode *mParent; - typename MapT::const_iterator mIter; - public: - ValueOnIterator() : mParent(nullptr), mIter() {} - ValueOnIterator(const RootNode *parent) : mParent(parent), mIter(parent->mTable.begin()) { - while (mIter!=parent->mTable.end() && (mIter->second.child!=nullptr || !mIter->second.state)) ++mIter; - } - ValueOnIterator& operator=(const ValueOnIterator&) = default; - ValueType operator*() const {NANOVDB_ASSERT(*this); return mIter->second.value;} - Coord getOrigin() const { NANOVDB_ASSERT(*this); return mIter->first;} - Coord getCoord() const { NANOVDB_ASSERT(*this); return mIter->first;} - operator bool() const {return mParent && mIter!=mParent->mTable.end();} - ValueOnIterator& operator++() { - NANOVDB_ASSERT(mParent); - ++mIter; - while (mIter!=mParent->mTable.end() && (mIter->second.child!=nullptr || !mIter->second.state)) ++mIter; - return *this;; - } - ValueOnIterator operator++(int) { - auto tmp = *this; - ++(*this); - return tmp; - } - uint32_t pos() const { - NANOVDB_ASSERT(mParent); - return uint32_t(std::distance(mParent->mTable.begin(), mIter)); - } - }; // Member class ValueOnIterator - - ValueOnIterator beginValueOn() {return ValueOnIterator(this);} - ValueOnIterator cbeginValueOn() const {return ValueOnIterator(this);} - - class TileIterator - { - const RootNode *mParent; - typename MapT::const_iterator mIter; - public: - TileIterator() : mParent(nullptr), mIter() {} - TileIterator(const RootNode *parent) : mParent(parent), mIter(parent->mTable.begin()) { - NANOVDB_ASSERT(mParent); - } - TileIterator& operator=(const TileIterator&) = default; - const Tile& operator*() const {NANOVDB_ASSERT(*this); return mIter->second;} - const Tile* operator->() const {NANOVDB_ASSERT(*this); return &(mIter->second);} - Coord getOrigin() const { NANOVDB_ASSERT(*this); return mIter->first;} - Coord getCoord() const { NANOVDB_ASSERT(*this); return mIter->first;} - operator bool() const {return mParent && mIter!=mParent->mTable.end();} - const ChildT* probeChild(ValueType &value) { - NANOVDB_ASSERT(*this); - const ChildT *child = mIter->second.child; - if (child==nullptr) value = mIter->second.value; - return child; - } - bool isValueOn() const {return mIter->second.child==nullptr && mIter->second.state;} - TileIterator& operator++() { - NANOVDB_ASSERT(mParent); - ++mIter; - return *this; - } - TileIterator operator++(int) { - auto tmp = *this; - ++(*this); - return tmp; - } - uint32_t pos() const { - NANOVDB_ASSERT(mParent); - return uint32_t(std::distance(mParent->mTable.begin(), mIter)); - } - }; // Member class TileIterator - - TileIterator beginTile() {return TileIterator(this);} - TileIterator cbeginChildAll() const {return TileIterator(this);} - - //class DenseIterator : public TileIterator - - RootNode(const ValueType& background) : mBackground(background) {} - RootNode(const RootNode&) = delete; // disallow copy-construction - RootNode(RootNode&&) = default; // allow move construction - RootNode& operator=(const RootNode&) = delete; // disallow copy assignment - RootNode& operator=(RootNode&&) = default; // allow move assignment - - ~RootNode() { this->clear(); } - - uint32_t tileCount() const { return uint32_t(mTable.size()); } - uint32_t getTableSize() const { return uint32_t(mTable.size()); }// match openvdb - const ValueType& background() const {return mBackground;} - - void nodeCount(std::array &count) const - { - for (auto it = this->cbeginChild(); it; ++it) { - count[ChildT::LEVEL] += 1; - it->nodeCount(count); - } - } - - bool empty() const { return mTable.empty(); } - - void clear() - { - for (auto iter = mTable.begin(); iter != mTable.end(); ++iter) delete iter->second.child; - mTable.clear(); - } - - static Coord CoordToKey(const Coord& ijk) { return ijk & ~ChildT::MASK; } - -#ifdef NANOVDB_NEW_ACCESSOR_METHODS - template - auto get(const Coord& ijk, ArgsT&&... args) const - { - if (const Tile *tile = this->probeTile(ijk)) { - if (auto *child = tile->child) return child->template get(ijk, args...); - return OpT::get(*tile, args...); - } - return OpT::get(*this, args...); - } - template - auto set(const Coord& ijk, ArgsT&&... args) - { - ChildT* child = nullptr; - const Coord key = CoordToKey(ijk); - auto iter = mTable.find(key); - if (iter == mTable.end()) { - child = new ChildT(ijk, mBackground, false); - mTable[key] = Tile(child); - } else if (iter->second.child != nullptr) { - child = iter->second.child; - } else { - child = new ChildT(ijk, iter->second.value, iter->second.state); - iter->second.child = child; - } - NANOVDB_ASSERT(child); - return child->template set(ijk, args...); - } - template - auto getAndCache(const Coord& ijk, const AccT& acc, ArgsT&&... args) const - { - if (const Tile *tile = this->probeTile(ijk)) { - if (auto *child = tile->child) { - acc.insert(ijk, child); - return child->template get(ijk, args...); - } - return OpT::get(*tile, args...); - } - return OpT::get(*this, args...); - } - - template - auto setAndCache(const Coord& ijk, const AccT& acc, ArgsT&&... args) - { - ChildT* child = nullptr; - const Coord key = CoordToKey(ijk); - auto iter = mTable.find(key); - if (iter == mTable.end()) { - child = new ChildT(ijk, mBackground, false); - mTable[key] = Tile(child); - } else if (iter->second.child != nullptr) { - child = iter->second.child; - } else { - child = new ChildT(ijk, iter->second.value, iter->second.state); - iter->second.child = child; - } - NANOVDB_ASSERT(child); - acc.insert(ijk, child); - return child->template setAndCache(ijk, acc, args...); - } - ValueType getValue(const Coord& ijk) const {return this->template get>(ijk);} - ValueType getValue(int i, int j, int k) const {return this->template get>(Coord(i,j,k));} - ValueType operator()(const Coord& ijk) const {return this->template get>(ijk);} - ValueType operator()(int i, int j, int k) const {return this->template get>(Coord(i,j,k));} - void setValue(const Coord& ijk, const ValueType& value) {this->template set>(ijk, value);} - bool probeValue(const Coord& ijk, ValueType& value) const {return this->template get>(ijk, value);} - bool isActive(const Coord& ijk) const {return this->template get>(ijk);} -#else - ValueType getValue(const Coord& ijk) const - { -#if 1 - if (auto *tile = this->probeTile(ijk)) return tile->child ? tile->child->getValue(ijk) : tile->value; - return mBackground; -#else - auto iter = mTable.find(CoordToKey(ijk)); - if (iter == mTable.end()) { - return mBackground; - } else if (iter->second.child) { - return iter->second.child->getValue(ijk); - } else { - return iter->second.value; - } -#endif - } - ValueType getValue(int i, int j, int k) const {return this->getValue(Coord(i,j,k));} - - void setValue(const Coord& ijk, const ValueType& value) - { - ChildT* child = nullptr; - const Coord key = CoordToKey(ijk); - auto iter = mTable.find(key); - if (iter == mTable.end()) { - child = new ChildT(ijk, mBackground, false); - mTable[key] = Tile(child); - } else if (iter->second.child != nullptr) { - child = iter->second.child; - } else { - child = new ChildT(ijk, iter->second.value, iter->second.state); - iter->second.child = child; - } - NANOVDB_ASSERT(child); - child->setValue(ijk, value); - } - - template - bool isActiveAndCache(const Coord& ijk, AccT& acc) const - { - auto iter = mTable.find(CoordToKey(ijk)); - if (iter == mTable.end()) - return false; - if (iter->second.child) { - acc.insert(ijk, iter->second.child); - return iter->second.child->isActiveAndCache(ijk, acc); - } - return iter->second.state; - } - - template - ValueType getValueAndCache(const Coord& ijk, AccT& acc) const - { - auto iter = mTable.find(CoordToKey(ijk)); - if (iter == mTable.end()) - return mBackground; - if (iter->second.child) { - acc.insert(ijk, iter->second.child); - return iter->second.child->getValueAndCache(ijk, acc); - } - return iter->second.value; - } - - template - void setValueAndCache(const Coord& ijk, const ValueType& value, AccT& acc) - { - ChildT* child = nullptr; - const Coord key = CoordToKey(ijk); - auto iter = mTable.find(key); - if (iter == mTable.end()) { - child = new ChildT(ijk, mBackground, false); - mTable[key] = Tile(child); - } else if (iter->second.child != nullptr) { - child = iter->second.child; - } else { - child = new ChildT(ijk, iter->second.value, iter->second.state); - iter->second.child = child; - } - NANOVDB_ASSERT(child); - acc.insert(ijk, child); - child->setValueAndCache(ijk, value, acc); - } - template - void setValueOnAndCache(const Coord& ijk, AccT& acc) - { - ChildT* child = nullptr; - const Coord key = CoordToKey(ijk); - auto iter = mTable.find(key); - if (iter == mTable.end()) { - child = new ChildT(ijk, mBackground, false); - mTable[key] = Tile(child); - } else if (iter->second.child != nullptr) { - child = iter->second.child; - } else { - child = new ChildT(ijk, iter->second.value, iter->second.state); - iter->second.child = child; - } - NANOVDB_ASSERT(child); - acc.insert(ijk, child); - child->setValueOnAndCache(ijk, acc); - } - template - void touchLeafAndCache(const Coord &ijk, AccT& acc) - { - ChildT* child = nullptr; - const Coord key = CoordToKey(ijk); - auto iter = mTable.find(key); - if (iter == mTable.end()) { - child = new ChildT(ijk, mBackground, false); - mTable[key] = Tile(child); - } else if (iter->second.child != nullptr) { - child = iter->second.child; - } else { - child = new ChildT(ijk, iter->second.value, iter->second.state); - iter->second.child = child; - } - acc.insert(ijk, child); - child->touchLeafAndCache(ijk, acc); - } -#endif// NANOVDB_NEW_ACCESSOR_METHODS - - template - uint32_t nodeCount() const - { - static_assert(is_same::value, "Root::getNodes: Invalid type"); - static_assert(NodeT::LEVEL < LEVEL, "Root::getNodes: LEVEL error"); - uint32_t sum = 0; - for (auto iter = mTable.begin(); iter != mTable.end(); ++iter) { - if (iter->second.child == nullptr) continue; // skip tiles - if constexpr(is_same::value) { //resolved at compile-time - ++sum; - } else { - sum += iter->second.child->template nodeCount(); - } - } - return sum; - } - - template - void getNodes(std::vector& array) - { - static_assert(is_same::value, "Root::getNodes: Invalid type"); - static_assert(NodeT::LEVEL < LEVEL, "Root::getNodes: LEVEL error"); - for (auto iter = mTable.begin(); iter != mTable.end(); ++iter) { - if (iter->second.child == nullptr) - continue; - if constexpr(is_same::value) { //resolved at compile-time - array.push_back(reinterpret_cast(iter->second.child)); - } else { - iter->second.child->getNodes(array); - } - } - } - - void addChild(ChildT*& child) - { - NANOVDB_ASSERT(child); - const Coord key = CoordToKey(child->mOrigin); - auto iter = mTable.find(key); - if (iter != mTable.end() && iter->second.child != nullptr) { // existing child node - delete iter->second.child; - iter->second.child = child; - } else { - mTable[key] = Tile(child); - } - child = nullptr; - } - - /// @brief Add a tile containing voxel (i, j, k) at the specified tree level, - /// creating a new branch if necessary. Delete any existing lower-level nodes - /// that contain (x, y, z). - /// @tparam level tree level at which the tile is inserted. Must be 1, 2 or 3. - /// @param ijk Index coordinate that map to the tile being inserted - /// @param value Value of the tile - /// @param state Binary state of the tile - template - void addTile(const Coord& ijk, const ValueType& value, bool state) - { - static_assert(level > 0 && level <= LEVEL, "invalid template value of level"); - const Coord key = CoordToKey(ijk); - auto iter = mTable.find(key); - if constexpr(level == LEVEL) { - if (iter == mTable.end()) { - mTable[key] = Tile(value, state); - } else if (iter->second.child == nullptr) { - iter->second.value = value; - iter->second.state = state; - } else { - delete iter->second.child; - iter->second.child = nullptr; - iter->second.value = value; - iter->second.state = state; - } - } else if constexpr(level < LEVEL) { - ChildT* child = nullptr; - if (iter == mTable.end()) { - child = new ChildT(ijk, mBackground, false); - mTable[key] = Tile(child); - } else if (iter->second.child != nullptr) { - child = iter->second.child; - } else { - child = new ChildT(ijk, iter->second.value, iter->second.state); - iter->second.child = child; - } - child->template addTile(ijk, value, state); - } - } - - template - void addNode(NodeT*& node) - { - if constexpr(is_same::value) { //resolved at compile-time - this->addChild(reinterpret_cast(node)); - } else { - ChildT* child = nullptr; - const Coord key = CoordToKey(node->mOrigin); - auto iter = mTable.find(key); - if (iter == mTable.end()) { - child = new ChildT(node->mOrigin, mBackground, false); - mTable[key] = Tile(child); - } else if (iter->second.child != nullptr) { - child = iter->second.child; - } else { - child = new ChildT(node->mOrigin, iter->second.value, iter->second.state); - iter->second.child = child; - } - child->addNode(node); - } - } - - void merge(RootNode &other) - { - for (auto iter1 = other.mTable.begin(); iter1 != other.mTable.end(); ++iter1) { - if (iter1->second.child == nullptr) continue;// ignore input tiles - auto iter2 = mTable.find(iter1->first); - if (iter2 == mTable.end() || iter2->second.child == nullptr) { - mTable[iter1->first] = Tile(iter1->second.child); - iter1->second.child = nullptr; - } else { - iter2->second.child->merge(*iter1->second.child); - } - } - other.clear(); - } - - template - typename std::enable_if::value>::type - signedFloodFill(T outside); - -}; // build::RootNode - -//================================================================================================ - -template -template -inline typename std::enable_if::value>::type -RootNode::signedFloodFill(T outside) -{ - std::map nodeKeys; - for (auto iter = mTable.begin(); iter != mTable.end(); ++iter) { - if (iter->second.child == nullptr) - continue; - nodeKeys.insert(std::pair(iter->first, iter->second.child)); - } - - // We employ a simple z-scanline algorithm that inserts inactive tiles with - // the inside value if they are sandwiched between inside child nodes only! - auto b = nodeKeys.begin(), e = nodeKeys.end(); - if (b == e) - return; - for (auto a = b++; b != e; ++a, ++b) { - Coord d = b->first - a->first; // delta of neighboring coordinates - if (d[0] != 0 || d[1] != 0 || d[2] == int(ChildT::DIM)) - continue; // not same z-scanline or neighbors - const ValueType fill[] = {a->second->getLastValue(), b->second->getFirstValue()}; - if (!(fill[0] < 0) || !(fill[1] < 0)) - continue; // scanline isn't inside - Coord c = a->first + Coord(0u, 0u, ChildT::DIM); - for (; c[2] != b->first[2]; c[2] += ChildT::DIM) { - const Coord key = RootNode::CoordToKey(c); - mTable[key] = typename RootNode::Tile(-outside, false); // inactive tile - } - } -} // build::RootNode::signedFloodFill - -// ----------------------------> InternalNode <-------------------------------------- - -template -struct InternalNode -{ - using ValueType = typename ChildT::ValueType; - using BuildType = typename ChildT::BuildType; - using ChildNodeType = ChildT; - using LeafNodeType = typename ChildT::LeafNodeType; - static constexpr uint32_t LOG2DIM = ChildT::LOG2DIM + 1; - static constexpr uint32_t TOTAL = LOG2DIM + ChildT::TOTAL; //dimension in index space - static constexpr uint32_t DIM = 1u << TOTAL; - static constexpr uint32_t SIZE = 1u << (3 * LOG2DIM); //number of tile values (or child pointers) - static constexpr uint32_t MASK = DIM - 1; - static constexpr uint32_t LEVEL = 1 + ChildT::LEVEL; // level 0 = leaf - static constexpr uint64_t NUM_VALUES = uint64_t(1) << (3 * TOTAL); // total voxel count represented by this node - using MaskT = Mask; - template - using MaskIterT = typename MaskT::template Iterator; - using NanoNodeT = typename NanoNode::Type; - - struct Tile { - Tile(ChildT* c = nullptr) : child(c) {} - Tile(const ValueType& v) : value(v) {} - union{ - ChildT* child; - ValueType value; - }; - }; - Coord mOrigin; - MaskT mValueMask; - MaskT mChildMask; - Tile mTable[SIZE]; - - union { - NanoNodeT *mDstNode; - uint64_t mDstOffset; - }; - - /// @brief Visits child nodes of this node only - class ChildIterator : public MaskIterT - { - using BaseT = MaskIterT; - const InternalNode *mParent; - public: - ChildIterator() : BaseT(), mParent(nullptr) {} - ChildIterator(const InternalNode* parent) : BaseT(parent->mChildMask.beginOn()), mParent(parent) {} - ChildIterator& operator=(const ChildIterator&) = default; - const ChildT& operator*() const {NANOVDB_ASSERT(*this); return *mParent->mTable[BaseT::pos()].child;} - const ChildT* operator->() const {NANOVDB_ASSERT(*this); return mParent->mTable[BaseT::pos()].child;} - Coord getCoord() const { NANOVDB_ASSERT(*this); return (*this)->origin();} - }; // Member class ChildIterator - - ChildIterator beginChild() {return ChildIterator(this);} - ChildIterator cbeginChildOn() const {return ChildIterator(this);}// match openvdb - - /// @brief Visits all tile values in this node, i.e. both inactive and active tiles - class ValueIterator : public MaskIterT - { - using BaseT = MaskIterT; - const InternalNode *mParent; - public: - ValueIterator() : BaseT(), mParent(nullptr) {} - ValueIterator(const InternalNode* parent) : BaseT(parent->mChildMask.beginOff()), mParent(parent) {} - ValueIterator& operator=(const ValueIterator&) = default; - ValueType operator*() const {NANOVDB_ASSERT(*this); return mParent->mTable[BaseT::pos()].value;} - Coord getCoord() const { NANOVDB_ASSERT(*this); return mParent->offsetToGlobalCoord(BaseT::pos());} - bool isActive() const { NANOVDB_ASSERT(*this); return mParent->mValueMask.isOn(BaseT::pos());} - }; // Member class ValueIterator - - ValueIterator beginValue() {return ValueIterator(this);} - ValueIterator cbeginValueAll() const {return ValueIterator(this);} - - /// @brief Visits active tile values of this node only - class ValueOnIterator : public MaskIterT - { - using BaseT = MaskIterT; - const InternalNode *mParent; - public: - ValueOnIterator() : BaseT(), mParent(nullptr) {} - ValueOnIterator(const InternalNode* parent) : BaseT(parent->mValueMask.beginOn()), mParent(parent) {} - ValueOnIterator& operator=(const ValueOnIterator&) = default; - ValueType operator*() const {NANOVDB_ASSERT(*this); return mParent->mTable[BaseT::pos()].value;} - Coord getCoord() const { NANOVDB_ASSERT(*this); return mParent->offsetToGlobalCoord(BaseT::pos());} - }; // Member class ValueOnIterator - - ValueOnIterator beginValueOn() {return ValueOnIterator(this);} - ValueOnIterator cbeginValueOn() const {return ValueOnIterator(this);} - - /// @brief Visits all tile values and child nodes of this node - class DenseIterator : public MaskT::DenseIterator - { - using BaseT = typename MaskT::DenseIterator; - const InternalNode *mParent; - public: - DenseIterator() : BaseT(), mParent(nullptr) {} - DenseIterator(const InternalNode* parent) : BaseT(0), mParent(parent) {} - DenseIterator& operator=(const DenseIterator&) = default; - ChildT* probeChild(ValueType& value) const - { - NANOVDB_ASSERT(mParent && bool(*this)); - ChildT *child = nullptr; - if (mParent->mChildMask.isOn(BaseT::pos())) { - child = mParent->mTable[BaseT::pos()].child; - } else { - value = mParent->mTable[BaseT::pos()].value; - } - return child; - } - Coord getCoord() const { NANOVDB_ASSERT(mParent && bool(*this)); return mParent->offsetToGlobalCoord(BaseT::pos());} - }; // Member class DenseIterator - - DenseIterator beginDense() {return DenseIterator(this);} - DenseIterator cbeginChildAll() const {return DenseIterator(this);}// matches openvdb - - InternalNode(const Coord& origin, const ValueType& value, bool state) - : mOrigin(origin & ~MASK) - , mValueMask(state) - , mChildMask() - , mDstOffset(0) - { - for (uint32_t i = 0; i < SIZE; ++i) mTable[i].value = value; - } - InternalNode(const InternalNode&) = delete; // disallow copy-construction - InternalNode(InternalNode&&) = delete; // disallow move construction - InternalNode& operator=(const InternalNode&) = delete; // disallow copy assignment - InternalNode& operator=(InternalNode&&) = delete; // disallow move assignment - ~InternalNode() - { - for (auto iter = mChildMask.beginOn(); iter; ++iter) { - delete mTable[*iter].child; - } - } - const MaskT& getValueMask() const {return mValueMask;} - const MaskT& valueMask() const {return mValueMask;} - const MaskT& getChildMask() const {return mChildMask;} - const MaskT& childMask() const {return mChildMask;} - const Coord& origin() const {return mOrigin;} - - void nodeCount(std::array &count) const - { - count[ChildT::LEVEL] += mChildMask.countOn(); - if constexpr(ChildT::LEVEL>0) { - for (auto it = const_cast(this)->beginChild(); it; ++it) it->nodeCount(count); - } - } - - static uint32_t CoordToOffset(const Coord& ijk) - { - return (((ijk[0] & int32_t(MASK)) >> ChildT::TOTAL) << (2 * LOG2DIM)) + - (((ijk[1] & int32_t(MASK)) >> ChildT::TOTAL) << (LOG2DIM)) + - ((ijk[2] & int32_t(MASK)) >> ChildT::TOTAL); - } - - static Coord OffsetToLocalCoord(uint32_t n) - { - NANOVDB_ASSERT(n < SIZE); - const uint32_t m = n & ((1 << 2 * LOG2DIM) - 1); - return Coord(n >> 2 * LOG2DIM, m >> LOG2DIM, m & ((1 << LOG2DIM) - 1)); - } - - void localToGlobalCoord(Coord& ijk) const - { - ijk <<= ChildT::TOTAL; - ijk += mOrigin; - } - - Coord offsetToGlobalCoord(uint32_t n) const - { - Coord ijk = InternalNode::OffsetToLocalCoord(n); - this->localToGlobalCoord(ijk); - return ijk; - } - - ValueType getFirstValue() const { return mChildMask.isOn(0) ? mTable[0].child->getFirstValue() : mTable[0].value; } - ValueType getLastValue() const { return mChildMask.isOn(SIZE - 1) ? mTable[SIZE - 1].child->getLastValue() : mTable[SIZE - 1].value; } - - template - auto get(const Coord& ijk, ArgsT&&... args) const - { - const uint32_t n = CoordToOffset(ijk); - if (mChildMask.isOn(n)) return mTable[n].child->template get(ijk, args...); - return OpT::get(*this, n, args...); - } - - template - auto set(const Coord& ijk, ArgsT&&... args) - { - const uint32_t n = CoordToOffset(ijk); - ChildT* child = nullptr; - if (mChildMask.isOn(n)) { - child = mTable[n].child; - } else { - child = new ChildT(ijk, mTable[n].value, mValueMask.isOn(n)); - mTable[n].child = child; - mChildMask.setOn(n); - } - NANOVDB_ASSERT(child); - return child->template set(ijk, args...); - } - - template - auto getAndCache(const Coord& ijk, const AccT& acc, ArgsT&&... args) const - { - const uint32_t n = CoordToOffset(ijk); - if (mChildMask.isOff(n)) return OpT::get(*this, n, args...); - ChildT* child = mTable[n].child; - acc.insert(ijk, child); - if constexpr(ChildT::LEVEL == 0) { - return child->template get(ijk, args...); - } else { - return child->template getAndCache(ijk, acc, args...); - } - } - - template - auto setAndCache(const Coord& ijk, const AccT& acc, ArgsT&&... args) - { - const uint32_t n = CoordToOffset(ijk); - ChildT* child = nullptr; - if (mChildMask.isOn(n)) { - child = mTable[n].child; - } else { - child = new ChildT(ijk, mTable[n].value, mValueMask.isOn(n)); - mTable[n].child = child; - mChildMask.setOn(n); - } - NANOVDB_ASSERT(child); - acc.insert(ijk, child); - if constexpr(ChildT::LEVEL == 0) { - return child->template set(ijk, args...); - } else { - return child->template setAndCache(ijk, acc, args...); - } - } - -#ifdef NANOVDB_NEW_ACCESSOR_METHODS - ValueType getValue(const Coord& ijk) const {return this->template get>(ijk);} - LeafNodeType& setValue(const Coord& ijk, const ValueType& value){return this->template set>(ijk, value);} -#else - ValueType getValue(const Coord& ijk) const - { - const uint32_t n = CoordToOffset(ijk); - if (mChildMask.isOn(n)) { - return mTable[n].child->getValue(ijk); - } - return mTable[n].value; - } - void setValue(const Coord& ijk, const ValueType& value) - { - const uint32_t n = CoordToOffset(ijk); - ChildT* child = nullptr; - if (mChildMask.isOn(n)) { - child = mTable[n].child; - } else { - child = new ChildT(ijk, mTable[n].value, mValueMask.isOn(n)); - mTable[n].child = child; - mChildMask.setOn(n); - } - child->setValue(ijk, value); - } - - template - ValueType getValueAndCache(const Coord& ijk, AccT& acc) const - { - const uint32_t n = CoordToOffset(ijk); - if (mChildMask.isOn(n)) { - acc.insert(ijk, const_cast(mTable[n].child)); - return mTable[n].child->getValueAndCache(ijk, acc); - } - return mTable[n].value; - } - - template - void setValueAndCache(const Coord& ijk, const ValueType& value, AccT& acc) - { - const uint32_t n = CoordToOffset(ijk); - ChildT* child = nullptr; - if (mChildMask.isOn(n)) { - child = mTable[n].child; - } else { - child = new ChildT(ijk, mTable[n].value, mValueMask.isOn(n)); - mTable[n].child = child; - mChildMask.setOn(n); - } - acc.insert(ijk, child); - child->setValueAndCache(ijk, value, acc); - } - - template - void setValueOnAndCache(const Coord& ijk, AccT& acc) - { - const uint32_t n = CoordToOffset(ijk); - ChildT* child = nullptr; - if (mChildMask.isOn(n)) { - child = mTable[n].child; - } else { - child = new ChildT(ijk, mTable[n].value, mValueMask.isOn(n)); - mTable[n].child = child; - mChildMask.setOn(n); - } - acc.insert(ijk, child); - child->setValueOnAndCache(ijk, acc); - } - - template - void touchLeafAndCache(const Coord &ijk, AccT& acc) - { - const uint32_t n = CoordToOffset(ijk); - ChildT* child = nullptr; - if (mChildMask.isOn(n)) { - child = mTable[n].child; - } else { - child = new ChildT(ijk, mTable[n].value, mValueMask.isOn(n)); - mTable[n].child = child; - mChildMask.setOn(n); - } - acc.insert(ijk, child); - if constexpr(LEVEL>1) child->touchLeafAndCache(ijk, acc); - } - template - bool isActiveAndCache(const Coord& ijk, AccT& acc) const - { - const uint32_t n = CoordToOffset(ijk); - if (mChildMask.isOn(n)) { - acc.insert(ijk, const_cast(mTable[n].child)); - return mTable[n].child->isActiveAndCache(ijk, acc); - } - return mValueMask.isOn(n); - } -#endif - - template - uint32_t nodeCount() const - { - static_assert(is_same::value, "Node::getNodes: Invalid type"); - NANOVDB_ASSERT(NodeT::LEVEL < LEVEL); - uint32_t sum = 0; - if constexpr(is_same::value) { // resolved at compile-time - sum += mChildMask.countOn(); - } else if constexpr(LEVEL>1) { - for (auto iter = mChildMask.beginOn(); iter; ++iter) { - sum += mTable[*iter].child->template nodeCount(); - } - } - return sum; - } - - template - void getNodes(std::vector& array) - { - static_assert(is_same::value, "Node::getNodes: Invalid type"); - NANOVDB_ASSERT(NodeT::LEVEL < LEVEL); - for (auto iter = mChildMask.beginOn(); iter; ++iter) { - if constexpr(is_same::value) { // resolved at compile-time - array.push_back(reinterpret_cast(mTable[*iter].child)); - } else if constexpr(LEVEL>1) { - mTable[*iter].child->getNodes(array); - } - } - } - - void addChild(ChildT*& child) - { - NANOVDB_ASSERT(child && (child->mOrigin & ~MASK) == this->mOrigin); - const uint32_t n = CoordToOffset(child->mOrigin); - if (mChildMask.isOn(n)) { - delete mTable[n].child; - } else { - mChildMask.setOn(n); - } - mTable[n].child = child; - child = nullptr; - } - - /// @brief Add a tile containing voxel (i, j, k) at the specified tree level, - /// creating a new branch if necessary. Delete any existing lower-level nodes - /// that contain (x, y, z). - /// @tparam level tree level at which the tile is inserted. Must be 1 or 2. - /// @param ijk Index coordinate that map to the tile being inserted - /// @param value Value of the tile - /// @param state Binary state of the tile - template - void addTile(const Coord& ijk, const ValueType& value, bool state) - { - static_assert(level > 0 && level <= LEVEL, "invalid template value of level"); - const uint32_t n = CoordToOffset(ijk); - if constexpr(level == LEVEL) { - if (mChildMask.isOn(n)) { - delete mTable[n].child; - mTable[n] = Tile(value); - } else { - mValueMask.set(n, state); - mTable[n].value = value; - } - } else if constexpr(level < LEVEL) { - ChildT* child = nullptr; - if (mChildMask.isOn(n)) { - child = mTable[n].child; - } else { - child = new ChildT(ijk, value, state); - mTable[n].child = child; - mChildMask.setOn(n); - } - child->template addTile(ijk, value, state); - } - } - - template - void addNode(NodeT*& node) - { - if constexpr(is_same::value) { //resolved at compile-time - this->addChild(reinterpret_cast(node)); - } else if constexpr(LEVEL>1) { - const uint32_t n = CoordToOffset(node->mOrigin); - ChildT* child = nullptr; - if (mChildMask.isOn(n)) { - child = mTable[n].child; - } else { - child = new ChildT(node->mOrigin, mTable[n].value, mValueMask.isOn(n)); - mTable[n].child = child; - mChildMask.setOn(n); - } - child->addNode(node); - } - } - - void merge(InternalNode &other) - { - for (auto iter = other.mChildMask.beginOn(); iter; ++iter) { - const uint32_t n = *iter; - if (mChildMask.isOn(n)) { - mTable[n].child->merge(*other.mTable[n].child); - } else { - mTable[n].child = other.mTable[n].child; - other.mChildMask.setOff(n); - mChildMask.setOn(n); - } - } - } - - template - typename std::enable_if::value>::type - signedFloodFill(T outside); - -}; // build::InternalNode - -//================================================================================================ - -template -template -inline typename std::enable_if::value>::type -InternalNode::signedFloodFill(T outside) -{ - const uint32_t first = *mChildMask.beginOn(); - if (first < NUM_VALUES) { - bool xInside = mTable[first].child->getFirstValue() < 0; - bool yInside = xInside, zInside = xInside; - for (uint32_t x = 0; x != (1 << LOG2DIM); ++x) { - const uint32_t x00 = x << (2 * LOG2DIM); // offset for block(x, 0, 0) - if (mChildMask.isOn(x00)) { - xInside = mTable[x00].child->getLastValue() < 0; - } - yInside = xInside; - for (uint32_t y = 0; y != (1u << LOG2DIM); ++y) { - const uint32_t xy0 = x00 + (y << LOG2DIM); // offset for block(x, y, 0) - if (mChildMask.isOn(xy0)) - yInside = mTable[xy0].child->getLastValue() < 0; - zInside = yInside; - for (uint32_t z = 0; z != (1 << LOG2DIM); ++z) { - const uint32_t xyz = xy0 + z; // offset for block(x, y, z) - if (mChildMask.isOn(xyz)) { - zInside = mTable[xyz].child->getLastValue() < 0; - } else { - mTable[xyz].value = zInside ? -outside : outside; - } - } - } - } - } -} // build::InternalNode::signedFloodFill - -// ----------------------------> LeafNode <-------------------------------------- - -template -struct LeafNode -{ - using BuildType = BuildT; - using ValueType = typename BuildToValueMap::type; - using LeafNodeType = LeafNode; - static constexpr uint32_t LOG2DIM = 3; - static constexpr uint32_t TOTAL = LOG2DIM; // needed by parent nodes - static constexpr uint32_t DIM = 1u << TOTAL; - static constexpr uint32_t SIZE = 1u << 3 * LOG2DIM; // total number of voxels represented by this node - static constexpr uint32_t MASK = DIM - 1; // mask for bit operations - static constexpr uint32_t LEVEL = 0; // level 0 = leaf - static constexpr uint64_t NUM_VALUES = uint64_t(1) << (3 * TOTAL); // total voxel count represented by this node - using NodeMaskType = Mask; - template - using MaskIterT = typename Mask::template Iterator; - using NanoLeafT = typename NanoNode::Type; - - Coord mOrigin; - Mask mValueMask; - ValueType mValues[SIZE]; - union { - NanoLeafT *mDstNode; - uint64_t mDstOffset; - }; - - /// @brief Visits all active values in a leaf node - class ValueOnIterator : public MaskIterT - { - using BaseT = MaskIterT; - const LeafNode *mParent; - public: - ValueOnIterator() : BaseT(), mParent(nullptr) {} - ValueOnIterator(const LeafNode* parent) : BaseT(parent->mValueMask.beginOn()), mParent(parent) {} - ValueOnIterator& operator=(const ValueOnIterator&) = default; - ValueType operator*() const {NANOVDB_ASSERT(*this); return mParent->mValues[BaseT::pos()];} - Coord getCoord() const { NANOVDB_ASSERT(*this); return mParent->offsetToGlobalCoord(BaseT::pos());} - }; // Member class ValueOnIterator - - ValueOnIterator beginValueOn() {return ValueOnIterator(this);} - ValueOnIterator cbeginValueOn() const {return ValueOnIterator(this);} - - /// @brief Visits all inactive values in a leaf node - class ValueOffIterator : public MaskIterT - { - using BaseT = MaskIterT; - const LeafNode *mParent; - public: - ValueOffIterator() : BaseT(), mParent(nullptr) {} - ValueOffIterator(const LeafNode* parent) : BaseT(parent->mValueMask.beginOff()), mParent(parent) {} - ValueOffIterator& operator=(const ValueOffIterator&) = default; - ValueType operator*() const {NANOVDB_ASSERT(*this); return mParent->mValues[BaseT::pos()];} - Coord getCoord() const { NANOVDB_ASSERT(*this); return mParent->offsetToGlobalCoord(BaseT::pos());} - }; // Member class ValueOffIterator - - ValueOffIterator beginValueOff() {return ValueOffIterator(this);} - ValueOffIterator cbeginValueOff() const {return ValueOffIterator(this);} - - /// @brief Visits all values in a leaf node, i.e. both active and inactive values - class ValueIterator - { - const LeafNode *mParent; - uint32_t mPos; - public: - ValueIterator() : mParent(nullptr), mPos(1u << 3 * LOG2DIM) {} - ValueIterator(const LeafNode* parent) : mParent(parent), mPos(0) {NANOVDB_ASSERT(parent);} - ValueIterator& operator=(const ValueIterator&) = default; - ValueType operator*() const { NANOVDB_ASSERT(*this); return mParent->mValues[mPos];} - Coord getCoord() const { NANOVDB_ASSERT(*this); return mParent->offsetToGlobalCoord(mPos);} - bool isActive() const { NANOVDB_ASSERT(*this); return mParent->isActive(mPos);} - operator bool() const {return mPos < SIZE;} - ValueIterator& operator++() {++mPos; return *this;} - ValueIterator operator++(int) { - auto tmp = *this; - ++(*this); - return tmp; - } - }; // Member class ValueIterator - - ValueIterator beginValue() {return ValueIterator(this);} - ValueIterator cbeginValueAll() const {return ValueIterator(this);} - - LeafNode(const Coord& ijk, const ValueType& value, bool state) - : mOrigin(ijk & ~MASK) - , mValueMask(state) //invalid - , mDstOffset(0) - { - ValueType* target = mValues; - uint32_t n = SIZE; - while (n--) { - *target++ = value; - } - } - LeafNode(const LeafNode&) = delete; // disallow copy-construction - LeafNode(LeafNode&&) = delete; // disallow move construction - LeafNode& operator=(const LeafNode&) = delete; // disallow copy assignment - LeafNode& operator=(LeafNode&&) = delete; // disallow move assignment - ~LeafNode() = default; - - const Mask& getValueMask() const {return mValueMask;} - const Mask& valueMask() const {return mValueMask;} - const Coord& origin() const {return mOrigin;} - - /// @brief Return the linear offset corresponding to the given coordinate - static uint32_t CoordToOffset(const Coord& ijk) - { - return ((ijk[0] & int32_t(MASK)) << (2 * LOG2DIM)) + - ((ijk[1] & int32_t(MASK)) << LOG2DIM) + - (ijk[2] & int32_t(MASK)); - } - - static Coord OffsetToLocalCoord(uint32_t n) - { - NANOVDB_ASSERT(n < SIZE); - const int32_t m = n & ((1 << 2 * LOG2DIM) - 1); - return Coord(n >> 2 * LOG2DIM, m >> LOG2DIM, m & int32_t(MASK)); - } - - void localToGlobalCoord(Coord& ijk) const - { - ijk += mOrigin; - } - - Coord offsetToGlobalCoord(uint32_t n) const - { - Coord ijk = LeafNode::OffsetToLocalCoord(n); - this->localToGlobalCoord(ijk); - return ijk; - } - - ValueType getFirstValue() const { return mValues[0]; } - ValueType getLastValue() const { return mValues[SIZE - 1]; } - const ValueType& getValue(uint32_t i) const {return mValues[i];} - const ValueType& getValue(const Coord& ijk) const {return mValues[CoordToOffset(ijk)];} - - template - auto get(const Coord& ijk, ArgsT&&... args) const {return OpT::get(*this, CoordToOffset(ijk), args...);} - - template - auto set(const Coord& ijk, ArgsT&&... args) {return OpT::set(*this, CoordToOffset(ijk), args...);} - -#ifndef NANOVDB_NEW_ACCESSOR_METHODS - template - const ValueType& getValueAndCache(const Coord& ijk, const AccT&) const - { - return mValues[CoordToOffset(ijk)]; - } - - template - void setValueAndCache(const Coord& ijk, const ValueType& value, const AccT&) - { - const uint32_t n = CoordToOffset(ijk); - mValueMask.setOn(n); - mValues[n] = value; - } - - template - void setValueOnAndCache(const Coord& ijk, const AccT&) - { - const uint32_t n = CoordToOffset(ijk); - mValueMask.setOn(n); - } - - template - bool isActiveAndCache(const Coord& ijk, const AccT&) const - { - return mValueMask.isOn(CoordToOffset(ijk)); - } -#endif - - void setValue(uint32_t n, const ValueType& value) - { - mValueMask.setOn(n); - mValues[n] = value; - } - void setValue(const Coord& ijk, const ValueType& value){this->setValue(CoordToOffset(ijk), value);} - - void merge(LeafNode &other) - { - other.mValueMask -= mValueMask; - for (auto iter = other.mValueMask.beginOn(); iter; ++iter) { - const uint32_t n = *iter; - mValues[n] = other.mValues[n]; - } - mValueMask |= other.mValueMask; - } - - template - typename std::enable_if::value>::type - signedFloodFill(T outside); - -}; // build::LeafNode - -//================================================================================================ - -template <> -struct LeafNode -{ - using ValueType = bool; - using BuildType = ValueMask; - using LeafNodeType = LeafNode; - static constexpr uint32_t LOG2DIM = 3; - static constexpr uint32_t TOTAL = LOG2DIM; // needed by parent nodes - static constexpr uint32_t DIM = 1u << TOTAL; - static constexpr uint32_t SIZE = 1u << 3 * LOG2DIM; // total number of voxels represented by this node - static constexpr uint32_t MASK = DIM - 1; // mask for bit operations - static constexpr uint32_t LEVEL = 0; // level 0 = leaf - static constexpr uint64_t NUM_VALUES = uint64_t(1) << (3 * TOTAL); // total voxel count represented by this node - using NodeMaskType = Mask; - template - using MaskIterT = typename Mask::template Iterator; - using NanoLeafT = typename NanoNode::Type; - - Coord mOrigin; - Mask mValueMask; - union { - NanoLeafT *mDstNode; - uint64_t mDstOffset; - }; - - /// @brief Visits all active values in a leaf node - class ValueOnIterator : public MaskIterT - { - using BaseT = MaskIterT; - const LeafNode *mParent; - public: - ValueOnIterator() : BaseT(), mParent(nullptr) {} - ValueOnIterator(const LeafNode* parent) : BaseT(parent->mValueMask.beginOn()), mParent(parent) {} - ValueOnIterator& operator=(const ValueOnIterator&) = default; - bool operator*() const {NANOVDB_ASSERT(*this); return true;} - Coord getCoord() const { NANOVDB_ASSERT(*this); return mParent->offsetToGlobalCoord(BaseT::pos());} - }; // Member class ValueOnIterator - - ValueOnIterator beginValueOn() {return ValueOnIterator(this);} - ValueOnIterator cbeginValueOn() const {return ValueOnIterator(this);} - - /// @brief Visits all inactive values in a leaf node - class ValueOffIterator : public MaskIterT - { - using BaseT = MaskIterT; - const LeafNode *mParent; - public: - ValueOffIterator() : BaseT(), mParent(nullptr) {} - ValueOffIterator(const LeafNode* parent) : BaseT(parent->mValueMask.beginOff()), mParent(parent) {} - ValueOffIterator& operator=(const ValueOffIterator&) = default; - bool operator*() const {NANOVDB_ASSERT(*this); return false;} - Coord getCoord() const { NANOVDB_ASSERT(*this); return mParent->offsetToGlobalCoord(BaseT::pos());} - }; // Member class ValueOffIterator - - ValueOffIterator beginValueOff() {return ValueOffIterator(this);} - ValueOffIterator cbeginValueOff() const {return ValueOffIterator(this);} - - /// @brief Visits all values in a leaf node, i.e. both active and inactive values - class ValueIterator - { - const LeafNode *mParent; - uint32_t mPos; - public: - ValueIterator() : mParent(nullptr), mPos(1u << 3 * LOG2DIM) {} - ValueIterator(const LeafNode* parent) : mParent(parent), mPos(0) {NANOVDB_ASSERT(parent);} - ValueIterator& operator=(const ValueIterator&) = default; - bool operator*() const { NANOVDB_ASSERT(*this); return mParent->mValueMask.isOn(mPos);} - Coord getCoord() const { NANOVDB_ASSERT(*this); return mParent->offsetToGlobalCoord(mPos);} - bool isActive() const { NANOVDB_ASSERT(*this); return mParent->mValueMask.isOn(mPos);} - operator bool() const {return mPos < SIZE;} - ValueIterator& operator++() {++mPos; return *this;} - ValueIterator operator++(int) { - auto tmp = *this; - ++(*this); - return tmp; - } - }; // Member class ValueIterator - - ValueIterator beginValue() {return ValueIterator(this);} - ValueIterator cbeginValueAll() const {return ValueIterator(this);} - - LeafNode(const Coord& ijk, const ValueType&, bool state) - : mOrigin(ijk & ~MASK) - , mValueMask(state) //invalid - , mDstOffset(0) - { - } - LeafNode(const LeafNode&) = delete; // disallow copy-construction - LeafNode(LeafNode&&) = delete; // disallow move construction - LeafNode& operator=(const LeafNode&) = delete; // disallow copy assignment - LeafNode& operator=(LeafNode&&) = delete; // disallow move assignment - ~LeafNode() = default; - - const Mask& valueMask() const {return mValueMask;} - const Mask& getValueMask() const {return mValueMask;} - const Coord& origin() const {return mOrigin;} - - /// @brief Return the linear offset corresponding to the given coordinate - static uint32_t CoordToOffset(const Coord& ijk) - { - return ((ijk[0] & int32_t(MASK)) << (2 * LOG2DIM)) + - ((ijk[1] & int32_t(MASK)) << LOG2DIM) + - (ijk[2] & int32_t(MASK)); - } - - static Coord OffsetToLocalCoord(uint32_t n) - { - NANOVDB_ASSERT(n < SIZE); - const int32_t m = n & ((1 << 2 * LOG2DIM) - 1); - return Coord(n >> 2 * LOG2DIM, m >> LOG2DIM, m & int32_t(MASK)); - } - - void localToGlobalCoord(Coord& ijk) const {ijk += mOrigin;} - - Coord offsetToGlobalCoord(uint32_t n) const - { - Coord ijk = LeafNode::OffsetToLocalCoord(n); - this->localToGlobalCoord(ijk); - return ijk; - } - - bool getFirstValue() const { return mValueMask.isOn(0); } - bool getLastValue() const { return mValueMask.isOn(SIZE - 1); } - bool getValue(uint32_t i) const {return mValueMask.isOn(i);} - bool getValue(const Coord& ijk) const {return mValueMask.isOn(CoordToOffset(ijk));} - - template - auto get(const Coord& ijk, ArgsT&&... args) const {return OpT::get(*this, CoordToOffset(ijk), args...);} - - template - auto set(const Coord& ijk, ArgsT&&... args) {return OpT::set(*this, CoordToOffset(ijk), args...);} - -#ifndef NANOVDB_NEW_ACCESSOR_METHODS - template - bool getValueAndCache(const Coord& ijk, const AccT&) const - { - return mValueMask.isOn(CoordToOffset(ijk)); - } - - template - void setValueAndCache(const Coord& ijk, bool, const AccT&) - { - const uint32_t n = CoordToOffset(ijk); - mValueMask.setOn(n); - } - - template - void setValueOnAndCache(const Coord& ijk, const AccT&) - { - const uint32_t n = CoordToOffset(ijk); - mValueMask.setOn(n); - } - - template - bool isActiveAndCache(const Coord& ijk, const AccT&) const - { - return mValueMask.isOn(CoordToOffset(ijk)); - } -#endif - - void setValue(uint32_t n, bool) {mValueMask.setOn(n);} - void setValue(const Coord& ijk) {mValueMask.setOn(CoordToOffset(ijk));} - - void merge(LeafNode &other) - { - mValueMask |= other.mValueMask; - } - -}; // build::LeafNode - -//================================================================================================ - -template <> -struct LeafNode -{ - using ValueType = bool; - using BuildType = ValueMask; - using LeafNodeType = LeafNode; - static constexpr uint32_t LOG2DIM = 3; - static constexpr uint32_t TOTAL = LOG2DIM; // needed by parent nodes - static constexpr uint32_t DIM = 1u << TOTAL; - static constexpr uint32_t SIZE = 1u << 3 * LOG2DIM; // total number of voxels represented by this node - static constexpr uint32_t MASK = DIM - 1; // mask for bit operations - static constexpr uint32_t LEVEL = 0; // level 0 = leaf - static constexpr uint64_t NUM_VALUES = uint64_t(1) << (3 * TOTAL); // total voxel count represented by this node - using NodeMaskType = Mask; - template - using MaskIterT = typename Mask::template Iterator; - using NanoLeafT = typename NanoNode::Type; - - Coord mOrigin; - Mask mValueMask, mValues; - union { - NanoLeafT *mDstNode; - uint64_t mDstOffset; - }; - - /// @brief Visits all active values in a leaf node - class ValueOnIterator : public MaskIterT - { - using BaseT = MaskIterT; - const LeafNode *mParent; - public: - ValueOnIterator() : BaseT(), mParent(nullptr) {} - ValueOnIterator(const LeafNode* parent) : BaseT(parent->mValueMask.beginOn()), mParent(parent) {} - ValueOnIterator& operator=(const ValueOnIterator&) = default; - bool operator*() const {NANOVDB_ASSERT(*this); return mParent->mValues.isOn(BaseT::pos());} - Coord getCoord() const { NANOVDB_ASSERT(*this); return mParent->offsetToGlobalCoord(BaseT::pos());} - }; // Member class ValueOnIterator - - ValueOnIterator beginValueOn() {return ValueOnIterator(this);} - ValueOnIterator cbeginValueOn() const {return ValueOnIterator(this);} - - /// @brief Visits all inactive values in a leaf node - class ValueOffIterator : public MaskIterT - { - using BaseT = MaskIterT; - const LeafNode *mParent; - public: - ValueOffIterator() : BaseT(), mParent(nullptr) {} - ValueOffIterator(const LeafNode* parent) : BaseT(parent->mValueMask.beginOff()), mParent(parent) {} - ValueOffIterator& operator=(const ValueOffIterator&) = default; - bool operator*() const {NANOVDB_ASSERT(*this); return mParent->mValues.isOn(BaseT::pos());} - Coord getCoord() const { NANOVDB_ASSERT(*this); return mParent->offsetToGlobalCoord(BaseT::pos());} - }; // Member class ValueOffIterator - - ValueOffIterator beginValueOff() {return ValueOffIterator(this);} - ValueOffIterator cbeginValueOff() const {return ValueOffIterator(this);} - - /// @brief Visits all values in a leaf node, i.e. both active and inactive values - class ValueIterator - { - const LeafNode *mParent; - uint32_t mPos; - public: - ValueIterator() : mParent(nullptr), mPos(1u << 3 * LOG2DIM) {} - ValueIterator(const LeafNode* parent) : mParent(parent), mPos(0) {NANOVDB_ASSERT(parent);} - ValueIterator& operator=(const ValueIterator&) = default; - bool operator*() const { NANOVDB_ASSERT(*this); return mParent->mValues.isOn(mPos);} - Coord getCoord() const { NANOVDB_ASSERT(*this); return mParent->offsetToGlobalCoord(mPos);} - bool isActive() const { NANOVDB_ASSERT(*this); return mParent->mValueMask.isOn(mPos);} - operator bool() const {return mPos < SIZE;} - ValueIterator& operator++() {++mPos; return *this;} - ValueIterator operator++(int) { - auto tmp = *this; - ++(*this); - return tmp; - } - }; // Member class ValueIterator - - ValueIterator beginValue() {return ValueIterator(this);} - ValueIterator cbeginValueAll() const {return ValueIterator(this);} - - LeafNode(const Coord& ijk, bool value, bool state) - : mOrigin(ijk & ~MASK) - , mValueMask(state) - , mValues(value) - , mDstOffset(0) - { - } - LeafNode(const LeafNode&) = delete; // disallow copy-construction - LeafNode(LeafNode&&) = delete; // disallow move construction - LeafNode& operator=(const LeafNode&) = delete; // disallow copy assignment - LeafNode& operator=(LeafNode&&) = delete; // disallow move assignment - ~LeafNode() = default; - - const Mask& valueMask() const {return mValueMask;} - const Mask& getValueMask() const {return mValueMask;} - const Coord& origin() const {return mOrigin;} - - /// @brief Return the linear offset corresponding to the given coordinate - static uint32_t CoordToOffset(const Coord& ijk) - { - return ((ijk[0] & int32_t(MASK)) << (2 * LOG2DIM)) + - ((ijk[1] & int32_t(MASK)) << LOG2DIM) + - (ijk[2] & int32_t(MASK)); - } - - static Coord OffsetToLocalCoord(uint32_t n) - { - NANOVDB_ASSERT(n < SIZE); - const int32_t m = n & ((1 << 2 * LOG2DIM) - 1); - return Coord(n >> 2 * LOG2DIM, m >> LOG2DIM, m & int32_t(MASK)); - } - - void localToGlobalCoord(Coord& ijk) const - { - ijk += mOrigin; - } - - Coord offsetToGlobalCoord(uint32_t n) const - { - Coord ijk = LeafNode::OffsetToLocalCoord(n); - this->localToGlobalCoord(ijk); - return ijk; - } - bool getFirstValue() const { return mValues.isOn(0); } - bool getLastValue() const { return mValues.isOn(SIZE - 1); } - - bool getValue(uint32_t i) const {return mValues.isOn(i);} - bool getValue(const Coord& ijk) const - { - return mValues.isOn(CoordToOffset(ijk)); - } -#ifndef NANOVDB_NEW_ACCESSOR_METHODS - template - bool isActiveAndCache(const Coord& ijk, const AccT&) const - { - return mValueMask.isOn(CoordToOffset(ijk)); - } - - template - bool getValueAndCache(const Coord& ijk, const AccT&) const - { - return mValues.isOn(CoordToOffset(ijk)); - } - - template - void setValueAndCache(const Coord& ijk, bool value, const AccT&) - { - const uint32_t n = CoordToOffset(ijk); - mValueMask.setOn(n); - mValues.setOn(n); - } - - template - void setValueOnAndCache(const Coord& ijk, const AccT&) - { - const uint32_t n = CoordToOffset(ijk); - mValueMask.setOn(n); - } -#endif - - void setValue(uint32_t n, bool value) - { - mValueMask.setOn(n); - mValues.set(n, value); - } - void setValue(const Coord& ijk, bool value) {return this->setValue(CoordToOffset(ijk), value);} - - void merge(LeafNode &other) - { - mValues |= other.mValues; - mValueMask |= other.mValueMask; - } - -}; // build::LeafNode - -//================================================================================================ - -template -template -inline typename std::enable_if::value>::type -LeafNode::signedFloodFill(T outside) -{ - const uint32_t first = *mValueMask.beginOn(); - if (first < SIZE) { - bool xInside = mValues[first] < 0, yInside = xInside, zInside = xInside; - for (uint32_t x = 0; x != DIM; ++x) { - const uint32_t x00 = x << (2 * LOG2DIM); - if (mValueMask.isOn(x00)) - xInside = mValues[x00] < 0; // element(x, 0, 0) - yInside = xInside; - for (uint32_t y = 0; y != DIM; ++y) { - const uint32_t xy0 = x00 + (y << LOG2DIM); - if (mValueMask.isOn(xy0)) - yInside = mValues[xy0] < 0; // element(x, y, 0) - zInside = yInside; - for (uint32_t z = 0; z != (1 << LOG2DIM); ++z) { - const uint32_t xyz = xy0 + z; // element(x, y, z) - if (mValueMask.isOn(xyz)) { - zInside = mValues[xyz] < 0; - } else { - mValues[xyz] = zInside ? -outside : outside; - } - } - } - } - } -} // build::LeafNode::signedFloodFill - -// ----------------------------> ValueAccessor <-------------------------------------- - -template -struct ValueAccessor -{ - using ValueType = typename BuildToValueMap::type; - using LeafT = build::LeafNode; - using Node1 = build::InternalNode; - using Node2 = build::InternalNode; - using RootNodeType = build::RootNode; - using LeafNodeType = typename RootNodeType::LeafNodeType; - - ValueAccessor(RootNodeType& root) - : mRoot(root) - , mKeys{Coord(Maximum::value()), Coord(Maximum::value()), Coord(Maximum::value())} - , mNode{nullptr, nullptr, nullptr} - { - } - ValueAccessor(ValueAccessor&&) = default; // allow move construction - ValueAccessor(const ValueAccessor&) = delete; // disallow copy construction - ValueType getValue(int i, int j, int k) const {return this->getValue(Coord(i,j,k));} - template - bool isCached(const Coord& ijk) const - { - return (ijk[0] & int32_t(~NodeT::MASK)) == mKeys[NodeT::LEVEL][0] && - (ijk[1] & int32_t(~NodeT::MASK)) == mKeys[NodeT::LEVEL][1] && - (ijk[2] & int32_t(~NodeT::MASK)) == mKeys[NodeT::LEVEL][2]; - } - - template - auto get(const Coord& ijk, ArgsT&&... args) const - { - if (this->template isCached(ijk)) { - return ((const LeafT*)mNode[0])->template get(ijk, args...); - } else if (this->template isCached(ijk)) { - return ((const Node1*)mNode[1])->template getAndCache(ijk, *this, args...); - } else if (this->template isCached(ijk)) { - return ((const Node2*)mNode[2])->template getAndCache(ijk, *this, args...); - } - return mRoot.template getAndCache(ijk, *this, args...); - } - - template - auto set(const Coord& ijk, ArgsT&&... args) const - { - if (this->template isCached(ijk)) { - return ((LeafT*)mNode[0])->template set(ijk, args...); - } else if (this->template isCached(ijk)) { - return ((Node1*)mNode[1])->template setAndCache(ijk, *this, args...); - } else if (this->template isCached(ijk)) { - return ((Node2*)mNode[2])->template setAndCache(ijk, *this, args...); - } - return mRoot.template setAndCache(ijk, *this, args...); - } - -#ifdef NANOVDB_NEW_ACCESSOR_METHODS - ValueType getValue(const Coord& ijk) const {return this->template get>(ijk);} - LeafT* setValue(const Coord& ijk, const ValueType& value) {return this->template set>(ijk, value);} - LeafT* setValueOn(const Coord& ijk) {return this->template set>(ijk);} - LeafT& touchLeaf(const Coord& ijk) {return this->template set>(ijk);} - bool isActive(const Coord& ijk) const {return this->template get>(ijk);} -#else - ValueType getValue(const Coord& ijk) const - { - if (this->template isCached(ijk)) { - return ((LeafT*)mNode[0])->getValueAndCache(ijk, *this); - } else if (this->template isCached(ijk)) { - return ((Node1*)mNode[1])->getValueAndCache(ijk, *this); - } else if (this->template isCached(ijk)) { - return ((Node2*)mNode[2])->getValueAndCache(ijk, *this); - } - return mRoot.getValueAndCache(ijk, *this); - } - - /// @brief Sets value in a leaf node and returns it. - LeafT* setValue(const Coord& ijk, const ValueType& value) - { - if (this->template isCached(ijk)) { - ((LeafT*)mNode[0])->setValueAndCache(ijk, value, *this); - } else if (this->template isCached(ijk)) { - ((Node1*)mNode[1])->setValueAndCache(ijk, value, *this); - } else if (this->template isCached(ijk)) { - ((Node2*)mNode[2])->setValueAndCache(ijk, value, *this); - } else { - mRoot.setValueAndCache(ijk, value, *this); - } - NANOVDB_ASSERT(this->isCached(ijk)); - return (LeafT*)mNode[0]; - } - void setValueOn(const Coord& ijk) - { - if (this->template isCached(ijk)) { - ((LeafT*)mNode[0])->setValueOnAndCache(ijk, *this); - } else if (this->template isCached(ijk)) { - ((Node1*)mNode[1])->setValueOnAndCache(ijk, *this); - } else if (this->template isCached(ijk)) { - ((Node2*)mNode[2])->setValueOnAndCache(ijk, *this); - } else { - mRoot.setValueOnAndCache(ijk, *this); - } - } - void touchLeaf(const Coord& ijk) const - { - if (this->template isCached(ijk)) { - return; - } else if (this->template isCached(ijk)) { - ((Node1*)mNode[1])->touchLeafAndCache(ijk, *this); - } else if (this->template isCached(ijk)) { - ((Node2*)mNode[2])->touchLeafAndCache(ijk, *this); - } else { - mRoot.touchLeafAndCache(ijk, *this); - } - } - bool isActive(const Coord& ijk) const - { - if (this->template isCached(ijk)) { - return ((LeafT*)mNode[0])->isActiveAndCache(ijk, *this); - } else if (this->template isCached(ijk)) { - return ((Node1*)mNode[1])->isActiveAndCache(ijk, *this); - } else if (this->template isCached(ijk)) { - return ((Node2*)mNode[2])->isActiveAndCache(ijk, *this); - } - return mRoot.isActiveAndCache(ijk, *this); - } -#endif - - bool isValueOn(const Coord& ijk) const { return this->isActive(ijk); } - template - void insert(const Coord& ijk, NodeT* node) const - { - mKeys[NodeT::LEVEL] = ijk & ~NodeT::MASK; - mNode[NodeT::LEVEL] = node; - } - RootNodeType& mRoot; - mutable Coord mKeys[3]; - mutable void* mNode[3]; -}; // build::ValueAccessor - -// ----------------------------> Tree <-------------------------------------- - -template -struct Tree -{ - using ValueType = typename BuildToValueMap::type; - using Node0 = build::LeafNode; - using Node1 = build::InternalNode; - using Node2 = build::InternalNode; - using RootNodeType = build::RootNode; - using LeafNodeType = typename RootNodeType::LeafNodeType; - struct WriteAccessor; - - RootNodeType mRoot; - std::mutex mMutex; - - Tree(const ValueType &background) : mRoot(background) {} - Tree(const Tree&) = delete; // disallow copy construction - Tree(Tree&&) = delete; // disallow move construction - Tree& tree() {return *this;} - RootNodeType& root() {return mRoot;} - ValueType getValue(const Coord& ijk) const {return mRoot.getValue(ijk);} - ValueType getValue(int i, int j, int k) const {return this->getValue(Coord(i,j,k));} - void setValue(const Coord& ijk, const ValueType &value) {mRoot.setValue(ijk, value);} - std::array nodeCount() const - { - std::array count{0,0,0}; - mRoot.nodeCount(count); - return count; - } - /// @brief regular accessor for thread-safe reading and non-thread-safe writing - ValueAccessor getAccessor() { return ValueAccessor(mRoot); } - /// @brief special accessor for thread-safe writing only - WriteAccessor getWriteAccessor() { return WriteAccessor(mRoot, mMutex); } -};// build::Tree - -// ----------------------------> Tree::WriteAccessor <-------------------------------------- - -template -struct Tree::WriteAccessor -{ - using AccT = ValueAccessor; - using ValueType = typename AccT::ValueType; - using LeafT = typename AccT::LeafT; - using Node1 = typename AccT::Node1; - using Node2 = typename AccT::Node2; - using RootNodeType = typename AccT::RootNodeType; - - WriteAccessor(RootNodeType& parent, std::mutex &mx) - : mParent(parent) - , mRoot(parent.mBackground) - , mAcc(mRoot) - , mMutex(mx) - { - } - WriteAccessor(const WriteAccessor&) = delete; // disallow copy construction - WriteAccessor(WriteAccessor&&) = default; // allow move construction - ~WriteAccessor() { this->merge(); } - void merge() - { - mMutex.lock(); - mParent.merge(mRoot); - mMutex.unlock(); - } - inline void setValueOn(const Coord& ijk) {mAcc.setValueOn(ijk);} - inline void setValue(const Coord& ijk, const ValueType &value) {mAcc.setValue(ijk, value);} - - RootNodeType &mParent, mRoot; - AccT mAcc; - std::mutex &mMutex; -}; // build::Tree::WriteAccessor - -// ----------------------------> Grid <-------------------------------------- - -template -struct Grid : public Tree -{ - using BuildType = BuildT; - using ValueType = typename BuildToValueMap::type; - using TreeType = Tree; - using Node0 = build::LeafNode; - using Node1 = build::InternalNode; - using Node2 = build::InternalNode; - using RootNodeType = build::RootNode; - - GridClass mGridClass; - GridType mGridType; - Map mMap; - std::string mName; - - Grid(const ValueType &background, const std::string &name = "", GridClass gClass = GridClass::Unknown) - : TreeType(background) - , mGridClass(gClass) - , mGridType(mapToGridType()) - , mName(name) - { - mMap.set(1.0, Vec3d(0.0), 1.0); - } - TreeType& tree() {return *this;} - const GridType& gridType() const { return mGridType; } - const GridClass& gridClass() const { return mGridClass; } - const Map& map() const { return mMap; } - void setTransform(double scale=1.0, const Vec3d &translation = Vec3d(0.0)) {mMap.set(scale, translation, 1.0);} - const std::string& gridName() const { return mName; } - const std::string& getName() const { return mName; } - void setName(const std::string &name) { mName = name; } - /// @brief Sets grids values in domain of the @a bbox to those returned by the specified @a func with the - /// expected signature [](const Coord&)->ValueType. - /// - /// @note If @a func returns a value equal to the background value of the input grid at a - /// specific voxel coordinate, then the active state of that coordinate is off! Else the value - /// value is set and the active state is on. This is done to allow for sparse grids to be generated. - /// - /// @param func Functor used to evaluate the grid values in the @a bbox - /// @param bbox Coordinate bounding-box over which the grid values will be set. - /// @param delta Specifies a lower threshold value for rendering (optional). Typically equals the voxel size - /// for level sets and otherwise it's zero. - template - void operator()(const Func& func, const CoordBBox& bbox, ValueType delta = ValueType(0)); -};// build::Grid - -template -template -void Grid::operator()(const Func& func, const CoordBBox& bbox, ValueType delta) -{ - auto &root = this->tree().root(); -#if __cplusplus >= 201703L - static_assert(is_same::type>::value, "GridBuilder: mismatched ValueType"); -#else// invoke_result was introduced in C++17 and result_of was removed in C++20 - static_assert(is_same::type>::value, "GridBuilder: mismatched ValueType"); -#endif - const CoordBBox leafBBox(bbox[0] >> Node0::TOTAL, bbox[1] >> Node0::TOTAL); - std::mutex mutex; - forEach(leafBBox, [&](const CoordBBox& b) { - Node0* leaf = nullptr; - for (auto it = b.begin(); it; ++it) { - Coord min(*it << Node0::TOTAL), max(min + Coord(Node0::DIM - 1)); - const CoordBBox b(min.maxComponent(bbox.min()), - max.minComponent(bbox.max()));// crop - if (leaf == nullptr) { - leaf = new Node0(b[0], root.mBackground, false); - } else { - leaf->mOrigin = b[0] & ~Node0::MASK; - NANOVDB_ASSERT(leaf->mValueMask.isOff()); - } - leaf->mDstOffset = 0;// no prune - for (auto ijk = b.begin(); ijk; ++ijk) { - const auto v = func(*ijk);// call functor - if (v != root.mBackground) leaf->setValue(*ijk, v);// don't insert background values - } - if (!leaf->mValueMask.isOff()) {// has active values - if (leaf->mValueMask.isOn()) {// only active values - const auto first = leaf->getFirstValue(); - int n=1; - while (n<512) {// 8^3 = 512 - if (leaf->mValues[n++] != first) break; - } - if (n == 512) leaf->mDstOffset = 1;// prune below - } - std::lock_guard guard(mutex); - NANOVDB_ASSERT(leaf != nullptr); - root.addNode(leaf); - NANOVDB_ASSERT(leaf == nullptr); - } - }// loop over sub-part of leafBBox - if (leaf) delete leaf; - }); - - // Prune leaf and tile nodes - for (auto it2 = root.mTable.begin(); it2 != root.mTable.end(); ++it2) { - if (auto *upper = it2->second.child) {//upper level internal node - for (auto it1 = upper->mChildMask.beginOn(); it1; ++it1) { - auto *lower = upper->mTable[*it1].child;// lower level internal node - for (auto it0 = lower->mChildMask.beginOn(); it0; ++it0) { - auto *leaf = lower->mTable[*it0].child;// leaf nodes - if (leaf->mDstOffset) { - lower->mTable[*it0].value = leaf->getFirstValue(); - lower->mChildMask.setOff(*it0); - lower->mValueMask.setOn(*it0); - delete leaf; - } - }// loop over leaf nodes - if (lower->mChildMask.isOff()) {//only tiles - const auto first = lower->getFirstValue(); - int n=1; - while (n < 4096) {// 16^3 = 4096 - if (lower->mTable[n++].value != first) break; - } - if (n == 4096) {// identical tile values so prune - upper->mTable[*it1].value = first; - upper->mChildMask.setOff(*it1); - upper->mValueMask.setOn(*it1); - delete lower; - } - } - }// loop over lower internal nodes - if (upper->mChildMask.isOff()) {//only tiles - const auto first = upper->getFirstValue(); - int n=1; - while (n < 32768) {// 32^3 = 32768 - if (upper->mTable[n++].value != first) break; - } - if (n == 32768) {// identical tile values so prune - it2->second.value = first; - it2->second.state = upper->mValueMask.isOn(); - it2->second.child = nullptr; - delete upper; - } - } - }// is child node of the root - }// loop over root table -}// build::Grid::operator() - -//================================================================================================ - -template -using BuildLeaf = LeafNode; -template -using BuildLower = InternalNode>; -template -using BuildUpper = InternalNode>; -template -using BuildRoot = RootNode>; -template -using BuildTile = typename BuildRoot::Tile; - -using FloatGrid = Grid; -using Fp4Grid = Grid; -using Fp8Grid = Grid; -using Fp16Grid = Grid; -using FpNGrid = Grid; -using DoubleGrid = Grid; -using Int32Grid = Grid; -using UInt32Grid = Grid; -using Int64Grid = Grid; -using Vec3fGrid = Grid; -using Vec3dGrid = Grid; -using Vec4fGrid = Grid; -using Vec4dGrid = Grid; -using MaskGrid = Grid; -using IndexGrid = Grid; -using OnIndexGrid = Grid; -using BoolGrid = Grid; - -// ----------------------------> NodeManager <-------------------------------------- - -// GridT can be openvdb::Grid and nanovdb::build::Grid -template -class NodeManager -{ -public: - - using ValueType = typename GridT::ValueType; - using BuildType = typename GridT::BuildType; - using GridType = GridT; - using TreeType = typename GridT::TreeType; - using RootNodeType = typename TreeType::RootNodeType; - static_assert(RootNodeType::LEVEL == 3, "NodeManager expected LEVEL=3"); - using Node2 = typename RootNodeType::ChildNodeType; - using Node1 = typename Node2::ChildNodeType; - using Node0 = typename Node1::ChildNodeType; - - NodeManager(GridT &grid) : mGrid(grid) {this->init();} - void init() - { - mArray0.clear(); - mArray1.clear(); - mArray2.clear(); - auto counts = mGrid.tree().nodeCount(); - mArray0.reserve(counts[0]); - mArray1.reserve(counts[1]); - mArray2.reserve(counts[2]); - - for (auto it2 = mGrid.tree().root().cbeginChildOn(); it2; ++it2) { - Node2 &upper = const_cast(*it2); - mArray2.emplace_back(&upper); - for (auto it1 = upper.cbeginChildOn(); it1; ++it1) { - Node1 &lower = const_cast(*it1); - mArray1.emplace_back(&lower); - for (auto it0 = lower.cbeginChildOn(); it0; ++it0) { - Node0 &leaf = const_cast(*it0); - mArray0.emplace_back(&leaf); - }// loop over leaf nodes - }// loop over lower internal nodes - }// loop over root node - } - - /// @brief Return the number of tree nodes at the specified level - /// @details 0 is leaf, 1 is lower internal, and 2 is upper internal level - uint64_t nodeCount(int level) const - { - NANOVDB_ASSERT(level==0 || level==1 || level==2); - return level==0 ? mArray0.size() : level==1 ? mArray1.size() : mArray2.size(); - } - - template - typename enable_if::type node(int i) {return *mArray0[i];} - template - typename enable_if::type node(int i) const {return *mArray0[i];} - template - typename enable_if::type node(int i) {return *mArray1[i];} - template - typename enable_if::type node(int i) const {return *mArray1[i];} - template - typename enable_if::type node(int i) {return *mArray2[i];} - template - typename enable_if::type node(int i) const {return *mArray2[i];} - - /// @brief Return the i'th leaf node with respect to breadth-first ordering - const Node0& leaf(uint32_t i) const { return *mArray0[i]; } - Node0& leaf(uint32_t i) { return *mArray0[i]; } - uint64_t leafCount() const {return mArray0.size();} - - /// @brief Return the i'th lower internal node with respect to breadth-first ordering - const Node1& lower(uint32_t i) const { return *mArray1[i]; } - Node1& lower(uint32_t i) { return *mArray1[i]; } - uint64_t lowerCount() const {return mArray1.size();} - - /// @brief Return the i'th upper internal node with respect to breadth-first ordering - const Node2& upper(uint32_t i) const { return *mArray2[i]; } - Node2& upper(uint32_t i) { return *mArray2[i]; } - uint64_t upperCount() const {return mArray2.size();} - - RootNodeType& root() {return mGrid.tree().root();} - const RootNodeType& root() const {return mGrid.tree().root();} - - TreeType& tree() {return mGrid.tree();} - const TreeType& tree() const {return mGrid.tree();} - - GridType& grid() {return mGrid;} - const GridType& grid() const {return mGrid;} - -protected: - - GridT &mGrid; - std::vector mArray0; // leaf nodes - std::vector mArray1; // lower internal nodes - std::vector mArray2; // upper internal nodes - -};// NodeManager - -template -typename enable_if::value>::type -sdfToLevelSet(NodeManagerT &mgr) -{ - mgr.grid().mGridClass = GridClass::LevelSet; - // Note that the bottom-up flood filling is essential - const auto outside = mgr.root().mBackground; - forEach(0, mgr.leafCount(), 8, [&](const Range1D& r) { - for (auto i = r.begin(); i != r.end(); ++i) mgr.leaf(i).signedFloodFill(outside); - }); - forEach(0, mgr.lowerCount(), 1, [&](const Range1D& r) { - for (auto i = r.begin(); i != r.end(); ++i) mgr.lower(i).signedFloodFill(outside); - }); - forEach(0, mgr.upperCount(), 1, [&](const Range1D& r) { - for (auto i = r.begin(); i != r.end(); ++i) mgr.upper(i).signedFloodFill(outside); - }); - mgr.root().signedFloodFill(outside); -}// sdfToLevelSet - -template -void levelSetToFog(NodeManagerT &mgr, bool rebuild = true) -{ - using ValueType = typename NodeManagerT::ValueType; - mgr.grid().mGridClass = GridClass::FogVolume; - const ValueType d = -mgr.root().mBackground, w = 1.0f / d; - std::atomic_bool prune{false}; - auto op = [&](ValueType& v) -> bool { - if (v > ValueType(0)) { - v = ValueType(0); - return false; - } - v = v > d ? v * w : ValueType(1); - return true; - }; - forEach(0, mgr.leafCount(), 8, [&](const Range1D& r) { - for (auto i = r.begin(); i != r.end(); ++i) { - auto& leaf = mgr.leaf(i); - for (uint32_t i = 0; i < 512u; ++i) leaf.mValueMask.set(i, op(leaf.mValues[i])); - } - }); - forEach(0, mgr.lowerCount(), 1, [&](const Range1D& r) { - for (auto i = r.begin(); i != r.end(); ++i) { - auto& node = mgr.lower(i); - for (uint32_t i = 0; i < 4096u; ++i) { - if (node.mChildMask.isOn(i)) { - auto* leaf = node.mTable[i].child; - if (leaf->mValueMask.isOff()) {// prune leaf node - node.mTable[i].value = leaf->getFirstValue(); - node.mChildMask.setOff(i); - delete leaf; - prune = true; - } - } else { - node.mValueMask.set(i, op(node.mTable[i].value)); - } - } - } - }); - forEach(0, mgr.upperCount(), 1, [&](const Range1D& r) { - for (auto i = r.begin(); i != r.end(); ++i) { - auto& node = mgr.upper(i); - for (uint32_t i = 0; i < 32768u; ++i) { - if (node.mChildMask.isOn(i)) {// prune lower internal node - auto* child = node.mTable[i].child; - if (child->mChildMask.isOff() && child->mValueMask.isOff()) { - node.mTable[i].value = child->getFirstValue(); - node.mChildMask.setOff(i); - delete child; - prune = true; - } - } else { - node.mValueMask.set(i, op(node.mTable[i].value)); - } - } - } - }); - - for (auto it = mgr.root().mTable.begin(); it != mgr.root().mTable.end(); ++it) { - auto* child = it->second.child; - if (child == nullptr) { - it->second.state = op(it->second.value); - } else if (child->mChildMask.isOff() && child->mValueMask.isOff()) { - it->second.value = child->getFirstValue(); - it->second.state = false; - it->second.child = nullptr; - delete child; - prune = true; - } - } - if (rebuild && prune) mgr.init(); -}// levelSetToFog - -// ----------------------------> Implementations of random access methods <-------------------------------------- - -template -struct TouchLeaf { - static BuildLeaf& set(BuildLeaf &leaf, uint32_t) {return leaf;} -};// TouchLeaf - -/// @brief Implements Tree::getValue(Coord), i.e. return the value associated with a specific coordinate @c ijk. -/// @tparam BuildT Build type of the grid being called -/// @details The value at a coordinate maps to the background, a tile value or a leaf value. -template -struct GetValue { - static auto get(const BuildRoot &root) {return root.mBackground;} - static auto get(const BuildTile &tile) {return tile.value;} - static auto get(const BuildUpper &node, uint32_t n) {return node.mTable[n].value;} - static auto get(const BuildLower &node, uint32_t n) {return node.mTable[n].value;} - static auto get(const BuildLeaf &leaf, uint32_t n) {return leaf.getValue(n);} -};// GetValue - -/// @brief Implements Tree::isActive(Coord) -/// @tparam T Build type of the grid being called -template -struct GetState { - static bool get(const BuildRoot&) {return false;} - static bool get(const BuildTile &tile) {return tile.state;} - static bool get(const BuildUpper &node, uint32_t n) {return node.mValueMask.isOn(n);} - static bool get(const BuildLower &node, uint32_t n) {return node.mValueMask.isOn(n);} - static bool get(const BuildLeaf &leaf, uint32_t n) {return leaf.mValueMask.isOn(n);} -};// GetState - -/// @brief Set the value and its state at the leaf level mapped to by ijk, and create the leaf node and branch if needed. -/// @tparam T BuildType of the corresponding tree -template -struct SetValue { - static BuildLeaf* set(BuildLeaf &leaf, uint32_t n) { - leaf.mValueMask.setOn(n);// always set the active bit - return &leaf; - } - static BuildLeaf* set(BuildLeaf &leaf, uint32_t n, const typename BuildLeaf::ValueType &v) { - leaf.setValue(n, v); - return &leaf; - } -};// SetValue - -/// @brief Implements Tree::probeLeaf(Coord) -/// @tparam T Build type of the grid being called -template -struct ProbeValue { - using ValueT = typename BuildLeaf::ValueType; - static bool get(const BuildRoot &root, ValueT &v) { - v = root.mBackground; - return false; - } - static bool get(const BuildTile &tile, ValueT &v) { - v = tile.value; - return tile.state; - } - static bool get(const BuildUpper &node, uint32_t n, ValueT &v) { - v = node.mTable[n].value; - return node.mValueMask.isOn(n); - } - static bool get(const BuildLower &node, uint32_t n, ValueT &v) { - v = node.mTable[n].value; - return node.mValueMask.isOn(n); - } - static bool get(const BuildLeaf &leaf, uint32_t n, ValueT &v) { - v = leaf.getValue(n); - return leaf.isActive(n); - } -};// ProbeValue - -} // namespace build - -} // namespace nanovdb - -#endif // NANOVDB_GRID_BUILDER_H_HAS_BEEN_INCLUDED +#include // for NANOVDB_DEPRECATED_HEADER +#include +NANOVDB_DEPRECATED_HEADER("Include nanovdb/tools/GridBuilder.h instead.") diff --git a/nanovdb/nanovdb/util/GridChecksum.h b/nanovdb/nanovdb/util/GridChecksum.h index 531a6f674b..1b0075f9c0 100644 --- a/nanovdb/nanovdb/util/GridChecksum.h +++ b/nanovdb/nanovdb/util/GridChecksum.h @@ -1,462 +1,6 @@ // Copyright Contributors to the OpenVDB Project // SPDX-License-Identifier: MPL-2.0 -/*! - \file GridChecksum.h - - \author Ken Museth - - \brief Computes a pair of 32bit checksums, of a Grid, by means of Cyclic Redundancy Check (CRC) - - \details A CRC32 is the 32 bit remainder, or residue, of binary division of a message, by a polynomial. -*/ - -#ifndef NANOVDB_GRIDCHECKSUM_H_HAS_BEEN_INCLUDED -#define NANOVDB_GRIDCHECKSUM_H_HAS_BEEN_INCLUDED - -#include // for std::generate -#include -#include -#include -#include // offsetof macro -#include -#include -#include // for std::unique_ptr - -#include -#include "ForEach.h" -#include "NodeManager.h" - -// Define log of block size for FULL CRC32 computation. -// A value of 12 corresponds to a block size of 4KB (2^12 = 4096). -// Undefine to use old checksum computation -#define NANOVDB_CRC32_LOG2_BLOCK_SIZE 12 - -namespace nanovdb { - -/// @brief List of different modes for computing for a checksum -enum class ChecksumMode : uint32_t { Disable = 0,// no computation - Partial = 1,// fast but approximate - Full = 2,// slow but accurate - Default = 1,// defaults to Partial - End = 3 };// marks the end of the enum list - -/// @brief Return the (2 x CRC32) checksum of the specified @a grid -/// @tparam BuildT Template parameter used to build NanoVDB grid. -/// @param grid Grid from which the checksum is computed. -/// @param mode Defines the mode of computation for the checksum. -/// @return Return the (2 x CRC32) checksum of the specified @a grid -template -uint64_t checksum(const NanoGrid &grid, ChecksumMode mode = ChecksumMode::Default); - -/// @brief Return true if the checksum of the @a grid matches the expected -/// value already encoded into the grid's meta data. -/// @tparam BuildT Template parameter used to build NanoVDB grid. -/// @param grid Grid whose checksum is validated. -/// @param mode Defines the mode of computation for the checksum. -template -bool validateChecksum(const NanoGrid &grid, ChecksumMode mode = ChecksumMode::Default); - -/// @brief Updates the checksum of a grid -/// -/// @param grid Grid whose checksum will be updated. -/// @param mode Defines the mode of computation for the checksum. -template -void updateChecksum(NanoGrid &grid, ChecksumMode mode = ChecksumMode::Default); - -namespace crc32 { - -/// @brief Initiate single entry in look-up-table for CRC32 computations -/// @param lut pointer of size 256 for look-up-table -/// @param n entry in table (assumed n < 256) -inline __hostdev__ void initLut(uint32_t lut[256], uint32_t n) -{ - uint32_t &cs = lut[n] = n; - for (int i = 0; i < 8; ++i) cs = (cs >> 1) ^ ((cs & 1) ? 0xEDB88320 : 0); -} - -/// @brief Initiate entire look-up-table for CRC32 computations -/// @param lut pointer of size 256 for look-up-table -inline __hostdev__ void initLut(uint32_t lut[256]){for (uint32_t n = 0u; n < 256u; ++n) initLut(lut, n);} - -/// @brief Create and initiate entire look-up-table for CRC32 computations -/// @return returns a unique pointer to the lookup table of size 256. -inline std::unique_ptr createLut() -{ - std::unique_ptr lut(new uint32_t[256]); - initLut(lut.get()); - return lut; -} - -/// @brief Compute crc32 checksum of @c data of @c size bytes (without a lookup table)) -/// @param data pointer to beginning of data -/// @param size byte size of data -/// @param crc initial value of crc32 checksum -/// @return return crc32 checksum of @c data -inline __hostdev__ uint32_t checksum(const void* data, size_t size, uint32_t crc = 0) -{ - crc = ~crc; - for (auto *p = (const uint8_t*)data, *q = p + size; p != q; ++p) { - crc ^= *p; - for (int j = 0; j < 8; ++j) crc = (crc >> 1) ^ (0xEDB88320 & (-(crc & 1))); - } - return ~crc; -} - -/// @brief Compute crc32 checksum of data between @c begin and @c end -/// @param begin points to beginning of data -/// @param end points to end of @data, (exclusive) -/// @param crc initial value of crc32 checksum -/// @return return crc32 checksum -inline __hostdev__ uint32_t checksum(const void *begin, const void *end, uint32_t crc = 0) -{ - NANOVDB_ASSERT(begin && end); - NANOVDB_ASSERT(end >= begin); - return checksum(begin, (const char*)end - (const char*)begin, crc); -} - -/// @brief Compute crc32 checksum of @c data with @c size bytes using a lookup table -/// @param data pointer to begenning of data -/// @param size byte size -/// @param lut pointer to loopup table for accelerated crc32 computation -/// @param crc initial value of the checksum -/// @return crc32 checksum of @c data with @c size bytes -inline __hostdev__ uint32_t checksum(const void *data, size_t size, const uint32_t lut[256], uint32_t crc = 0) -{ - crc = ~crc; - for (auto *p = (const uint8_t*)data, *q = p + size; p != q; ++p) crc = lut[(crc ^ *p) & 0xFF] ^ (crc >> 8); - return ~crc; -} - -/// @brief Compute crc32 checksum of data between @c begin and @c end using a lookup table -/// @param begin points to beginning of data -/// @param end points to end of @data, (exclusive) -/// @param lut pointer to loopup table for accelerated crc32 computation -/// @param crc initial value of crc32 checksum -/// @return return crc32 checksum -inline __hostdev__ uint32_t checksum(const void *begin, const void *end, const uint32_t lut[256], uint32_t crc = 0) -{ - NANOVDB_ASSERT(begin && end); - NANOVDB_ASSERT(end >= begin); - return checksum(begin, (const char*)end - (const char*)begin, lut, crc); -} - -}// namespace crc32 - -/// @brief Class that encapsulates two CRC32 checksums, one for the Grid, Tree and Root node meta data -/// and one for the remaining grid nodes. -class GridChecksum -{ - /// Three types of checksums: - /// 1) Empty: all 64 bits are on (used to signify no checksum) - /// 2) Partial: Upper 32 bits are on and not all of lower 32 bits are on (lower 32 bits checksum head of grid) - /// 3) Full: Not all of the 64 bits are one (lower 32 bits checksum head of grid and upper 32 bits checksum tail of grid) - union {uint32_t mCRC[2]; uint64_t mChecksum; };// mCRC[0] is checksum of Grid, Tree and Root, and mCRC[1] is checksum of nodes - static constexpr uint32_t EMPTY32 = ~uint32_t{0}; - -public: - - static constexpr uint64_t EMPTY = ~uint64_t(0); - - /// @brief default constructor initiates checksum to EMPTY - GridChecksum() : mCRC{EMPTY32, EMPTY32} {} - - /// @brief Constructor that allows the two 32bit checksums to be initiated explicitly - /// @param head Initial 32bit CRC checksum of grid, tree and root data - /// @param tail Initial 32bit CRC checksum of all the nodes and blind data - GridChecksum(uint32_t head, uint32_t tail) : mCRC{head, tail} {} - - /// @brief - /// @param checksum - /// @param mode - GridChecksum(uint64_t checksum, ChecksumMode mode = ChecksumMode::Full) : mChecksum{mode == ChecksumMode::Disable ? EMPTY : checksum} - { - if (mode == ChecksumMode::Partial) mCRC[1] = EMPTY32; - } - - /// @brief return the 64 bit checksum of this instance - uint64_t checksum() const { return mChecksum; } - - /// @brief return 32 bit (crc32) checksum of this instance - /// @param i index of value 0 or 1 indicated the 32 bit checksum of the head or nodes - /// @return non-const reference of the i'th 32bit checksum - uint32_t& checksum(int i) {NANOVDB_ASSERT(i==0 || i==1); return mCRC[i]; } - - /// @brief return 32 bit (crc32) checksum of this instance - /// @param i index of value 0 or 1 indicated the 32 bit checksum of the head or nodes - /// @return copy of the i'th 32bit checksum - uint32_t checksum(int i) const {NANOVDB_ASSERT(i==0 || i==1); return mCRC[i]; } - - /// @brief return true if the 64 bit checksum is partial, i.e. of head only - bool isPartial() const { return mCRC[0] != EMPTY32 && mCRC[1] == EMPTY32; } - - /// @brief return true if the 64 bit checksum is fill, i.e. of both had and nodes - bool isFull() const { return mCRC[0] != EMPTY32 && mCRC[1] != EMPTY32; } - - /// @brief return true if the 64 bit checksum is disables (unset) - bool isEmpty() const { return mChecksum == EMPTY; } - - /// @brief return the mode of the 64 bit checksum - ChecksumMode mode() const - { - return mChecksum == EMPTY ? ChecksumMode::Disable : - mCRC[1] == EMPTY32 ? ChecksumMode::Partial : ChecksumMode::Full; - } -#ifdef NANOVDB_CRC32_LOG2_BLOCK_SIZE - /// @brief compute checksum of @c gridData using a 4KB blocked approach - /// @param gridData Reference to GridData - /// @param mode Mode of the checksum computation - ChecksumMode operator()(const GridData &gridData, ChecksumMode mode = ChecksumMode::Full); -#else - /// @brief Compute checksum using old (node-based) approach - /// @tparam ValueT Build type of the grid - /// @param grid Reference to Grid - /// @param mode Mode of the checksum computation - template - void operator()(const NanoGrid &grid, ChecksumMode mode = ChecksumMode::Full); -#endif - /// @brief return true if the checksums are identical - /// @param rhs other GridChecksum - bool operator==(const GridChecksum &rhs) const {return mChecksum == rhs.mChecksum;} - - /// @brief return true if the checksums are not identical - /// @param rhs other GridChecksum - bool operator!=(const GridChecksum &rhs) const {return mChecksum != rhs.mChecksum;} -};// GridChecksum - -// [GridData][TreeData]---[RootData][ROOT TILES...]---[NodeData<5>]---[NodeData<4>]---[LeafData<3>]---[BLINDMETA...]---[BLIND0]---[BLIND1]---etc. - -#ifdef NANOVDB_CRC32_LOG2_BLOCK_SIZE - -inline ChecksumMode GridChecksum::operator()(const GridData &gridData, ChecksumMode mode) -{ - mChecksum = EMPTY; - - if (mode == ChecksumMode::Disable) return ChecksumMode::Disable; - - auto lut = crc32::createLut(); - const uint8_t *begin = (const uint8_t*)(&gridData), *mid = gridData.template nodePtr<2>(), *end = begin + gridData.mGridSize;// what about empty grids? - if (mid == nullptr) {// no (upper) nodes - if (gridData.mBlindMetadataCount) { - mid = begin + gridData.mBlindMetadataOffset;// exclude blind data from Partial checksum - } else { - mid = end;// no nodes or blind data, so Partial checksum is computed on the entire grid buffer - } - } - mCRC[0] = crc32::checksum(begin + 16, mid, lut.get());// GridData, TreeData. RootData but exclude GridData::mMagic and GridData::mChecksum - - if (mode != ChecksumMode::Full || mid == end) return ChecksumMode::Partial; - - uint64_t size = end - mid;// includes blind data - const uint64_t blockCount = size >> NANOVDB_CRC32_LOG2_BLOCK_SIZE;// number of 4 KB (4096 byte) blocks - std::unique_ptr checksums(new uint32_t[blockCount]); - forEach(0, blockCount, 64, [&](const Range1D &r) { - uint32_t blockSize = 1 << NANOVDB_CRC32_LOG2_BLOCK_SIZE; - uint32_t *p = checksums.get() + r.begin(); - for (auto i = r.begin(); i != r.end(); ++i) { - if (i+1 == blockCount) blockSize += size - (blockCount< -void GridChecksum::operator()(const NanoGrid &grid, ChecksumMode mode) -{ - // Validate the assumed memory layout - static_assert(offsetof(GridData, mMagic) == 0, "Unexpected offset to magic number"); - static_assert(offsetof(GridData, mChecksum) == 8, "Unexpected offset to checksum"); - static_assert(offsetof(GridData, mVersion) == 16, "Unexpected offset to version number"); - - mChecksum = EMPTY; - - if (mode == ChecksumMode::Disable) return; - - auto lut = crc32::createLut(); - const uint8_t *begin = reinterpret_cast(&grid), *mid = grid.template nodePtr<2>(); - - mCRC[0] = crc32::checksum(begin + 16, mid, lut.get());// process Grid + Tree + Root but exclude mMagic and mChecksum - - if (mode != ChecksumMode::Full || grid.isEmpty()) return; - - const auto &tree = grid.tree(); - const auto &root = tree.root(); - auto nodeMgrHandle = createNodeManager(grid); - auto *nodeMgr = nodeMgrHandle.template mgr(); - assert(isValid(nodeMgr)); - const auto nodeCount = tree.nodeCount(0) + tree.nodeCount(1) + tree.nodeCount(2); - std::vector checksums(nodeCount, 0); - // process upper internal nodes - auto kernel2 = [&](const Range1D &r) { - uint32_t *p = checksums.data() + r.begin(); - for (auto i = r.begin(); i != r.end(); ++i) { - const auto &node = nodeMgr->upper(static_cast(i)); - *p++ = crc32::checksum(&node, node.memUsage(), lut.get()); - } - }; - // process lower internal nodes - auto kernel1 = [&](const Range1D &r) { - uint32_t *p = checksums.data() + r.begin() + tree.nodeCount(2); - for (auto i = r.begin(); i != r.end(); ++i) { - const auto &node = nodeMgr->lower(static_cast(i)); - *p++ = crc32::checksum(&node, node.memUsage(), lut.get()); - } - }; - // process leaf nodes - auto kernel0 = [&](const Range1D &r) { - uint32_t *p = checksums.data() + r.begin() + tree.nodeCount(1) + tree.nodeCount(2); - for (auto i = r.begin(); i != r.end(); ++i) { - const auto &leaf = nodeMgr->leaf(static_cast(i)); - *p++ = crc32::checksum(&leaf, leaf.memUsage(), lut.get()); - } - }; - forEach(0, tree.nodeCount(2), 1, kernel2); - forEach(0, tree.nodeCount(1), 1, kernel1); - forEach(0, tree.nodeCount(0), 8, kernel0); - mCRC[1] = crc32::checksum(checksums.data(), sizeof(uint32_t)*checksums.size(), lut.get()); -}// GridChecksum::operator() - -#endif// NANOVDB_CRC32_LOG2_BLOCK_SIZE - -template -uint64_t checksum(const NanoGrid &grid, ChecksumMode mode) -{ - GridChecksum cs; - cs(grid, mode); - return cs.checksum(); -} - -template -bool validateChecksum(const NanoGrid &grid, ChecksumMode mode) -{ - GridChecksum cs1(grid.checksum(), mode), cs2; - cs2(grid, cs1.mode() ); - return cs1 == cs2; -} - -template -void updateChecksum(NanoGrid &grid, ChecksumMode mode) -{ - GridChecksum cs; - cs(grid, mode); - grid.data()->mChecksum = cs.checksum(); -} - -inline bool updateChecksum(GridData &gridData, ChecksumMode mode) -{ -#ifdef NANOVDB_CRC32_LOG2_BLOCK_SIZE - GridChecksum cs; - cs(gridData, mode); - gridData.mChecksum = cs.checksum(); -#else - if (mode == ChecksumMode::Disable) return false; - switch (data->mGridType){ - case GridType::Float: - updateChecksum(*reinterpret_cast*>(data), mode); - break; - case GridType::Double: - updateChecksum(*reinterpret_cast*>(data), mode); - break; - case GridType::Int16: - updateChecksum(*reinterpret_cast*>(data), mode); - break; - case GridType::Int32: - updateChecksum(*reinterpret_cast*>(data), mode); - break; - case GridType::Int64: - updateChecksum(*reinterpret_cast*>(data), mode); - break; - case GridType::Vec3f: - updateChecksum(*reinterpret_cast*>(data), mode); - break; - case GridType::Vec3d: - updateChecksum(*reinterpret_cast*>(data), mode); - break; - case GridType::UInt32: - updateChecksum(*reinterpret_cast*>(data), mode); - break; - case GridType::Mask: - updateChecksum(*reinterpret_cast*>(data), mode); - break; - case GridType::Index: - updateChecksum(*reinterpret_cast*>(data), mode); - break; - case GridType::OnIndex: - updateChecksum(*reinterpret_cast*>(data), mode); - break; - case GridType::IndexMask: - updateChecksum(*reinterpret_cast*>(data), mode); - break; - case GridType::OnIndexMask: - updateChecksum(*reinterpret_cast*>(data), mode); - break; - case GridType::Boolean: - updateChecksum(*reinterpret_cast*>(data), mode); - break; - case GridType::RGBA8: - updateChecksum(*reinterpret_cast*>(data), mode); - break; - case GridType::Fp4: - updateChecksum(*reinterpret_cast*>(data), mode); - break; - case GridType::Fp8: - updateChecksum(*reinterpret_cast*>(data), mode); - break; - case GridType::Fp16: - updateChecksum(*reinterpret_cast*>(data), mode); - break; - case GridType::FpN: - updateChecksum(*reinterpret_cast*>(data), mode); - break; - case GridType::Vec4f: - updateChecksum(*reinterpret_cast*>(data), mode); - break; - case GridType::Vec4d: - updateChecksum(*reinterpret_cast*>(data), mode); - break; - default: { - std::stringstream ss; - ss << "Cannot update checksum for grid of unknown type \"" << toStr(data->mGridType); - throw std::runtime_error(ss.str() + "\""); - } - }// switch -#endif - return true; -}// updateChecksum(GridData *data, ChecksumMode mode) - -/// @brief Preserve the existing mode of the checksum and update it if it's not disabled -/// @param data -/// @return -inline bool updateChecksum(GridData *data) -{ - GridChecksum cs(data->mChecksum); - const auto mode = cs.mode(); - return updateChecksum(*data, mode); -}// updateChecksum(GridData *data) - -/// @brief Updates the ground index and count, as well as the partial checksum if needed -/// @param data Pointer to grid data -/// @param gridIndex New value of the index -/// @param gridCount New value of the grid count -/// @return returns true if the checksum was updated -inline bool updateGridCount(GridData *data, uint32_t gridIndex, uint32_t gridCount) -{ - NANOVDB_ASSERT(gridIndex < gridCount); - if (data->mGridIndex == gridIndex && data->mGridCount == gridCount) return false;// nothing to update - data->mGridIndex = gridIndex; - data->mGridCount = gridCount; - GridChecksum cs(data->mChecksum); - if (cs.isEmpty()) return false;// no checksum to update - updateChecksum(*data, ChecksumMode::Partial);// only update the checksum of the grid since we only modified the GridData - reinterpret_cast(&(data->mChecksum))->checksum(1) = cs.checksum(1);// copy the old checksum of the tree nodes since it was set to EMPTY during the update - return true; -} - -} // namespace nanovdb - -#endif // NANOVDB_GRIDCHECKSUM_H_HAS_BEEN_INCLUDED +#include // for NANOVDB_DEPRECATED_HEADER +#include +NANOVDB_DEPRECATED_HEADER("Include nanovdb/tools/GridChecksum.h instead.") diff --git a/nanovdb/nanovdb/util/GridStats.h b/nanovdb/nanovdb/util/GridStats.h index 267e7462e3..e84b14229b 100644 --- a/nanovdb/nanovdb/util/GridStats.h +++ b/nanovdb/nanovdb/util/GridStats.h @@ -1,855 +1,6 @@ // Copyright Contributors to the OpenVDB Project // SPDX-License-Identifier: MPL-2.0 -/*! - \file GridStats.h - - \author Ken Museth - - \date August 29, 2020 - - \brief Re-computes min/max/avg/var/bbox information for each node in a - pre-existing NanoVDB grid. -*/ - -#ifndef NANOVDB_GRIDSTATS_H_HAS_BEEN_INCLUDED -#define NANOVDB_GRIDSTATS_H_HAS_BEEN_INCLUDED - -#include -#include "Range.h" -#include "ForEach.h" - -#ifdef NANOVDB_USE_TBB -#include -#endif - -#if defined(__CUDACC__) -#include // for cuda::std::numeric_limits -#else -#include // for std::numeric_limits -#endif - -#include -#include - -namespace nanovdb { - -/// @brief Grid flags which indicate what extra information is present in the grid buffer -enum class StatsMode : uint32_t { - Disable = 0,// disable the computation of any type of statistics (obviously the FASTEST!) - BBox = 1,// only compute the bbox of active values per node and total activeVoxelCount - MinMax = 2,// additionally compute extrema values - All = 3,// compute all of the statics, i.e. bbox, min/max, average and standard deviation - Default = 3,// default computational mode for statistics - End = 4, -}; - -/// @brief Re-computes the min/max, stats and bbox information for an existing NanoVDB Grid -/// -/// @param grid Grid whose stats to update -/// @param mode Mode of computation for the statistics. -template -void gridStats(NanoGrid& grid, StatsMode mode = StatsMode::Default); - -//================================================================================================ - -template::Rank> -class Extrema; - -/// @brief Template specialization of Extrema on scalar value types, i.e. rank = 0 -template -class Extrema -{ -protected: - ValueT mMin, mMax; - -public: - using ValueType = ValueT; - __hostdev__ Extrema() -#if defined(__CUDACC__) - : mMin(cuda::std::numeric_limits::max()) - , mMax(cuda::std::numeric_limits::lowest()) -#else - : mMin(std::numeric_limits::max()) - , mMax(std::numeric_limits::lowest()) -#endif - { - } - __hostdev__ Extrema(const ValueT& v) - : mMin(v) - , mMax(v) - { - } - __hostdev__ Extrema(const ValueT& a, const ValueT& b) - : mMin(a) - , mMax(b) - { - } - __hostdev__ Extrema& min(const ValueT& v) - { - if (v < mMin) mMin = v; - return *this; - } - __hostdev__ Extrema& max(const ValueT& v) - { - if (v > mMax) mMax = v; - return *this; - } - __hostdev__ Extrema& add(const ValueT& v) - { - this->min(v); - this->max(v); - return *this; - } - __hostdev__ Extrema& add(const ValueT& v, uint64_t) { return this->add(v); } - __hostdev__ Extrema& add(const Extrema& other) - { - this->min(other.mMin); - this->max(other.mMax); - return *this; - } - __hostdev__ const ValueT& min() const { return mMin; } - __hostdev__ const ValueT& max() const { return mMax; } - __hostdev__ operator bool() const { return mMin <= mMax; } - __hostdev__ static constexpr bool hasMinMax() { return !std::is_same::value; } - __hostdev__ static constexpr bool hasAverage() { return false; } - __hostdev__ static constexpr bool hasStdDeviation() { return false; } - __hostdev__ static constexpr bool hasStats() { return !std::is_same::value; } - __hostdev__ static constexpr size_t size() { return 0; } - - template - __hostdev__ void setStats(NodeT &node) const - { - node.setMin(this->min()); - node.setMax(this->max()); - } -}; // Extrema - -/// @brief Template specialization of Extrema on vector value types, i.e. rank = 1 -template -class Extrema -{ -protected: - using Real = typename VecT::ValueType; // this works with both nanovdb and openvdb vectors - struct Pair - { - Real scalar; - VecT vector; - - __hostdev__ Pair(Real s)// is only used by Extrema() default c-tor - : scalar(s) - , vector(s) - { - } - __hostdev__ Pair(const VecT& v) - : scalar(v.lengthSqr()) - , vector(v) - { - } - __hostdev__ bool operator<(const Pair& rhs) const { return scalar < rhs.scalar; } - } mMin, mMax; - __hostdev__ Extrema& add(const Pair& p) - { - if (p < mMin) mMin = p; - if (mMax < p) mMax = p; - return *this; - } - -public: - using ValueType = VecT; - __hostdev__ Extrema() -#if defined(__CUDACC__) - : mMin(cuda::std::numeric_limits::max()) - , mMax(cuda::std::numeric_limits::lowest()) -#else - : mMin(std::numeric_limits::max()) - , mMax(std::numeric_limits::lowest()) -#endif - { - } - __hostdev__ Extrema(const VecT& v) - : mMin(v) - , mMax(v) - { - } - __hostdev__ Extrema(const VecT& a, const VecT& b) - : mMin(a) - , mMax(b) - { - } - __hostdev__ Extrema& min(const VecT& v) - { - Pair tmp(v); - if (tmp < mMin) mMin = tmp; - return *this; - } - __hostdev__ Extrema& max(const VecT& v) - { - Pair tmp(v); - if (mMax < tmp) mMax = tmp; - return *this; - } - __hostdev__ Extrema& add(const VecT& v) { return this->add(Pair(v)); } - __hostdev__ Extrema& add(const VecT& v, uint64_t) { return this->add(Pair(v)); } - __hostdev__ Extrema& add(const Extrema& other) - { - if (other.mMin < mMin) mMin = other.mMin; - if (mMax < other.mMax) mMax = other.mMax; - return *this; - } - __hostdev__ const VecT& min() const { return mMin.vector; } - __hostdev__ const VecT& max() const { return mMax.vector; } - __hostdev__ operator bool() const { return !(mMax < mMin); } - __hostdev__ static constexpr bool hasMinMax() { return !std::is_same::value; } - __hostdev__ static constexpr bool hasAverage() { return false; } - __hostdev__ static constexpr bool hasStdDeviation() { return false; } - __hostdev__ static constexpr bool hasStats() { return !std::is_same::value; } - __hostdev__ static constexpr size_t size() { return 0; } - - template - __hostdev__ void setStats(NodeT &node) const - { - node.setMin(this->min()); - node.setMax(this->max()); - } -}; // Extrema - -//================================================================================================ - -template::Rank> -class Stats; - -/// @brief This class computes statistics (minimum value, maximum -/// value, mean, variance and standard deviation) of a population -/// of floating-point values. -/// -/// @details variance = Mean[ (X-Mean[X])^2 ] = Mean[X^2] - Mean[X]^2, -/// standard deviation = sqrt(variance) -/// -/// @note This class employs incremental computation and double precision. -template -class Stats : public Extrema -{ -protected: - using BaseT = Extrema; - using RealT = double; // for accuracy the internal precission must be 64 bit floats - size_t mSize; - double mAvg, mAux; - -public: - using ValueType = ValueT; - __hostdev__ Stats() - : BaseT() - , mSize(0) - , mAvg(0.0) - , mAux(0.0) - { - } - __hostdev__ Stats(const ValueT& val) - : BaseT(val) - , mSize(1) - , mAvg(RealT(val)) - , mAux(0.0) - { - } - /// @brief Add a single sample - __hostdev__ Stats& add(const ValueT& val) - { - BaseT::add(val); - mSize += 1; - const double delta = double(val) - mAvg; - mAvg += delta / double(mSize); - mAux += delta * (double(val) - mAvg); - return *this; - } - /// @brief Add @a n samples with constant value @a val. - __hostdev__ Stats& add(const ValueT& val, uint64_t n) - { - const double denom = 1.0 / double(mSize + n); - const double delta = double(val) - mAvg; - mAvg += denom * delta * double(n); - mAux += denom * delta * delta * double(mSize) * double(n); - BaseT::add(val); - mSize += n; - return *this; - } - - /// Add the samples from the other Stats instance. - __hostdev__ Stats& add(const Stats& other) - { - if (other.mSize > 0) { - const double denom = 1.0 / double(mSize + other.mSize); - const double delta = other.mAvg - mAvg; - mAvg += denom * delta * double(other.mSize); - mAux += other.mAux + denom * delta * delta * double(mSize) * double(other.mSize); - BaseT::add(other); - mSize += other.mSize; - } - return *this; - } - - __hostdev__ static constexpr bool hasMinMax() { return !std::is_same::value; } - __hostdev__ static constexpr bool hasAverage() { return !std::is_same::value; } - __hostdev__ static constexpr bool hasStdDeviation() { return !std::is_same::value; } - __hostdev__ static constexpr bool hasStats() { return !std::is_same::value; } - - __hostdev__ size_t size() const { return mSize; } - - //@{ - /// Return the arithmetic mean, i.e. average, value. - __hostdev__ double avg() const { return mAvg; } - __hostdev__ double mean() const { return mAvg; } - //@} - - //@{ - /// @brief Return the population variance. - /// - /// @note The unbiased sample variance = population variance * num/(num-1) - __hostdev__ double var() const { return mSize < 2 ? 0.0 : mAux / double(mSize); } - __hostdev__ double variance() const { return this->var(); } - //@} - - //@{ - /// @brief Return the standard deviation (=Sqrt(variance)) as - /// defined from the (biased) population variance. - __hostdev__ double std() const { return sqrt(this->var()); } - __hostdev__ double stdDev() const { return this->std(); } - //@} - - template - __hostdev__ void setStats(NodeT &node) const - { - node.setMin(this->min()); - node.setMax(this->max()); - node.setAvg(this->avg()); - node.setDev(this->std()); - } -}; // end Stats - -/// @brief This class computes statistics (minimum value, maximum -/// value, mean, variance and standard deviation) of a population -/// of floating-point values. -/// -/// @details variance = Mean[ (X-Mean[X])^2 ] = Mean[X^2] - Mean[X]^2, -/// standard deviation = sqrt(variance) -/// -/// @note This class employs incremental computation and double precision. -template -class Stats : public Extrema -{ -protected: - using BaseT = Extrema; - using RealT = double; // for accuracy the internal precision must be 64 bit floats - size_t mSize; - double mAvg, mAux; - -public: - using ValueType = ValueT; - __hostdev__ Stats() - : BaseT() - , mSize(0) - , mAvg(0.0) - , mAux(0.0) - { - } - /// @brief Add a single sample - __hostdev__ Stats& add(const ValueT& val) - { - typename BaseT::Pair tmp(val); - BaseT::add(tmp); - mSize += 1; - const double delta = tmp.scalar - mAvg; - mAvg += delta / double(mSize); - mAux += delta * (tmp.scalar - mAvg); - return *this; - } - /// @brief Add @a n samples with constant value @a val. - __hostdev__ Stats& add(const ValueT& val, uint64_t n) - { - typename BaseT::Pair tmp(val); - const double denom = 1.0 / double(mSize + n); - const double delta = tmp.scalar - mAvg; - mAvg += denom * delta * double(n); - mAux += denom * delta * delta * double(mSize) * double(n); - BaseT::add(tmp); - mSize += n; - return *this; - } - - /// Add the samples from the other Stats instance. - __hostdev__ Stats& add(const Stats& other) - { - if (other.mSize > 0) { - const double denom = 1.0 / double(mSize + other.mSize); - const double delta = other.mAvg - mAvg; - mAvg += denom * delta * double(other.mSize); - mAux += other.mAux + denom * delta * delta * double(mSize) * double(other.mSize); - BaseT::add(other); - mSize += other.mSize; - } - return *this; - } - - __hostdev__ static constexpr bool hasMinMax() { return !std::is_same::value; } - __hostdev__ static constexpr bool hasAverage() { return !std::is_same::value; } - __hostdev__ static constexpr bool hasStdDeviation() { return !std::is_same::value; } - __hostdev__ static constexpr bool hasStats() { return !std::is_same::value; } - - __hostdev__ size_t size() const { return mSize; } - - //@{ - /// Return the arithmetic mean, i.e. average, value. - __hostdev__ double avg() const { return mAvg; } - __hostdev__ double mean() const { return mAvg; } - //@} - - //@{ - /// @brief Return the population variance. - /// - /// @note The unbiased sample variance = population variance * num/(num-1) - __hostdev__ double var() const { return mSize < 2 ? 0.0 : mAux / double(mSize); } - __hostdev__ double variance() const { return this->var(); } - //@} - - //@{ - /// @brief Return the standard deviation (=Sqrt(variance)) as - /// defined from the (biased) population variance. - __hostdev__ double std() const { return sqrt(this->var()); } - __hostdev__ double stdDev() const { return this->std(); } - //@} - - template - __hostdev__ void setStats(NodeT &node) const - { - node.setMin(this->min()); - node.setMax(this->max()); - node.setAvg(this->avg()); - node.setDev(this->std()); - } -}; // end Stats - -/// @brief No-op Stats class -template -struct NoopStats -{ - using ValueType = ValueT; - __hostdev__ NoopStats() {} - __hostdev__ NoopStats(const ValueT&) {} - __hostdev__ NoopStats& add(const ValueT&) { return *this; } - __hostdev__ NoopStats& add(const ValueT&, uint64_t) { return *this; } - __hostdev__ NoopStats& add(const NoopStats&) { return *this; } - __hostdev__ static constexpr size_t size() { return 0; } - __hostdev__ static constexpr bool hasMinMax() { return false; } - __hostdev__ static constexpr bool hasAverage() { return false; } - __hostdev__ static constexpr bool hasStdDeviation() { return false; } - __hostdev__ static constexpr bool hasStats() { return false; } - template - __hostdev__ void setStats(NodeT&) const{} -}; // end NoopStats - -//================================================================================================ - -/// @brief Allows for the construction of NanoVDB grids without any dependency -template> -class GridStats -{ - struct NodeStats; - using TreeT = typename GridT::TreeType; - using ValueT = typename TreeT::ValueType; - using BuildT = typename TreeT::BuildType; - using Node0 = typename TreeT::Node0; // leaf - using Node1 = typename TreeT::Node1; // lower - using Node2 = typename TreeT::Node2; // upper - using RootT = typename TreeT::Node3; // root - static_assert(std::is_same::value, "Mismatching type"); - - ValueT mDelta; // skip rendering of node if: node.max < -mDelta || node.min > mDelta - - void process( GridT& );// process grid and all tree nodes - void process( TreeT& );// process Tree, root node and child nodes - void process( RootT& );// process root node and child nodes - NodeStats process( Node0& );// process leaf node - - template - NodeStats process( NodeT& );// process internal node and child nodes - - template - void setStats(DataT*, const Extrema&); - template - void setStats(DataT*, const Stats&); - template - void setStats(DataT*, const NoopStats&) {} - - template - typename std::enable_if::value>::type - setFlag(const T&, const T&, FlagT& flag) const { flag &= ~FlagT(1); } // unset 1st bit to enable rendering - - template - typename std::enable_if::value>::type - setFlag(const T& min, const T& max, FlagT& flag) const; - -public: - GridStats() = default; - - void operator()(GridT& grid, ValueT delta = ValueT(0)); - -}; // GridStats - -template -struct GridStats::NodeStats -{ - StatsT stats; - CoordBBox bbox; - - NodeStats(): stats(), bbox() {}//activeCount(0), bbox() {}; - - NodeStats& add(const NodeStats &other) - { - stats.add( other.stats );// no-op for NoopStats?! - bbox[0].minComponent(other.bbox[0]); - bbox[1].maxComponent(other.bbox[1]); - return *this; - } -};// GridStats::NodeStats - -//================================================================================================ - -template -void GridStats::operator()(GridT& grid, ValueT delta) -{ - mDelta = delta; // delta = voxel size for level sets, else 0 - this->process( grid ); -} - -//================================================================================================ - -template -template -inline void GridStats:: - setStats(DataT* data, const Extrema& e) -{ - data->setMin(e.min()); - data->setMax(e.max()); -} - -template -template -inline void GridStats:: - setStats(DataT* data, const Stats& s) -{ - data->setMin(s.min()); - data->setMax(s.max()); - data->setAvg(s.avg()); - data->setDev(s.std()); -} - -//================================================================================================ - -template -template -inline typename std::enable_if::value>::type -GridStats:: - setFlag(const T& min, const T& max, FlagT& flag) const -{ - if (mDelta > 0 && (min > mDelta || max < -mDelta)) {// LS: min > dx || max < -dx - flag |= FlagT(1u);// set 1st bit to disable rendering - } else { - flag &= ~FlagT(1u);// unset 1st bit to enable rendering - } -} - -//================================================================================================ - -template -void GridStats::process( GridT &grid ) -{ - this->process( grid.tree() );// this processes tree, root and all nodes - - // set world space AABB - auto& data = *grid.data(); - const auto& indexBBox = grid.tree().root().bbox(); - if (indexBBox.empty()) { - data.mWorldBBox = BBox(); - data.setBBoxOn(false); - } else { - // Note that below max is offset by one since CoordBBox.max is inclusive - // while bbox.max is exclusive. However, min is inclusive in both - // CoordBBox and BBox. This also guarantees that a grid with a single - // active voxel, does not have an empty world bbox! E.g. if a grid with a - // unit index-to-world transformation only contains the active voxel (0,0,0) - // then indeBBox = (0,0,0) -> (0,0,0) and then worldBBox = (0.0, 0.0, 0.0) - // -> (1.0, 1.0, 1.0). This is a consequence of the different definitions - // of index and world bounding boxes inherited from OpenVDB! - grid.mWorldBBox = CoordBBox(indexBBox[0], indexBBox[1].offsetBy(1)).transform(grid.map()); - grid.setBBoxOn(true); - } - - // set bit flags - data.setMinMaxOn(StatsT::hasMinMax()); - data.setAverageOn(StatsT::hasAverage()); - data.setStdDeviationOn(StatsT::hasStdDeviation()); -} // GridStats::process( Grid ) - -//================================================================================================ - -template -inline void GridStats::process( typename GridT::TreeType &tree ) -{ - this->process( tree.root() ); -} - -//================================================================================================ - -template -void GridStats::process(RootT &root) -{ - using ChildT = Node2; - auto &data = *root.data(); - if (data.mTableSize == 0) { // empty root node - data.mMinimum = data.mMaximum = data.mBackground; - data.mAverage = data.mStdDevi = 0; - data.mBBox = CoordBBox(); - } else { - NodeStats total; - for (uint32_t i = 0; i < data.mTableSize; ++i) { - auto* tile = data.tile(i); - if (tile->isChild()) { // process child node - total.add( this->process( *data.getChild(tile) ) ); - } else if (tile->state) { // active tile - const Coord ijk = tile->origin(); - total.bbox[0].minComponent(ijk); - total.bbox[1].maxComponent(ijk + Coord(ChildT::DIM - 1)); - if (StatsT::hasStats()) { // resolved at compile time - total.stats.add(tile->value, ChildT::NUM_VALUES); - } - } - } - this->setStats(&data, total.stats); - if (total.bbox.empty()) { - std::cerr << "\nWarning in GridStats: input tree only contained inactive root tiles!" - << "\nWhile not strictly an error it's rather suspicious!\n"; - } - data.mBBox = total.bbox; - } -} // GridStats::process( RootNode ) - -//================================================================================================ - -template -template -typename GridStats::NodeStats -GridStats::process(NodeT &node) -{ - static_assert(is_same::value || is_same::value, "Incorrect node type"); - using ChildT = typename NodeT::ChildNodeType; - - NodeStats total; - auto* data = node.data(); - - // Serial processing of active tiles - if (const auto tileCount = data->mValueMask.countOn()) { - //total.activeCount = tileCount * ChildT::NUM_VALUES; // active tiles - for (auto it = data->mValueMask.beginOn(); it; ++it) { - if (StatsT::hasStats()) { // resolved at compile time - total.stats.add( data->mTable[*it].value, ChildT::NUM_VALUES ); - } - const Coord ijk = node.offsetToGlobalCoord(*it); - total.bbox[0].minComponent(ijk); - total.bbox[1].maxComponent(ijk + Coord(int32_t(ChildT::DIM) - 1)); - } - } - - // Serial or parallel processing of child nodes - if (const size_t childCount = data->mChildMask.countOn()) { -#ifndef NANOVDB_USE_TBB - for (auto it = data->mChildMask.beginOn(); it; ++it) { - total.add( this->process( *data->getChild(*it) ) ); - } -#else - std::unique_ptr childNodes(new ChildT*[childCount]); - ChildT **ptr = childNodes.get(); - for (auto it = data->mChildMask.beginOn(); it; ++it) { - *ptr++ = data->getChild( *it ); - } - using RangeT = tbb::blocked_range; - total.add( tbb::parallel_reduce(RangeT(0, childCount), NodeStats(), - [&](const RangeT &r, NodeStats local)->NodeStats { - for(size_t i=r.begin(); i!=r.end(); ++i){ - local.add( this->process( *childNodes[i] ) ); - } - return local;}, - [](NodeStats a, const NodeStats &b)->NodeStats { return a.add( b ); } - )); -#endif - } - - data->mBBox = total.bbox; - if (total.bbox.empty()) { - data->mFlags |= uint32_t(1); // set 1st bit on to disable rendering of node - data->mFlags &= ~uint32_t(2); // set 2nd bit off since node does not contain active values - } else { - data->mFlags |= uint32_t(2); // set 2nd bit on since node contains active values - if (StatsT::hasStats()) { // resolved at compile time - this->setStats(data, total.stats); - this->setFlag(data->mMinimum, data->mMaximum, data->mFlags); - } - } - return total; -} // GridStats::process( InternalNode ) - -//================================================================================================ - -template -typename GridStats::NodeStats -GridStats::process(Node0 &leaf) -{ - NodeStats local; - if (leaf.updateBBox()) {// optionally update active bounding box (updates data->mFlags) - local.bbox[0] = local.bbox[1] = leaf.mBBoxMin; - local.bbox[1] += Coord(leaf.mBBoxDif[0], leaf.mBBoxDif[1], leaf.mBBoxDif[2]); - if (StatsT::hasStats()) {// resolved at compile time - for (auto it = leaf.cbeginValueOn(); it; ++it) local.stats.add(*it); - this->setStats(&leaf, local.stats); - this->setFlag(leaf.getMin(), leaf.getMax(), leaf.mFlags); - } - } - return local; -} // GridStats::process( LeafNode ) - -//================================================================================================ - -template -void gridStats(NanoGrid& grid, StatsMode mode) -{ - using GridT = NanoGrid; - using ValueT = typename GridT::ValueType; - if (mode == StatsMode::Disable) { - return; - } else if (mode == StatsMode::BBox || std::is_same::value) { - GridStats > stats; - stats(grid); - } else if (mode == StatsMode::MinMax) { - GridStats > stats; - stats(grid); - } else if (mode == StatsMode::All) { - GridStats > stats; - stats(grid); - } else { - throw std::runtime_error("gridStats: Unsupported statistics mode."); - } -}// gridStats - -//================================================================================================ - -namespace { - -// returns a bitmask (of size 32^3 or 16^3) that marks all the entries -// in a node table that intersects with the specified bounding box. -template -Mask getBBoxMask(const CoordBBox &bbox, const NodeT* node) -{ - Mask mask;// typically 32^3 or 16^3 bit mask - auto b = CoordBBox::createCube(node->origin(), node->dim()); - assert( bbox.hasOverlap(b) ); - if ( bbox.isInside(b) ) { - mask.setOn();//node is completely inside the bbox so early out - } else { - b.intersect(bbox);// trim bounding box - // transform bounding box from global to local coordinates - b.min() &= NodeT::DIM-1u; - b.min() >>= NodeT::ChildNodeType::TOTAL; - b.max() &= NodeT::DIM-1u; - b.max() >>= NodeT::ChildNodeType::TOTAL; - assert( !b.empty() ); - auto it = b.begin();// iterates over all the child nodes or tiles that intersects bbox - for (const Coord& ijk = *it; it; ++it) { - mask.setOn(ijk[2] + (ijk[1] << NodeT::LOG2DIM) + (ijk[0] << 2*NodeT::LOG2DIM)); - } - } - return mask; -}// getBBoxMask - -}// end of unnamed namespace - -/// @brief return the extrema of all the values in a grid that -/// intersects the specified bounding box. -template -Extrema::ValueType> -getExtrema(const NanoGrid& grid, const CoordBBox &bbox) -{ - using GridT = NanoGrid; - using ValueT = typename GridT::ValueType; - using TreeT = typename GridTree::type; - using RootT = typename NodeTrait::type;// root node - using Node2 = typename NodeTrait::type;// upper internal node - using Node1 = typename NodeTrait::type;// lower internal node - using Node0 = typename NodeTrait::type;// leaf node - - Extrema extrema; - const RootT &root = grid.tree().root(); - const auto &bbox3 = root.bbox(); - if (bbox.isInside(bbox3)) {// bbox3 is contained inside bbox - extrema.min(root.minimum()); - extrema.max(root.maximum()); - extrema.add(root.background()); - } else if (bbox.hasOverlap(bbox3)) { - const auto *data3 = root.data(); - for (uint32_t i=0; imTableSize; ++i) { - const auto *tile = data3->tile(i); - CoordBBox bbox2 = CoordBBox::createCube(tile->origin(), Node2::dim()); - if (!bbox.hasOverlap(bbox2)) continue; - if (tile->isChild()) { - const Node2 *node2 = data3->getChild(tile); - if (bbox.isInside(bbox2)) { - extrema.min(node2->minimum()); - extrema.max(node2->maximum()); - } else {// partial intersections at level 2 - auto *data2 = node2->data(); - const auto bboxMask2 = getBBoxMask(bbox, node2); - for (auto it2 = bboxMask2.beginOn(); it2; ++it2) { - if (data2->mChildMask.isOn(*it2)) { - const Node1* node1 = data2->getChild(*it2); - CoordBBox bbox1 = CoordBBox::createCube(node1->origin(), Node1::dim()); - if (bbox.isInside(bbox1)) { - extrema.min(node1->minimum()); - extrema.max(node1->maximum()); - } else {// partial intersection at level 1 - auto *data1 = node1->data(); - const auto bboxMask1 = getBBoxMask(bbox, node1); - for (auto it1 = bboxMask1.beginOn(); it1; ++it1) { - if (data1->mChildMask.isOn(*it1)) { - const Node0* node0 = data1->getChild(*it1); - CoordBBox bbox0 = CoordBBox::createCube(node0->origin(), Node0::dim()); - if (bbox.isInside(bbox0)) { - extrema.min(node0->minimum()); - extrema.max(node0->maximum()); - } else {// partial intersection at level 0 - auto *data0 = node0->data(); - const auto bboxMask0 = getBBoxMask(bbox, node0); - for (auto it0 = bboxMask0.beginOn(); it0; ++it0) { - extrema.add(data0->getValue(*it0)); - } - }// end partial intersection at level 0 - } else {// tile at level 1 - extrema.add(data1->mTable[*it1].value); - } - } - }// end of partial intersection at level 1 - } else {// tile at level 2 - extrema.add(data2->mTable[*it2].value); - } - }// loop over tiles and nodes at level 2 - }// end of partial intersection at level 1 - } else {// tile at root level - extrema.add(tile->value); - } - }// loop over root table - } else {// bbox does not overlap the grid - extrema.add(root.background()); - } - return extrema; -}// getExtrema - -} // namespace nanovdb - -#endif // NANOVDB_GRIDSTATS_H_HAS_BEEN_INCLUDED +#include // for NANOVDB_DEPRECATED_HEADER +#include +NANOVDB_DEPRECATED_HEADER("Include nanovdb/tools/GridStats.h instead.") diff --git a/nanovdb/nanovdb/util/GridValidator.h b/nanovdb/nanovdb/util/GridValidator.h index fe6815bfb4..476e760d4e 100644 --- a/nanovdb/nanovdb/util/GridValidator.h +++ b/nanovdb/nanovdb/util/GridValidator.h @@ -1,185 +1,6 @@ // Copyright Contributors to the OpenVDB Project // SPDX-License-Identifier: MPL-2.0 -/*! - \file GridValidator.h - - \author Ken Museth - - \date August 30, 2020 - - \brief Checks the validity of an existing NanoVDB grid. -*/ - -#ifndef NANOVDB_GRIDVALIDATOR_H_HAS_BEEN_INCLUDED -#define NANOVDB_GRIDVALIDATOR_H_HAS_BEEN_INCLUDED - -#include -#include "GridChecksum.h" - -namespace nanovdb { - -/// @brief Return true if the specified grid passes several validation tests. -/// -/// @param grid Grid to validate -/// @param detailed If true the validation test is detailed and relatively slow. -/// @param verbose If true information about the first failed test is printed to std::cerr -template -bool isValid(const NanoGrid &grid, bool detailed = true, bool verbose = false); - -/// @brief Allows for the construction of NanoVDB grids without any dependecy -template -class GridValidator -{ - using GridT = NanoGrid; - inline static void checkTree( const GridT&, std::string&, bool); - inline static void checkRoot( const GridT&, std::string&, bool); - inline static void checkNodes(const GridT&, std::string&); - -public: - /// @brief Returns an error message (an empty string means no error) - /// - /// @param grid NanoVDB grid to be tested - /// @param detailed If true the checksum is computed and validated as well as all the node pointers - /// - /// @note The validation is much slower if @c detailed == true! - static std::string check(const GridT &grid, bool detailed = true); - -};// GridValidator - -//================================================================================================ - -template -std::string GridValidator::check(const GridT &grid, bool detailed) -{ - std::string errorStr; - - // First check the Grid - auto *data = reinterpret_cast(&grid); - std::stringstream ss; - if (!isValid(data)) { - errorStr.assign("Grid is not 32B aligned"); - } else if (data->mMagic != NANOVDB_MAGIC_NUMBER && data->mMagic != NANOVDB_MAGIC_GRID) { - const uint64_t magic1 = NANOVDB_MAGIC_NUMBER, magic2 = NANOVDB_MAGIC_GRID; - const char *c0 = (const char*)&(data->mMagic), *c1=(const char*)&magic1, *c2=(const char*)&magic2; - ss << "Incorrect magic number: Expected \""; - for (int i=0; i<8; ++i) ss << c1[i]; - ss << "\" or \""; - for (int i=0; i<8; ++i) ss << c2[i]; - ss << "\", but found \""; - for (int i=0; i<8; ++i) ss << c0[i]; - ss << "\""; - errorStr = ss.str(); - } else if (!validateChecksum(grid, detailed ? ChecksumMode::Full : ChecksumMode::Partial)) { - errorStr.assign("Mis-matching checksum"); - } else if (data->mVersion >= Version(29,0,0) && data->mVersion.getMajor() != NANOVDB_MAJOR_VERSION_NUMBER) { - ss << "Invalid major version number: Expected " << NANOVDB_MAJOR_VERSION_NUMBER << ", but read " << data->mVersion.c_str(); - errorStr = ss.str(); - } else if (data->mVersion < Version(29,0,0) && data->mVersion.id() != 28u) { - ss << "Invalid old major version number: Expected 28 or newer, but read " << data->mVersion.id(); - errorStr = ss.str(); - } else if (data->mGridClass >= GridClass::End) { - errorStr.assign("Invalid GridClass"); - } else if (data->mGridType >= GridType::End) { - errorStr.assign("Invalid GridType"); - } else if (data->mGridType != mapToGridType()) { - errorStr.assign("Invalid combination of ValueType and GridType"); - } else if (!isValid(data->mGridType, data->mGridClass)) { - errorStr.assign("Invalid combination of GridType and GridClass"); - } else if ( (const uint8_t*)(&(grid.tree())) != (const uint8_t*)(&grid+1) ) { - errorStr.assign("Invalid Tree pointer"); - } else { - checkTree(grid, errorStr, detailed); - } - return errorStr; -} - -//================================================================================================ - -template -void GridValidator::checkTree(const GridT &grid, std::string &errorStr, bool detailed) -{ - if (!isValid(&grid.tree())) { - errorStr.assign("Tree is not 32B aligned"); - } else if ( (const uint8_t*)(&grid.tree().root()) < (const uint8_t*)(&grid.tree()+1)) { - errorStr.assign("Invalid root pointer (should be located after the Grid and Tree)"); - } else if ( (const uint8_t*)(&grid.tree().root()) > (const uint8_t*)(&grid) + grid.gridSize() - sizeof(grid.tree().root()) ) { - errorStr.assign("Invalid root pointer (appears to be located after the end of the buffer)"); - } else { - checkRoot(grid, errorStr, detailed); - } -}// GridValidator::checkTree - -//================================================================================================ - -template -void GridValidator::checkRoot(const GridT &grid, std::string &errorStr, bool detailed) -{ - auto &root = grid.tree().root(); - auto *data = root.data(); - if (!isValid(data)) { - errorStr.assign("Root is not 32B aligned"); - } - const uint8_t *minPtr = (const uint8_t*)(&root + 1); - const uint8_t *maxPtr = (const uint8_t*)(&root) + root.memUsage(); - for (uint32_t i = 0; errorStr.empty() && imTableSize; ++i) { - const auto *tile = data->tile(i); - if ( (const uint8_t *) tile < minPtr ) { - errorStr.assign("Invalid root tile pointer (below lower bound"); - } else if ( (const uint8_t *) tile > maxPtr - sizeof(*tile) ) { - errorStr.assign("Invalid root tile pointer (above higher bound"); - } - } - if (detailed && errorStr.empty()) { - checkNodes(grid, errorStr); - } -}// GridValidator::processRoot - -//================================================================================================ -template -void GridValidator::checkNodes(const GridT &grid, std::string &errorStr) -{ - auto &root = grid.tree().root();// note, the root node was already checked - const uint8_t *minPtr = (const uint8_t*)(&root) + root.memUsage(); - const uint8_t *maxPtr = (const uint8_t*)(&grid) + grid.gridSize(); - - auto check = [&](const void * ptr, size_t ptrSize) -> bool { - if (!isValid(ptr)) { - errorStr.assign("Invalid node pointer: not 32B aligned"); - } else if ( (const uint8_t *) ptr < minPtr ) { - errorStr.assign("Invalid node pointer: below lower bound"); - } else if ( (const uint8_t *) ptr > maxPtr - ptrSize ) { - errorStr.assign("Invalid node pointer: above higher bound"); - } - return errorStr.empty(); - }; - - for (auto it2 = grid.tree().root().cbeginChild(); it2; ++it2) { - auto &node2 = *it2; - if (!check(&node2, sizeof(node2))) return; - for (auto it1 = node2.cbeginChild(); it1; ++it1) { - auto &node1 = *it1; - if (!check(&node1, sizeof(node1))) return; - for (auto it0 = node1.cbeginChild(); it0; ++it0) { - auto &node0 = *it0; - if (!check(&node2, sizeof(node2))) return; - }// loop over child nodes of the lower internal node - }// loop over child nodes of the upper internal node - }// loop over child nodes of the root node - -} // GridValidator::processNodes - - -//================================================================================================ - -template -bool isValid(const NanoGrid &grid, bool detailed, bool verbose) -{ - const std::string str = GridValidator::check( grid, detailed ); - if (verbose && !str.empty()) std::cerr << "Validation failed: " << str << std::endl; - return str.empty(); -} - -} // namespace nanovdb - -#endif // NANOVDB_GRIDVALIDATOR_H_HAS_BEEN_INCLUDED +#include // for NANOVDB_DEPRECATED_HEADER +#include +NANOVDB_DEPRECATED_HEADER("Include nanovdb/tools/GridValidator.h instead.") diff --git a/nanovdb/nanovdb/util/HDDA.h b/nanovdb/nanovdb/util/HDDA.h index d3ef5733e0..4430c40701 100644 --- a/nanovdb/nanovdb/util/HDDA.h +++ b/nanovdb/nanovdb/util/HDDA.h @@ -1,510 +1,6 @@ // Copyright Contributors to the OpenVDB Project // SPDX-License-Identifier: MPL-2.0 -/// @file HDDA.h -/// -/// @author Ken Museth -/// -/// @brief Hierarchical Digital Differential Analyzers specialized for VDB. - -#ifndef NANOVDB_HDDA_H_HAS_BEEN_INCLUDED -#define NANOVDB_HDDA_H_HAS_BEEN_INCLUDED - -// Comment out to disable this explicit round-off check -#define ENFORCE_FORWARD_STEPPING - -#include // only dependency - -namespace nanovdb { - -/// @brief A Digital Differential Analyzer specialized for OpenVDB grids -/// @note Conceptually similar to Bresenham's line algorithm applied -/// to a 3D Ray intersecting OpenVDB nodes or voxels. Log2Dim = 0 -/// corresponds to a voxel and Log2Dim a tree node of size 2^Log2Dim. -/// -/// @note The Ray template class is expected to have the following -/// methods: test(time), t0(), t1(), invDir(), and operator()(time). -/// See the example Ray class above for their definition. -template -class HDDA -{ -public: - using RealType = typename RayT::RealType; - using RealT = RealType; - using Vec3Type = typename RayT::Vec3Type; - using Vec3T = Vec3Type; - using CoordType = CoordT; - - /// @brief Default ctor - HDDA() = default; - - /// @brief ctor from ray and dimension at which the DDA marches - __hostdev__ HDDA(const RayT& ray, int dim) { this->init(ray, dim); } - - /// @brief Re-initializes the HDDA - __hostdev__ void init(const RayT& ray, RealT startTime, RealT maxTime, int dim) - { - assert(startTime <= maxTime); - mDim = dim; - mT0 = startTime; - mT1 = maxTime; - const Vec3T &pos = ray(mT0), &dir = ray.dir(), &inv = ray.invDir(); - mVoxel = RoundDown(pos) & (~(dim - 1)); - for (int axis = 0; axis < 3; ++axis) { - if (dir[axis] == RealT(0)) { //handles dir = +/- 0 - mNext[axis] = Maximum::value(); //i.e. disabled! - mStep[axis] = 0; - } else if (inv[axis] > 0) { - mStep[axis] = 1; - mNext[axis] = mT0 + (mVoxel[axis] + dim - pos[axis]) * inv[axis]; - mDelta[axis] = inv[axis]; - } else { - mStep[axis] = -1; - mNext[axis] = mT0 + (mVoxel[axis] - pos[axis]) * inv[axis]; - mDelta[axis] = -inv[axis]; - } - } - } - - /// @brief Simular to init above except it uses the bounds of the input ray - __hostdev__ void init(const RayT& ray, int dim) { this->init(ray, ray.t0(), ray.t1(), dim); } - - /// @brief Updates the HDDA to march with the specified dimension - __hostdev__ bool update(const RayT& ray, int dim) - { - if (mDim == dim) - return false; - mDim = dim; - const Vec3T &pos = ray(mT0), &inv = ray.invDir(); - mVoxel = RoundDown(pos) & (~(dim - 1)); - for (int axis = 0; axis < 3; ++axis) { - if (mStep[axis] == 0) - continue; - mNext[axis] = mT0 + (mVoxel[axis] - pos[axis]) * inv[axis]; - if (mStep[axis] > 0) - mNext[axis] += dim * inv[axis]; - } - - return true; - } - - __hostdev__ int dim() const { return mDim; } - - /// @brief Increment the voxel index to next intersected voxel or node - /// and returns true if the step in time does not exceed maxTime. - __hostdev__ bool step() - { - const int axis = MinIndex(mNext); -#if 1 - switch (axis) { - case 0: - return step<0>(); - case 1: - return step<1>(); - default: - return step<2>(); - } -#else - mT0 = mNext[axis]; - mNext[axis] += mDim * mDelta[axis]; - mVoxel[axis] += mDim * mStep[axis]; - return mT0 <= mT1; -#endif - } - - /// @brief Return the index coordinates of the next node or voxel - /// intersected by the ray. If Log2Dim = 0 the return value is the - /// actual signed coordinate of the voxel, else it is the origin - /// of the corresponding VDB tree node or tile. - /// @note Incurs no computational overhead. - __hostdev__ const CoordT& voxel() const { return mVoxel; } - - /// @brief Return the time (parameterized along the Ray) of the - /// first hit of a tree node of size 2^Log2Dim. - /// @details This value is initialized to startTime or ray.t0() - /// depending on the constructor used. - /// @note Incurs no computational overhead. - __hostdev__ RealType time() const { return mT0; } - - /// @brief Return the maximum time (parameterized along the Ray). - __hostdev__ RealType maxTime() const { return mT1; } - - /// @brief Return the time (parameterized along the Ray) of the - /// second (i.e. next) hit of a tree node of size 2^Log2Dim. - /// @note Incurs a (small) computational overhead. - __hostdev__ RealType next() const - { -#if 1 //def __CUDA_ARCH__ - return fminf(mT1, fminf(mNext[0], fminf(mNext[1], mNext[2]))); -#else - return std::min(mT1, std::min(mNext[0], std::min(mNext[1], mNext[2]))); -#endif - } - -private: - // helper to implement the general form - template - __hostdev__ bool step() - { -#ifdef ENFORCE_FORWARD_STEPPING - //if (mNext[axis] <= mT0) mNext[axis] += mT0 - mNext[axis] + fmaxf(mNext[axis]*1.0e-6f, 1.0e-6f); - //if (mNext[axis] <= mT0) mNext[axis] += mT0 - mNext[axis] + (mNext[axis] + 1.0f)*1.0e-6f; - if (mNext[axis] <= mT0) { - mNext[axis] += mT0 - 0.999999f * mNext[axis] + 1.0e-6f; - } -#endif - mT0 = mNext[axis]; - mNext[ axis] += mDim * mDelta[axis]; - mVoxel[axis] += mDim * mStep[ axis]; - return mT0 <= mT1; - } - - int32_t mDim; - RealT mT0, mT1; // min and max allowed times - CoordT mVoxel, mStep; // current voxel location and step to next voxel location - Vec3T mDelta, mNext; // delta time and next time -}; // class HDDA - -/////////////////////////////////////////// ZeroCrossing //////////////////////////////////////////// - -/// @brief returns true if the ray intersects a zero-crossing at the voxel level of the grid in the accessor -/// The empty-space ray-marching is performed at all levels of the tree using an -/// HDDA. If an intersection is detected, then ijk is updated with the index coordinate of the closest -/// voxel after the intersection point, v contains the grid values at ijk, and t is set to the time of -/// the intersection along the ray. -template -inline __hostdev__ bool ZeroCrossing(RayT& ray, AccT& acc, Coord& ijk, typename AccT::ValueType& v, float& t) -{ - if (!ray.clip(acc.root().bbox()) || ray.t1() > 1e20) - return false; // clip ray to bbox - static const float Delta = 1.0001f; - ijk = RoundDown(ray.start()); // first hit of bbox - HDDA hdda(ray, acc.getDim(ijk, ray)); - const auto v0 = acc.getValue(ijk); - while (hdda.step()) { - ijk = RoundDown(ray(hdda.time() + Delta)); - hdda.update(ray, acc.getDim(ijk, ray)); - if (hdda.dim() > 1 || !acc.isActive(ijk)) - continue; // either a tile value or an inactive voxel - while (hdda.step() && acc.isActive(hdda.voxel())) { // in the narrow band - v = acc.getValue(hdda.voxel()); - if (v * v0 < 0) { // zero crossing - ijk = hdda.voxel(); - t = hdda.time(); - return true; - } - } - } - return false; -} - -/////////////////////////////////////////// DDA //////////////////////////////////////////// - -/// @brief A Digital Differential Analyzer. Unlike HDDA (defined above) this DDA -/// uses a fixed step-size defined by the template parameter Dim! -/// -/// @note The Ray template class is expected to have the following -/// methods: test(time), t0(), t1(), invDir(), and operator()(time). -/// See the example Ray class above for their definition. -template -class DDA -{ - static_assert(Dim >= 1, "Dim must be >= 1"); - -public: - using RealType = typename RayT::RealType; - using RealT = RealType; - using Vec3Type = typename RayT::Vec3Type; - using Vec3T = Vec3Type; - using CoordType = CoordT; - - /// @brief Default ctor - DDA() = default; - - /// @brief ctor from ray and dimension at which the DDA marches - __hostdev__ DDA(const RayT& ray) { this->init(ray); } - - /// @brief Re-initializes the DDA - __hostdev__ void init(const RayT& ray, RealT startTime, RealT maxTime) - { - assert(startTime <= maxTime); - mT0 = startTime; - mT1 = maxTime; - const Vec3T &pos = ray(mT0), &dir = ray.dir(), &inv = ray.invDir(); - mVoxel = RoundDown(pos) & (~(Dim - 1)); - for (int axis = 0; axis < 3; ++axis) { - if (dir[axis] == RealT(0)) { //handles dir = +/- 0 - mNext[axis] = Maximum::value(); //i.e. disabled! - mStep[axis] = 0; - } else if (inv[axis] > 0) { - mStep[axis] = Dim; - mNext[axis] = (mT0 + (mVoxel[axis] + Dim - pos[axis]) * inv[axis]); - mDelta[axis] = inv[axis]; - } else { - mStep[axis] = -Dim; - mNext[axis] = mT0 + (mVoxel[axis] - pos[axis]) * inv[axis]; - mDelta[axis] = -inv[axis]; - } - } - } - - /// @brief Simular to init above except it uses the bounds of the input ray - __hostdev__ void init(const RayT& ray) { this->init(ray, ray.t0(), ray.t1()); } - - /// @brief Increment the voxel index to next intersected voxel or node - /// and returns true if the step in time does not exceed maxTime. - __hostdev__ bool step() - { - const int axis = MinIndex(mNext); -#if 1 - switch (axis) { - case 0: - return step<0>(); - case 1: - return step<1>(); - default: - return step<2>(); - } -#else -#ifdef ENFORCE_FORWARD_STEPPING - if (mNext[axis] <= mT0) { - mNext[axis] += mT0 - 0.999999f * mNext[axis] + 1.0e-6f; - } -#endif - mT0 = mNext[axis]; - mNext[axis] += mDelta[axis]; - mVoxel[axis] += mStep[axis]; - return mT0 <= mT1; -#endif - } - - /// @brief Return the index coordinates of the next node or voxel - /// intersected by the ray. If Log2Dim = 0 the return value is the - /// actual signed coordinate of the voxel, else it is the origin - /// of the corresponding VDB tree node or tile. - /// @note Incurs no computational overhead. - __hostdev__ const CoordT& voxel() const { return mVoxel; } - - /// @brief Return the time (parameterized along the Ray) of the - /// first hit of a tree node of size 2^Log2Dim. - /// @details This value is initialized to startTime or ray.t0() - /// depending on the constructor used. - /// @note Incurs no computational overhead. - __hostdev__ RealType time() const { return mT0; } - - /// @brief Return the maximum time (parameterized along the Ray). - __hostdev__ RealType maxTime() const { return mT1; } - - /// @brief Return the time (parameterized along the Ray) of the - /// second (i.e. next) hit of a tree node of size 2^Log2Dim. - /// @note Incurs a (small) computational overhead. - __hostdev__ RealType next() const - { - return Min(mT1, Min(mNext[0], Min(mNext[1], mNext[2]))); - } - - __hostdev__ int nextAxis() const - { - return nanovdb::MinIndex(mNext); - } - -private: - // helper to implement the general form - template - __hostdev__ bool step() - { -#ifdef ENFORCE_FORWARD_STEPPING - if (mNext[axis] <= mT0) { - mNext[axis] += mT0 - 0.999999f * mNext[axis] + 1.0e-6f; - } -#endif - mT0 = mNext[axis]; - mNext[axis] += mDelta[axis]; - mVoxel[axis] += mStep[axis]; - return mT0 <= mT1; - } - - RealT mT0, mT1; // min and max allowed times - CoordT mVoxel, mStep; // current voxel location and step to next voxel location - Vec3T mDelta, mNext; // delta time and next time -}; // class DDA - -/////////////////////////////////////////// ZeroCrossingNode //////////////////////////////////////////// - -template -inline __hostdev__ bool ZeroCrossingNode(RayT& ray, const NodeT& node, float v0, nanovdb::Coord& ijk, float& v, float& t) -{ - BBox bbox(node.origin(), node.origin() + Coord(node.dim() - 1)); - - if (!ray.clip(node.bbox())) { - return false; - } - - const float t0 = ray.t0(); - - static const float Delta = 1.0001f; - ijk = Coord::Floor(ray(ray.t0() + Delta)); - - t = t0; - v = 0; - - DDA dda(ray); - while (dda.step()) { - ijk = dda.voxel(); - - if (bbox.isInside(ijk) == false) - return false; - - v = node.getValue(ijk); - if (v * v0 < 0) { - t = dda.time(); - return true; - } - } - return false; -} - -/////////////////////////////////////////// TreeMarcher //////////////////////////////////////////// - -/// @brief returns true if the ray intersects an active value at any level of the grid in the accessor. -/// The empty-space ray-marching is performed at all levels of the tree using an -/// HDDA. If an intersection is detected, then ijk is updated with the index coordinate of the first -/// active voxel or tile, and t is set to the time of its intersection along the ray. -template -inline __hostdev__ bool firstActive(RayT& ray, AccT& acc, Coord &ijk, float& t) -{ - if (!ray.clip(acc.root().bbox()) || ray.t1() > 1e20) {// clip ray to bbox - return false;// missed or undefined bbox - } - static const float Delta = 1.0001f;// forward step-size along the ray to avoid getting stuck - t = ray.t0();// initiate time - ijk = RoundDown(ray.start()); // first voxel inside bbox - for (HDDA hdda(ray, acc.getDim(ijk, ray)); !acc.isActive(ijk); hdda.update(ray, acc.getDim(ijk, ray))) { - if (!hdda.step()) return false;// leap-frog HDDA and exit if ray bound is exceeded - t = hdda.time() + Delta;// update time - ijk = RoundDown( ray(t) );// update ijk - } - return true; -} - -/////////////////////////////////////////// TreeMarcher //////////////////////////////////////////// - -/// @brief A Tree Marcher for Generic Grids - -template -class TreeMarcher -{ -public: - using ChildT = typename NodeT::ChildNodeType; - using RealType = typename RayT::RealType; - using RealT = RealType; - using CoordType = CoordT; - - inline __hostdev__ TreeMarcher(AccT& acc) - : mAcc(acc) - { - } - - /// @brief Initialize the TreeMarcher with an index-space ray. - inline __hostdev__ bool init(const RayT& indexRay) - { - mRay = indexRay; - if (!mRay.clip(mAcc.root().bbox())) - return false; // clip ray to bbox - - // tweak the intersection span into the bbox. - // CAVEAT: this will potentially clip some tiny corner intersections. - static const float Eps = 0.000001f; - const float t0 = mRay.t0() + Eps; - const float t1 = mRay.t1() - Eps; - if (t0 > t1) - return false; - - const CoordT ijk = RoundDown(mRay(t0)); - const uint32_t dim = mAcc.getDim(ijk, mRay); - mHdda.init(mRay, t0, t1, nanovdb::Max(dim, NodeT::dim())); - - mT0 = (dim <= ChildT::dim()) ? mHdda.time() : -1; // potentially begin a span. - mTmax = t1; - return true; - } - - /// @brief step the ray through the tree. If the ray hits a node then - /// populate t0 & t1, and the node. - /// @return true when a node of type NodeT is intersected, false otherwise. - inline __hostdev__ bool step(const NodeT** node, float& t0, float& t1) - { - // CAVEAT: if Delta is too large then it will clip corners of nodes in a visible way. - // but it has to be quite large when very far from the grid (due to fp32 rounding) - static const float Delta = 0.01f; - bool hddaIsValid; - - do { - t0 = mT0; - - auto currentNode = mAcc.template getNode(); - - // get next node intersection... - hddaIsValid = mHdda.step(); - const CoordT nextIjk = RoundDown(mRay(mHdda.time() + Delta)); - const auto nextDim = mAcc.getDim(nextIjk, mRay); - mHdda.update(mRay, (int)Max(nextDim, NodeT::dim())); - mT0 = (nextDim <= ChildT::dim()) ? mHdda.time() : -1; // potentially begin a span. - - if (t0 >= 0) { // we are in a span. - t1 = Min(mTmax, mHdda.time()); - - // TODO: clean this up! - if (t0 >= t1 || currentNode == nullptr) - continue; - - *node = currentNode; - return true; - } - - } while (hddaIsValid); - - return false; - } - - inline __hostdev__ const RayT& ray() const { return mRay; } - - inline __hostdev__ RayT& ray() { return mRay; } - -private: - AccT& mAcc; - RayT mRay; - HDDA mHdda; - float mT0; - float mTmax; -};// TreeMarcher - -/////////////////////////////////////////// PointTreeMarcher //////////////////////////////////////////// - -/// @brief A Tree Marcher for Point Grids -/// -/// @note This class will handle correctly offseting the ray by 0.5 to ensure that -/// the underlying HDDA will intersect with the grid-cells. See details below. - -template -class PointTreeMarcher : public TreeMarcher, RayT, AccT, CoordT> -{ - using BaseT = TreeMarcher, RayT, AccT, CoordT>; -public: - __hostdev__ PointTreeMarcher(AccT& acc) : BaseT(acc) {} - - /// @brief Initiates this instance with a ray in index space. - /// - /// @details An offset by 0.5 is applied to the ray to account for the fact that points in vdb - /// grids are bucketed into so-called grid cell, which are centered round grid voxels, - /// whereas the DDA is based on so-called grid nodes, which are coincident with grid - /// voxels. So, rather than offsettting the points by 0.5 to bring them into a grid - /// node representation this method offsets the eye of the ray by 0.5, which effectively - /// ensures that the DDA operates on grid cells as oppose to grid nodes. This subtle - /// but important offset by 0.5 is explined in more details in our online documentation. - __hostdev__ bool init(RayT ray) { return BaseT::init(ray.offsetEye(0.5)); } -};// PointTreeMarcher - -} // namespace nanovdb - -#endif // NANOVDB_HDDA_HAS_BEEN_INCLUDED +#include // for NANOVDB_DEPRECATED_HEADER +#include +NANOVDB_DEPRECATED_HEADER("Include nanovdb/math/HDDA.h instead.") diff --git a/nanovdb/nanovdb/util/HostBuffer.h b/nanovdb/nanovdb/util/HostBuffer.h index e0520d6983..b843eed478 100644 --- a/nanovdb/nanovdb/util/HostBuffer.h +++ b/nanovdb/nanovdb/util/HostBuffer.h @@ -1,595 +1,6 @@ // Copyright Contributors to the OpenVDB Project // SPDX-License-Identifier: MPL-2.0 -/*! - @file HostBuffer.h - - @date April 20, 2021 - - @brief HostBuffer - a buffer that contains a shared or private bump - pool to either externally or internally managed host memory. - - @details This HostBuffer can be used in multiple ways, most of which are - demonstrated in the examples below. Memory in the pool can - be managed or unmanged (e.g. internal or external) and can - be shared between multiple buffers or belong to a single buffer. - - Example that uses HostBuffer::create inside io::readGrids to create a - full self-managed buffer, i.e. not shared and without padding, per grid in the file. - @code - auto handles = nanovdb::io::readGrids("file.nvdb"); - @endcode - - Example that uses HostBuffer::createFull. Assuming you have a raw pointer - to a NanoVDB grid of unknown type, this examples shows how to create its - GridHandle which can be used to enquire about the grid type and meta data. - @code - void *data;// pointer to a NanoVDB grid of unknown type - uint64_t size;// byte size of NanoVDB grid of unknown type - auto buffer = nanovdb::HostBuffer::createFull(size, data); - nanovdb::GridHandle<> gridHandle(std::move(buffer)); - @endcode - - Example that uses HostBuffer::createPool for internally managed host memory. - Suppose you want to read multiple grids in multiple files, but reuse the same - fixed sized memory buffer to both avoid memory fragmentation as well as - exceeding the fixed memory ceiling! - @code - auto pool = nanovdb::HostBuffer::createPool(1 << 30);// 1 GB memory pool - std::vector> frames;// vector of grid names - for (int i=0; i(std::malloc(size)+NANOVDB_DATA_ALIGNMENT);// 1 GB pool - uint8_t *buffer = nanovdb::alignPtr(data);// 32B aligned buffer - //uint8_t *buffer = std::aligned_alloc(NANOVDB_DATA_ALIGNMENT, poolSize);// in C++17 - auto pool = nanovdb::HostBuffer::createPool(poolSize, buffer); - auto handles1 = nanovdb::io::readGrids("file1.nvdb", 0, pool); - auto handles2 = nanovdb::io::readGrids("file2.nvdb", 0, pool); - .... - std::free(data); - @endcode - - Example that uses HostBuffer::createPool for externally managed host memory. - Note that in this example @c handles are allowed to outlive @c pool since - they internally store a shared pointer to the memory pool. However @c array - MUST outlive @c handles since the pool does not own its memory in this example. - @code - const size_t poolSize = 1 << 30;// 1 GB - std::unique_ptr array(new uint8_t[size+NANOVDB_DATA_ALIGNMENT]);// scoped pool of 1 GB - //std::unique_ptr array(std::aligned_alloc(NANOVDB_DATA_ALIGNMENT, size));// in C++17 - uint8_t *buffer = nanovdb::alignPtr(array.get());// 32B aligned buffer - auto pool = nanovdb::HostBuffer::createPool(poolSize, buffer); - auto handles = nanovdb::io::readGrids("file.nvdb", 0, pool); - @endcode -*/ - -#ifndef NANOVDB_HOSTBUFFER_H_HAS_BEEN_INCLUDED -#define NANOVDB_HOSTBUFFER_H_HAS_BEEN_INCLUDED - -#include // for NANOVDB_DATA_ALIGNMENT; -#include // for types like int32_t etc -#include // for fprintf -#include // for std::malloc/std::realloc/std::free -#include // for std::make_shared -#include // for std::mutex -#include // for std::unordered_set -#include // for assert -#include // for std::stringstream -#include // for memcpy - -#define checkPtr(ptr, msg) \ - { \ - ptrAssert((ptr), (msg), __FILE__, __LINE__); \ - } - -namespace nanovdb { - -template -struct BufferTraits -{ - static constexpr bool hasDeviceDual = false; -}; - -// ----------------------------> HostBuffer <-------------------------------------- - -/// @brief This is a buffer that contains a shared or private pool -/// to either externally or internally managed host memory. -/// -/// @note Terminology: -/// Pool: 0 = buffer.size() < buffer.poolSize() -/// Buffer: 0 < buffer.size() < buffer.poolSize() -/// Full: 0 < buffer.size() = buffer.poolSize() -/// Empty: 0 = buffer.size() = buffer.poolSize() -class HostBuffer -{ - struct Pool;// forward declaration of private pool struct - std::shared_ptr mPool; - uint64_t mSize; // total number of bytes for the NanoVDB grid. - uint8_t* mData; // raw buffer for the NanoVDB grid. - -#if defined(DEBUG) || defined(_DEBUG) - static inline void ptrAssert(void* ptr, const char* msg, const char* file, int line, bool abort = true) - { - if (ptr == nullptr) { - fprintf(stderr, "NULL pointer error: %s %s %d\n", msg, file, line); - if (abort) - exit(1); - } - if (uint64_t(ptr) % NANOVDB_DATA_ALIGNMENT) { - fprintf(stderr, "Alignment pointer error: %s %s %d\n", msg, file, line); - if (abort) - exit(1); - } - } -#else - static inline void ptrAssert(void*, const char*, const char*, int, bool = true) - { - } -#endif - -public: - /// @brief Return a full buffer or an empty buffer - HostBuffer(uint64_t bufferSize = 0); - - /// @brief Move copy-constructor - HostBuffer(HostBuffer&& other); - - /// @brief Custom descructor - ~HostBuffer() { this->clear(); } - - /// @brief Move copy assignment operation - HostBuffer& operator=(HostBuffer&& other); - - /// @brief Disallow copy-construction - HostBuffer(const HostBuffer&) = delete; - - /// @brief Disallow copy assignment operation - HostBuffer& operator=(const HostBuffer&) = delete; - - /// @brief Return a pool buffer which satisfies: buffer.size == 0, - /// buffer.poolSize() == poolSize, and buffer.data() == nullptr. - /// If data==nullptr, memory for the pool will be allocated. - /// - /// @throw If poolSize is zero. - static HostBuffer createPool(uint64_t poolSize, void *data = nullptr); - - /// @brief Return a full buffer which satisfies: buffer.size == bufferSize, - /// buffer.poolSize() == bufferSize, and buffer.data() == data. - /// If data==nullptr, memory for the pool will be allocated. - /// - /// @throw If bufferSize is zero. - static HostBuffer createFull(uint64_t bufferSize, void *data = nullptr); - - /// @brief Return a buffer with @c bufferSize bytes managed by - /// the specified memory @c pool. If none is provided, i.e. - /// @c pool == nullptr or @c pool->poolSize() == 0, one is - /// created with size @c bufferSize, i.e. a full buffer is returned. - /// - /// @throw If the specified @c pool has insufficient memory for - /// the requested buffer size. - static HostBuffer create(uint64_t bufferSize, const HostBuffer* pool = nullptr); - - /// @brief Initialize as a full buffer with the specified size. If data is NULL - /// the memory is internally allocated. - void init(uint64_t bufferSize, void *data = nullptr); - - //@{ - /// @brief Retuns a pointer to the raw memory buffer managed by this allocator. - /// - /// @warning Note that the pointer can be NULL if the allocator was not initialized! - const uint8_t* data() const { return mData; } - uint8_t* data() { return mData; } - //@} - - //@{ - /// @brief Returns the size in bytes associated with this buffer. - uint64_t bufferSize() const { return mSize; } - uint64_t size() const { return this->bufferSize(); } - //@} - - /// @brief Returns the size in bytes of the memory pool shared with this instance. - uint64_t poolSize() const; - - /// @brief Return true if memory is managed (using std::malloc and std:free) by the - /// shared pool in this buffer. Else memory is assumed to be managed externally. - bool isManaged() const; - - //@{ - /// @brief Returns true if this buffer has no memory associated with it - bool isEmpty() const { return !mPool || mSize == 0 || mData == nullptr; } - bool empty() const { return this->isEmpty(); } - //@} - - /// @brief Return true if this is a pool, i.e. an empty buffer with a nonempty - /// internal pool, i.e. this->size() == 0 and this->poolSize() != 0 - bool isPool() const { return mSize == 0 && this->poolSize() > 0; } - - /// @brief Return true if the pool exists, is nonempty but has no more available memory - bool isFull() const; - - /// @brief Clear this buffer so it is empty. - void clear(); - - /// @brief Clears all existing buffers that are registered against the memory pool - /// and resets the pool so it can be reused to create new buffers. - /// - /// @throw If this instance is not empty or contains no pool. - /// - /// @warning This method is not thread-safe! - void reset(); - - /// @brief Total number of bytes from the pool currently in use by buffers - uint64_t poolUsage() const; - - /// @brief resize the pool size. It will attempt to resize the existing - /// memory block, but if that fails a deep copy is performed. - /// If @c data is not NULL it will be used as new externally - /// managed memory for the pool. All registered buffers are - /// updated so GridHandle::grid might return a new address (if - /// deep copy was performed). - /// - /// @note This method can be use to resize the memory pool and even - /// change it from internally to externally managed memory or vice versa. - /// - /// @throw if @c poolSize is less than this->poolUsage() the used memory - /// or allocations fail. - void resizePool(uint64_t poolSize, void *data = nullptr); - -}; // HostBuffer class - -// --------------------------> Implementation of HostBuffer::Pool <------------------------------------ - -// This is private struct of HostBuffer so you can safely ignore the API -struct HostBuffer::Pool -{ - using HashTableT = std::unordered_set; - std::mutex mMutex; // mutex for updating mRegister and mFree - HashTableT mRegister; - uint8_t* mData; - uint8_t* mFree; - uint64_t mSize; - uint64_t mPadding; - bool mManaged; - - /// @brief External memory ctor - Pool(uint64_t size = 0, void* data = nullptr) - : mData((uint8_t*)data) - , mFree(mData) - , mSize(size) - , mPadding(0) - , mManaged(data == nullptr) - { - if (mManaged) { - mData = static_cast(Pool::alloc(mSize)); - if (mData == nullptr) { - throw std::runtime_error("Pool::Pool malloc failed"); - } - } - mPadding = alignmentPadding(mData); - if (!mManaged && mPadding != 0) { - throw std::runtime_error("Pool::Pool: external memory buffer is not aligned to " + - std::to_string(NANOVDB_DATA_ALIGNMENT) + - " bytes.\nHint: use nanovdb::alignPtr or std::aligned_alloc (C++17 only)"); - } - mFree = mData + mPadding; - } - - /// @brief Custom destructor - ~Pool() - { - assert(mRegister.empty()); - if (mManaged) { - std::free(mData); - } - } - - /// @brief Disallow copy-construction - Pool(const Pool&) = delete; - - /// @brief Disallow move-construction - Pool(const Pool&&) = delete; - - /// @brief Disallow copy assignment operation - Pool& operator=(const Pool&) = delete; - - /// @brief Disallow move assignment operation - Pool& operator=(const Pool&&) = delete; - - /// @brief Return the total number of bytes used from this Pool by buffers - uint64_t usage() const { return static_cast(mFree - mData) - mPadding; } - - /// @brief Allocate a buffer of the specified size and add it to the register - void add(HostBuffer* buffer, uint64_t size) - { - auto* alignedFree = mFree + alignmentPadding(mFree); - - if (alignedFree + size > mData + mPadding + mSize) { - std::stringstream ss; - ss << "HostBuffer::Pool: insufficient memory\n" - << "\tA buffer requested " << size << " bytes with " << NANOVDB_DATA_ALIGNMENT - << "-bytes alignment from a pool with " - << mSize << " bytes of which\n\t" << (alignedFree - mData - mPadding) - << " bytes are used by " << mRegister.size() << " other buffer(s). " - << "Pool is " << (mManaged ? "internally" : "externally") << " managed.\n"; - //std::cerr << ss.str(); - throw std::runtime_error(ss.str()); - } - buffer->mSize = size; - const std::lock_guard lock(mMutex); - mRegister.insert(buffer); - buffer->mData = alignedFree; - mFree = alignedFree + size; - } - - /// @brief Remove the specified buffer from the register - void remove(HostBuffer *buffer) - { - const std::lock_guard lock(mMutex); - mRegister.erase(buffer); - } - - /// @brief Replaces buffer1 with buffer2 in the register - void replace(HostBuffer *buffer1, HostBuffer *buffer2) - { - const std::lock_guard lock(mMutex); - mRegister.erase( buffer1); - mRegister.insert(buffer2); - } - - /// @brief Reset the register and all its buffers - void reset() - { - for (HostBuffer *buffer : mRegister) { - buffer->mPool.reset(); - buffer->mSize = 0; - buffer->mData = nullptr; - } - mRegister.clear(); - mFree = mData + mPadding; - } - - /// @brief Resize this Pool and update registered buffers as needed. If data is no NULL - /// it is used as externally managed memory. - void resize(uint64_t size, void *data = nullptr) - { - const uint64_t memUsage = this->usage(); - - const bool managed = (data == nullptr); - - if (!managed && alignmentPadding(data) != 0) { - throw std::runtime_error("Pool::resize: external memory buffer is not aligned to " + - std::to_string(NANOVDB_DATA_ALIGNMENT) + " bytes"); - } - - if (memUsage > size) { - throw std::runtime_error("Pool::resize: insufficient memory"); - } - - uint64_t padding = 0; - if (mManaged && managed && size != mSize) { // managed -> managed - padding = mPadding; - data = Pool::realloc(mData, memUsage, size, padding); // performs both copy and free of mData - } else if (!mManaged && managed) { // un-managed -> managed - data = Pool::alloc(size); - padding = alignmentPadding(data); - } - - if (data == nullptr) { - throw std::runtime_error("Pool::resize: allocation failed"); - } else if (data != mData) { - auto* paddedData = static_cast(data) + padding; - - if (!(mManaged && managed)) { // no need to copy if managed -> managed - memcpy(paddedData, mData + mPadding, memUsage); - } - - for (HostBuffer* buffer : mRegister) { // update registered buffers - buffer->mData = paddedData + ptrdiff_t(buffer->mData - (mData + mPadding)); - } - mFree = paddedData + memUsage; // update the free pointer - if (mManaged && !managed) {// only free if managed -> un-managed - std::free(mData); - } - - mData = static_cast(data); - mPadding = padding; - } - mSize = size; - mManaged = managed; - } - /// @brief Return true is all the memory in this pool is in use. - bool isFull() const - { - assert(mFree <= mData + mPadding + mSize); - return mSize > 0 ? mFree == mData + mPadding + mSize : false; - } - -private: - - static void* alloc(uint64_t size) - { -//#if (__cplusplus >= 201703L) -// return std::aligned_alloc(NANOVDB_DATA_ALIGNMENT, size);//C++17 or newer -//#else - // make sure we alloc enough space to align the result - return std::malloc(size + NANOVDB_DATA_ALIGNMENT); -//#endif - } - - static void* realloc(void* const origData, - uint64_t origSize, - uint64_t desiredSize, - uint64_t& padding) - { - // make sure we alloc enough space to align the result - void* data = std::realloc(origData, desiredSize + NANOVDB_DATA_ALIGNMENT); - - if (data != nullptr && data != origData) { - uint64_t newPadding = alignmentPadding(data); - // Number of padding bytes may have changed -- move data if that's the case - if (newPadding != padding) { - // Realloc should not happen when shrinking down buffer, but let's be safe - std::memmove(static_cast(data) + newPadding, - static_cast(data) + padding, - Min(origSize, desiredSize)); - padding = newPadding; - } - } - - return data; - } - -};// struct HostBuffer::Pool - -// --------------------------> Implementation of HostBuffer <------------------------------------ - -inline HostBuffer::HostBuffer(uint64_t size) : mPool(nullptr), mSize(size), mData(nullptr) -{ - if (size>0) { - mPool = std::make_shared(size); - mData = mPool->mFree; - mPool->mRegister.insert(this); - mPool->mFree += size; - } -} - -inline HostBuffer::HostBuffer(HostBuffer&& other) : mPool(other.mPool), mSize(other.mSize), mData(other.mData) -{ - if (mPool && mSize != 0) { - mPool->replace(&other, this); - } - other.mPool.reset(); - other.mSize = 0; - other.mData = nullptr; -} - -inline void HostBuffer::init(uint64_t bufferSize, void *data) -{ - if (bufferSize == 0) { - throw std::runtime_error("HostBuffer: invalid buffer size"); - } - if (mPool) { - mPool.reset(); - } - if (!mPool || mPool->mSize != bufferSize) { - mPool = std::make_shared(bufferSize, data); - } - mPool->add(this, bufferSize); -} - -inline HostBuffer& HostBuffer::operator=(HostBuffer&& other) -{ - if (mPool) { - mPool->remove(this); - } - mPool = other.mPool; - mSize = other.mSize; - mData = other.mData; - if (mPool && mSize != 0) { - mPool->replace(&other, this); - } - other.mPool.reset(); - other.mSize = 0; - other.mData = nullptr; - return *this; -} - -inline uint64_t HostBuffer::poolSize() const -{ - return mPool ? mPool->mSize : 0u; -} - -inline uint64_t HostBuffer::poolUsage() const -{ - return mPool ? mPool->usage(): 0u; -} - -inline bool HostBuffer::isManaged() const -{ - return mPool ? mPool->mManaged : false; -} - -inline bool HostBuffer::isFull() const -{ - return mPool ? mPool->isFull() : false; -} - -inline HostBuffer HostBuffer::createPool(uint64_t poolSize, void *data) -{ - if (poolSize == 0) { - throw std::runtime_error("HostBuffer: invalid pool size"); - } - HostBuffer buffer; - buffer.mPool = std::make_shared(poolSize, data); - // note the buffer is NOT registered by its pool since it is not using its memory - buffer.mSize = 0; - buffer.mData = nullptr; - return buffer; -} - -inline HostBuffer HostBuffer::createFull(uint64_t bufferSize, void *data) -{ - if (bufferSize == 0) { - throw std::runtime_error("HostBuffer: invalid buffer size"); - } - HostBuffer buffer; - buffer.mPool = std::make_shared(bufferSize, data); - buffer.mPool->add(&buffer, bufferSize); - return buffer; -} - -inline HostBuffer HostBuffer::create(uint64_t bufferSize, const HostBuffer* pool) -{ - HostBuffer buffer; - if (pool == nullptr || !pool->mPool) { - buffer.mPool = std::make_shared(bufferSize); - } else { - buffer.mPool = pool->mPool; - } - buffer.mPool->add(&buffer, bufferSize); - return buffer; -} - -inline void HostBuffer::clear() -{ - if (mPool) {// remove self from the buffer register in the pool - mPool->remove(this); - } - mPool.reset(); - mSize = 0; - mData = nullptr; -} - -inline void HostBuffer::reset() -{ - if (this->size()>0) { - throw std::runtime_error("HostBuffer: only empty buffers can call reset"); - } - if (!mPool) { - throw std::runtime_error("HostBuffer: this buffer contains no pool to reset"); - } - mPool->reset(); -} - -inline void HostBuffer::resizePool(uint64_t size, void *data) -{ - if (!mPool) { - throw std::runtime_error("HostBuffer: this buffer contains no pool to resize"); - } - mPool->resize(size, data); -} - -} // namespace nanovdb - -#endif // end of NANOVDB_HOSTBUFFER_H_HAS_BEEN_INCLUDED +#include // for NANOVDB_DEPRECATED_HEADER +#include +NANOVDB_DEPRECATED_HEADER("Include nanovdb/HostBuffer.h instead.") diff --git a/nanovdb/nanovdb/util/IO.h b/nanovdb/nanovdb/util/IO.h index 5d51cb53c6..49d51e4f24 100644 --- a/nanovdb/nanovdb/util/IO.h +++ b/nanovdb/nanovdb/util/IO.h @@ -1,796 +1,6 @@ // Copyright Contributors to the OpenVDB Project // SPDX-License-Identifier: MPL-2.0 -/*! - \file IO.h - - \author Ken Museth - - \date May 1, 2020 - - \brief Implements I/O for NanoVDB grids. Features optional BLOSC and ZIP - file compression, support for multiple grids per file as well as - multiple grid types. - - \note This file does NOT depend on OpenVDB, but optionally on ZIP and BLOSC - - \details NanoVDB files take on of two formats: - 1) multiple segments each with multiple grids (segments have easy to access metadata about its grids) - 2) starting with verion 32.6.0 nanovdb files also support a raw buffer with one or more grids (just a - dump of a raw grid buffer, so no new metadata). - - // 1: Segment: FileHeader, MetaData0, gridName0...MetaDataN, gridNameN, compress Grid0,...compressed GridN - // 2: Raw: Grid0,...GridN -*/ - -#ifndef NANOVDB_IO_H_HAS_BEEN_INCLUDED -#define NANOVDB_IO_H_HAS_BEEN_INCLUDED - -#include -#include "GridHandle.h" -#include "GridChecksum.h"// for updateGridCount - -#include // for std::ifstream -#include // for std::cerr/cout -#include // for std::string -#include // for std::stringstream -#include // for std::strcmp -#include // for std::unique_ptr -#include // for std::vector -#ifdef NANOVDB_USE_ZIP -#include // for ZIP compression -#endif -#ifdef NANOVDB_USE_BLOSC -#include // for BLOSC compression -#endif - -// Due to a bug in older versions of gcc, including fstream might -// define "major" and "minor" which are used as member data below. -// See https://bugzilla.redhat.com/show_bug.cgi?id=130601 -#if defined(major) || defined(minor) -#undef major -#undef minor -#endif - -namespace nanovdb { - -namespace io { - -// --------------------------> writeGrid(s) <------------------------------------ - -/// @brief Write a single grid to file (over-writing existing content of the file) -template -void writeGrid(const std::string& fileName, const GridHandle& handle, io::Codec codec = io::Codec::NONE, int verbose = 0); - -/// @brief Write multiple grids to file (over-writing existing content of the file) -template class VecT = std::vector> -void writeGrids(const std::string& fileName, const VecT>& handles, Codec codec = Codec::NONE, int verbose = 0); - -// --------------------------> readGrid(s) <------------------------------------ - -/// @brief Read and return one or all grids from a file into a single GridHandle -/// @tparam BufferT Type of buffer used memory allocation -/// @param fileName string name of file to be read from -/// @param n zero-based signed index of the grid to be read. -/// The default value of 0 means read only first grid. -/// A negative value of n means read all grids in the file. -/// @param verbose specify verbosity level. Default value of zero means quiet. -/// @param buffer optional buffer used for memory allocation -/// @return return a single GridHandle with one or all grids found in the file -/// @throw will throw a std::runtime_error if the file does not contain a grid with index n -template -GridHandle readGrid(const std::string& fileName, int n = 0, int verbose = 0, const BufferT& buffer = BufferT()); - -/// @brief Read and return the first grid with a specific name from a file -/// @tparam BufferT Type of buffer used memory allocation -/// @param fileName string name of file to be read from -/// @param gridName string name of the grid to be read -/// @param verbose specify verbosity level. Default value of zero means quiet. -/// @param buffer optional buffer used for memory allocation -/// @return return a single GridHandle containing the grid with the specific name -/// @throw will throw a std::runtime_error if the file does not contain a grid with the specific name -template -GridHandle readGrid(const std::string& fileName, const std::string& gridName, int verbose = 0, const BufferT& buffer = BufferT()); - -/// @brief Read all the grids in the file and return them as a vector of multiple GridHandles, each containing -/// all grids encoded in the same segment of the file (i.e. they where written together) -/// @tparam BufferT Type of buffer used memory allocation -/// @param fileName string name of file to be read from -/// @param verbose specify verbosity level. Default value of zero means quiet. -/// @param buffer optional buffer used for memory allocation -/// @return Return a vector of GridHandles each containing all grids encoded -/// in the same segment of the file (i.e. they where written together). -template class VecT = std::vector> -VecT> readGrids(const std::string& fileName, int verbose = 0, const BufferT& buffer = BufferT()); - -// ----------------------------------------------------------------------- - -/// We fix a specific size for counting bytes in files so that they -/// are saved the same regardless of machine precision. (Note there are -/// still little/bigendian issues, however) -using fileSize_t = uint64_t; - -/// @brief Internal functions for compressed read/write of a NanoVDB GridHandle into a stream -/// -/// @warning These functions should never be called directly by client code -namespace Internal { -static constexpr fileSize_t MAX_SIZE = 1UL << 30; // size is 1 GB - -template -static fileSize_t write(std::ostream& os, const GridHandle& handle, Codec codec, uint32_t n); - -template -static void read(std::istream& is, BufferT& buffer, Codec codec); - -static void read(std::istream& is, char* data, fileSize_t size, Codec codec); -} // namespace Internal - -/// @brief Standard hash function to use on strings; std::hash may vary by -/// platform/implementation and is know to produce frequent collisions. -uint64_t stringHash(const char* cstr); - -/// @brief Return a uint64_t hash key of a std::string -inline uint64_t stringHash(const std::string& str){return stringHash(str.c_str());} - -/// @brief Return a uint64_t with its bytes reversed so we can check for endianness -inline uint64_t reverseEndianness(uint64_t val) -{ - return (((val) >> 56) & 0x00000000000000FF) | (((val) >> 40) & 0x000000000000FF00) | - (((val) >> 24) & 0x0000000000FF0000) | (((val) >> 8) & 0x00000000FF000000) | - (((val) << 8) & 0x000000FF00000000) | (((val) << 24) & 0x0000FF0000000000) | - (((val) << 40) & 0x00FF000000000000) | (((val) << 56) & 0xFF00000000000000); -} - -/// @brief This class defines the meta data stored for each grid in a segment -/// -/// @details A segment consists of a FileHeader followed by a list of FileGridMetaData -/// each followed by grid names and then finally the grids themselves. -/// -/// @note This class should not be confused with nanovdb::GridMetaData defined in NanoVDB.h -/// Also, FileMetaData is defined in NanoVDB.h. -struct FileGridMetaData : public FileMetaData -{ - static_assert(sizeof(FileMetaData) == 176, "Unexpected sizeof(FileMetaData)"); - std::string gridName; - void read(std::istream& is); - void write(std::ostream& os) const; - FileGridMetaData() {} - template - FileGridMetaData(uint64_t size, Codec c, const NanoGrid& grid); - uint64_t memUsage() const { return sizeof(FileMetaData) + nameSize; } -}; // FileGridMetaData - -/// @brief This class defines all the data stored in segment of a file -/// -/// @details A segment consists of a FileHeader followed by a list of FileGridMetaData -/// each followed by grid names and then finally the grids themselves. -struct Segment -{ - // Check assumptions made during read and write of FileHeader and FileMetaData - static_assert(sizeof(FileHeader) == 16u, "Unexpected sizeof(FileHeader)"); - FileHeader header;// defined in NanoVDB.h - std::vector meta;// defined in NanoVDB.h - Segment(Codec c = Codec::NONE) -#ifdef NANOVDB_USE_NEW_MAGIC_NUMBERS - : header{NANOVDB_MAGIC_FILE, Version(), 0u, c} -#else - : header{NANOVDB_MAGIC_NUMBER, Version(), 0u, c} -#endif - , meta() - { - } - template - void add(const GridHandle& h); - bool read(std::istream& is); - void write(std::ostream& os) const; - uint64_t memUsage() const; -}; // Segment - -/// @brief Return true if the file contains a grid with the specified name -bool hasGrid(const std::string& fileName, const std::string& gridName); - -/// @brief Return true if the stream contains a grid with the specified name -bool hasGrid(std::istream& is, const std::string& gridName); - -/// @brief Reads and returns a vector of meta data for all the grids found in the specified file -std::vector readGridMetaData(const std::string& fileName); - -/// @brief Reads and returns a vector of meta data for all the grids found in the specified stream -std::vector readGridMetaData(std::istream& is); - -// --------------------------> Implementations for Internal <------------------------------------ - -template -fileSize_t Internal::write(std::ostream& os, const GridHandle& handle, Codec codec, unsigned int n) -{ - const char* data = reinterpret_cast(handle.gridData(n)); - fileSize_t total = 0, residual = handle.gridSize(n); - - switch (codec) { - case Codec::ZIP: { -#ifdef NANOVDB_USE_ZIP - uLongf size = compressBound(residual); // Get an upper bound on the size of the compressed data. - std::unique_ptr tmp(new Bytef[size]); - const int status = compress(tmp.get(), &size, reinterpret_cast(data), residual); - if (status != Z_OK) - std::runtime_error("Internal write error in ZIP"); - if (size > residual) - std::cerr << "\nWarning: Unexpected ZIP compression from " << residual << " to " << size << " bytes\n"; - const fileSize_t outBytes = size; - os.write(reinterpret_cast(&outBytes), sizeof(fileSize_t)); - os.write(reinterpret_cast(tmp.get()), outBytes); - total += sizeof(fileSize_t) + outBytes; -#else - throw std::runtime_error("ZIP compression codec was disabled during build"); -#endif - break; - } - case Codec::BLOSC: { -#ifdef NANOVDB_USE_BLOSC - do { - fileSize_t chunk = residual < MAX_SIZE ? residual : MAX_SIZE, size = chunk + BLOSC_MAX_OVERHEAD; - std::unique_ptr tmp(new char[size]); - const int count = blosc_compress_ctx(9, 1, sizeof(float), chunk, data, tmp.get(), size, BLOSC_LZ4_COMPNAME, 1 << 18, 1); - if (count <= 0) - std::runtime_error("Internal write error in BLOSC"); - const fileSize_t outBytes = count; - os.write(reinterpret_cast(&outBytes), sizeof(fileSize_t)); - os.write(reinterpret_cast(tmp.get()), outBytes); - total += sizeof(fileSize_t) + outBytes; - data += chunk; - residual -= chunk; - } while (residual > 0); -#else - throw std::runtime_error("BLOSC compression codec was disabled during build"); -#endif - break; - } - default: - os.write(data, residual); - total += residual; - } - if (!os) throw std::runtime_error("Failed to write Tree to file"); - return total; -} // Internal::write - -template -void Internal::read(std::istream& is, BufferT& buffer, Codec codec) -{ - Internal::read(is, reinterpret_cast(buffer.data()), buffer.size(), codec); -} // Internal::read - -/// @brief read compressed grid from stream -/// @param is input stream to read from -/// @param data data buffer to write into -/// @param residual expected size of uncompressed data -/// @param codec mode of compression -void Internal::read(std::istream& is, char* data, fileSize_t residual, Codec codec) -{ - // read tree using optional compression - switch (codec) { - case Codec::ZIP: { -#ifdef NANOVDB_USE_ZIP - fileSize_t size; - is.read(reinterpret_cast(&size), sizeof(fileSize_t)); - std::unique_ptr tmp(new Bytef[size]); - is.read(reinterpret_cast(tmp.get()), size); - uLongf numBytes = residual; - int status = uncompress(reinterpret_cast(data), &numBytes, tmp.get(), static_cast(size)); - if (status != Z_OK) std::runtime_error("Internal read error in ZIP"); - if (fileSize_t(numBytes) != residual) throw std::runtime_error("UNZIP failed on byte size"); -#else - throw std::runtime_error("ZIP compression codec was disabled during build"); -#endif - break; - } - case Codec::BLOSC: { -#ifdef NANOVDB_USE_BLOSC - do { - fileSize_t size; - is.read(reinterpret_cast(&size), sizeof(fileSize_t)); - std::unique_ptr tmp(new char[size]); - is.read(reinterpret_cast(tmp.get()), size); - const fileSize_t chunk = residual < MAX_SIZE ? residual : MAX_SIZE; - const int count = blosc_decompress_ctx(tmp.get(), data, size_t(chunk), 1); //fails with more threads :( - if (count < 1) - std::runtime_error("Internal read error in BLOSC"); - if (count != int(chunk)) - throw std::runtime_error("BLOSC failed on byte size"); - data += size_t(chunk); - residual -= chunk; - } while (residual > 0); -#else - throw std::runtime_error("BLOSC compression codec was disabled during build"); -#endif - break; - } - default: - is.read(data, residual);// read uncompressed data - } - if (!is) throw std::runtime_error("Failed to read Tree from file"); -} // Internal::read - -// --------------------------> Implementations for FileGridMetaData <------------------------------------ - -template -inline FileGridMetaData::FileGridMetaData(uint64_t size, Codec c, const NanoGrid& grid) - : FileMetaData{size, // gridSize - size, // fileSize (will typically be redefined) - 0u, // nameKey - grid.activeVoxelCount(), // voxelCount - grid.gridType(), // gridType - grid.gridClass(), // gridClass - grid.worldBBox(), // worldBBox - grid.tree().bbox(), // indexBBox - grid.voxelSize(), // voxelSize - 0, // nameSize - {0, 0, 0, 1}, // nodeCount[4] - {0, 0, 0}, // tileCount[3] - c, // codec - 0, // padding - Version()}// version - , gridName(grid.gridName()) -{ - nameKey = stringHash(gridName); - nameSize = static_cast(gridName.size() + 1); // include '\0' - const uint32_t* ptr = reinterpret_cast(&grid.tree())->mNodeCount; - for (int i = 0; i < 3; ++i) FileMetaData::nodeCount[i] = *ptr++; - for (int i = 0; i < 3; ++i) FileMetaData::tileCount[i] = *ptr++; -}// FileGridMetaData::FileGridMetaData - -inline void FileGridMetaData::write(std::ostream& os) const -{ - os.write(reinterpret_cast(this), sizeof(FileMetaData)); - os.write(gridName.c_str(), nameSize); - if (!os) throw std::runtime_error("Failed writing FileGridMetaData"); -}// FileGridMetaData::write - -inline void FileGridMetaData::read(std::istream& is) -{ - is.read(reinterpret_cast(this), sizeof(FileMetaData)); - std::unique_ptr tmp(new char[nameSize]); - is.read(reinterpret_cast(tmp.get()), nameSize); - gridName.assign(tmp.get()); - if (!is) throw std::runtime_error("Failed reading FileGridMetaData"); -}// FileGridMetaData::read - -// --------------------------> Implementations for Segment <------------------------------------ - -inline uint64_t Segment::memUsage() const -{ - uint64_t sum = sizeof(FileHeader); - for (auto& m : meta) sum += m.memUsage();// includes FileMetaData + grid name - return sum; -}// Segment::memUsage - -template -inline void Segment::add(const GridHandle& h) -{ - for (uint32_t i = 0; i < h.gridCount(); ++i) { - if (auto* grid = h.template grid(i)) { // most common - meta.emplace_back(h.gridSize(i), header.codec, *grid); - } else if (auto* grid = h.template grid(i)) { - meta.emplace_back(h.gridSize(i), header.codec, *grid); - } else if (auto* grid = h.template grid(i)) { - meta.emplace_back(h.gridSize(i), header.codec, *grid); - } else if (auto* grid = h.template grid(i)) { - meta.emplace_back(h.gridSize(i), header.codec, *grid); - } else if (auto* grid = h.template grid(i)) { - meta.emplace_back(h.gridSize(i), header.codec, *grid); - } else if (auto* grid = h.template grid(i)) { - meta.emplace_back(h.gridSize(i), header.codec, *grid); - } else if (auto* grid = h.template grid(i)) { - meta.emplace_back(h.gridSize(i), header.codec, *grid); - } else if (auto* grid = h.template grid(i)) { - meta.emplace_back(h.gridSize(i), header.codec, *grid); - } else if (auto* grid = h.template grid(i)) { - meta.emplace_back(h.gridSize(i), header.codec, *grid); - } else if (auto* grid = h.template grid(i)) { - meta.emplace_back(h.gridSize(i), header.codec, *grid); - } else if (auto* grid = h.template grid(i)) { - meta.emplace_back(h.gridSize(i), header.codec, *grid); - } else if (auto* grid = h.template grid(i)) { - meta.emplace_back(h.gridSize(i), header.codec, *grid); - } else if (auto* grid = h.template grid(i)) { - meta.emplace_back(h.gridSize(i), header.codec, *grid); - } else if (auto* grid = h.template grid(i)) { - meta.emplace_back(h.gridSize(i), header.codec, *grid); - } else if (auto* grid = h.template grid(i)) { - meta.emplace_back(h.gridSize(i), header.codec, *grid); - } else if (auto* grid = h.template grid(i)) { - meta.emplace_back(h.gridSize(i), header.codec, *grid); - } else if (auto* grid = h.template grid(i)) { - meta.emplace_back(h.gridSize(i), header.codec, *grid); - } else if (auto* grid = h.template grid(i)) { - meta.emplace_back(h.gridSize(i), header.codec, *grid); - } else if (auto* grid = h.template grid(i)) { - meta.emplace_back(h.gridSize(i), header.codec, *grid); - } else if (auto* grid = h.template grid(i)) { - meta.emplace_back(h.gridSize(i), header.codec, *grid); - } else if (auto* grid = h.template grid(i)) { - meta.emplace_back(h.gridSize(i), header.codec, *grid); - } else { - std::stringstream ss; - ss << "nanovdb::io::Segment::add: Cannot write grid of unknown type \""<(&header), sizeof(FileHeader))) { - throw std::runtime_error("Failed to write FileHeader of Segment"); - } - for (auto& m : meta) m.write(os); -}// Segment::write - -inline bool Segment::read(std::istream& is) -{ - is.read(reinterpret_cast(&header), sizeof(FileHeader)); - if (is.eof()) {// The EOF flag is only set once a read tries to read past the end of the file - is.clear(std::ios_base::eofbit);// clear eof flag so we can rewind and read again - return false; - } - if (!header.isValid()) { - // first check for byte-swapped header magic. - if (header.magic == reverseEndianness(NANOVDB_MAGIC_NUMBER) || - header.magic == reverseEndianness(NANOVDB_MAGIC_FILE)) { - throw std::runtime_error("This nvdb file has reversed endianness"); - } else { - throw std::runtime_error("Magic number error: This is not a valid nvdb file"); - } - } else if ( !header.version.isCompatible()) { - std::stringstream ss; - Version v; - is.read(reinterpret_cast(&v), sizeof(Version));// read GridData::mVersion located at byte 16=sizeof(FileHeader) is stream - if ( v.getMajor() == NANOVDB_MAJOR_VERSION_NUMBER) { - ss << "This file looks like it contains a raw grid buffer and not a standard file with meta data"; - } else if ( header.version.getMajor() < NANOVDB_MAJOR_VERSION_NUMBER) { - ss << "The file contains an older version of NanoVDB: " << std::string(header.version.c_str()) << "!\n\t" - << "Recommendation: Re-generate this NanoVDB file with this version: " << NANOVDB_MAJOR_VERSION_NUMBER << ".X of NanoVDB"; - } else { - ss << "This tool was compiled against an older version of NanoVDB: " << NANOVDB_MAJOR_VERSION_NUMBER << ".X!\n\t" - << "Recommendation: Re-compile this tool against the newer version: " << header.version.getMajor() << ".X of NanoVDB"; - } - throw std::runtime_error("An unrecoverable error in nanovdb::Segment::read:\n\tIncompatible file format: " + ss.str()); - } - meta.resize(header.gridCount); - for (auto& m : meta) { - m.read(is); - m.version = header.version; - } - return true; -}// Segment::read - -// --------------------------> writeGrid <------------------------------------ - -template -void writeGrid(std::ostream& os, const GridHandle& handle, Codec codec) -{ - Segment seg(codec); - seg.add(handle); - const auto start = os.tellp(); - seg.write(os); // write header without the correct fileSize (so it's allocated) - for (uint32_t i = 0; i < handle.gridCount(); ++i) { - seg.meta[i].fileSize = Internal::write(os, handle, codec, i); - } - os.seekp(start); - seg.write(os);// re-write header with the correct fileSize - os.seekp(0, std::ios_base::end);// skip to end -}// writeGrid - -template -void writeGrid(const std::string& fileName, const GridHandle& handle, Codec codec, int verbose) -{ - std::ofstream os(fileName, std::ios::out | std::ios::binary | std::ios::trunc); - if (!os.is_open()) { - throw std::ios_base::failure("Unable to open file named \"" + fileName + "\" for output"); - } - writeGrid(os, handle, codec); - if (verbose) { - std::cout << "Wrote nanovdb::Grid to file named \"" << fileName << "\"" << std::endl; - } -}// writeGrid - -// --------------------------> writeGrids <------------------------------------ - -template class VecT = std::vector> -void writeGrids(std::ostream& os, const VecT>& handles, Codec codec = Codec::NONE) -{ - for (auto& h : handles) writeGrid(os, h, codec); -}// writeGrids - -template class VecT> -void writeGrids(const std::string& fileName, const VecT>& handles, Codec codec, int verbose) -{ - std::ofstream os(fileName, std::ios::out | std::ios::binary | std::ios::trunc); - if (!os.is_open()) throw std::ios_base::failure("Unable to open file named \"" + fileName + "\" for output"); - writeGrids(os, handles, codec); - if (verbose) std::cout << "Wrote " << handles.size() << " nanovdb::Grid(s) to file named \"" << fileName << "\"" << std::endl; -}// writeGrids - -// --------------------------> readGrid <------------------------------------ - -template -GridHandle readGrid(std::istream& is, int n, const BufferT& pool) -{ - GridHandle handle; - if (n<0) {// read all grids into the same buffer - try {//first try to read a raw grid buffer - handle.read(is, pool); - } catch(const std::logic_error&) { - Segment seg; - uint64_t bufferSize = 0u; - uint32_t gridCount = 0u, gridIndex = 0u; - const auto start = is.tellg(); - while (seg.read(is)) { - std::streamoff skipSize = 0; - for (auto& m : seg.meta) { - ++gridCount; - bufferSize += m.gridSize; - skipSize += m.fileSize; - }// loop over grids in segment - is.seekg(skipSize, std::ios_base::cur); // skip forward from the current position - }// loop over segments - auto buffer = BufferT::create(bufferSize, &pool); - char *ptr = (char*)buffer.data(); - is.seekg(start);// rewind - while (seg.read(is)) { - for (auto& m : seg.meta) { - Internal::read(is, ptr, m.gridSize, seg.header.codec); - updateGridCount((GridData*)ptr, gridIndex++, gridCount); - ptr += m.gridSize; - }// loop over grids in segment - }// loop over segments - return GridHandle(std::move(buffer)); - } - } else {// read a specific grid - try {//first try to read a raw grid buffer - handle.read(is, uint32_t(n), pool); - updateGridCount((GridData*)handle.data(), 0u, 1u); - } catch(const std::logic_error&) { - Segment seg; - int counter = -1; - while (seg.read(is)) { - std::streamoff seek = 0; - for (auto& m : seg.meta) { - if (++counter == n) { - auto buffer = BufferT::create(m.gridSize, &pool); - Internal::read(is, buffer, seg.header.codec); - updateGridCount((GridData*)buffer.data(), 0u, 1u); - return GridHandle(std::move(buffer)); - } else { - seek += m.fileSize; - } - }// loop over grids in segment - is.seekg(seek, std::ios_base::cur); // skip forward from the current position - }// loop over segments - if (n != counter) throw std::runtime_error("stream does not contain a #" + std::to_string(n) + " grid"); - } - } - return handle; -}// readGrid - -/// @brief Read the n'th grid -template -GridHandle readGrid(const std::string& fileName, int n, int verbose, const BufferT& buffer) -{ - std::ifstream is(fileName, std::ios::in | std::ios::binary); - if (!is.is_open()) throw std::ios_base::failure("Unable to open file named \"" + fileName + "\" for input"); - auto handle = readGrid(is, n, buffer); - if (verbose) { - if (n<0) { - std::cout << "Read all NanoGrids from the file named \"" << fileName << "\"" << std::endl; - } else { - std::cout << "Read NanoGrid # " << n << " from the file named \"" << fileName << "\"" << std::endl; - } - } - return handle; // is converted to r-value and return value is move constructed. -}// readGrid - -/// @brief Read a specific grid from an input stream given the name of the grid -/// @tparam BufferT Buffer type used for allocation -/// @param is input stream from which to read the grid -/// @param gridName string name of the (first) grid to be returned -/// @param pool optional memory pool from which to allocate the grid buffer -/// @return Return the first grid in the input stream with a specific name -/// @throw std::runtime_error with no grid exists with the specified name -template -GridHandle readGrid(std::istream& is, const std::string& gridName, const BufferT& pool) -{ - try { - GridHandle handle; - handle.read(is, gridName, pool); - return handle; - } catch(const std::logic_error&) { - const auto key = stringHash(gridName); - Segment seg; - while (seg.read(is)) {// loop over all segments in stream - std::streamoff seek = 0; - for (auto& m : seg.meta) {// loop over all grids in segment - if ((m.nameKey == 0u || m.nameKey == key) && m.gridName == gridName) { // check for hash key collision - auto buffer = BufferT::create(m.gridSize, &pool); - is.seekg(seek, std::ios_base::cur); // rewind - Internal::read(is, buffer, seg.header.codec); - updateGridCount((GridData*)buffer.data(), 0u, 1u); - return GridHandle(std::move(buffer)); - } else { - seek += m.fileSize; - } - } - is.seekg(seek, std::ios_base::cur); // skip forward from the current position - } - } - throw std::runtime_error("Grid name '" + gridName + "' not found in file"); -}// readGrid - -/// @brief Read the first grid with a specific name -template -GridHandle readGrid(const std::string& fileName, const std::string& gridName, int verbose, const BufferT& buffer) -{ - std::ifstream is(fileName, std::ios::in | std::ios::binary); - if (!is.is_open()) throw std::ios_base::failure("Unable to open file named \"" + fileName + "\" for input"); - auto handle = readGrid(is, gridName, buffer); - if (verbose) { - if (handle) { - std::cout << "Read NanoGrid named \"" << gridName << "\" from the file named \"" << fileName << "\"" << std::endl; - } else { - std::cout << "File named \"" << fileName << "\" does not contain a grid named \"" + gridName + "\"" << std::endl; - } - } - return handle; // is converted to r-value and return value is move constructed. -}// readGrid - -// --------------------------> readGrids <------------------------------------ - -template class VecT = std::vector> -VecT> readGrids(std::istream& is, const BufferT& pool = BufferT()) -{ - VecT> handles; - Segment seg; - while (seg.read(is)) { - uint64_t bufferSize = 0; - for (auto& m : seg.meta) bufferSize += m.gridSize; - auto buffer = BufferT::create(bufferSize, &pool); - uint64_t bufferOffset = 0; - for (uint16_t i = 0; i < seg.header.gridCount; ++i) { - auto *data = reinterpret_cast(buffer.data() + bufferOffset); - Internal::read(is, (char*)data, seg.meta[i].gridSize, seg.header.codec); - updateGridCount(data, uint32_t(i), uint32_t(seg.header.gridCount)); - bufferOffset += seg.meta[i].gridSize; - }// loop over grids in segment - handles.emplace_back(std::move(buffer)); // force move copy assignment - }// loop over segments - return handles; // is converted to r-value and return value is move constructed. -}// readGrids - -/// @brief Read all the grids -template class VecT> -VecT> readGrids(const std::string& fileName, int verbose, const BufferT& buffer) -{ - std::ifstream is(fileName, std::ios::in | std::ios::binary); - if (!is.is_open()) throw std::ios_base::failure("Unable to open file named \"" + fileName + "\" for input"); - auto handles = readGrids(is, buffer); - if (verbose) std::cout << "Read " << handles.size() << " NanoGrid(s) from the file named \"" << fileName << "\"" << std::endl; - return handles; // is converted to r-value and return value is move constructed. -}// readGrids - -// --------------------------> readGridMetaData <------------------------------------ - -inline std::vector readGridMetaData(const std::string& fileName) -{ - std::ifstream is(fileName, std::ios::in | std::ios::binary); - if (!is.is_open()) throw std::ios_base::failure("Unable to open file named \"" + fileName + "\" for input"); - return readGridMetaData(is); // is converted to r-value and return value is move constructed. -}// readGridMetaData - -inline std::vector readGridMetaData(std::istream& is) -{ - Segment seg; - std::vector meta; - try { - GridHandle<> handle;// if stream contains a raw grid buffer we unfortunately have to load everything - handle.read(is); - seg.add(handle); - meta = std::move(seg.meta); - } catch(const std::logic_error&) { - while (seg.read(is)) { - std::streamoff skip = 0; - for (auto& m : seg.meta) { - meta.push_back(m); - skip += m.fileSize; - }// loop over grid meta data in segment - is.seekg(skip, std::ios_base::cur); - }// loop over segments - } - return meta; // is converted to r-value and return value is move constructed. -}// readGridMetaData - -// --------------------------> hasGrid <------------------------------------ - -inline bool hasGrid(const std::string& fileName, const std::string& gridName) -{ - std::ifstream is(fileName, std::ios::in | std::ios::binary); - if (!is.is_open()) throw std::ios_base::failure("Unable to open file named \"" + fileName + "\" for input"); - return hasGrid(is, gridName); -}// hasGrid - -inline bool hasGrid(std::istream& is, const std::string& gridName) -{ - const auto key = stringHash(gridName); - Segment seg; - while (seg.read(is)) { - std::streamoff seek = 0; - for (auto& m : seg.meta) { - if (m.nameKey == key && m.gridName == gridName) return true; // check for hash key collision - seek += m.fileSize; - }// loop over grid meta data in segment - is.seekg(seek, std::ios_base::cur); - }// loop over segments - return false; -}// hasGrid - -// --------------------------> stringHash <------------------------------------ - -inline uint64_t stringHash(const char* c_str) -{ - uint64_t hash = 0;// zero is returned when cstr = nullptr or "\0" - if (c_str) { - for (auto* str = reinterpret_cast(c_str); *str; ++str) { - uint64_t overflow = hash >> (64 - 8); - hash *= 67; // Next-ish prime after 26 + 26 + 10 - hash += *str + overflow; - } - } - return hash; -}// stringHash - -} // namespace io - -template -inline std::ostream& -operator<<(std::ostream& os, const BBox>& b) -{ - os << "(" << b[0][0] << "," << b[0][1] << "," << b[0][2] << ") -> " - << "(" << b[1][0] << "," << b[1][1] << "," << b[1][2] << ")"; - return os; -} - -inline std::ostream& -operator<<(std::ostream& os, const CoordBBox& b) -{ - os << "(" << b[0][0] << "," << b[0][1] << "," << b[0][2] << ") -> " - << "(" << b[1][0] << "," << b[1][1] << "," << b[1][2] << ")"; - return os; -} - -inline std::ostream& -operator<<(std::ostream& os, const Coord& ijk) -{ - os << "(" << ijk[0] << "," << ijk[1] << "," << ijk[2] << ")"; - return os; -} - -template -inline std::ostream& -operator<<(std::ostream& os, const Vec3& v) -{ - os << "(" << v[0] << "," << v[1] << "," << v[2] << ")"; - return os; -} - -template -inline std::ostream& -operator<<(std::ostream& os, const Vec4& v) -{ - os << "(" << v[0] << "," << v[1] << "," << v[2] << "," << v[3] << ")"; - return os; -} - -} // namespace nanovdb - -#endif // NANOVDB_IO_H_HAS_BEEN_INCLUDED +#include // for NANOVDB_DEPRECATED_HEADER +#include +NANOVDB_DEPRECATED_HEADER("Include nanovdb/io/IO.h instead.") diff --git a/nanovdb/nanovdb/util/Invoke.h b/nanovdb/nanovdb/util/Invoke.h index 48e1ac0a42..f0e1561bb4 100644 --- a/nanovdb/nanovdb/util/Invoke.h +++ b/nanovdb/nanovdb/util/Invoke.h @@ -2,7 +2,7 @@ // SPDX-License-Identifier: MPL-2.0 /*! - \file Invoke.h + \file nanovdb/util/Invoke.h \author Ken Museth @@ -16,8 +16,8 @@ @endcode */ -#ifndef NANOVDB_INVOKE_H_HAS_BEEN_INCLUDED -#define NANOVDB_INVOKE_H_HAS_BEEN_INCLUDED +#ifndef NANOVDB_UTIL_INVOKE_H_HAS_BEEN_INCLUDED +#define NANOVDB_UTIL_INVOKE_H_HAS_BEEN_INCLUDED #include // for nanovdb::CoordBBox @@ -31,6 +31,8 @@ namespace nanovdb { +namespace util { + namespace { #ifndef NANOVDB_USE_TBB // Base case @@ -82,6 +84,14 @@ int invoke(const Func &taskFunc1, Rest... taskFuncN) { return -1;// should never happen } +}// namespace util + +template +[[deprecated("Use nanovdb::util::invoke instead")]] +int invoke(const Func &taskFunc1, Rest... taskFuncN) { + return util::invoke(taskFunc1, taskFuncN...); +} + }// namespace nanovdb -#endif // NANOVDB_INVOKE_H_HAS_BEEN_INCLUDED +#endif // NANOVDB_UTIL_INVOKE_H_HAS_BEEN_INCLUDED diff --git a/nanovdb/nanovdb/util/NanoToOpenVDB.h b/nanovdb/nanovdb/util/NanoToOpenVDB.h index 8610afb9a8..ea7c956104 100644 --- a/nanovdb/nanovdb/util/NanoToOpenVDB.h +++ b/nanovdb/nanovdb/util/NanoToOpenVDB.h @@ -1,344 +1,6 @@ // Copyright Contributors to the OpenVDB Project // SPDX-License-Identifier: MPL-2.0 -/*! - \file NanoToOpenVDB.h - - \author Ken Museth - - \date May 6, 2020 - - \brief This class will deserialize an NanoVDB grid into an OpenVDB grid. - - \todo Add support for PointIndexGrid and PointDataGrid -*/ - -#include // manages and streams the raw memory buffer of a NanoVDB grid. -#include -#include "ForEach.h" - -#include - -#ifndef NANOVDB_NANOTOOPENVDB_H_HAS_BEEN_INCLUDED -#define NANOVDB_NANOTOOPENVDB_H_HAS_BEEN_INCLUDED - -template -struct ConvertTrait {using Type = T;}; - -template -struct ConvertTrait> {using Type = openvdb::math::Vec3;}; - -template -struct ConvertTrait> {using Type = openvdb::math::Vec4;}; - -template<> -struct ConvertTrait {using Type = float;}; - -template<> -struct ConvertTrait {using Type = float;}; - -template<> -struct ConvertTrait {using Type = float;}; - -template<> -struct ConvertTrait {using Type = float;}; - -template<> -struct ConvertTrait {using Type = openvdb::ValueMask;}; - -namespace nanovdb { - -/// @brief Forward declaration of free-standing function that de-serializes a typed NanoVDB grid into an OpenVDB Grid -template -typename openvdb::Grid::Type>::Type>::Ptr -nanoToOpenVDB(const NanoGrid& grid, int verbose = 0); - -/// @brief Forward declaration of free-standing function that de-serializes a NanoVDB GridHandle into an OpenVDB GridBase -template -openvdb::GridBase::Ptr -nanoToOpenVDB(const GridHandle& handle, int verbose = 0, uint32_t n = 0); - -/// @brief This class will serialize an OpenVDB grid into a NanoVDB grid managed by a GridHandle. -template -class NanoToOpenVDB -{ - using NanoNode0 = LeafNode; // note that it's using openvdb coord nd mask types! - using NanoNode1 = InternalNode; - using NanoNode2 = InternalNode; - using NanoRootT = RootNode; - using NanoTreeT = Tree; - using NanoGridT = Grid; - using NanoValueT = typename NanoGridT::ValueType; - - using OpenBuildT = typename ConvertTrait::Type; // e.g. float -> float but nanovdb::Vec3 -> openvdb::Vec3 - using OpenNode0 = openvdb::tree::LeafNode; // leaf - using OpenNode1 = openvdb::tree::InternalNode; // lower - using OpenNode2 = openvdb::tree::InternalNode; // upper - using OpenRootT = openvdb::tree::RootNode; - using OpenTreeT = openvdb::tree::Tree; - using OpenGridT = openvdb::Grid; - using OpenValueT = typename OpenGridT::ValueType; - -public: - /// @brief Construction from an existing const OpenVDB Grid. - NanoToOpenVDB(){}; - - /// @brief Return a shared pointer to a NanoVDB grid constructed from the specified OpenVDB grid - typename OpenGridT::Ptr operator()(const NanoGrid& grid, int verbose = 0); - -private: - - template - OpenNodeT* processNode(const NanoNodeT*); - - OpenNode2* process(const NanoNode2* node) {return this->template processNode(node);} - OpenNode1* process(const NanoNode1* node) {return this->template processNode(node);} - - template - typename std::enable_if::value && - !std::is_same::value && - !std::is_same::value && - !std::is_same::value && - !std::is_same::value && - !std::is_same::value, - OpenNode0*>::type - process(const NanoLeafT* node); - - template - typename std::enable_if::value || - std::is_same::value || - std::is_same::value || - std::is_same::value, - OpenNode0*>::type - process(const NanoLeafT* node); - - template - typename std::enable_if::value, - OpenNode0*>::type - process(const NanoLeafT* node); - - template - typename std::enable_if::value, - OpenNode0*>::type - process(const NanoLeafT* node); - - /// converts nanovdb value types to openvdb value types, e.g. nanovdb::Vec3f& -> openvdb::Vec3f& - static const OpenValueT& Convert(const NanoValueT &v) {return reinterpret_cast(v);} - static const OpenValueT* Convert(const NanoValueT *v) {return reinterpret_cast(v);} - -}; // NanoToOpenVDB class - -template -typename NanoToOpenVDB::OpenGridT::Ptr -NanoToOpenVDB::operator()(const NanoGrid& grid, int /*verbose*/) -{ - // since the input nanovdb grid might use nanovdb types (Coord, Mask, Vec3) we cast to use openvdb types - const NanoGridT *srcGrid = reinterpret_cast(&grid); - - auto dstGrid = openvdb::createGrid(Convert(srcGrid->tree().background())); - dstGrid->setName(srcGrid->gridName()); // set grid name - switch (srcGrid->gridClass()) { // set grid class - case nanovdb::GridClass::LevelSet: - dstGrid->setGridClass(openvdb::GRID_LEVEL_SET); - break; - case nanovdb::GridClass::FogVolume: - dstGrid->setGridClass(openvdb::GRID_FOG_VOLUME); - break; - case nanovdb::GridClass::Staggered: - dstGrid->setGridClass(openvdb::GRID_STAGGERED); - break; - case nanovdb::GridClass::PointIndex: - throw std::runtime_error("NanoToOpenVDB does not yet support PointIndexGrids"); - case nanovdb::GridClass::PointData: - throw std::runtime_error("NanoToOpenVDB does not yet support PointDataGrids"); - default: - dstGrid->setGridClass(openvdb::GRID_UNKNOWN); - } - // set transform - const nanovdb::Map& nanoMap = reinterpret_cast(srcGrid)->mMap; - auto mat = openvdb::math::Mat4::identity(); - mat.setMat3(openvdb::math::Mat3(nanoMap.mMatD)); - mat.transpose(); // the 3x3 in nanovdb is transposed relative to openvdb's 3x3 - mat.setTranslation(openvdb::math::Vec3(nanoMap.mVecD)); - dstGrid->setTransform(openvdb::math::Transform::createLinearTransform(mat)); // calls simplify! - - // process root node - auto &root = dstGrid->tree().root(); - auto *data = srcGrid->tree().root().data(); - for (uint32_t i=0; imTableSize; ++i) { - auto *tile = data->tile(i); - if (tile->isChild()) { - root.addChild( this->process( data->getChild(tile)) ); - } else { - root.addTile(tile->origin(), Convert(tile->value), tile->state); - } - } - - return dstGrid; -} - -template -template -DstNodeT* -NanoToOpenVDB::processNode(const SrcNodeT *srcNode) -{ - DstNodeT *dstNode = new DstNodeT(); // un-initialized for fast construction - dstNode->setOrigin(srcNode->origin()); - const auto& childMask = srcNode->childMask(); - const_cast(dstNode->getValueMask()) = srcNode->valueMask(); - const_cast(dstNode->getChildMask()) = childMask; - auto* dstTable = const_cast(dstNode->getTable()); - auto* srcData = srcNode->data(); - std::vector> childNodes; - const auto childCount = childMask.countOn(); - childNodes.reserve(childCount); - for (uint32_t n = 0; n < DstNodeT::NUM_VALUES; ++n) { - if (childMask.isOn(n)) { - childNodes.emplace_back(n, srcData->getChild(n)); - } else { - dstTable[n].setValue(Convert(srcData->mTable[n].value)); - } - } - auto kernel = [&](const auto& r) { - for (auto i = r.begin(); i != r.end(); ++i) { - auto &p = childNodes[i]; - dstTable[p.first].setChild( this->process(p.second) ); - } - }; - -#if 0 - kernel(Range1D(0, childCount)); -#else - forEach(0, childCount, 1, kernel); -#endif - return dstNode; -} // processNode - -template -template -inline typename std::enable_if::value && - !std::is_same::value && - !std::is_same::value && - !std::is_same::value && - !std::is_same::value && - !std::is_same::value, - typename NanoToOpenVDB::OpenNode0*>::type -NanoToOpenVDB::process(const NanoLeafT *srcNode) -{ - static_assert(std::is_same::value, "NanoToOpenVDB::process assert failed"); - OpenNode0* dstNode = new OpenNode0(); // un-initialized for fast construction - dstNode->setOrigin(srcNode->origin()); - dstNode->setValueMask(srcNode->valueMask()); - - const auto* src = Convert(srcNode->data()->mValues);// doesn't work for compressed data, bool or ValueMask - for (auto *dst = dstNode->buffer().data(), *end = dst + OpenNode0::SIZE; dst != end; dst += 4, src += 4) { - dst[0] = src[0]; - dst[1] = src[1]; - dst[2] = src[2]; - dst[3] = src[3]; - } - - return dstNode; -} // process(NanoNode0) - -template -template -inline typename std::enable_if::value || - std::is_same::value || - std::is_same::value || - std::is_same::value, - typename NanoToOpenVDB::OpenNode0*>::type -NanoToOpenVDB::process(const NanoLeafT *srcNode) -{ - static_assert(std::is_same::value, "NanoToOpenVDB::process assert failed"); - OpenNode0* dstNode = new OpenNode0(); // un-initialized for fast construction - dstNode->setOrigin(srcNode->origin()); - dstNode->setValueMask(srcNode->valueMask()); - float *dst = dstNode->buffer().data(); - for (int i=0; i!=512; i+=4) { - *dst++ = srcNode->getValue(i); - *dst++ = srcNode->getValue(i+1); - *dst++ = srcNode->getValue(i+2); - *dst++ = srcNode->getValue(i+3); - } - - return dstNode; -} // process(NanoNode0) - -template -template -inline typename std::enable_if::value, - typename NanoToOpenVDB::OpenNode0*>::type -NanoToOpenVDB::process(const NanoLeafT *srcNode) -{ - static_assert(std::is_same::value, "NanoToOpenVDB::process assert failed"); - OpenNode0* dstNode = new OpenNode0(); // un-initialized for fast construction - dstNode->setOrigin(srcNode->origin()); - dstNode->setValueMask(srcNode->valueMask()); - - return dstNode; -} // process(NanoNode0) - -template -template -inline typename std::enable_if::value, - typename NanoToOpenVDB::OpenNode0*>::type -NanoToOpenVDB::process(const NanoLeafT *srcNode) -{ - static_assert(std::is_same::value, "NanoToOpenVDB::process assert failed"); - OpenNode0* dstNode = new OpenNode0(); // un-initialized for fast construction - dstNode->setOrigin(srcNode->origin()); - dstNode->setValueMask(srcNode->valueMask()); - reinterpret_cast&>(dstNode->buffer()) = srcNode->data()->mValues; - - return dstNode; -} // process(NanoNode0) - -template -inline typename openvdb::Grid::Type>::Type>::Ptr -nanoToOpenVDB(const NanoGrid& grid, int verbose) -{ - nanovdb::NanoToOpenVDB tmp; - return tmp(grid, verbose); -} - -template -openvdb::GridBase::Ptr -nanoToOpenVDB(const GridHandle& handle, int verbose, uint32_t n) -{ - if (auto grid = handle.template grid(n)) { - return nanovdb::nanoToOpenVDB(*grid, verbose); - } else if (auto grid = handle.template grid(n)) { - return nanovdb::nanoToOpenVDB(*grid, verbose); - } else if (auto grid = handle.template grid(n)) { - return nanovdb::nanoToOpenVDB(*grid, verbose); - } else if (auto grid = handle.template grid(n)) { - return nanovdb::nanoToOpenVDB(*grid, verbose); - } else if (auto grid = handle.template grid(n)) { - return nanovdb::nanoToOpenVDB(*grid, verbose); - } else if (auto grid = handle.template grid(n)) { - return nanovdb::nanoToOpenVDB(*grid, verbose); - } else if (auto grid = handle.template grid(n)) { - return nanovdb::nanoToOpenVDB(*grid, verbose); - } else if (auto grid = handle.template grid(n)) { - return nanovdb::nanoToOpenVDB(*grid, verbose); - } else if (auto grid = handle.template grid(n)) { - return nanovdb::nanoToOpenVDB(*grid, verbose); - } else if (auto grid = handle.template grid(n)) { - return nanovdb::nanoToOpenVDB(*grid, verbose); - } else if (auto grid = handle.template grid(n)) { - return nanovdb::nanoToOpenVDB(*grid, verbose); - } else if (auto grid = handle.template grid(n)) { - return nanovdb::nanoToOpenVDB(*grid, verbose); - } else if (auto grid = handle.template grid(n)) { - return nanovdb::nanoToOpenVDB(*grid, verbose); - } else if (auto grid = handle.template grid(n)) { - return nanovdb::nanoToOpenVDB(*grid, verbose); - } else { - OPENVDB_THROW(openvdb::RuntimeError, "Unsupported NanoVDB grid type!"); - } -} - -} // namespace nanovdb - -#endif // NANOVDB_NANOTOOPENVDB_H_HAS_BEEN_INCLUDED +#include // for NANOVDB_DEPRECATED_HEADER +#include +NANOVDB_DEPRECATED_HEADER("Include nanovdb/tools/NanoToOpenVDB.h instead.") diff --git a/nanovdb/nanovdb/util/NodeManager.h b/nanovdb/nanovdb/util/NodeManager.h index 4da1eee873..5f665ee7f6 100644 --- a/nanovdb/nanovdb/util/NodeManager.h +++ b/nanovdb/nanovdb/util/NodeManager.h @@ -1,327 +1,6 @@ // Copyright Contributors to the OpenVDB Project // SPDX-License-Identifier: MPL-2.0 -/*! - \file NodeManager.h - - \author Ken Museth - - \date February 12, 2021 - - \brief This class allows for sequential access to nodes - in a NanoVDB tree on both the host and device. - - \details The ordering of the sequential access to nodes is always breadth-first! -*/ - -#include // for NanoGrid etc -#include "HostBuffer.h"// for HostBuffer - -#ifndef NANOVDB_NODEMANAGER_H_HAS_BEEN_INCLUDED -#define NANOVDB_NODEMANAGER_H_HAS_BEEN_INCLUDED - -namespace nanovdb { - -/// @brief NodeManager allows for sequential access to nodes -template -class NodeManager; - -/// @brief NodeManagerHandle manages the memory of a NodeManager -template -class NodeManagerHandle; - -/// @brief brief Construct a NodeManager and return its handle -/// -/// @param grid grid whose nodes will be accessed sequentially -/// @param buffer buffer from which to allocate the output handle -/// -/// @note This is the only way to create a NodeManager since it's using -/// managed memory pointed to by a NodeManagerHandle. -template -NodeManagerHandle createNodeManager(const NanoGrid &grid, - const BufferT& buffer = BufferT()); - -struct NodeManagerData -{// 48B = 6*8B - uint64_t mMagic;// 8B - union {int64_t mPadding; uint8_t mLinear;};// 8B of which 1B is used for a binary flag - void *mGrid;// 8B pointer to either host or device grid - union {int64_t *mPtr[3], mOff[3];};// 24B, use mOff if mLinear!=0 -}; - -/// @brief This class serves to manage a raw memory buffer of a NanoVDB NodeManager or LeafManager. -template -class NodeManagerHandle -{ - GridType mGridType{GridType::Unknown}; - BufferT mBuffer; - - template - const NodeManager* getMgr() const { - return mGridType == mapToGridType() ? (const NodeManager*)mBuffer.data() : nullptr; - } - - template - typename enable_if::hasDeviceDual, const NodeManager*>::type - getDeviceMgr() const { - return mGridType == mapToGridType() ? (const NodeManager*)mBuffer.deviceData() : nullptr; - } - - template - static T* no_const(const T* ptr) { return const_cast(ptr); } - -public: - /// @brief Move constructor from a buffer - NodeManagerHandle(GridType gridType, BufferT&& buffer) : mGridType(gridType) { mBuffer = std::move(buffer); } - /// @brief Empty ctor - NodeManagerHandle() = default; - /// @brief Disallow copy-construction - NodeManagerHandle(const NodeManagerHandle&) = delete; - /// @brief Disallow copy assignment operation - NodeManagerHandle& operator=(const NodeManagerHandle&) = delete; - /// @brief Move copy assignment operation - NodeManagerHandle& operator=(NodeManagerHandle&& other) noexcept { - mGridType = other.mGridType; - mBuffer = std::move(other.mBuffer); - other.mGridType = GridType::Unknown; - return *this; - } - /// @brief Move copy-constructor - NodeManagerHandle(NodeManagerHandle&& other) noexcept { - mGridType = other.mGridType; - mBuffer = std::move(other.mBuffer); - other.mGridType = GridType::Unknown; - } - /// @brief Default destructor - ~NodeManagerHandle() { this->reset(); } - /// @brief clear the buffer - void reset() { mBuffer.clear(); } - - /// @brief Return a reference to the buffer - BufferT& buffer() { return mBuffer; } - - /// @brief Return a const reference to the buffer - const BufferT& buffer() const { return mBuffer; } - - /// @brief Returns a non-const pointer to the data. - /// - /// @warning Note that the return pointer can be NULL if the NodeManagerHandle was not initialized - uint8_t* data() { return mBuffer.data(); } - - /// @brief Returns a const pointer to the data. - /// - /// @warning Note that the return pointer can be NULL if the NodeManagerHandle was not initialized - const uint8_t* data() const { return mBuffer.data(); } - - /// @brief Returns the size in bytes of the raw memory buffer managed by this NodeManagerHandle's allocator. - uint64_t size() const { return mBuffer.size(); } - - /// @brief Returns a const pointer to the NodeManager encoded in this NodeManagerHandle. - /// - /// @warning Note that the return pointer can be NULL if the template parameter does not match the specified grid! - template - const NodeManager* mgr() const { return this->template getMgr(); } - - /// @brief Returns a pointer to the NodeManager encoded in this NodeManagerHandle. - /// - /// @warning Note that the return pointer can be NULL if the template parameter does not match the specified grid! - template - NodeManager* mgr() { return no_const(this->template getMgr()); } - - /// @brief Return a const pointer to the NodeManager encoded in this NodeManagerHandle on the device, e.g. GPU - /// - /// @warning Note that the return pointer can be NULL if the template parameter does not match the specified grid! - template - typename enable_if::hasDeviceDual, const NodeManager*>::type - deviceMgr() const { return this->template getDeviceMgr(); } - - /// @brief Return a const pointer to the NodeManager encoded in this NodeManagerHandle on the device, e.g. GPU - /// - /// @warning Note that the return pointer can be NULL if the template parameter does not match the specified grid! - template - typename enable_if::hasDeviceDual, NodeManager*>::type - deviceMgr() { return no_const(this->template getDeviceMgr()); } - - /// @brief Upload the NodeManager to the device, e.g. from CPU to GPU - /// - /// @note This method is only available if the buffer supports devices - template - typename enable_if::hasDeviceDual, void>::type - deviceUpload(void* deviceGrid, void* stream = nullptr, bool sync = true) - { - assert(deviceGrid); - auto *data = reinterpret_cast(mBuffer.data()); - void *tmp = data->mGrid; - data->mGrid = deviceGrid; - mBuffer.deviceUpload(stream, sync); - data->mGrid = tmp; - } - - /// @brief Download the NodeManager to from the device, e.g. from GPU to CPU - /// - /// @note This method is only available if the buffer supports devices - template - typename enable_if::hasDeviceDual, void>::type - deviceDownload(void* stream = nullptr, bool sync = true) - { - auto *data = reinterpret_cast(mBuffer.data()); - void *tmp = data->mGrid; - mBuffer.deviceDownload(stream, sync); - data->mGrid = tmp; - } -};// NodeManagerHandle - -/// @brief This class allows for sequential access to nodes in a NanoVDB tree -/// -/// @details Nodes are always arranged breadth first during sequential access of nodes -/// at a particular level. -template -class NodeManager : private NodeManagerData -{ - using DataT = NodeManagerData; - using GridT = NanoGrid; - using TreeT = typename GridTree::type; - template - using NodeT = typename NodeTrait::type; - using RootT = NodeT<3>;// root node - using Node2 = NodeT<2>;// upper internal node - using Node1 = NodeT<1>;// lower internal node - using Node0 = NodeT<0>;// leaf node - -public: - static constexpr bool FIXED_SIZE = Node0::FIXED_SIZE && Node1::FIXED_SIZE && Node2::FIXED_SIZE; - - NodeManager(const NodeManager&) = delete; - NodeManager(NodeManager&&) = delete; - NodeManager& operator=(const NodeManager&) = delete; - NodeManager& operator=(NodeManager&&) = delete; - ~NodeManager() = delete; - - /// @brief return true if the nodes have both fixed size and are arranged breadth-first in memory. - /// This allows for direct and memory-efficient linear access to nodes. - __hostdev__ static bool isLinear(const GridT &grid) {return FIXED_SIZE && grid.isBreadthFirst();} - - /// @brief return true if the nodes have both fixed size and are arranged breadth-first in memory. - /// This allows for direct and memory-efficient linear access to nodes. - __hostdev__ bool isLinear() const {return DataT::mLinear!=0u;} - - /// @brief Return the memory footprint in bytes of the NodeManager derived from the specified grid - __hostdev__ static uint64_t memUsage(const GridT &grid) { - uint64_t size = sizeof(NodeManagerData); - if (!NodeManager::isLinear(grid)) { - const uint32_t *p = grid.tree().mNodeCount; - size += sizeof(int64_t)*(p[0]+p[1]+p[2]); - } - return size; - } - - /// @brief Return the memory footprint in bytes of this instance - __hostdev__ uint64_t memUsage() const {return NodeManager::memUsage(this->grid());} - - /// @brief Return a reference to the grid - __hostdev__ GridT& grid() { return *reinterpret_cast(DataT::mGrid); } - __hostdev__ const GridT& grid() const { return *reinterpret_cast(DataT::mGrid); } - - /// @brief Return a reference to the tree - __hostdev__ TreeT& tree() { return this->grid().tree(); } - __hostdev__ const TreeT& tree() const { return this->grid().tree(); } - - /// @brief Return a reference to the root - __hostdev__ RootT& root() { return this->tree().root(); } - __hostdev__ const RootT& root() const { return this->tree().root(); } - - /// @brief Return the number of tree nodes at the specified level - /// @details 0 is leaf, 1 is lower internal, and 2 is upper internal level - __hostdev__ uint64_t nodeCount(int level) const { return this->tree().nodeCount(level); } - - __hostdev__ uint64_t leafCount() const { return this->tree().nodeCount(0); } - __hostdev__ uint64_t lowerCount() const { return this->tree().nodeCount(1); } - __hostdev__ uint64_t upperCount() const { return this->tree().nodeCount(2); } - - /// @brief Return the i'th leaf node with respect to breadth-first ordering - template - __hostdev__ const NodeT& node(uint32_t i) const { - NANOVDB_ASSERT(i < this->nodeCount(LEVEL)); - const NodeT* ptr = nullptr; - if (DataT::mLinear) { - ptr = PtrAdd>(DataT::mGrid, DataT::mOff[LEVEL]) + i; - } else { - ptr = PtrAdd>(DataT::mGrid, DataT::mPtr[LEVEL][i]); - } - NANOVDB_ASSERT(isValid(ptr)); - return *ptr; - } - - /// @brief Return the i'th node with respect to breadth-first ordering - template - __hostdev__ NodeT& node(uint32_t i) { - NANOVDB_ASSERT(i < this->nodeCount(LEVEL)); - NodeT* ptr = nullptr; - if (DataT::mLinear) { - ptr = PtrAdd>(DataT::mGrid, DataT::mOff[LEVEL]) + i; - } else { - ptr = PtrAdd>(DataT::mGrid, DataT::mPtr[LEVEL][i]); - } - NANOVDB_ASSERT(isValid(ptr)); - return *ptr; - } - - /// @brief Return the i'th leaf node with respect to breadth-first ordering - __hostdev__ const Node0& leaf(uint32_t i) const { return this->node<0>(i); } - __hostdev__ Node0& leaf(uint32_t i) { return this->node<0>(i); } - - /// @brief Return the i'th lower internal node with respect to breadth-first ordering - __hostdev__ const Node1& lower(uint32_t i) const { return this->node<1>(i); } - __hostdev__ Node1& lower(uint32_t i) { return this->node<1>(i); } - - /// @brief Return the i'th upper internal node with respect to breadth-first ordering - __hostdev__ const Node2& upper(uint32_t i) const { return this->node<2>(i); } - __hostdev__ Node2& upper(uint32_t i) { return this->node<2>(i); } - -}; // NodeManager class - -template -NodeManagerHandle createNodeManager(const NanoGrid &grid, - const BufferT& buffer) -{ - NodeManagerHandle handle(mapToGridType(), BufferT::create(NodeManager::memUsage(grid), &buffer)); - auto *data = reinterpret_cast(handle.data()); - NANOVDB_ASSERT(isValid(data)); - NANOVDB_ASSERT(mapToGridType() == grid.gridType()); -#ifdef NANOVDB_USE_NEW_MAGIC_NUMBERS - *data = NodeManagerData{NANOVDB_MAGIC_NODE, 0u, (void*)&grid, {0u,0u,0u}}; -#else - *data = NodeManagerData{NANOVDB_MAGIC_NUMBER, 0u, (void*)&grid, {0u,0u,0u}}; -#endif - - if (NodeManager::isLinear(grid)) { - data->mLinear = uint8_t(1u); - data->mOff[0] = PtrDiff(grid.tree().template getFirstNode<0>(), &grid); - data->mOff[1] = PtrDiff(grid.tree().template getFirstNode<1>(), &grid); - data->mOff[2] = PtrDiff(grid.tree().template getFirstNode<2>(), &grid); - } else { - int64_t *ptr0 = data->mPtr[0] = reinterpret_cast(data + 1); - int64_t *ptr1 = data->mPtr[1] = data->mPtr[0] + grid.tree().nodeCount(0); - int64_t *ptr2 = data->mPtr[2] = data->mPtr[1] + grid.tree().nodeCount(1); - // Performs depth first traversal but breadth first insertion - for (auto it2 = grid.tree().root().cbeginChild(); it2; ++it2) { - *ptr2++ = PtrDiff(&*it2, &grid); - for (auto it1 = it2->cbeginChild(); it1; ++it1) { - *ptr1++ = PtrDiff(&*it1, &grid); - for (auto it0 = it1->cbeginChild(); it0; ++it0) { - *ptr0++ = PtrDiff(&*it0, &grid); - }// loop over child nodes of the lower internal node - }// loop over child nodes of the upper internal node - }// loop over child nodes of the root node - } - - return handle;// // is converted to r-value so return value is move constructed! -} - -} // namespace nanovdb - -#if defined(__CUDACC__) -#include -#endif// defined(__CUDACC__) - -#endif // NANOVDB_NODEMANAGER_H_HAS_BEEN_INCLUDED +#include // for NANOVDB_DEPRECATED_HEADER +#include +NANOVDB_DEPRECATED_HEADER("Include nanovdb/NodeManager.h instead.") diff --git a/nanovdb/nanovdb/util/OpenToNanoVDB.h b/nanovdb/nanovdb/util/OpenToNanoVDB.h index ea6c2c94d7..a4cecde1de 100644 --- a/nanovdb/nanovdb/util/OpenToNanoVDB.h +++ b/nanovdb/nanovdb/util/OpenToNanoVDB.h @@ -1,15 +1,6 @@ // Copyright Contributors to the OpenVDB Project // SPDX-License-Identifier: MPL-2.0 -/*! - \file OpenToNanoVDB.h - - \author Ken Museth - - \date January 8, 2020 - - \warning this file has been replaced by CreateNanoGrid.h - -*/ - -#include "CreateNanoGrid.h" \ No newline at end of file +#include // for NANOVDB_DEPRECATED_HEADER +#include +NANOVDB_DEPRECATED_HEADER("Use nanovdb/tools/CreateNanoGrid.h instead.") \ No newline at end of file diff --git a/nanovdb/nanovdb/util/PrefixSum.h b/nanovdb/nanovdb/util/PrefixSum.h index 87775c2d2a..0f70a81a10 100644 --- a/nanovdb/nanovdb/util/PrefixSum.h +++ b/nanovdb/nanovdb/util/PrefixSum.h @@ -2,7 +2,7 @@ // SPDX-License-Identifier: MPL-2.0 /*! - \file PrefixSum.h + \file nanovdb/util/PrefixSum.h \author Ken Museth @@ -15,10 +15,10 @@ last entry which is the sum of all the input elements. */ -#ifndef NANOVDB_PREFIX_SUM_H_HAS_BEEN_INCLUDED -#define NANOVDB_PREFIX_SUM_H_HAS_BEEN_INCLUDED +#ifndef NANOVDB_UTIL_PREFIX_SUM_H_HAS_BEEN_INCLUDED +#define NANOVDB_UTIL_PREFIX_SUM_H_HAS_BEEN_INCLUDED -#include "Range.h"// for Range1D +#include // for Range1D #include #include // for std::plus @@ -28,6 +28,8 @@ namespace nanovdb { +namespace util { + /// @brief Computes inclusive prefix sum of a vector /// @tparam T Type of the elements in the input/out vector /// @tparam OpT Type of operation performed on each element (defaults to sum) @@ -74,6 +76,15 @@ T prefixSum(std::vector &vec, bool threaded, OpT op) return vec.back();// sum of all input elements }// prefixSum +}// namespace util + +template> +[[deprecated("Use nanovdb::util::prefixSum instead")]] +T prefixSum(std::vector &vec, bool threaded = true, OpT op = OpT()) +{ + return util::prefixSum(vec, threaded, op); +}// prefixSum + }// namespace nanovdb -#endif // NANOVDB_PREFIX_SUM_H_HAS_BEEN_INCLUDED +#endif // NANOVDB_UTIL_PREFIX_SUM_H_HAS_BEEN_INCLUDED diff --git a/nanovdb/nanovdb/util/Primitives.h b/nanovdb/nanovdb/util/Primitives.h index 7c1f3a5856..0d6714e0d2 100644 --- a/nanovdb/nanovdb/util/Primitives.h +++ b/nanovdb/nanovdb/util/Primitives.h @@ -1,1754 +1,6 @@ // Copyright Contributors to the OpenVDB Project // SPDX-License-Identifier: MPL-2.0 -/*! - \file Primitives.h - - \author Ken Museth - - \date June 26, 2020 - - \brief Generates volumetric primitives, e.g. sphere, torus etc, as NanoVDB grid. - - \note This has no dependency on openvdb. -*/ - -#ifndef NANOVDB_PRIMITIVES_H_HAS_BEEN_INCLUDED -#define NANOVDB_PRIMITIVES_H_HAS_BEEN_INCLUDED - -#define NANOVDB_PARALLEL_PRIMITIVES - -#include -#include "CreateNanoGrid.h" -#include - -namespace nanovdb { - -/// @brief Returns a handle to a narrow-band level set of a sphere -/// -/// @param radius Radius of sphere in world units -/// @param center Center of sphere in world units -/// @param voxelSize Size of a voxel in world units -/// @param halfWidth Half-width of narrow band in voxel units -/// @param origin Origin of grid in world units -/// @param name Name of the grid -/// @param sMode Mode of computation for the statistics. -/// @param cMode Mode of computation for the checksum. -/// @param tolerance Global error tolerance use when VoxelT = FpN -/// @param ditherOn If true dithering will be applied when VoxelT = {Fp4,Fp8,Fp16,FpN} -/// @param buffer Buffer used for memory allocation by the handle -/// -/// @details The @c BuildT template parameter must be one of the following: -/// float (default), double, Fp4, Fp8, Fp16 or FpN. The @c tolerance -/// argument is only used when BuildT is set to FpN. -template -typename enable_if::value || - is_same::value, GridHandle>::type -createLevelSetSphere(double radius = 100.0, - const Vec3d& center = Vec3d(0), - double voxelSize = 1.0, - double halfWidth = 3.0, - const Vec3d& origin = Vec3d(0), - const std::string& name = "sphere_ls", - StatsMode sMode = StatsMode::Default, - ChecksumMode cMode = ChecksumMode::Default, - const BufferT& buffer = BufferT()); - -template -typename enable_if::value || - is_same::value || - is_same::value, GridHandle>::type -createLevelSetSphere(double radius = 100.0, - const Vec3d& center = Vec3d(0), - double voxelSize = 1.0, - double halfWidth = 3.0, - const Vec3d& origin = Vec3d(0), - const std::string& name = "sphere_ls", - StatsMode sMode = StatsMode::Default, - ChecksumMode cMode = ChecksumMode::Default, - bool ditherOn = false, - const BufferT& buffer = BufferT()); - -template -typename enable_if::value, GridHandle>::type -createLevelSetSphere(double radius = 100.0, - const Vec3d& center = Vec3d(0), - double voxelSize = 1.0, - double halfWidth = 3.0, - const Vec3d& origin = Vec3d(0), - const std::string& name = "sphere_ls_FpN", - StatsMode sMode = StatsMode::Default, - ChecksumMode cMode = ChecksumMode::Default, - float tolerance = -1.0f, - bool ditherOn = false, - const BufferT& buffer = BufferT()); - -//================================================================================================ - -/// @brief Returns a handle to a sparse fog volume of a sphere such -/// that the exterior is 0 and inactive, the interior is active -/// with values varying smoothly from 0 at the surface of the -/// sphere to 1 at the halfWidth and interior of the sphere. -/// -/// @param radius Radius of sphere in world units -/// @param center Center of sphere in world units -/// @param voxelSize Size of a voxel in world units -/// @param halfWidth Half-width of narrow band in voxel units -/// @param origin Origin of grid in world units -/// @param name Name of the grid -/// @param sMode Mode of computation for the statistics. -/// @param cMode Mode of computation for the checksum. -/// @param tolerance Global error tolerance use when VoxelT = FpN -/// @param ditherOn If true dithering will be applied when BuildT = {Fp4,Fp8,Fp16,FpN} -/// @param buffer Buffer used for memory allocation by the handle -/// -/// @details The @c BuildT template parameter must be one of the following: -/// float (default), double, Fp4, Fp8, Fp16 or FpN. The @c tolerance -/// argument is only used when BuildT is set to FpN. -template -typename disable_if::value, GridHandle>::type -createFogVolumeSphere(double radius = 100.0, - const Vec3d& center = Vec3d(0.0), - double voxelSize = 1.0, - double halfWidth = 3.0, - const Vec3d& origin = Vec3d(0.0), - const std::string& name = "sphere_fog", - StatsMode sMode = StatsMode::Default, - ChecksumMode cMode = ChecksumMode::Default, - const BufferT& buffer = BufferT()); - -template -typename enable_if::value, GridHandle>::type -createFogVolumeSphere(double radius = 100.0, - const Vec3d& center = Vec3d(0.0), - double voxelSize = 1.0, - double halfWidth = 3.0, - const Vec3d& origin = Vec3d(0.0), - const std::string& name = "sphere_fog", - StatsMode sMode = StatsMode::Default, - ChecksumMode cMode = ChecksumMode::Default, - float tolerance = -1.0f, - bool ditherOn = false, - const BufferT& buffer = BufferT()); - -//================================================================================================ - -/// @brief Returns a handle to a PointDataGrid containing points scattered -/// on the surface of a sphere. -/// -/// @param pointsPerVoxel Number of point per voxel on on the surface -/// @param radius Radius of sphere in world units -/// @param center Center of sphere in world units -/// @param voxelSize Size of a voxel in world units -/// @param origin Origin of grid in world units -/// @param name Name of the grid -/// @param mode Mode of computation for the checksum. -/// @param buffer Buffer used for memory allocation by the handle -/// -/// @details The @c BuildT template parameter must be float (default) or double. -template -typename disable_if::value, GridHandle>::type -createPointSphere(int pointsPerVoxel = 1, - double radius = 100.0, - const Vec3d& center = Vec3d(0.0), - double voxelSize = 1.0, - const Vec3d& origin = Vec3d(0.0), - const std::string& name = "sphere_points", - ChecksumMode mode = ChecksumMode::Default, - const BufferT& buffer = BufferT()); - -//================================================================================================ - -/// @brief Returns a handle to a narrow-band level set of a torus in the xz-plane -/// -/// @param majorRadius Major radius of torus in world units -/// @param minorRadius Minor radius of torus in world units -/// @param center Center of torus in world units -/// @param voxelSize Size of a voxel in world units -/// @param halfWidth Half-width of narrow band in voxel units -/// @param origin Origin of grid in world units -/// @param name Name of the grid -/// @param sMode Mode of computation for the statistics. -/// @param cMode Mode of computation for the checksum. -/// @param tolerance Global error tolerance use when VoxelT = FpN -/// @param ditherOn If true dithering will be applied when VoxelT = {Fp4,Fp8,Fp16,FpN} -/// @param buffer Buffer used for memory allocation by the handle -/// -/// @details The @c BuildT template parameter must be one of the following: -/// float (default), double, Fp4, Fp8, Fp16 or FpN. The @c tolerance -/// argument is only used when BuildT is set to FpN. -template -typename disable_if::value, GridHandle>::type -createLevelSetTorus(double majorRadius = 100.0, - double minorRadius = 50.0, - const Vec3d& center = Vec3d(0.0), - double voxelSize = 1.0, - double halfWidth = 3.0, - const Vec3d& origin = Vec3d(0.0), - const std::string& name = "torus_ls", - StatsMode sMode = StatsMode::Default, - ChecksumMode cMode = ChecksumMode::Default, - const BufferT& buffer = BufferT()); - -template -typename enable_if::value, GridHandle>::type -createLevelSetTorus(double majorRadius = 100.0, - double minorRadius = 50.0, - const Vec3d& center = Vec3d(0.0), - double voxelSize = 1.0, - double halfWidth = 3.0, - const Vec3d& origin = Vec3d(0.0), - const std::string& name = "torus_ls", - StatsMode sMode = StatsMode::Default, - ChecksumMode cMode = ChecksumMode::Default, - float tolerance = -1.0f, - bool ditherOn = false, - const BufferT& buffer = BufferT()); - -//================================================================================================ - -/// @brief Returns a handle to a sparse fog volume of a torus in the xz-plane such -/// that the exterior is 0 and inactive, the interior is active -/// with values varying smoothly from 0 at the surface of the -/// torus to 1 at the halfWidth and interior of the torus. -/// -/// @param majorRadius Major radius of torus in world units -/// @param minorRadius Minor radius of torus in world units -/// @param center Center of torus in world units -/// @param voxelSize Size of a voxel in world units -/// @param halfWidth Half-width of narrow band in voxel units -/// @param origin Origin of grid in world units -/// @param name Name of the grid -/// @param sMode Mode of computation for the statistics. -/// @param cMode Mode of computation for the checksum. -/// @param tolerance Global error tolerance use when VoxelT = FpN -/// @param ditherOn If true dithering will be applied when VoxelT = {Fp4,Fp8,Fp16,FpN} -/// @param buffer Buffer used for memory allocation by the handle -/// -/// @details The @c BuildT template parameter must be one of the following: -/// float (default), double, Fp4, Fp8, Fp16 or FpN. The @c tolerance -/// argument is only used when BuildT is set to FpN. -template -typename disable_if::value, GridHandle>::type -createFogVolumeTorus(double majorRadius = 100.0, - double minorRadius = 50.0, - const Vec3d& center = Vec3d(0.0), - double voxelSize = 1.0, - double halfWidth = 3.0, - const Vec3d& origin = Vec3d(0.0), - const std::string& name = "torus_fog", - StatsMode sMode = StatsMode::Default, - ChecksumMode cMode = ChecksumMode::Default, - const BufferT& buffer = BufferT()); - -template -typename enable_if::value, GridHandle>::type -createFogVolumeTorus(double majorRadius = 100.0, - double minorRadius = 50.0, - const Vec3d& center = Vec3d(0.0), - double voxelSize = 1.0, - double halfWidth = 3.0, - const Vec3d& origin = Vec3d(0.0), - const std::string& name = "torus_fog_FpN", - StatsMode sMode = StatsMode::Default, - ChecksumMode cMode = ChecksumMode::Default, - float tolerance = -1.0f, - bool ditherOn = false, - const BufferT& buffer = BufferT()); - -//================================================================================================ - -/// @brief Returns a handle to a PointDataGrid containing points scattered -/// on the surface of a torus. -/// -/// @param pointsPerVoxel Number of point per voxel on on the surface -/// @param majorRadius Major radius of torus in world units -/// @param minorRadius Minor radius of torus in world units -/// @param center Center of torus in world units -/// @param voxelSize Size of a voxel in world units -/// @param origin Origin of grid in world units -/// @param name Name of the grid -/// @param cMode Mode of computation for the checksum. -/// @param buffer Buffer used for memory allocation by the handle -// -/// @details The @c BuildT template parameter must be float (default) or double. -template -typename disable_if::value, GridHandle>::type -createPointTorus(int pointsPerVoxel = 1, // half-width of narrow band in voxel units - double majorRadius = 100.0, // major radius of torus in world units - double minorRadius = 50.0, // minor radius of torus in world units - const Vec3d& center = Vec3d(0.0), // center of torus in world units - double voxelSize = 1.0, // size of a voxel in world units - const Vec3d& origin = Vec3d(0.0f), // origin of grid in world units - const std::string& name = "torus_points", // name of grid - ChecksumMode cMode = ChecksumMode::Default, - const BufferT& buffer = BufferT()); - -//================================================================================================ - -/// @brief Returns a handle to a narrow-band level set of a box -/// -/// @param width Width of box in world units -/// @param height Height of box in world units -/// @param depth Depth of box in world units -/// @param center Center of box in world units -/// @param voxelSize Size of a voxel in world units -/// @param halfWidth Half-width of narrow band in voxel units -/// @param origin Origin of grid in world units -/// @param name Name of the grid -/// @param sMode Mode of computation for the statistics. -/// @param cMode Mode of computation for the checksum. -/// @param tolerance Global error tolerance use when VoxelT = FpN -/// @param ditherOn If true dithering will be applied when VoxelT = {Fp4,Fp8,Fp16,FpN} -/// @param buffer Buffer used for memory allocation by the handle -/// -/// @details The @c BuildT template parameter must be one of the following: -/// float (default), double, Fp4, Fp8, Fp16 or FpN. The @c tolerance -/// argument is only used when BuildT is set to FpN. -template -typename disable_if::value, GridHandle>::type -createLevelSetBox(double width = 40.0, - double height = 60.0, - double depth = 100.0, - const Vec3d& center = Vec3d(0.0), - double voxelSize = 1.0, - double halfWidth = 3.0, - const Vec3d& origin = Vec3d(0.0), - const std::string& name = "box_ls", - StatsMode sMode = StatsMode::Default, - ChecksumMode cMode = ChecksumMode::Default, - const BufferT& buffer = BufferT()); - -template -typename enable_if::value, GridHandle>::type -createLevelSetBox(double width = 40.0, - double height = 60.0, - double depth = 100.0, - const Vec3d& center = Vec3d(0.0), - double voxelSize = 1.0, - double halfWidth = 3.0, - const Vec3d& origin = Vec3d(0.0), - const std::string& name = "box_ls_FpN", - StatsMode sMode = StatsMode::Default, - ChecksumMode cMode = ChecksumMode::Default, - float tolerance = -1.0f, - bool ditherOn = false, - const BufferT& buffer = BufferT()); - -//================================================================================================ - -/// @brief Returns a handle to a sparse fog volume of a box such -/// that the exterior is 0 and inactive, the interior is active -/// with values varying smoothly from 0 at the surface of the -/// box to 1 at the halfWidth and interior of the box. -/// -/// @param width Width of box in world units -/// @param height Height of box in world units -/// @param depth Depth of box in world units -/// @param center Center of box in world units -/// @param voxelSize Size of a voxel in world units -/// @param halfWidth Half-width of narrow band in voxel units -/// @param origin Origin of grid in world units -/// @param name Name of the grid -/// @param sMode Mode of computation for the statistics. -/// @param cMode Mode of computation for the checksum. -/// @param tolerance Global error tolerance use when VoxelT = FpN -/// @param ditherOn If true dithering will be applied when VoxelT = {Fp4,Fp8,Fp16,FpN} -/// @param buffer Buffer used for memory allocation by the handle -/// -/// @details The @c BuildT template parameter must be one of the following: -/// float (default), double, Fp4, Fp8, Fp16 or FpN. The @c tolerance -/// argument is only used when BuildT is set to FpN. -template -typename disable_if::value, GridHandle>::type -createFogVolumeBox(double width = 40.0, - double height = 60.0, - double depth = 100.0, - const Vec3d& center = Vec3d(0.0), - double voxelSize = 1.0, - double halfWidth = 3.0, - const Vec3d& origin = Vec3d(0.0), - const std::string& name = "box_fog", - StatsMode sMode = StatsMode::Default, - ChecksumMode cMode = ChecksumMode::Default, - const BufferT& buffer = BufferT()); - -template -typename enable_if::value, GridHandle>::type -createFogVolumeBox(double width = 40.0, - double height = 60.0, - double depth = 100.0, - const Vec3d& center = Vec3d(0.0), - double voxelSize = 1.0, - double halfWidth = 3.0, - const Vec3d& origin = Vec3d(0.0), - const std::string& name = "box_fog_FpN", - StatsMode sMode = StatsMode::Default, - ChecksumMode cMode = ChecksumMode::Default, - float tolerance = -1.0f, - bool ditherOn = false, - const BufferT& buffer = BufferT()); - -//================================================================================================ - -/// @brief Returns a handle to a narrow-band level set of a octahedron -/// -/// @param scale Scale of octahedron in world units -/// @param center Center of octahedron in world units -/// @param voxelSize Size of a voxel in world units -/// @param halfWidth Half-width of narrow band in voxel units -/// @param origin Origin of grid in world units -/// @param name Name of the grid -/// @param sMode Mode of computation for the statistics. -/// @param cMode Mode of computation for the checksum. -/// @param tolerance Global error tolerance use when VoxelT = FpN -/// @param ditherOn If true dithering will be applied when VoxelT = {Fp4,Fp8,Fp16,FpN} -/// @param buffer Buffer used for memory allocation by the handle -/// -/// @details The @c BuildT template parameter must be one of the following: -/// float (default), double, Fp4, Fp8, Fp16 or FpN. The @c tolerance -/// argument is only used when BuildT is set to FpN. -template -typename disable_if::value, GridHandle>::type -createLevelSetOctahedron(double scale = 100.0, - const Vec3d& center = Vec3d(0.0), - double voxelSize = 1.0, - double halfWidth = 3.0, - const Vec3d& origin = Vec3d(0.0), - const std::string& name = "octadedron_ls", - StatsMode sMode = StatsMode::Default, - ChecksumMode cMode = ChecksumMode::Default, - const BufferT& buffer = BufferT()); - -template -typename enable_if::value, GridHandle>::type -createLevelSetOctahedron(double scale = 100.0, - const Vec3d& center = Vec3d(0.0), - double voxelSize = 1.0, - double halfWidth = 3.0, - const Vec3d& origin = Vec3d(0.0), - const std::string& name = "octadedron_ls_FpN", - StatsMode sMode = StatsMode::Default, - ChecksumMode cMode = ChecksumMode::Default, - float tolerance = -1.0f, - bool ditherOn = false, - const BufferT& buffer = BufferT()); - -//================================================================================================ - -/// @brief Returns a handle to a sparse fog volume of an octahedron such -/// that the exterior is 0 and inactive, the interior is active -/// with values varying smoothly from 0 at the surface of the -/// octahedron to 1 at the halfWidth and interior of the octahedron. -/// -/// @param scale Scale of octahedron in world units -/// @param center Center of box in world units -/// @param voxelSize Size of a voxel in world units -/// @param halfWidth Half-width of narrow band in voxel units -/// @param origin Origin of grid in world units -/// @param name Name of the grid -/// @param sMode Mode of computation for the statistics. -/// @param cMode Mode of computation for the checksum. -/// @param tolerance Global error tolerance use when VoxelT = FpN -/// @param ditherOn If true dithering will be applied when VoxelT = {Fp4,Fp8,Fp16,FpN} -/// @param buffer Buffer used for memory allocation by the handle -/// -/// @details The @c BuildT template parameter must be one of the following: -/// float (default), double, Fp4, Fp8, Fp16 or FpN. The @c tolerance -/// argument is only used when BuildT is set to FpN. -template -typename disable_if::value, GridHandle>::type -createFogVolumeOctahedron(double scale = 100.0, - const Vec3d& center = Vec3d(0.0), - double voxelSize = 1.0, - double halfWidth = 3.0, - const Vec3d& origin = Vec3d(0.0), - const std::string& name = "octadedron_fog", - StatsMode sMode = StatsMode::Default, - ChecksumMode cMode = ChecksumMode::Default, - const BufferT& buffer = BufferT()); - -template -typename enable_if::value, GridHandle>::type -createFogVolumeOctahedron(double scale = 100.0, - const Vec3d& center = Vec3d(0.0), - double voxelSize = 1.0, - double halfWidth = 3.0, - const Vec3d& origin = Vec3d(0.0), - const std::string& name = "octadedron_fog_FpN", - StatsMode sMode = StatsMode::Default, - ChecksumMode cMode = ChecksumMode::Default, - float tolerance = -1.0f, - bool ditherOn = false, - const BufferT& buffer = BufferT()); - -//================================================================================================ - -/// @brief Returns a handle to a narrow-band level set of a bounding-box (= wireframe of a box) -/// -/// @param width Width of box in world units -/// @param height Height of box in world units -/// @param depth Depth of box in world units -/// @param thickness Thickness of the wire in world units -/// @param center Center of bbox in world units -/// @param voxelSize Size of a voxel in world units -/// @param halfWidth Half-width of narrow band in voxel units -/// @param origin Origin of grid in world units -/// @param name Name of the grid -/// @param sMode Mode of computation for the statistics. -/// @param cMode Mode of computation for the checksum. -/// @param tolerance Global error tolerance use when VoxelT = FpN -/// @param ditherOn If true dithering will be applied when VoxelT = {Fp4,Fp8,Fp16,FpN} -/// @param buffer Buffer used for memory allocation by the handle -/// -/// @details The @c BuildT template parameter must be one of the following: -/// float (default), double, Fp4, Fp8, Fp16 or FpN. The @c tolerance -/// argument is only used when BuildT is set to FpN. -template -typename disable_if::value, GridHandle>::type -createLevelSetBBox(double width = 40.0, - double height = 60.0, - double depth = 100.0, - double thickness = 10.0, - const Vec3d& center = Vec3d(0.0), - double voxelSize = 1.0, - double halfWidth = 3.0, - const Vec3d& origin = Vec3d(0.0), - const std::string& name = "bbox_ls", - StatsMode sMode = StatsMode::Default, - ChecksumMode cMode = ChecksumMode::Default, - const BufferT& buffer = BufferT()); - -template -typename enable_if::value, GridHandle>::type -createLevelSetBBox(double width = 40.0, - double height = 60.0, - double depth = 100.0, - double thickness = 10.0, - const Vec3d& center = Vec3d(0.0), - double voxelSize = 1.0, - double halfWidth = 3.0, - const Vec3d& origin = Vec3d(0.0), - const std::string& name = "bbox_ls_FpN", - StatsMode sMode = StatsMode::Default, - ChecksumMode cMode = ChecksumMode::Default, - float tolerance = -1.0f, - bool ditherOn = false, - const BufferT& buffer = BufferT()); - - -//================================================================================================ - -/// @brief Returns a handle to a PointDataGrid containing points scattered -/// on the surface of a box. -/// -/// @param pointsPerVoxel Number of point per voxel on on the surface -/// @param width Width of box in world units -/// @param height Height of box in world units -/// @param depth Depth of box in world units -/// @param center Center of box in world units -/// @param voxelSize Size of a voxel in world units -/// @param origin Origin of grid in world units -/// @param name Name of the grid -/// @param mode Mode of computation for the checksum. -/// @param buffer Buffer used for memory allocation by the handle -template -typename disable_if::value, GridHandle>::type -createPointBox(int pointsPerVoxel = 1, // half-width of narrow band in voxel units - double width = 40.0, // width of box in world units - double height = 60.0, // height of box in world units - double depth = 100.0, // depth of box in world units - const Vec3d& center = Vec3d(0.0), // center of box in world units - double voxelSize = 1.0, // size of a voxel in world units - const Vec3d& origin = Vec3d(0.0), // origin of grid in world units - const std::string& name = "box_points", // name of grid - ChecksumMode mode = ChecksumMode::Default, - const BufferT& buffer = BufferT()); - -//================================================================================================ - -/// @brief Given an input NanoVDB voxel grid this methods returns a GridHandle to another NanoVDB -/// PointDataGrid with points scattered in the active leaf voxels of in input grid. Note, the -/// coordinates of the points are encoded as blind data in world-space. -/// -/// @param srcGrid Const input grid used to determine the active voxels to scatter points into -/// @param pointsPerVoxel Number of point per voxel on on the surface -/// @param name Name of the grid -/// @param mode Mode of computation for the checksum. -/// @param buffer Buffer used for memory allocation by the handle -template -inline GridHandle -createPointScatter(const NanoGrid& srcGrid, // source grid used to scatter points into - int pointsPerVoxel = 1, // half-width of narrow band in voxel units - const std::string& name = "point_scatter", // name of grid - ChecksumMode mode = ChecksumMode::Default, - const BufferT& buffer = BufferT()); - -//================================================================================================ - -namespace { - -/// @brief Returns a shared pointer to a build::Grid containing a narrow-band SDF values for a sphere -/// -/// @brief Note, this is not (yet) a valid level set SDF field since values inside sphere (and outside -/// the narrow band) are still undefined. Call builder::sdfToLevelSet() to set those -/// values or alternatively call builder::levelSetToFog to generate a FOG volume. -/// -/// @details The @c BuildT template parameter must be one of the following: -/// float (default), double, Fp4, Fp8, Fp16 or FpN. -template -std::shared_ptr> -initSphere(double radius, // radius of sphere in world units - const Vec3d& center, // center of sphere in world units - double voxelSize, // size of a voxel in world units - double halfWidth, // half-width of narrow band in voxel units - const Vec3d& origin) // origin of grid in world units -{ - using GridT = build::Grid; - using ValueT = typename BuildToValueMap::type; - static_assert(is_floating_point::value, "initSphere: expect floating point"); - if (!(radius > 0)) - throw std::runtime_error("Sphere: radius must be positive!"); - if (!(voxelSize > 0)) - throw std::runtime_error("Sphere: voxelSize must be positive!"); - if (!(halfWidth > 0)) - throw std::runtime_error("Sphere: halfWidth must be positive!"); - - auto grid = std::make_shared(ValueT(halfWidth * voxelSize)); - grid->setTransform(voxelSize, origin); - - // Define radius of sphere with narrow-band in voxel units - const ValueT r0 = radius / ValueT(voxelSize), rmax = r0 + ValueT(halfWidth); - - // Radius below the Nyquist frequency - if (r0 < ValueT(1.5f)) return grid; - - // Define center of sphere in voxel units - const Vec3 c(ValueT(center[0] - origin[0]) / ValueT(voxelSize), - ValueT(center[1] - origin[1]) / ValueT(voxelSize), - ValueT(center[2] - origin[2]) / ValueT(voxelSize)); - - // Define bounds of the voxel coordinates - const int imin = Floor(c[0] - rmax), imax = Ceil(c[0] + rmax); - const int jmin = Floor(c[1] - rmax), jmax = Ceil(c[1] + rmax); - const int kmin = Floor(c[2] - rmax), kmax = Ceil(c[2] + rmax); - - const Range<1,int> range(imin, imax+1, 32); - - auto kernel = [&](const Range<1,int> &r) { - auto acc = grid->getWriteAccessor(); - Coord ijk; - int &i = ijk[0], &j = ijk[1], &k = ijk[2], m = 1; - // Compute signed distances to sphere using leapfrogging in k - for (i = r.begin(); i < r.end(); ++i) { - const auto x2 = Pow2(ValueT(i) - c[0]); - for (j = jmin; j <= jmax; ++j) { - const auto x2y2 = Pow2(ValueT(j) - c[1]) + x2; - for (k = kmin; k <= kmax; k += m) { - m = 1; - const auto v = Sqrt(x2y2 + Pow2(ValueT(k) - c[2])) - r0; // Distance in voxel units - const auto d = v < 0 ? -v : v; - if (d < halfWidth) { // inside narrow band - acc.setValue(ijk, ValueT(voxelSize) * v); // distance in world units - } else { // outside narrow band - m += Floor(d - halfWidth); // leapfrog - } - } //end leapfrog over k - } //end loop over j - } //end loop over i - };// kernel -#ifdef NANOVDB_PARALLEL_PRIMITIVES - forEach(range, kernel); -#else - kernel(range); -#endif - return grid; -} // initSphere - -template -std::shared_ptr> -initTorus(double radius1, // major radius of torus in world units - double radius2, // minor radius of torus in world units - const Vec3d& center, // center of torus in world units - double voxelSize, // size of a voxel in world units - double halfWidth, // half-width of narrow band in voxel units - const Vec3d& origin) // origin of grid in world units -{ - using GridT = build::Grid; - using ValueT = typename BuildToValueMap::type; - static_assert(is_floating_point::value, "initTorus: expect floating point"); - if (!(radius2 > 0)) - throw std::runtime_error("Torus: radius2 must be positive!"); - if (!(radius1 > radius2)) - throw std::runtime_error("Torus: radius1 must be larger than radius2!"); - if (!(voxelSize > 0)) - throw std::runtime_error("Torus: voxelSize must be positive!"); - if (!(halfWidth > 0)) - throw std::runtime_error("Torus: halfWidth must be positive!"); - - auto grid = std::make_shared(ValueT(halfWidth * voxelSize)); - grid->setTransform(voxelSize, origin); - - // Define size of torus with narrow-band in voxel units - const ValueT r1 = radius1 / ValueT(voxelSize), r2 = radius2 / ValueT(voxelSize), rmax1 = r1 + r2 + ValueT(halfWidth), rmax2 = r2 + ValueT(halfWidth); - - // Radius below the Nyquist frequency - if (r2 < ValueT(1.5)) return grid; - - // Define center of torus in voxel units - const Vec3 c(ValueT(center[0] - origin[0]) / ValueT(voxelSize), - ValueT(center[1] - origin[1]) / ValueT(voxelSize), - ValueT(center[2] - origin[2]) / ValueT(voxelSize)); - - // Define bounds of the voxel coordinates - const int imin = Floor(c[0] - rmax1), imax = Ceil(c[0] + rmax1); - const int jmin = Floor(c[1] - rmax2), jmax = Ceil(c[1] + rmax2); - const int kmin = Floor(c[2] - rmax1), kmax = Ceil(c[2] + rmax1); - - const Range<1,int> range(imin, imax+1, 32); - auto kernel = [&](const Range<1,int> &r) { - auto acc = grid->getWriteAccessor(); - Coord ijk; - int &i = ijk[0], &j = ijk[1], &k = ijk[2], m = 1; - // Compute signed distances to torus using leapfrogging in k - for (i = r.begin(); i < r.end(); ++i) { - const auto x2 = Pow2(ValueT(i) - c[0]); - for (k = kmin; k <= kmax; ++k) { - const auto x2z2 = Pow2(Sqrt(Pow2(ValueT(k) - c[2]) + x2) - r1); - for (j = jmin; j <= jmax; j += m) { - m = 1; - const auto v = Sqrt(x2z2 + Pow2(ValueT(j) - c[1])) - r2; // Distance in voxel units - const auto d = v < 0 ? -v : v; - if (d < halfWidth) { // inside narrow band - acc.setValue(ijk, ValueT(voxelSize) * v); // distance in world units - } else { // outside narrow band - m += Floor(d - halfWidth); // leapfrog - } - } //end leapfrog over k - } //end loop over j - } //end loop over i - }; // kernel - -#ifdef NANOVDB_PARALLEL_PRIMITIVES - forEach(range, kernel); -#else - kernel(range); -#endif - - return grid; -} // initTorus - -template -std::shared_ptr> -initBox(double width, // major radius of torus in world units - double height, // minor radius of torus in world units - double depth, - const Vec3d& center, // center of box in world units - double voxelSize, // size of a voxel in world units - double halfWidth, // half-width of narrow band in voxel units - const Vec3d& origin) // origin of grid in world units -{ - using GridT = build::Grid; - using ValueT = typename BuildToValueMap::type; - static_assert(is_floating_point::value, "initBox: expect floating point"); - using Vec3T = Vec3; - if (!(width > 0)) - throw std::runtime_error("Box: width must be positive!"); - if (!(height > 0)) - throw std::runtime_error("Box: height must be positive!"); - if (!(depth > 0)) - throw std::runtime_error("Box: depth must be positive!"); - - if (!(voxelSize > 0)) - throw std::runtime_error("Box: voxelSize must be positive!"); - if (!(halfWidth > 0)) - throw std::runtime_error("Box: halfWidth must be positive!"); - - auto grid = std::make_shared(ValueT(halfWidth * voxelSize)); - grid->setTransform(voxelSize, origin); - - // Define size of box with narrow-band in voxel units - const Vec3T r(width / (2 * ValueT(voxelSize)), - height / (2 * ValueT(voxelSize)), - depth / (2 * ValueT(voxelSize))); - - // Below the Nyquist frequency - if (r.min() < ValueT(1.5)) return grid; - - // Define center of box in voxel units - const Vec3T c(ValueT(center[0] - origin[0]) / ValueT(voxelSize), - ValueT(center[1] - origin[1]) / ValueT(voxelSize), - ValueT(center[2] - origin[2]) / ValueT(voxelSize)); - - // Define utility functions - auto Pos = [](ValueT x) { return x > 0 ? x : 0; }; - auto Neg = [](ValueT x) { return x < 0 ? x : 0; }; - - // Define bounds of the voxel coordinates - const BBox b(c - r - Vec3T(ValueT(halfWidth)), c + r + Vec3T(ValueT(halfWidth))); - const CoordBBox bbox(Coord(Floor(b[0][0]), Floor(b[0][1]), Floor(b[0][2])), - Coord( Ceil(b[1][0]), Ceil(b[1][1]), Ceil(b[1][2]))); - const Range<1,int> range(bbox[0][0], bbox[1][0]+1, 32); - - // Compute signed distances to box using leapfrogging in k - auto kernel = [&](const Range<1,int> &ra) { - auto acc = grid->getWriteAccessor(); - int m = 1; - for (Coord p(ra.begin(),bbox[0][1],bbox[0][2]); p[0] < ra.end(); ++p[0]) { - const auto q1 = Abs(ValueT(p[0]) - c[0]) - r[0]; - const auto x2 = Pow2(Pos(q1)); - for (p[1] = bbox[0][1]; p[1] <= bbox[1][1]; ++p[1]) { - const auto q2 = Abs(ValueT(p[1]) - c[1]) - r[1]; - const auto q0 = Max(q1, q2); - const auto x2y2 = x2 + Pow2(Pos(q2)); - for (p[2] = bbox[0][2]; p[2] <= bbox[1][2]; p[2] += m) { - m = 1; - const auto q3 = Abs(ValueT(p[2]) - c[2]) - r[2]; - const auto v = Sqrt(x2y2 + Pow2(Pos(q3))) + Neg(Max(q0, q3)); // Distance in voxel units - const auto d = Abs(v); - if (d < halfWidth) { // inside narrow band - acc.setValue(p, ValueT(voxelSize) * v); // distance in world units - } else { // outside narrow band - m += Floor(d - halfWidth); // leapfrog - } - } //end leapfrog over k - } //end loop over j - } //end loop over i - }; // kernel -#ifdef NANOVDB_PARALLEL_PRIMITIVES - forEach(range, kernel); -#else - kernel(range); -#endif - return grid; -} // initBox - -template -std::shared_ptr> -initBBox(double width, // width of the bbox in world units - double height, // height of the bbox in world units - double depth, // depth of the bbox in world units - double thickness, // thickness of the wire in world units - const Vec3d& center, // center of bbox in world units - double voxelSize, // size of a voxel in world units - double halfWidth, // half-width of narrow band in voxel units - const Vec3d& origin) // origin of grid in world units -{ - using GridT = build::Grid; - using ValueT = typename BuildToValueMap::type; - static_assert(is_floating_point::value, "initBBox: expect floating point"); - using Vec3T = Vec3; - if (!(width > 0)) - throw std::runtime_error("BBox: width must be positive!"); - if (!(height > 0)) - throw std::runtime_error("BBox: height must be positive!"); - if (!(depth > 0)) - throw std::runtime_error("BBox: depth must be positive!"); - if (!(thickness > 0)) - throw std::runtime_error("BBox: thickness must be positive!"); - if (!(voxelSize > 0.0)) - throw std::runtime_error("BBox: voxelSize must be positive!"); - - - auto grid = std::make_shared(ValueT(halfWidth * voxelSize)); - grid->setTransform(voxelSize, origin); - - // Define size of bbox with narrow-band in voxel units - const Vec3T r(width / (2 * ValueT(voxelSize)), - height / (2 * ValueT(voxelSize)), - depth / (2 * ValueT(voxelSize))); - const ValueT e = thickness / ValueT(voxelSize); - - // Below the Nyquist frequency - if (r.min() < ValueT(1.5) || e < ValueT(1.5)) return grid; - - // Define center of bbox in voxel units - const Vec3T c(ValueT(center[0] - origin[0]) / ValueT(voxelSize), - ValueT(center[1] - origin[1]) / ValueT(voxelSize), - ValueT(center[2] - origin[2]) / ValueT(voxelSize)); - - // Define utility functions - auto Pos = [](ValueT x) { return x > 0 ? x : 0; }; - auto Neg = [](ValueT x) { return x < 0 ? x : 0; }; - - // Define bounds of the voxel coordinates - const BBox b(c - r - Vec3T(e + ValueT(halfWidth)), c + r + Vec3T(e + ValueT(halfWidth))); - const CoordBBox bbox(Coord(Floor(b[0][0]), Floor(b[0][1]), Floor(b[0][2])), - Coord( Ceil(b[1][0]), Ceil(b[1][1]), Ceil(b[1][2]))); - const Range<1,int> range(bbox[0][0], bbox[1][0]+1, 32); - - // Compute signed distances to bbox using leapfrogging in k - auto kernel = [&](const Range<1,int> &ra) { - auto acc = grid->getWriteAccessor(); - int m = 1; - for (Coord p(ra.begin(),bbox[0][1],bbox[0][2]); p[0] < ra.end(); ++p[0]) { - const ValueT px = Abs(ValueT(p[0]) - c[0]) - r[0]; - const ValueT qx = Abs(ValueT(px) + e) - e; - const ValueT px2 = Pow2(Pos(px)); - const ValueT qx2 = Pow2(Pos(qx)); - for (p[1] = bbox[0][1]; p[1] <= bbox[1][1]; ++p[1]) { - const ValueT py = Abs(ValueT(p[1]) - c[1]) - r[1]; - const ValueT qy = Abs(ValueT(py) + e) - e; - const ValueT qy2 = Pow2(Pos(qy)); - const ValueT px2qy2 = px2 + qy2; - const ValueT qx2py2 = qx2 + Pow2(Pos(py)); - const ValueT qx2qy2 = qx2 + qy2; - const ValueT a[3] = {Max(px, qy), Max(qx, py), Max(qx, qy)}; - for (p[2] = bbox[0][2]; p[2] <= bbox[1][2]; p[2] += m) { - m = 1; - const ValueT pz = Abs(ValueT(p[2]) - c[2]) - r[2]; - const ValueT qz = Abs(ValueT(pz) + e) - e; - const ValueT qz2 = Pow2(Pos(qz)); - const ValueT s1 = Sqrt(px2qy2 + qz2) + Neg(Max(a[0], qz)); - const ValueT s2 = Sqrt(qx2py2 + qz2) + Neg(Max(a[1], qz)); - const ValueT s3 = Sqrt(qx2qy2 + Pow2(Pos(pz))) + Neg(Max(a[2], pz)); - const ValueT v = Min(s1, Min(s2, s3)); // Distance in voxel units - const ValueT d = Abs(v); - if (d < halfWidth) { // inside narrow band - acc.setValue(p, ValueT(voxelSize) * v); // distance in world units - } else { // outside narrow band - m += Floor(d - halfWidth); // leapfrog - } - } //end leapfrog over k - } //end loop over j - } //end loop over i - }; //kernel -#ifdef NANOVDB_PARALLEL_PRIMITIVES - forEach(range, kernel); -#else - kernel(range); -#endif - - return grid; -} // initBBox - -template -std::shared_ptr> -initOctahedron(double scale, // scale of the octahedron in world units - const Vec3d& center, // center of octahedron in world units - double voxelSize, // size of a voxel in world units - double halfWidth, // half-width of narrow band in voxel units - const Vec3d& origin) // origin of grid in world units -{ - using GridT = build::Grid; - using ValueT = typename BuildToValueMap::type; - using Vec3T = Vec3; - static_assert(is_floating_point::value, "initOctahedron: expect floating point"); - - if (!(scale > 0)) throw std::runtime_error("Octahedron: width must be positive!"); - if (!(voxelSize > 0)) throw std::runtime_error("Octahedron: voxelSize must be positive!"); - - auto grid = std::make_shared(ValueT(halfWidth * voxelSize)); - grid->setTransform(voxelSize, origin); - - // Define size of octahedron with narrow-band in voxel units - const ValueT s = scale / (2 * ValueT(voxelSize)); - - // Below the Nyquist frequency - if ( s < ValueT(1.5) ) return grid; - - // Define center of octahedron in voxel units - const Vec3T c(ValueT(center[0] - origin[0]) / ValueT(voxelSize), - ValueT(center[1] - origin[1]) / ValueT(voxelSize), - ValueT(center[2] - origin[2]) / ValueT(voxelSize)); - - // Define utility functions - auto sdf = [&s](ValueT x, ValueT y, ValueT z) { - const ValueT d = ValueT(0.5)*(z - y + s); - if (d < ValueT(0)) { - return Vec3T(x, y - s, z).length(); - } else if (d > s) { - return Vec3T(x, y, z - s).length(); - } - return Vec3T(x, y - s + d, z - d).length(); - }; - - // Define bounds of the voxel coordinates - const BBox b(c - Vec3T(s + ValueT(halfWidth)), c + Vec3T(s + ValueT(halfWidth))); - const CoordBBox bbox(Coord(Floor(b[0][0]), Floor(b[0][1]), Floor(b[0][2])), - Coord( Ceil(b[1][0]), Ceil(b[1][1]), Ceil(b[1][2]))); - const Range<1,int> range(bbox[0][0], bbox[1][0]+1, 32); - - // Compute signed distances to octahedron using leapfrogging in k - auto kernel = [&](const Range<1,int> &ra) { - auto acc = grid->getWriteAccessor(); - int m = 1; - static const ValueT a = Sqrt(ValueT(1)/ValueT(3)); - for (Coord p(ra.begin(),bbox[0][1],bbox[0][2]); p[0] < ra.end(); ++p[0]) { - const ValueT px = Abs(ValueT(p[0]) - c[0]); - for (p[1] = bbox[0][1]; p[1] <= bbox[1][1]; ++p[1]) { - const ValueT py = Abs(ValueT(p[1]) - c[1]); - for (p[2] = bbox[0][2]; p[2] <= bbox[1][2]; p[2] += m) { - m = 1; - const ValueT pz = Abs(ValueT(p[2]) - c[2]); - ValueT d = px + py + pz - s; - ValueT v; - if (ValueT(3)*px < d) { - v = sdf(px, py, pz); - } else if (ValueT(3)*py < d) { - v = sdf(py, pz, px); - } else if (ValueT(3)*pz < d) { - v = sdf(pz, px, py); - } else { - v = a * d; - } - d = Abs(v); - if (d < halfWidth) { // inside narrow band - acc.setValue(p, ValueT(voxelSize) * v); // distance in world units - } else { // outside narrow band - m += Floor(d - halfWidth); // leapfrog - } - } //end leapfrog over k - } //end loop over j - } //end loop over i - };// kernel -#ifdef NANOVDB_PARALLEL_PRIMITIVES - forEach(range, kernel); -#else - kernel(range); -#endif - return grid; -} // initOctahedron - -} // unnamed namespace - -//================================================================================================ - -template -typename enable_if::value || - is_same::value, GridHandle>::type -createLevelSetSphere(double radius, // radius of sphere in world units - const Vec3d& center, // center of sphere in world units - double voxelSize, // size of a voxel in world units - double halfWidth, // half-width of narrow band in voxel units - const Vec3d& origin, // origin of grid in world units - const std::string& name, // name of grid - StatsMode sMode, // mode of computation for the statistics - ChecksumMode cMode, // mode of computation for the checksum - const BufferT& buffer) -{ - using GridT = build::Grid; - auto grid = initSphere(radius, center, voxelSize, halfWidth, origin); - grid->mName = name; - build::NodeManager mgr(*grid); - build::sdfToLevelSet(mgr); - CreateNanoGrid converter(*grid); - converter.setStats(sMode); - converter.setChecksum(cMode); - auto handle = converter.template getHandle(buffer); - assert(handle); - return handle; -} // createLevelSetSphere - -//================================================================================================ - -template -typename enable_if::value || - is_same::value || - is_same::value, GridHandle>::type -createLevelSetSphere(double radius, // radius of sphere in world units - const Vec3d& center, // center of sphere in world units - double voxelSize, // size of a voxel in world units - double halfWidth, // half-width of narrow band in voxel units - const Vec3d& origin, // origin of grid in world units - const std::string& name, // name of grid - StatsMode sMode, // mode of computation for the statistics - ChecksumMode cMode, // mode of computation for the checksum - bool ditherOn, - const BufferT& buffer) -{ - using GridT = build::Grid; - auto grid = initSphere(radius, center, voxelSize, halfWidth, origin); - grid->mName = name; - build::NodeManager mgr(*grid); - build::sdfToLevelSet(mgr); - CreateNanoGrid converter(*grid); - converter.setStats(sMode); - converter.setChecksum(cMode); - converter.enableDithering(ditherOn); - auto handle = converter.template getHandle(buffer); - assert(handle); - return handle; -} // createLevelSetSphere - -//================================================================================================ - -template -typename enable_if::value, GridHandle>::type -createLevelSetSphere(double radius, // radius of sphere in world units - const Vec3d& center, // center of sphere in world units - double voxelSize, // size of a voxel in world units - double halfWidth, // half-width of narrow band in voxel units - const Vec3d& origin, // origin of grid in world units - const std::string& name, // name of grid - StatsMode sMode, // mode of computation for the statistics - ChecksumMode cMode, // mode of computation for the checksum - float tolerance,// only used if VoxelT = FpN - bool ditherOn, - const BufferT& buffer) -{ - using GridT = build::Grid; - auto grid = initSphere(radius, center, voxelSize, halfWidth, origin); - grid->mName = name; - build::NodeManager mgr(*grid); - build::sdfToLevelSet(mgr); - CreateNanoGrid converter(*grid); - converter.setStats(sMode); - converter.setChecksum(cMode); - converter.enableDithering(ditherOn); - AbsDiff oracle(tolerance); - auto handle = converter.template getHandle(oracle, buffer); - assert(handle); - return handle; -} // createLevelSetSphere - -//================================================================================================ - -template -typename disable_if::value, GridHandle>::type -createFogVolumeSphere(double radius, // radius of sphere in world units - const Vec3d& center, // center of sphere in world units - double voxelSize, // size of a voxel in world units - double halfWidth, // half-width of narrow band in voxel units - const Vec3d& origin, // origin of grid in world units - const std::string& name, // name of grid - StatsMode sMode, // mode of computation for the statistics - ChecksumMode cMode, // mode of computation for the checksum - const BufferT& buffer) -{ - using GridT = build::Grid; - auto grid = initSphere(radius, center, voxelSize, halfWidth, origin); - grid->mName = name; - build::NodeManager mgr(*grid); - build::sdfToLevelSet(mgr); - build::levelSetToFog(mgr, false); - CreateNanoGrid converter(*grid); - converter.setStats(sMode); - converter.setChecksum(cMode); - auto handle = converter.template getHandle(buffer); - assert(handle); - return handle; -} // createFogVolumeSphere - -//================================================================================================ - -template -typename enable_if::value, GridHandle>::type -createFogVolumeSphere(double radius, // radius of sphere in world units - const Vec3d& center, // center of sphere in world units - double voxelSize, // size of a voxel in world units - double halfWidth, // half-width of narrow band in voxel units - const Vec3d& origin, // origin of grid in world units - const std::string& name, // name of grid - StatsMode sMode, // mode of computation for the statistics - ChecksumMode cMode, // mode of computation for the checksum - float tolerance,// only used if VoxelT = FpN - bool ditherOn, - const BufferT& buffer) -{ - using GridT = build::Grid; - auto grid = initSphere(radius, center, voxelSize, halfWidth, origin); - grid->mName = name; - build::NodeManager mgr(*grid); - build::sdfToLevelSet(mgr); - build::levelSetToFog(mgr, false); - CreateNanoGrid converter(*grid); - converter.setStats(sMode); - converter.setChecksum(cMode); - converter.enableDithering(ditherOn); - AbsDiff oracle(tolerance); - auto handle = converter.template getHandle(oracle, buffer); - assert(handle); - return handle; -} // createFogVolumeSphere - -//================================================================================================ - -template -typename disable_if::value, GridHandle>::type -createPointSphere(int pointsPerVoxel, // number of points to be scattered in each active voxel - double radius, // radius of sphere in world units - const Vec3d& center, // center of sphere in world units - double voxelSize, // size of a voxel in world units - const Vec3d& origin, // origin of grid in world units - const std::string& name, // name of grid - ChecksumMode cMode, // mode of computation for the checksum - const BufferT& buffer) -{ - auto sphereHandle = createLevelSetSphere(radius, center, voxelSize, 0.5, origin, "dummy", - StatsMode::BBox, ChecksumMode::Disable, buffer); - assert(sphereHandle); - auto* sphereGrid = sphereHandle.template grid(); - assert(sphereGrid); - auto pointHandle = createPointScatter(*sphereGrid, pointsPerVoxel, name, cMode, buffer); - assert(pointHandle); - return pointHandle; -} // createPointSphere - -//================================================================================================ - -template -typename disable_if::value, GridHandle>::type -createLevelSetTorus(double majorRadius, // major radius of torus in world units - double minorRadius, // minor radius of torus in world units - const Vec3d& center, // center of torus in world units - double voxelSize, // size of a voxel in world units - double halfWidth, // half-width of narrow band in voxel units - const Vec3d& origin, // origin of grid in world units - const std::string& name, // name of grid - StatsMode sMode, // mode of computation for the statistics - ChecksumMode cMode, // mode of computation for the checksum - const BufferT& buffer) -{ - using GridT = build::Grid; - auto grid = initTorus(majorRadius, minorRadius, center, voxelSize, halfWidth, origin); - grid->mName = name; - build::NodeManager mgr(*grid); - build::sdfToLevelSet(mgr); - CreateNanoGrid converter(*grid); - converter.setStats(sMode); - converter.setChecksum(cMode); - auto handle = converter.template getHandle(buffer); - assert(handle); - return handle; -} // createLevelSetTorus - -//================================================================================================ - -template -typename enable_if::value, GridHandle>::type -createLevelSetTorus(double majorRadius, // major radius of torus in world units - double minorRadius, // minor radius of torus in world units - const Vec3d& center, // center of torus in world units - double voxelSize, // size of a voxel in world units - double halfWidth, // half-width of narrow band in voxel units - const Vec3d& origin, // origin of grid in world units - const std::string& name, // name of grid - StatsMode sMode, // mode of computation for the statistics - ChecksumMode cMode, // mode of computation for the checksum - float tolerance, - bool ditherOn, - const BufferT& buffer) -{ - using GridT = build::Grid; - auto grid = initTorus(majorRadius, minorRadius, center, voxelSize, halfWidth, origin); - grid->mName = name; - build::NodeManager mgr(*grid); - build::sdfToLevelSet(mgr); - CreateNanoGrid converter(*grid); - converter.setStats(sMode); - converter.setChecksum(cMode); - converter.enableDithering(ditherOn); - AbsDiff oracle(tolerance); - auto handle = converter.template getHandle(oracle, buffer); - assert(handle); - return handle; -} // createLevelSetTorus - -//================================================================================================ - -template -typename disable_if::value, GridHandle>::type -createFogVolumeTorus(double majorRadius, // major radius of torus in world units - double minorRadius, // minor radius of torus in world units - const Vec3d& center, // center of torus in world units - double voxelSize, // size of a voxel in world units - double halfWidth, // half-width of narrow band in voxel units - const Vec3d& origin, // origin of grid in world units - const std::string& name, // name of grid - StatsMode sMode, // mode of computation for the statistics - ChecksumMode cMode, // mode of computation for the checksum - const BufferT& buffer) -{ - using GridT = build::Grid; - auto grid = initTorus(majorRadius, minorRadius, center, voxelSize, halfWidth, origin); - grid->mName = name; - build::NodeManager mgr(*grid); - build::sdfToLevelSet(mgr); - build::levelSetToFog(mgr, false); - CreateNanoGrid converter(*grid); - converter.setStats(sMode); - converter.setChecksum(cMode); - auto handle = converter.template getHandle(buffer); - assert(handle); - return handle; -} // createFogVolumeTorus - -//================================================================================================ - -template -typename enable_if::value, GridHandle>::type -createFogVolumeTorus(double majorRadius, // major radius of torus in world units - double minorRadius, // minor radius of torus in world units - const Vec3d& center, // center of torus in world units - double voxelSize, // size of a voxel in world units - double halfWidth, // half-width of narrow band in voxel units - const Vec3d& origin, // origin of grid in world units - const std::string& name, // name of grid - StatsMode sMode, // mode of computation for the statistics - ChecksumMode cMode, // mode of computation for the checksum - float tolerance, - bool ditherOn, - const BufferT& buffer) -{ - using GridT = build::Grid; - auto grid = initTorus(majorRadius, minorRadius, center, voxelSize, halfWidth, origin); - grid->mName = name; - build::NodeManager mgr(*grid); - build::sdfToLevelSet(mgr); - build::levelSetToFog(mgr, false); - CreateNanoGrid converter(*grid); - converter.setStats(sMode); - converter.setChecksum(cMode); - converter.enableDithering(ditherOn); - AbsDiff oracle(tolerance); - auto handle = converter.template getHandle(oracle, buffer); - assert(handle); - return handle; -} // createFogVolumeTorus - -//================================================================================================ - -template -typename disable_if::value, GridHandle>::type -createPointTorus(int pointsPerVoxel, // number of points to be scattered in each active voxel - double majorRadius, // major radius of torus in world units - double minorRadius, // minor radius of torus in world units - const Vec3d& center, // center of torus in world units - double voxelSize, // size of a voxel in world units - const Vec3d& origin, // origin of grid in world units - const std::string& name, // name of grid - ChecksumMode cMode, // mode of computation for the checksum - const BufferT& buffer) -{ - auto torusHandle = createLevelSetTorus(majorRadius, minorRadius, center, voxelSize, 0.5f, origin, - "dummy", StatsMode::BBox, ChecksumMode::Disable, buffer); - assert(torusHandle); - auto* torusGrid = torusHandle.template grid(); - assert(torusGrid); - auto pointHandle = createPointScatter(*torusGrid, pointsPerVoxel, name, cMode, buffer); - assert(pointHandle); - return pointHandle; -} // createPointTorus - -//================================================================================================ - -template -typename disable_if::value, GridHandle>::type -createLevelSetBox(double width, // width of box in world units - double height, // height of box in world units - double depth, // depth of box in world units - const Vec3d& center, // center of box in world units - double voxelSize, // size of a voxel in world units - double halfWidth, // half-width of narrow band in voxel units - const Vec3d& origin, // origin of grid in world units - const std::string& name, // name of grid - StatsMode sMode, // mode of computation for the statistics - ChecksumMode cMode, // mode of computation for the checksum - const BufferT& buffer) -{ - using GridT = build::Grid; - auto grid = initBox(width, height, depth, center, voxelSize, halfWidth, origin); - grid->mName = name; - build::NodeManager mgr(*grid); - build::sdfToLevelSet(mgr); - CreateNanoGrid converter(*grid); - converter.setStats(sMode); - converter.setChecksum(cMode); - auto handle = converter.template getHandle(buffer); - assert(handle); - return handle; -} // createLevelSetBox - -//================================================================================================ - -template -typename enable_if::value, GridHandle>::type -createLevelSetBox(double width, // width of box in world units - double height, // height of box in world units - double depth, // depth of box in world units - const Vec3d& center, // center of box in world units - double voxelSize, // size of a voxel in world units - double halfWidth, // half-width of narrow band in voxel units - const Vec3d& origin, // origin of grid in world units - const std::string& name, // name of grid - StatsMode sMode, // mode of computation for the statistics - ChecksumMode cMode, // mode of computation for the checksum - float tolerance, - bool ditherOn, - const BufferT& buffer) -{ - using GridT = build::Grid; - auto grid = initBox(width, height, depth, center, voxelSize, halfWidth, origin); - grid->mName = name; - build::NodeManager mgr(*grid); - build::sdfToLevelSet(mgr); - CreateNanoGrid converter(*grid); - converter.setStats(sMode); - converter.setChecksum(cMode); - converter.enableDithering(ditherOn); - AbsDiff oracle(tolerance); - auto handle = converter.template getHandle(oracle, buffer); - assert(handle); - return handle; -} // createLevelSetBox - -//================================================================================================ - -template -typename disable_if::value, GridHandle>::type -createLevelSetOctahedron(double scale, // scale of the octahedron in world units - const Vec3d& center, // center of box in world units - double voxelSize, // size of a voxel in world units - double halfWidth, // half-width of narrow band in voxel units - const Vec3d& origin, // origin of grid in world units - const std::string& name, // name of grid - StatsMode sMode, // mode of computation for the statistics - ChecksumMode cMode, // mode of computation for the checksum - const BufferT& buffer) -{ - using GridT = build::Grid; - auto grid = initOctahedron(scale, center, voxelSize, halfWidth, origin); - grid->mName = name; - build::NodeManager mgr(*grid); - build::sdfToLevelSet(mgr); - CreateNanoGrid converter(*grid); - converter.setStats(sMode); - converter.setChecksum(cMode); - auto handle = converter.template getHandle(buffer); - assert(handle); - return handle; -} // createLevelSetOctahedron - -//================================================================================================ - -template -typename enable_if::value, GridHandle>::type -createLevelSetOctahedron(double scale, // scale of the octahedron in world units - const Vec3d& center, // center of box in world units - double voxelSize, // size of a voxel in world units - double halfWidth, // half-width of narrow band in voxel units - const Vec3d& origin, // origin of grid in world units - const std::string& name, // name of grid - StatsMode sMode, // mode of computation for the statistics - ChecksumMode cMode, // mode of computation for the checksum - float tolerance, - bool ditherOn, - const BufferT& buffer) -{ - using GridT = build::Grid; - auto grid = initOctahedron(scale, center, voxelSize, halfWidth, origin); - grid->mName = name; - build::NodeManager mgr(*grid); - build::sdfToLevelSet(mgr); - CreateNanoGrid converter(*grid); - converter.setStats(sMode); - converter.setChecksum(cMode); - converter.enableDithering(ditherOn); - AbsDiff oracle(tolerance); - auto handle = converter.template getHandle(oracle, buffer); - assert(handle); - return handle; -} // createLevelSetOctahedron - -//================================================================================================ - -template -typename disable_if::value, GridHandle>::type -createLevelSetBBox(double width, // width of bbox in world units - double height, // height of bbox in world units - double depth, // depth of bbox in world units - double thickness, // thickness of the wire in world units - const Vec3d& center, // center of bbox in world units - double voxelSize, // size of a voxel in world units - double halfWidth, // half-width of narrow band in voxel units - const Vec3d& origin, // origin of grid in world units - const std::string& name, // name of grid - StatsMode sMode, // mode of computation for the statistics - ChecksumMode cMode, // mode of computation for the checksum - const BufferT& buffer) -{ - using GridT = build::Grid; - auto grid = initBBox(width, height, depth, thickness, center, voxelSize, halfWidth, origin); - grid->mName = name; - build::NodeManager mgr(*grid); - build::sdfToLevelSet(mgr); - CreateNanoGrid converter(*grid); - converter.setStats(sMode); - converter.setChecksum(cMode); - auto handle = converter.template getHandle(buffer); - assert(handle); - return handle; -} // createLevelSetBBox - -//================================================================================================ - -template -typename enable_if::value, GridHandle>::type -createLevelSetBBox(double width, // width of bbox in world units - double height, // height of bbox in world units - double depth, // depth of bbox in world units - double thickness, // thickness of the wire in world units - const Vec3d& center, // center of bbox in world units - double voxelSize, // size of a voxel in world units - double halfWidth, // half-width of narrow band in voxel units - const Vec3d& origin, // origin of grid in world units - const std::string& name, // name of grid - StatsMode sMode, // mode of computation for the statistics - ChecksumMode cMode, // mode of computation for the checksum - float tolerance, - bool ditherOn, - const BufferT& buffer) -{ - using GridT = build::Grid; - auto grid = initBBox(width, height, depth, thickness, center, voxelSize, halfWidth, origin); - grid->mName = name; - build::NodeManager mgr(*grid); - build::sdfToLevelSet(mgr); - CreateNanoGrid converter(*grid); - converter.setStats(sMode); - converter.setChecksum(cMode); - converter.enableDithering(ditherOn); - AbsDiff oracle(tolerance); - auto handle = converter.template getHandle(oracle, buffer); - assert(handle); - return handle; -} // createLevelSetBBox - -//================================================================================================ - -template -typename disable_if::value, GridHandle>::type -createFogVolumeBox(double width, // width of box in world units - double height, // height of box in world units - double depth, // depth of box in world units - const Vec3d& center, // center of box in world units - double voxelSize, // size of a voxel in world units - double halfWidth, // half-width of narrow band in voxel units - const Vec3d& origin, // origin of grid in world units - const std::string& name, // name of grid - StatsMode sMode, // mode of computation for the statistics - ChecksumMode cMode, // mode of computation for the checksum - const BufferT& buffer) -{ - using GridT = build::Grid; - auto grid = initBox(width, height, depth, center, voxelSize, halfWidth, origin); - grid->mName = name; - build::NodeManager mgr(*grid); - build::sdfToLevelSet(mgr); - build::levelSetToFog(mgr, false); - CreateNanoGrid converter(*grid); - converter.setStats(sMode); - converter.setChecksum(cMode); - auto handle = converter.template getHandle(buffer); - assert(handle); - return handle; -} // createFogVolumeBox - -//================================================================================================ - -template -typename enable_if::value, GridHandle>::type -createFogVolumeBox(double width, // width of box in world units - double height, // height of box in world units - double depth, // depth of box in world units - const Vec3d& center, // center of box in world units - double voxelSize, // size of a voxel in world units - double halfWidth, // half-width of narrow band in voxel units - const Vec3d& origin, // origin of grid in world units - const std::string& name, // name of grid - StatsMode sMode, // mode of computation for the statistics - ChecksumMode cMode, // mode of computation for the checksum - float tolerance, - bool ditherOn, - const BufferT& buffer) -{ - using GridT = build::Grid; - auto grid = initBox(width, height, depth, center, voxelSize, halfWidth, origin); - grid->mName = name; - build::NodeManager mgr(*grid); - build::sdfToLevelSet(mgr); - build::levelSetToFog(mgr, false); - CreateNanoGrid converter(*grid); - converter.setStats(sMode); - converter.setChecksum(cMode); - converter.enableDithering(ditherOn); - AbsDiff oracle(tolerance); - auto handle = converter.template getHandle(oracle, buffer); - assert(handle); - return handle; -} // createFogVolumeBox - -//================================================================================================ - -template -typename disable_if::value, GridHandle>::type -createFogVolumeOctahedron(double scale, // scale of octahedron in world units - const Vec3d& center, // center of box in world units - double voxelSize, // size of a voxel in world units - double halfWidth, // half-width of narrow band in voxel units - const Vec3d& origin, // origin of grid in world units - const std::string& name, // name of grid - StatsMode sMode, // mode of computation for the statistics - ChecksumMode cMode, // mode of computation for the checksum - const BufferT& buffer) -{ - using GridT = build::Grid; - auto grid = initOctahedron(scale, center, voxelSize, halfWidth, origin); - grid->mName = name; - build::NodeManager mgr(*grid); - build::sdfToLevelSet(mgr); - build::levelSetToFog(mgr, false); - CreateNanoGrid converter(*grid); - converter.setStats(sMode); - converter.setChecksum(cMode); - auto handle = converter.template getHandle(buffer); - assert(handle); - return handle; -} // createFogVolumeOctahedron - -//================================================================================================ - -template -typename enable_if::value, GridHandle>::type -createFogVolumeOctahedron(double scale, // scale of octahedron in world units - const Vec3d& center, // center of box in world units - double voxelSize, // size of a voxel in world units - double halfWidth, // half-width of narrow band in voxel units - const Vec3d& origin, // origin of grid in world units - const std::string& name, // name of grid - StatsMode sMode, // mode of computation for the statistics - ChecksumMode cMode, // mode of computation for the checksum - float tolerance, - bool ditherOn, - const BufferT& buffer) -{ - using GridT = build::Grid; - auto grid = initOctahedron(scale, center, voxelSize, halfWidth, origin); - grid->mName = name; - build::NodeManager mgr(*grid); - build::sdfToLevelSet(mgr); - build::levelSetToFog(mgr, false); - CreateNanoGrid converter(*grid); - converter.setStats(sMode); - converter.setChecksum(cMode); - converter.enableDithering(ditherOn); - AbsDiff oracle(tolerance); - auto handle = converter.template getHandle(oracle, buffer); - assert(handle); - return handle; -} // createFogVolumeOctahedron - -//================================================================================================ - -template -typename disable_if::value, GridHandle>::type -createPointBox(int pointsPerVoxel, // number of points to be scattered in each active voxel - double width, // width of box in world units - double height, // height of box in world units - double depth, // depth of box in world units - const Vec3d& center, // center of box in world units - double voxelSize, // size of a voxel in world units - const Vec3d& origin, // origin of grid in world units - const std::string& name, // name of grid - ChecksumMode cMode, // mode of computation for the checksum - const BufferT& buffer) -{ - auto boxHandle = createLevelSetBox(width, height, depth, center, voxelSize, 0.5, origin, "dummy", - StatsMode::BBox, ChecksumMode::Disable, buffer); - assert(boxHandle); - auto* boxGrid = boxHandle.template grid(); - assert(boxGrid); - auto pointHandle = createPointScatter(*boxGrid, pointsPerVoxel, name, cMode, buffer); - assert(pointHandle); - return pointHandle; -} // createPointBox - -//================================================================================================ - -template -inline GridHandle -createPointScatter(const NanoGrid& srcGrid, // origin of grid in world units - int pointsPerVoxel, // number of points to be scattered in each active voxel - const std::string& name, // name of grid - ChecksumMode cMode, // mode of computation for the checksum - const BufferT& buffer) -{ - using ValueT = typename BuildToValueMap::type; - static_assert(is_floating_point::value, "createPointScatter: expect floating point"); - using Vec3T = Vec3; - if (pointsPerVoxel < 1) { - throw std::runtime_error("createPointScatter: Expected at least one point per voxel"); - } - if (!srcGrid.isLevelSet()) { - throw std::runtime_error("createPointScatter: Expected a level set grid"); - } - if (!srcGrid.hasBBox()) { - throw std::runtime_error("createPointScatter: ActiveVoxelCount is required"); - } - const uint64_t pointCount = pointsPerVoxel * srcGrid.activeVoxelCount(); - if (pointCount == 0) { - throw std::runtime_error("createPointScatter: No particles to scatter"); - } - std::vector xyz; - xyz.reserve(pointCount); - using DstGridT = build::Grid; - DstGridT dstGrid(std::numeric_limits::max(), name, GridClass::PointData); - dstGrid.mMap = srcGrid.map(); - auto dstAcc = dstGrid.getAccessor(); - std::srand(1234); - const ValueT s = 1 / (1 + ValueT(RAND_MAX)); // scale so s*rand() is in ] 0, 1 [ - // return a point with random local voxel coordinates (-0.5 to +0.5) - auto randomPoint = [&s](){return s * Vec3T(rand(), rand(), rand()) - Vec3T(0.5);}; - const auto& srcTree = srcGrid.tree(); - auto srcMgrHandle = createNodeManager(srcGrid); - auto *srcMgr = srcMgrHandle.template mgr(); - assert(srcMgr); - for (uint32_t i = 0, end = srcTree.nodeCount(0); i < end; ++i) { - auto& srcLeaf = srcMgr->leaf(i); - auto* dstLeaf = dstAcc.setValue(srcLeaf.origin(), pointsPerVoxel); // allocates leaf node - dstLeaf->mValueMask = srcLeaf.valueMask(); - for (uint32_t j = 0, m = 0; j < 512; ++j) { - if (dstLeaf->mValueMask.isOn(j)) { - const Vec3f ijk = dstLeaf->offsetToGlobalCoord(j).asVec3s();// floating-point representatrion of index coorindates - for (int n = 0; n < pointsPerVoxel; ++n) xyz.push_back(srcGrid.indexToWorld(randomPoint() + ijk)); - m += pointsPerVoxel; - }// active voxels - dstLeaf->mValues[j] = m; - }// loop over all voxels - }// loop over leaf nodes - assert(pointCount == xyz.size()); - CreateNanoGrid converter(dstGrid); - converter.setStats(StatsMode::MinMax); - converter.setChecksum(ChecksumMode::Disable); - - converter.addBlindData(name, - GridBlindDataSemantic::WorldCoords, - GridBlindDataClass::AttributeArray, - mapToGridType(), - pointCount, - sizeof(Vec3T)); - auto handle = converter.template getHandle(buffer); - assert(handle); - - auto* grid = handle.template grid(); - assert(grid && grid->template isSequential<0>()); - auto &tree = grid->tree(); - if (tree.nodeCount(0) == 0) throw std::runtime_error("Expect leaf nodes!"); - auto *leafData = tree.getFirstLeaf()->data(); - leafData[0].mMinimum = 0; // start of prefix sum - for (uint32_t i = 1, n = tree.nodeCount(0); i < n; ++i) { - leafData[i].mMinimum = leafData[i - 1].mMinimum + leafData[i - 1].mMaximum; - } - if (Vec3T *blindData = grid->template getBlindData(0)) { - memcpy(blindData, xyz.data(), xyz.size() * sizeof(Vec3T)); - } else { - throw std::runtime_error("Blind data pointer was NULL"); - } - updateChecksum(*grid, cMode); - return handle; -} // createPointScatter - -} // namespace nanovdb - -#endif // NANOVDB_PRIMITIVES_H_HAS_BEEN_INCLUDED +#include // for NANOVDB_DEPRECATED_HEADER +#include +NANOVDB_DEPRECATED_HEADER("Include nanovdb/tools/CreatePrimitives.h instead.") diff --git a/nanovdb/nanovdb/util/Range.h b/nanovdb/nanovdb/util/Range.h index 7b21b7ce94..c12873513e 100644 --- a/nanovdb/nanovdb/util/Range.h +++ b/nanovdb/nanovdb/util/Range.h @@ -2,7 +2,7 @@ // SPDX-License-Identifier: MPL-2.0 /*! - \file Range.h + \file nanovdb/util/Range.h \author Ken Museth @@ -11,10 +11,11 @@ \brief Custom Range class that is compatible with the tbb::blocked_range classes */ -#ifndef NANOVDB_RANGE_H_HAS_BEEN_INCLUDED -#define NANOVDB_RANGE_H_HAS_BEEN_INCLUDED +#ifndef NANOVDB_UTIL_RANGE_H_HAS_BEEN_INCLUDED +#define NANOVDB_UTIL_RANGE_H_HAS_BEEN_INCLUDED #include +#include // for size_t #ifdef NANOVDB_USE_TBB #include // for tbb::split @@ -22,6 +23,8 @@ namespace nanovdb { +namespace util { + class Split {};// Dummy class used by split constructors template @@ -144,6 +147,12 @@ class Range<3, T> const Range<1, T>& operator[](int i) const { assert(i==0 || i==1 || i==2); return mRange[i]; } };// Range<3, T> +}// namespace util + +using Range1D [[deprecated("Use nanovdb::util::Range1D instead")]] = util::Range<1, size_t>; +using Range2D [[deprecated("Use nanovdb::util::Range2D instead")]] = util::Range<2, size_t>; +using Range3D [[deprecated("Use nanovdb::util::Range3D instead")]] = util::Range<3, size_t>; + }// namespace nanovdb -#endif // NANOVDB_RANGE_H_HAS_BEEN_INCLUDED +#endif // NANOVDB_UTIL_RANGE_H_HAS_BEEN_INCLUDED diff --git a/nanovdb/nanovdb/util/Ray.h b/nanovdb/nanovdb/util/Ray.h index 62d6ff51a0..1fed33bf7c 100644 --- a/nanovdb/nanovdb/util/Ray.h +++ b/nanovdb/nanovdb/util/Ray.h @@ -1,551 +1,6 @@ // Copyright Contributors to the OpenVDB Project // SPDX-License-Identifier: MPL-2.0 -/// @file Ray.h -/// -/// @author Ken Museth -/// -/// @brief A Ray class. - -#ifndef NANOVDB_RAY_H_HAS_BEEN_INCLUDED -#define NANOVDB_RAY_H_HAS_BEEN_INCLUDED - -#include // for Vec3 - -namespace nanovdb { - -template -class Ray -{ -public: - using RealType = RealT; - using Vec3Type = Vec3; - using Vec3T = Vec3Type; - - struct TimeSpan - { - RealT t0, t1; - /// @brief Default constructor - __hostdev__ TimeSpan() {} - /// @brief Constructor - __hostdev__ TimeSpan(RealT _t0, RealT _t1) - : t0(_t0) - , t1(_t1) - { - } - /// @brief Set both times - __hostdev__ void set(RealT _t0, RealT _t1) - { - t0 = _t0; - t1 = _t1; - } - /// @brief Get both times - __hostdev__ void get(RealT& _t0, RealT& _t1) const - { - _t0 = t0; - _t1 = t1; - } - /// @brief Return @c true if t1 is larger than t0 by at least eps. - __hostdev__ bool valid(RealT eps = Delta::value()) const { return (t1 - t0) > eps; } - /// @brief Return the midpoint of the ray. - __hostdev__ RealT mid() const { return 0.5 * (t0 + t1); } - /// @brief Multiplies both times - __hostdev__ void scale(RealT s) - { - assert(s > 0); - t0 *= s; - t1 *= s; - } - /// @brief Return @c true if time is inclusive - __hostdev__ bool test(RealT t) const { return (t >= t0 && t <= t1); } - }; - - __hostdev__ Ray(const Vec3Type& eye = Vec3Type(0, 0, 0), - const Vec3Type& direction = Vec3Type(1, 0, 0), - RealT t0 = Delta::value(), - RealT t1 = Maximum::value()) - : mEye(eye) - , mDir(direction) - , mInvDir(1 / mDir[0], 1 / mDir[1], 1 / mDir[2]) - , mTimeSpan(t0, t1) - , mSign{mInvDir[0] < 0, mInvDir[1] < 0, mInvDir[2] < 0} - { - } - - __hostdev__ Ray& offsetEye(RealT offset) - { - mEye[0] += offset; - mEye[1] += offset; - mEye[2] += offset; - return *this; - } - - __hostdev__ Ray& setEye(const Vec3Type& eye) - { - mEye = eye; - return *this; - } - - __hostdev__ Ray& setDir(const Vec3Type& dir) - { - mDir = dir; - mInvDir[0] = 1.0 / mDir[0]; - mInvDir[1] = 1.0 / mDir[1]; - mInvDir[2] = 1.0 / mDir[2]; - mSign[0] = mInvDir[0] < 0; - mSign[1] = mInvDir[1] < 0; - mSign[2] = mInvDir[2] < 0; - return *this; - } - - __hostdev__ Ray& setMinTime(RealT t0) - { - mTimeSpan.t0 = t0; - return *this; - } - - __hostdev__ Ray& setMaxTime(RealT t1) - { - mTimeSpan.t1 = t1; - return *this; - } - - __hostdev__ Ray& setTimes( - RealT t0 = Delta::value(), - RealT t1 = Maximum::value()) - { - assert(t0 > 0 && t1 > 0); - mTimeSpan.set(t0, t1); - return *this; - } - - __hostdev__ Ray& scaleTimes(RealT scale) - { - mTimeSpan.scale(scale); - return *this; - } - - __hostdev__ Ray& reset( - const Vec3Type& eye, - const Vec3Type& direction, - RealT t0 = Delta::value(), - RealT t1 = Maximum::value()) - { - this->setEye(eye); - this->setDir(direction); - this->setTimes(t0, t1); - return *this; - } - - __hostdev__ const Vec3T& eye() const { return mEye; } - - __hostdev__ const Vec3T& dir() const { return mDir; } - - __hostdev__ const Vec3T& invDir() const { return mInvDir; } - - __hostdev__ RealT t0() const { return mTimeSpan.t0; } - - __hostdev__ RealT t1() const { return mTimeSpan.t1; } - - __hostdev__ int sign(int i) const { return mSign[i]; } - - /// @brief Return the position along the ray at the specified time. - __hostdev__ Vec3T operator()(RealT time) const - { -#if 1 - return Vec3T(fmaf(time, mDir[0], mEye[0]), - fmaf(time, mDir[1], mEye[1]), - fmaf(time, mDir[2], mEye[2])); -#else - return mEye + mDir * time; -#endif - } - - /// @brief Return the starting point of the ray. - __hostdev__ Vec3T start() const { return (*this)(mTimeSpan.t0); } - - /// @brief Return the endpoint of the ray. - __hostdev__ Vec3T end() const { return (*this)(mTimeSpan.t1); } - - /// @brief Return the midpoint of the ray. - __hostdev__ Vec3T mid() const { return (*this)(mTimeSpan.mid()); } - - /// @brief Return @c true if t1 is larger than t0 by at least eps. - __hostdev__ bool valid(RealT eps = Delta::value()) const { return mTimeSpan.valid(eps); } - - /// @brief Return @c true if @a time is within t0 and t1, both inclusive. - __hostdev__ bool test(RealT time) const { return mTimeSpan.test(time); } - - /// @brief Return a new Ray that is transformed with the specified map. - /// - /// @param map the map from which to construct the new Ray. - /// - /// @warning Assumes a linear map and a normalized direction. - /// - /// @details The requirement that the direction is normalized - /// follows from the transformation of t0 and t1 - and that fact that - /// we want applyMap and applyInverseMap to be inverse operations. - template - __hostdev__ Ray applyMap(const MapType& map) const - { - const Vec3T eye = map.applyMap(mEye); - const Vec3T dir = map.applyJacobian(mDir); - const RealT length = dir.length(), invLength = RealT(1) / length; - RealT t1 = mTimeSpan.t1; - if (mTimeSpan.t1 < Maximum::value()) { - t1 *= length; - } - return Ray(eye, dir * invLength, length * mTimeSpan.t0, t1); - } - template - __hostdev__ Ray applyMapF(const MapType& map) const - { - const Vec3T eye = map.applyMapF(mEye); - const Vec3T dir = map.applyJacobianF(mDir); - const RealT length = dir.length(), invLength = RealT(1) / length; - RealT t1 = mTimeSpan.t1; - if (mTimeSpan.t1 < Maximum::value()) { - t1 *= length; - } - return Ray(eye, dir * invLength, length * mTimeSpan.t0, t1); - } - - /// @brief Return a new Ray that is transformed with the inverse of the specified map. - /// - /// @param map the map from which to construct the new Ray by inverse mapping. - /// - /// @warning Assumes a linear map and a normalized direction. - /// - /// @details The requirement that the direction is normalized - /// follows from the transformation of t0 and t1 - and that fact that - /// we want applyMap and applyInverseMap to be inverse operations. - template - __hostdev__ Ray applyInverseMap(const MapType& map) const - { - const Vec3T eye = map.applyInverseMap(mEye); - const Vec3T dir = map.applyInverseJacobian(mDir); - const RealT length = dir.length(), invLength = RealT(1) / length; - return Ray(eye, dir * invLength, length * mTimeSpan.t0, length * mTimeSpan.t1); - } - template - __hostdev__ Ray applyInverseMapF(const MapType& map) const - { - const Vec3T eye = map.applyInverseMapF(mEye); - const Vec3T dir = map.applyInverseJacobianF(mDir); - const RealT length = dir.length(), invLength = RealT(1) / length; - return Ray(eye, dir * invLength, length * mTimeSpan.t0, length * mTimeSpan.t1); - } - - /// @brief Return a new ray in world space, assuming the existing - /// ray is represented in the index space of the specified grid. - template - __hostdev__ Ray indexToWorldF(const GridType& grid) const - { - const Vec3T eye = grid.indexToWorldF(mEye); - const Vec3T dir = grid.indexToWorldDirF(mDir); - const RealT length = dir.length(), invLength = RealT(1) / length; - RealT t1 = mTimeSpan.t1; - if (mTimeSpan.t1 < Maximum::value()) { - t1 *= length; - } - return Ray(eye, dir * invLength, length * mTimeSpan.t0, t1); - } - - /// @brief Return a new ray in index space, assuming the existing - /// ray is represented in the world space of the specified grid. - template - __hostdev__ Ray worldToIndexF(const GridType& grid) const - { - const Vec3T eye = grid.worldToIndexF(mEye); - const Vec3T dir = grid.worldToIndexDirF(mDir); - const RealT length = dir.length(), invLength = RealT(1) / length; - RealT t1 = mTimeSpan.t1; - if (mTimeSpan.t1 < Maximum::value()) { - t1 *= length; - } - return Ray(eye, dir * invLength, length * mTimeSpan.t0, t1); - } - - /// @brief Return true if this ray intersects the specified sphere. - /// - /// @param center The center of the sphere in the same space as this ray. - /// @param radius The radius of the sphere in the same units as this ray. - /// @param t0 The first intersection point if an intersection exists. - /// @param t1 The second intersection point if an intersection exists. - /// - /// @note If the return value is true, i.e. a hit, and t0 = - /// this->t0() or t1 == this->t1() only one true intersection exist. - __hostdev__ bool intersects(const Vec3T& center, RealT radius, RealT& t0, RealT& t1) const - { - const Vec3T origin = mEye - center; - const RealT A = mDir.lengthSqr(); - const RealT B = 2 * mDir.dot(origin); - const RealT C = origin.lengthSqr() - radius * radius; - const RealT D = B * B - 4 * A * C; - - if (D < 0) { - return false; - } - const RealT Q = RealT(-0.5) * (B < 0 ? (B + Sqrt(D)) : (B - Sqrt(D))); - - t0 = Q / A; - t1 = C / Q; - - if (t0 > t1) { - RealT tmp = t0; - t0 = t1; - t1 = tmp; - } - if (t0 < mTimeSpan.t0) { - t0 = mTimeSpan.t0; - } - if (t1 > mTimeSpan.t1) { - t1 = mTimeSpan.t1; - } - return t0 <= t1; - } - - /// @brief Return true if this ray intersects the specified sphere. - /// - /// @param center The center of the sphere in the same space as this ray. - /// @param radius The radius of the sphere in the same units as this ray. - __hostdev__ bool intersects(const Vec3T& center, RealT radius) const - { - RealT t0, t1; - return this->intersects(center, radius, t0, t1) > 0; - } - - /// @brief Return true if this ray intersects the specified sphere. - /// - /// @note For intersection this ray is clipped to the two intersection points. - /// - /// @param center The center of the sphere in the same space as this ray. - /// @param radius The radius of the sphere in the same units as this ray. - __hostdev__ bool clip(const Vec3T& center, RealT radius) - { - RealT t0, t1; - const bool hit = this->intersects(center, radius, t0, t1); - if (hit) { - mTimeSpan.set(t0, t1); - } - return hit; - } -#if 0 - /// @brief Return true if the Ray intersects the specified - /// axisaligned bounding box. - /// - /// @param bbox Axis-aligned bounding box in the same space as the Ray. - /// @param t0 If an intersection is detected this is assigned - /// the time for the first intersection point. - /// @param t1 If an intersection is detected this is assigned - /// the time for the second intersection point. - template - __hostdev__ bool intersects(const BBoxT& bbox, RealT& t0, RealT& t1) const - { - t0 = (bbox[ mSign[0]][0] - mEye[0]) * mInvDir[0]; - RealT t2 = (bbox[1-mSign[1]][1] - mEye[1]) * mInvDir[1]; - if (t0 > t2) return false; - t1 = (bbox[1-mSign[0]][0] - mEye[0]) * mInvDir[0]; - RealT t3 = (bbox[ mSign[1]][1] - mEye[1]) * mInvDir[1]; - if (t3 > t1) return false; - if (t3 > t0) t0 = t3; - if (t2 < t1) t1 = t2; - t3 = (bbox[ mSign[2]][2] - mEye[2]) * mInvDir[2]; - if (t3 > t1) return false; - t2 = (bbox[1-mSign[2]][2] - mEye[2]) * mInvDir[2]; - if (t0 > t2) return false; - if (t3 > t0) t0 = t3; - if (mTimeSpan.t1 < t0) return false; - if (t2 < t1) t1 = t2; - if (mTimeSpan.t0 > t1) return false; - if (mTimeSpan.t0 > t0) t0 = mTimeSpan.t0; - if (mTimeSpan.t1 < t1) t1 = mTimeSpan.t1; - return true; - /* - mTimeSpan.get(_t0, _t1); - double t0 = _t0, t1 = _t1; - for (int i = 0; i < 3; ++i) { - //if (abs(mDir[i])<1e-3) continue; - double a = (double(bbox.min()[i]) - mEye[i]) * mInvDir[i]; - double b = (double(bbox.max()[i]) - mEye[i]) * mInvDir[i]; - if (a > b) { - double tmp = a; - a = b; - b = tmp; - } - if (a > t0) t0 = a; - if (b < t1) t1 = b; - if (t0 > t1) { - //if (gVerbose) printf("Missed BBOX: (%i,%i,%i) -> (%i,%i,%i) t0=%f t1=%f\n", - // bbox.min()[0], bbox.min()[1], bbox.min()[2], - // bbox.max()[0], bbox.max()[1], bbox.max()[2], t0, t1); - return false; - } - } - _t0 = t0; _t1 = t1; - return true; - */ - } -#else - /// @brief Returns true if this ray intersects an index bounding box. - /// If the return value is true t0 and t1 are set to the intersection - /// times along the ray. - /// - /// @warning Intersection with a CoordBBox internally converts to a floating-point bbox - /// which imples that the max is padded with one voxel, i.e. bbox.max += 1! This - /// avoids gaps between neighboring CoordBBox'es, say from neighboring tree nodes. - __hostdev__ bool intersects(const CoordBBox& bbox, RealT& t0, RealT& t1) const - { - mTimeSpan.get(t0, t1); - for (int i = 0; i < 3; ++i) { - RealT a = RealT(bbox.min()[i]), b = RealT(bbox.max()[i] + 1); - if (a >= b) { // empty bounding box - return false; - } - a = (a - mEye[i]) * mInvDir[i]; - b = (b - mEye[i]) * mInvDir[i]; - if (a > b) { - RealT tmp = a; - a = b; - b = tmp; - } - if (a > t0) { - t0 = a; - } - if (b < t1) { - t1 = b; - } - if (t0 > t1) { - return false; - } - } - return true; - } - /// @brief Returns true if this ray intersects a floating-point bounding box. - /// If the return value is true t0 and t1 are set to the intersection - /// times along the ray. - template - __hostdev__ bool intersects(const BBox& bbox, RealT& t0, RealT& t1) const - { - static_assert(is_floating_point::value, "Ray::intersects: Expected a floating point coordinate"); - mTimeSpan.get(t0, t1); - for (int i = 0; i < 3; ++i) { - RealT a = RealT(bbox.min()[i]), b = RealT(bbox.max()[i]); - if (a >= b) { // empty bounding box - return false; - } - a = (a - mEye[i]) * mInvDir[i]; - b = (b - mEye[i]) * mInvDir[i]; - if (a > b) { - RealT tmp = a; - a = b; - b = tmp; - } - if (a > t0) { - t0 = a; - } - if (b < t1) { - t1 = b; - } - if (t0 > t1) { - return false; - } - } - return true; - } -#endif - - /// @brief Return true if this ray intersects the specified bounding box. - /// - /// @param bbox Axis-aligned bounding box in the same space as this ray. - /// - /// @warning If @a bbox is of the type CoordBBox it is converted to a floating-point - /// bounding box, which imples that the max is padded with one voxel, i.e. - /// bbox.max += 1! This avoids gaps between neighboring CoordBBox'es, say - /// from neighboring tree nodes. - template - __hostdev__ bool intersects(const BBoxT& bbox) const - { -#if 1 - RealT t0, t1; - return this->intersects(bbox, t0, t1); -#else - //BBox bbox(Vec3T(_bbox[0][0]-1e-4,_bbox[0][1]-1e-4,_bbox[0][2]-1e-4), - // Vec3T(_bbox[1][0]+1e-4,_bbox[1][1]+1e-4,_bbox[1][2]+1e-4)); - RealT t0 = (bbox[mSign[0]][0] - mEye[0]) * mInvDir[0]; - RealT t2 = (bbox[1 - mSign[1]][1] - mEye[1]) * mInvDir[1]; - if (t0 > t2) return false; - RealT t1 = (bbox[1 - mSign[0]][0] - mEye[0]) * mInvDir[0]; - RealT t3 = (bbox[mSign[1]][1] - mEye[1]) * mInvDir[1]; - if (t3 > t1) return false; - if (t3 > t0) t0 = t3; - if (t2 < t1) t1 = t2; - t3 = (bbox[mSign[2]][2] - mEye[2]) * mInvDir[2]; - if (t3 > t1) return false; - t2 = (bbox[1 - mSign[2]][2] - mEye[2]) * mInvDir[2]; - if (t0 > t2) return false; - //if (t3 > t0) t0 = t3; - //if (mTimeSpan.t1 < t0) return false; - //if (t2 < t1) t1 = t2; - //return mTimeSpan.t0 < t1; - return true; -#endif - } - - /// @brief Return true if this ray intersects the specified bounding box. - /// - /// @param bbox Axis-aligned bounding box in the same space as this ray. - /// - /// @warning If @a bbox is of the type CoordBBox it is converted to a floating-point - /// bounding box, which imples that the max is padded with one voxel, i.e. - /// bbox.max += 1! This avoids gaps between neighboring CoordBBox'es, say - /// from neighboring tree nodes. - /// - /// @note For intersection this ray is clipped to the two intersection points. - template - __hostdev__ bool clip(const BBoxT& bbox) - { - RealT t0, t1; - const bool hit = this->intersects(bbox, t0, t1); - if (hit) { - mTimeSpan.set(t0, t1); - } - return hit; - } - - /// @brief Return true if the Ray intersects the plane specified - /// by a normal and distance from the origin. - /// - /// @param normal Normal of the plane. - /// @param distance Distance of the plane to the origin. - /// @param t Time of intersection, if one exists. - __hostdev__ bool intersects(const Vec3T& normal, RealT distance, RealT& t) const - { - const RealT cosAngle = mDir.dot(normal); - if (isApproxZero(cosAngle)) { - return false; // ray is parallel to plane - } - t = (distance - mEye.dot(normal)) / cosAngle; - return this->test(t); - } - - /// @brief Return true if the Ray intersects the plane specified - /// by a normal and point. - /// - /// @param normal Normal of the plane. - /// @param point Point in the plane. - /// @param t Time of intersection, if one exists. - __hostdev__ bool intersects(const Vec3T& normal, const Vec3T& point, RealT& t) const - { - return this->intersects(normal, point.dot(normal), t); - } - -private: - Vec3T mEye, mDir, mInvDir; - TimeSpan mTimeSpan; - int mSign[3]; -}; // end of Ray class - -} // namespace nanovdb - -#endif // NANOVDB_RAY_HAS_BEEN_INCLUDED +#include // for NANOVDB_DEPRECATED_HEADER +#include +NANOVDB_DEPRECATED_HEADER("Include nanovdb/math/Ray.h instead.") diff --git a/nanovdb/nanovdb/util/Reduce.h b/nanovdb/nanovdb/util/Reduce.h index 7073d26e05..eb0a5e749c 100644 --- a/nanovdb/nanovdb/util/Reduce.h +++ b/nanovdb/nanovdb/util/Reduce.h @@ -2,7 +2,7 @@ // SPDX-License-Identifier: MPL-2.0 /*! - \file Reduce.h + \file nanovdb/util/Reduce.h \author Ken Museth @@ -11,10 +11,10 @@ \brief A unified wrapper for tbb::parallel_reduce and a naive std::future analog */ -#ifndef NANOVDB_REDUCE_H_HAS_BEEN_INCLUDED -#define NANOVDB_REDUCE_H_HAS_BEEN_INCLUDED +#ifndef NANOVDB_UTIL_REDUCE_H_HAS_BEEN_INCLUDED +#define NANOVDB_UTIL_REDUCE_H_HAS_BEEN_INCLUDED -#include "Range.h"// for Range1D +#include // for util::Range1D #ifdef NANOVDB_USE_TBB #include @@ -26,6 +26,8 @@ namespace nanovdb { +namespace util { + /// @return reduction /// /// @param range RangeT can be Range, CoordBBox, tbb::blocked_range, blocked_range2D, or blocked_range3D. @@ -37,7 +39,6 @@ namespace nanovdb { /// auto func = [&array](auto &r, int a){for (auto i=r.begin(); i!=r.end(); ++i) a+=array[i]; return a;}; /// int sum = reduce(array, 0, func, [](int a, int b){return a + b;}); /// @endcode - template inline T reduce(RangeT range, const T& identity, const FuncT &func, const JoinT &join) { @@ -73,7 +74,7 @@ inline T reduce(RangeT range, const T& identity, const FuncT &func, const JoinT } /// @brief Simple wrapper to the function defined above -template +template inline T reduce(size_t begin, size_t end, size_t grainSize, const T& identity, const FuncT& func, const JoinT& join) { Range1D range(begin, end, grainSize); @@ -97,6 +98,36 @@ inline T reduce(const ContainerT &c, size_t grainSize, const T& identit return reduce( range, identity, func, join ); } +}// namespace util + +/// @brief Simple wrapper to the function defined above +template +[[deprecated("Use nanovdb::util::reduce instead")]] +inline T reduce(size_t begin, size_t end, size_t grainSize, const T& identity, const FuncT& func, const JoinT& join) +{ + util::Range1D range(begin, end, grainSize); + return util::reduce( range, identity, func, join ); +} + +/// @brief Simple wrapper that works with std::containers +template class ContainerT, typename... ArgT, typename T, typename FuncT, typename JoinT > +[[deprecated("Use nanovdb::util::reduce instead")]] +inline T reduce(const ContainerT &c, const T& identity, const FuncT& func, const JoinT& join) +{ + util::Range1D range(0, c.size(), 1); + return util::reduce( range, identity, func, join ); + +} + +/// @brief Simple wrapper that works with std::containers +template class ContainerT, typename... ArgT, typename T, typename FuncT, typename JoinT > +[[deprecated("Use nanovdb::util::reduce instead")]] +T reduce(const ContainerT &c, size_t grainSize, const T& identity, const FuncT& func, const JoinT& join) +{ + util::Range1D range(0, c.size(), grainSize); + return util::reduce( range, identity, func, join ); +} + }// namespace nanovdb -#endif // NANOVDB_REDUCE_H_HAS_BEEN_INCLUDED +#endif // NANOVDB_UTIL_REDUCE_H_HAS_BEEN_INCLUDED diff --git a/nanovdb/nanovdb/util/SampleFromVoxels.h b/nanovdb/nanovdb/util/SampleFromVoxels.h index e779d66cf6..b40ea82677 100644 --- a/nanovdb/nanovdb/util/SampleFromVoxels.h +++ b/nanovdb/nanovdb/util/SampleFromVoxels.h @@ -1,983 +1,6 @@ // Copyright Contributors to the OpenVDB Project // SPDX-License-Identifier: MPL-2.0 -////////////////////////////////////////////////////////////////////////// -/// -/// @file SampleFromVoxels.h -/// -/// @brief NearestNeighborSampler, TrilinearSampler, TriquadraticSampler and TricubicSampler -/// -/// @note These interpolators employ internal caching for better performance when used repeatedly -/// in the same voxel location, so try to reuse an instance of these classes more than once. -/// -/// @warning While all the interpolators defined below work with both scalars and vectors -/// values (e.g. float and Vec3) TrilinarSampler::zeroCrossing and -/// Trilinear::gradient will only compile with floating point value types. -/// -/// @author Ken Museth -/// -/////////////////////////////////////////////////////////////////////////// - -#ifndef NANOVDB_SAMPLE_FROM_VOXELS_H_HAS_BEEN_INCLUDED -#define NANOVDB_SAMPLE_FROM_VOXELS_H_HAS_BEEN_INCLUDED - -// Only define __hostdev__ when compiling as NVIDIA CUDA -#if defined(__CUDACC__) || defined(__HIP__) -#define __hostdev__ __host__ __device__ -#else -#include // for floor -#define __hostdev__ -#endif - -namespace nanovdb { - -// Forward declaration of sampler with specific polynomial orders -template -class SampleFromVoxels; - -/// @brief Factory free-function for a sampler of specific polynomial orders -/// -/// @details This allows for the compact syntax: -/// @code -/// auto acc = grid.getAccessor(); -/// auto smp = nanovdb::createSampler<1>( acc ); -/// @endcode -template -__hostdev__ SampleFromVoxels createSampler(const TreeOrAccT& acc) -{ - return SampleFromVoxels(acc); -} - -/// @brief Utility function that returns the Coord of the round-down of @a xyz -/// and redefined @xyz as the fractional part, ie xyz-in = return-value + xyz-out -template class Vec3T> -__hostdev__ inline CoordT Floor(Vec3T& xyz); - -/// @brief Template specialization of Floor for Vec3 -template class Vec3T> -__hostdev__ inline CoordT Floor(Vec3T& xyz) -{ - const float ijk[3] = {floorf(xyz[0]), floorf(xyz[1]), floorf(xyz[2])}; - xyz[0] -= ijk[0]; - xyz[1] -= ijk[1]; - xyz[2] -= ijk[2]; - return CoordT(int32_t(ijk[0]), int32_t(ijk[1]), int32_t(ijk[2])); -} - -/// @brief Template specialization of Floor for Vec3 -template class Vec3T> -__hostdev__ inline CoordT Floor(Vec3T& xyz) -{ - const double ijk[3] = {floor(xyz[0]), floor(xyz[1]), floor(xyz[2])}; - xyz[0] -= ijk[0]; - xyz[1] -= ijk[1]; - xyz[2] -= ijk[2]; - return CoordT(int32_t(ijk[0]), int32_t(ijk[1]), int32_t(ijk[2])); -} - -// ------------------------------> NearestNeighborSampler <-------------------------------------- - -/// @brief Nearest neighbor, i.e. zero order, interpolator with caching -template -class SampleFromVoxels -{ -public: - using ValueT = typename TreeOrAccT::ValueType; - using CoordT = typename TreeOrAccT::CoordType; - - static const int ORDER = 0; - /// @brief Construction from a Tree or ReadAccessor - __hostdev__ SampleFromVoxels(const TreeOrAccT& acc) - : mAcc(acc) - , mPos(CoordT::max()) - { - } - - __hostdev__ const TreeOrAccT& accessor() const { return mAcc; } - - /// @note xyz is in index space space - template - inline __hostdev__ ValueT operator()(const Vec3T& xyz) const; - - inline __hostdev__ ValueT operator()(const CoordT& ijk) const; - -private: - const TreeOrAccT& mAcc; - mutable CoordT mPos; - mutable ValueT mVal; // private cache -}; // SampleFromVoxels - -/// @brief Nearest neighbor, i.e. zero order, interpolator without caching -template -class SampleFromVoxels -{ -public: - using ValueT = typename TreeOrAccT::ValueType; - using CoordT = typename TreeOrAccT::CoordType; - static const int ORDER = 0; - - /// @brief Construction from a Tree or ReadAccessor - __hostdev__ SampleFromVoxels(const TreeOrAccT& acc) - : mAcc(acc) - { - } - - __hostdev__ const TreeOrAccT& accessor() const { return mAcc; } - - /// @note xyz is in index space space - template - inline __hostdev__ ValueT operator()(const Vec3T& xyz) const; - - inline __hostdev__ ValueT operator()(const CoordT& ijk) const { return mAcc.getValue(ijk);} - -private: - const TreeOrAccT& mAcc; -}; // SampleFromVoxels - -template -template -__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels::operator()(const Vec3T& xyz) const -{ - const CoordT ijk = Round(xyz); - if (ijk != mPos) { - mPos = ijk; - mVal = mAcc.getValue(mPos); - } - return mVal; -} - -template -__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels::operator()(const CoordT& ijk) const -{ - if (ijk != mPos) { - mPos = ijk; - mVal = mAcc.getValue(mPos); - } - return mVal; -} - -template -template -__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels::operator()(const Vec3T& xyz) const -{ - return mAcc.getValue(Round(xyz)); -} - -// ------------------------------> TrilinearSampler <-------------------------------------- - -/// @brief Tri-linear sampler, i.e. first order, interpolator -template -class TrilinearSampler -{ -protected: - const TreeOrAccT& mAcc; - -public: - using ValueT = typename TreeOrAccT::ValueType; - using CoordT = typename TreeOrAccT::CoordType; - static const int ORDER = 1; - - /// @brief Protected constructor from a Tree or ReadAccessor - __hostdev__ TrilinearSampler(const TreeOrAccT& acc) : mAcc(acc) {} - - __hostdev__ const TreeOrAccT& accessor() const { return mAcc; } - - /// @brief Extract the stencil of 8 values - inline __hostdev__ void stencil(CoordT& ijk, ValueT (&v)[2][2][2]) const; - - template class Vec3T> - static inline __hostdev__ ValueT sample(const Vec3T &uvw, const ValueT (&v)[2][2][2]); - - template class Vec3T> - static inline __hostdev__ Vec3T gradient(const Vec3T &uvw, const ValueT (&v)[2][2][2]); - - static inline __hostdev__ bool zeroCrossing(const ValueT (&v)[2][2][2]); -}; // TrilinearSamplerBase - -template -__hostdev__ void TrilinearSampler::stencil(CoordT& ijk, ValueT (&v)[2][2][2]) const -{ - v[0][0][0] = mAcc.getValue(ijk); // i, j, k - - ijk[2] += 1; - v[0][0][1] = mAcc.getValue(ijk); // i, j, k + 1 - - ijk[1] += 1; - v[0][1][1] = mAcc.getValue(ijk); // i, j+1, k + 1 - - ijk[2] -= 1; - v[0][1][0] = mAcc.getValue(ijk); // i, j+1, k - - ijk[0] += 1; - ijk[1] -= 1; - v[1][0][0] = mAcc.getValue(ijk); // i+1, j, k - - ijk[2] += 1; - v[1][0][1] = mAcc.getValue(ijk); // i+1, j, k + 1 - - ijk[1] += 1; - v[1][1][1] = mAcc.getValue(ijk); // i+1, j+1, k + 1 - - ijk[2] -= 1; - v[1][1][0] = mAcc.getValue(ijk); // i+1, j+1, k -} - -template -template class Vec3T> -__hostdev__ typename TreeOrAccT::ValueType TrilinearSampler::sample(const Vec3T &uvw, const ValueT (&v)[2][2][2]) -{ -#if 0 - auto lerp = [](ValueT a, ValueT b, ValueT w){ return fma(w, b-a, a); };// = w*(b-a) + a - //auto lerp = [](ValueT a, ValueT b, ValueT w){ return fma(w, b, fma(-w, a, a));};// = (1-w)*a + w*b -#else - auto lerp = [](ValueT a, ValueT b, RealT w) { return a + ValueT(w) * (b - a); }; -#endif - return lerp(lerp(lerp(v[0][0][0], v[0][0][1], uvw[2]), lerp(v[0][1][0], v[0][1][1], uvw[2]), uvw[1]), - lerp(lerp(v[1][0][0], v[1][0][1], uvw[2]), lerp(v[1][1][0], v[1][1][1], uvw[2]), uvw[1]), - uvw[0]); -} - -template -template class Vec3T> -__hostdev__ Vec3T TrilinearSampler::gradient(const Vec3T &uvw, const ValueT (&v)[2][2][2]) -{ - static_assert(is_floating_point::value, "TrilinearSampler::gradient requires a floating-point type"); -#if 0 - auto lerp = [](ValueT a, ValueT b, ValueT w){ return fma(w, b-a, a); };// = w*(b-a) + a - //auto lerp = [](ValueT a, ValueT b, ValueT w){ return fma(w, b, fma(-w, a, a));};// = (1-w)*a + w*b -#else - auto lerp = [](ValueT a, ValueT b, RealT w) { return a + ValueT(w) * (b - a); }; -#endif - - ValueT D[4] = {v[0][0][1] - v[0][0][0], v[0][1][1] - v[0][1][0], v[1][0][1] - v[1][0][0], v[1][1][1] - v[1][1][0]}; - - // Z component - Vec3T grad(0, 0, lerp(lerp(D[0], D[1], uvw[1]), lerp(D[2], D[3], uvw[1]), uvw[0])); - - const ValueT w = ValueT(uvw[2]); - D[0] = v[0][0][0] + D[0] * w; - D[1] = v[0][1][0] + D[1] * w; - D[2] = v[1][0][0] + D[2] * w; - D[3] = v[1][1][0] + D[3] * w; - - // X component - grad[0] = lerp(D[2], D[3], uvw[1]) - lerp(D[0], D[1], uvw[1]); - - // Y component - grad[1] = lerp(D[1] - D[0], D[3] - D[2], uvw[0]); - - return grad; -} - -template -__hostdev__ bool TrilinearSampler::zeroCrossing(const ValueT (&v)[2][2][2]) -{ - static_assert(is_floating_point::value, "TrilinearSampler::zeroCrossing requires a floating-point type"); - const bool less = v[0][0][0] < ValueT(0); - return (less ^ (v[0][0][1] < ValueT(0))) || - (less ^ (v[0][1][1] < ValueT(0))) || - (less ^ (v[0][1][0] < ValueT(0))) || - (less ^ (v[1][0][0] < ValueT(0))) || - (less ^ (v[1][0][1] < ValueT(0))) || - (less ^ (v[1][1][1] < ValueT(0))) || - (less ^ (v[1][1][0] < ValueT(0))); -} - -/// @brief Template specialization that does not use caching of stencil points -template -class SampleFromVoxels : public TrilinearSampler -{ - using BaseT = TrilinearSampler; - using ValueT = typename TreeOrAccT::ValueType; - using CoordT = typename TreeOrAccT::CoordType; - -public: - - /// @brief Construction from a Tree or ReadAccessor - __hostdev__ SampleFromVoxels(const TreeOrAccT& acc) : BaseT(acc) {} - - /// @note xyz is in index space space - template class Vec3T> - inline __hostdev__ ValueT operator()(Vec3T xyz) const; - - /// @note ijk is in index space space - __hostdev__ ValueT operator()(const CoordT &ijk) const {return BaseT::mAcc.getValue(ijk);} - - /// @brief Return the gradient in index space. - /// - /// @warning Will only compile with floating point value types - template class Vec3T> - inline __hostdev__ Vec3T gradient(Vec3T xyz) const; - - /// @brief Return true if the tr-linear stencil has a zero crossing at the specified index position. - /// - /// @warning Will only compile with floating point value types - template class Vec3T> - inline __hostdev__ bool zeroCrossing(Vec3T xyz) const; - -}; // SampleFromVoxels - -/// @brief Template specialization with caching of stencil values -template -class SampleFromVoxels : public TrilinearSampler -{ - using BaseT = TrilinearSampler; - using ValueT = typename TreeOrAccT::ValueType; - using CoordT = typename TreeOrAccT::CoordType; - - mutable CoordT mPos; - mutable ValueT mVal[2][2][2]; - - template class Vec3T> - __hostdev__ void cache(Vec3T& xyz) const; -public: - - /// @brief Construction from a Tree or ReadAccessor - __hostdev__ SampleFromVoxels(const TreeOrAccT& acc) : BaseT(acc), mPos(CoordT::max()){} - - /// @note xyz is in index space space - template class Vec3T> - inline __hostdev__ ValueT operator()(Vec3T xyz) const; - - // @note ijk is in index space space - __hostdev__ ValueT operator()(const CoordT &ijk) const; - - /// @brief Return the gradient in index space. - /// - /// @warning Will only compile with floating point value types - template class Vec3T> - inline __hostdev__ Vec3T gradient(Vec3T xyz) const; - - /// @brief Return true if the tr-linear stencil has a zero crossing at the specified index position. - /// - /// @warning Will only compile with floating point value types - template class Vec3T> - inline __hostdev__ bool zeroCrossing(Vec3T xyz) const; - - /// @brief Return true if the cached tri-linear stencil has a zero crossing. - /// - /// @warning Will only compile with floating point value types - __hostdev__ bool zeroCrossing() const { return BaseT::zeroCrossing(mVal); } - -}; // SampleFromVoxels - -template -template class Vec3T> -__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels::operator()(Vec3T xyz) const -{ - this->cache(xyz); - return BaseT::sample(xyz, mVal); -} - -template -__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels::operator()(const CoordT &ijk) const -{ - return ijk == mPos ? mVal[0][0][0] : BaseT::mAcc.getValue(ijk); -} - -template -template class Vec3T> -__hostdev__ Vec3T SampleFromVoxels::gradient(Vec3T xyz) const -{ - this->cache(xyz); - return BaseT::gradient(xyz, mVal); -} - -template -template class Vec3T> -__hostdev__ bool SampleFromVoxels::zeroCrossing(Vec3T xyz) const -{ - this->cache(xyz); - return BaseT::zeroCrossing(mVal); -} - -template -template class Vec3T> -__hostdev__ void SampleFromVoxels::cache(Vec3T& xyz) const -{ - CoordT ijk = Floor(xyz); - if (ijk != mPos) { - mPos = ijk; - BaseT::stencil(ijk, mVal); - } -} - -#if 0 - -template -template class Vec3T> -__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels::operator()(Vec3T xyz) const -{ - ValueT val[2][2][2]; - CoordT ijk = Floor(xyz); - BaseT::stencil(ijk, val); - return BaseT::sample(xyz, val); -} - -#else - -template -template class Vec3T> -__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels::operator()(Vec3T xyz) const -{ - auto lerp = [](ValueT a, ValueT b, RealT w) { return a + ValueT(w) * (b - a); }; - - CoordT coord = Floor(xyz); - - ValueT vx, vx1, vy, vy1, vz, vz1; - - vz = BaseT::mAcc.getValue(coord); - coord[2] += 1; - vz1 = BaseT::mAcc.getValue(coord); - vy = lerp(vz, vz1, xyz[2]); - - coord[1] += 1; - - vz1 = BaseT::mAcc.getValue(coord); - coord[2] -= 1; - vz = BaseT::mAcc.getValue(coord); - vy1 = lerp(vz, vz1, xyz[2]); - - vx = lerp(vy, vy1, xyz[1]); - - coord[0] += 1; - - vz = BaseT::mAcc.getValue(coord); - coord[2] += 1; - vz1 = BaseT::mAcc.getValue(coord); - vy1 = lerp(vz, vz1, xyz[2]); - - coord[1] -= 1; - - vz1 = BaseT::mAcc.getValue(coord); - coord[2] -= 1; - vz = BaseT::mAcc.getValue(coord); - vy = lerp(vz, vz1, xyz[2]); - - vx1 = lerp(vy, vy1, xyz[1]); - - return lerp(vx, vx1, xyz[0]); -} -#endif - - -template -template class Vec3T> -__hostdev__ inline Vec3T SampleFromVoxels::gradient(Vec3T xyz) const -{ - ValueT val[2][2][2]; - CoordT ijk = Floor(xyz); - BaseT::stencil(ijk, val); - return BaseT::gradient(xyz, val); -} - -template -template class Vec3T> -__hostdev__ bool SampleFromVoxels::zeroCrossing(Vec3T xyz) const -{ - ValueT val[2][2][2]; - CoordT ijk = Floor(xyz); - BaseT::stencil(ijk, val); - return BaseT::zeroCrossing(val); -} - -// ------------------------------> TriquadraticSampler <-------------------------------------- - -/// @brief Tri-quadratic sampler, i.e. second order, interpolator -template -class TriquadraticSampler -{ -protected: - const TreeOrAccT& mAcc; - -public: - using ValueT = typename TreeOrAccT::ValueType; - using CoordT = typename TreeOrAccT::CoordType; - static const int ORDER = 1; - - /// @brief Protected constructor from a Tree or ReadAccessor - __hostdev__ TriquadraticSampler(const TreeOrAccT& acc) : mAcc(acc) {} - - __hostdev__ const TreeOrAccT& accessor() const { return mAcc; } - - /// @brief Extract the stencil of 27 values - inline __hostdev__ void stencil(const CoordT &ijk, ValueT (&v)[3][3][3]) const; - - template class Vec3T> - static inline __hostdev__ ValueT sample(const Vec3T &uvw, const ValueT (&v)[3][3][3]); - - static inline __hostdev__ bool zeroCrossing(const ValueT (&v)[3][3][3]); -}; // TriquadraticSamplerBase - -template -__hostdev__ void TriquadraticSampler::stencil(const CoordT &ijk, ValueT (&v)[3][3][3]) const -{ - CoordT p(ijk[0] - 1, 0, 0); - for (int dx = 0; dx < 3; ++dx, ++p[0]) { - p[1] = ijk[1] - 1; - for (int dy = 0; dy < 3; ++dy, ++p[1]) { - p[2] = ijk[2] - 1; - for (int dz = 0; dz < 3; ++dz, ++p[2]) { - v[dx][dy][dz] = mAcc.getValue(p);// extract the stencil of 27 values - } - } - } -} - -template -template class Vec3T> -__hostdev__ typename TreeOrAccT::ValueType TriquadraticSampler::sample(const Vec3T &uvw, const ValueT (&v)[3][3][3]) -{ - auto kernel = [](const ValueT* value, double weight)->ValueT { - return weight * (weight * (0.5f * (value[0] + value[2]) - value[1]) + - 0.5f * (value[2] - value[0])) + value[1]; - }; - - ValueT vx[3]; - for (int dx = 0; dx < 3; ++dx) { - ValueT vy[3]; - for (int dy = 0; dy < 3; ++dy) { - vy[dy] = kernel(&v[dx][dy][0], uvw[2]); - }//loop over y - vx[dx] = kernel(vy, uvw[1]); - }//loop over x - return kernel(vx, uvw[0]); -} - -template -__hostdev__ bool TriquadraticSampler::zeroCrossing(const ValueT (&v)[3][3][3]) -{ - static_assert(is_floating_point::value, "TrilinearSampler::zeroCrossing requires a floating-point type"); - const bool less = v[0][0][0] < ValueT(0); - for (int dx = 0; dx < 3; ++dx) { - for (int dy = 0; dy < 3; ++dy) { - for (int dz = 0; dz < 3; ++dz) { - if (less ^ (v[dx][dy][dz] < ValueT(0))) return true; - } - } - } - return false; -} - -/// @brief Template specialization that does not use caching of stencil points -template -class SampleFromVoxels : public TriquadraticSampler -{ - using BaseT = TriquadraticSampler; - using ValueT = typename TreeOrAccT::ValueType; - using CoordT = typename TreeOrAccT::CoordType; -public: - - /// @brief Construction from a Tree or ReadAccessor - __hostdev__ SampleFromVoxels(const TreeOrAccT& acc) : BaseT(acc) {} - - /// @note xyz is in index space space - template class Vec3T> - inline __hostdev__ ValueT operator()(Vec3T xyz) const; - - __hostdev__ ValueT operator()(const CoordT &ijk) const {return BaseT::mAcc.getValue(ijk);} - - /// @brief Return true if the tr-linear stencil has a zero crossing at the specified index position. - /// - /// @warning Will only compile with floating point value types - template class Vec3T> - inline __hostdev__ bool zeroCrossing(Vec3T xyz) const; - -}; // SampleFromVoxels - -/// @brief Template specialization with caching of stencil values -template -class SampleFromVoxels : public TriquadraticSampler -{ - using BaseT = TriquadraticSampler; - using ValueT = typename TreeOrAccT::ValueType; - using CoordT = typename TreeOrAccT::CoordType; - - mutable CoordT mPos; - mutable ValueT mVal[3][3][3]; - - template class Vec3T> - __hostdev__ void cache(Vec3T& xyz) const; -public: - - /// @brief Construction from a Tree or ReadAccessor - __hostdev__ SampleFromVoxels(const TreeOrAccT& acc) : BaseT(acc), mPos(CoordT::max()){} - - /// @note xyz is in index space space - template class Vec3T> - inline __hostdev__ ValueT operator()(Vec3T xyz) const; - - inline __hostdev__ ValueT operator()(const CoordT &ijk) const; - - /// @brief Return true if the tr-linear stencil has a zero crossing at the specified index position. - /// - /// @warning Will only compile with floating point value types - template class Vec3T> - inline __hostdev__ bool zeroCrossing(Vec3T xyz) const; - - /// @brief Return true if the cached tri-linear stencil has a zero crossing. - /// - /// @warning Will only compile with floating point value types - __hostdev__ bool zeroCrossing() const { return BaseT::zeroCrossing(mVal); } - -}; // SampleFromVoxels - -template -template class Vec3T> -__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels::operator()(Vec3T xyz) const -{ - this->cache(xyz); - return BaseT::sample(xyz, mVal); -} - -template -__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels::operator()(const CoordT &ijk) const -{ - return ijk == mPos ? mVal[1][1][1] : BaseT::mAcc.getValue(ijk); -} - -template -template class Vec3T> -__hostdev__ bool SampleFromVoxels::zeroCrossing(Vec3T xyz) const -{ - this->cache(xyz); - return BaseT::zeroCrossing(mVal); -} - -template -template class Vec3T> -__hostdev__ void SampleFromVoxels::cache(Vec3T& xyz) const -{ - CoordT ijk = Floor(xyz); - if (ijk != mPos) { - mPos = ijk; - BaseT::stencil(ijk, mVal); - } -} - -template -template class Vec3T> -__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels::operator()(Vec3T xyz) const -{ - ValueT val[3][3][3]; - CoordT ijk = Floor(xyz); - BaseT::stencil(ijk, val); - return BaseT::sample(xyz, val); -} - -template -template class Vec3T> -__hostdev__ bool SampleFromVoxels::zeroCrossing(Vec3T xyz) const -{ - ValueT val[3][3][3]; - CoordT ijk = Floor(xyz); - BaseT::stencil(ijk, val); - return BaseT::zeroCrossing(val); -} - -// ------------------------------> TricubicSampler <-------------------------------------- - -/// @brief Tri-cubic sampler, i.e. third order, interpolator. -/// -/// @details See the following paper for implementation details: -/// Lekien, F. and Marsden, J.: Tricubic interpolation in three dimensions. -/// In: International Journal for Numerical Methods -/// in Engineering (2005), No. 63, p. 455-471 - -template -class TricubicSampler -{ -protected: - using ValueT = typename TreeOrAccT::ValueType; - using CoordT = typename TreeOrAccT::CoordType; - - const TreeOrAccT& mAcc; - -public: - /// @brief Construction from a Tree or ReadAccessor - __hostdev__ TricubicSampler(const TreeOrAccT& acc) - : mAcc(acc) - { - } - - __hostdev__ const TreeOrAccT& accessor() const { return mAcc; } - - /// @brief Extract the stencil of 8 values - inline __hostdev__ void stencil(const CoordT& ijk, ValueT (&c)[64]) const; - - template class Vec3T> - static inline __hostdev__ ValueT sample(const Vec3T &uvw, const ValueT (&c)[64]); -}; // TricubicSampler - -template -__hostdev__ void TricubicSampler::stencil(const CoordT& ijk, ValueT (&C)[64]) const -{ - auto fetch = [&](int i, int j, int k) -> ValueT& { return C[((i + 1) << 4) + ((j + 1) << 2) + k + 1]; }; - - // fetch 64 point stencil values - for (int i = -1; i < 3; ++i) { - for (int j = -1; j < 3; ++j) { - fetch(i, j, -1) = mAcc.getValue(ijk + CoordT(i, j, -1)); - fetch(i, j, 0) = mAcc.getValue(ijk + CoordT(i, j, 0)); - fetch(i, j, 1) = mAcc.getValue(ijk + CoordT(i, j, 1)); - fetch(i, j, 2) = mAcc.getValue(ijk + CoordT(i, j, 2)); - } - } - const ValueT half(0.5), quarter(0.25), eighth(0.125); - const ValueT X[64] = {// values of f(x,y,z) at the 8 corners (each from 1 stencil value). - fetch(0, 0, 0), - fetch(1, 0, 0), - fetch(0, 1, 0), - fetch(1, 1, 0), - fetch(0, 0, 1), - fetch(1, 0, 1), - fetch(0, 1, 1), - fetch(1, 1, 1), - // values of df/dx at the 8 corners (each from 2 stencil values). - half * (fetch(1, 0, 0) - fetch(-1, 0, 0)), - half * (fetch(2, 0, 0) - fetch(0, 0, 0)), - half * (fetch(1, 1, 0) - fetch(-1, 1, 0)), - half * (fetch(2, 1, 0) - fetch(0, 1, 0)), - half * (fetch(1, 0, 1) - fetch(-1, 0, 1)), - half * (fetch(2, 0, 1) - fetch(0, 0, 1)), - half * (fetch(1, 1, 1) - fetch(-1, 1, 1)), - half * (fetch(2, 1, 1) - fetch(0, 1, 1)), - // values of df/dy at the 8 corners (each from 2 stencil values). - half * (fetch(0, 1, 0) - fetch(0, -1, 0)), - half * (fetch(1, 1, 0) - fetch(1, -1, 0)), - half * (fetch(0, 2, 0) - fetch(0, 0, 0)), - half * (fetch(1, 2, 0) - fetch(1, 0, 0)), - half * (fetch(0, 1, 1) - fetch(0, -1, 1)), - half * (fetch(1, 1, 1) - fetch(1, -1, 1)), - half * (fetch(0, 2, 1) - fetch(0, 0, 1)), - half * (fetch(1, 2, 1) - fetch(1, 0, 1)), - // values of df/dz at the 8 corners (each from 2 stencil values). - half * (fetch(0, 0, 1) - fetch(0, 0, -1)), - half * (fetch(1, 0, 1) - fetch(1, 0, -1)), - half * (fetch(0, 1, 1) - fetch(0, 1, -1)), - half * (fetch(1, 1, 1) - fetch(1, 1, -1)), - half * (fetch(0, 0, 2) - fetch(0, 0, 0)), - half * (fetch(1, 0, 2) - fetch(1, 0, 0)), - half * (fetch(0, 1, 2) - fetch(0, 1, 0)), - half * (fetch(1, 1, 2) - fetch(1, 1, 0)), - // values of d2f/dxdy at the 8 corners (each from 4 stencil values). - quarter * (fetch(1, 1, 0) - fetch(-1, 1, 0) - fetch(1, -1, 0) + fetch(-1, -1, 0)), - quarter * (fetch(2, 1, 0) - fetch(0, 1, 0) - fetch(2, -1, 0) + fetch(0, -1, 0)), - quarter * (fetch(1, 2, 0) - fetch(-1, 2, 0) - fetch(1, 0, 0) + fetch(-1, 0, 0)), - quarter * (fetch(2, 2, 0) - fetch(0, 2, 0) - fetch(2, 0, 0) + fetch(0, 0, 0)), - quarter * (fetch(1, 1, 1) - fetch(-1, 1, 1) - fetch(1, -1, 1) + fetch(-1, -1, 1)), - quarter * (fetch(2, 1, 1) - fetch(0, 1, 1) - fetch(2, -1, 1) + fetch(0, -1, 1)), - quarter * (fetch(1, 2, 1) - fetch(-1, 2, 1) - fetch(1, 0, 1) + fetch(-1, 0, 1)), - quarter * (fetch(2, 2, 1) - fetch(0, 2, 1) - fetch(2, 0, 1) + fetch(0, 0, 1)), - // values of d2f/dxdz at the 8 corners (each from 4 stencil values). - quarter * (fetch(1, 0, 1) - fetch(-1, 0, 1) - fetch(1, 0, -1) + fetch(-1, 0, -1)), - quarter * (fetch(2, 0, 1) - fetch(0, 0, 1) - fetch(2, 0, -1) + fetch(0, 0, -1)), - quarter * (fetch(1, 1, 1) - fetch(-1, 1, 1) - fetch(1, 1, -1) + fetch(-1, 1, -1)), - quarter * (fetch(2, 1, 1) - fetch(0, 1, 1) - fetch(2, 1, -1) + fetch(0, 1, -1)), - quarter * (fetch(1, 0, 2) - fetch(-1, 0, 2) - fetch(1, 0, 0) + fetch(-1, 0, 0)), - quarter * (fetch(2, 0, 2) - fetch(0, 0, 2) - fetch(2, 0, 0) + fetch(0, 0, 0)), - quarter * (fetch(1, 1, 2) - fetch(-1, 1, 2) - fetch(1, 1, 0) + fetch(-1, 1, 0)), - quarter * (fetch(2, 1, 2) - fetch(0, 1, 2) - fetch(2, 1, 0) + fetch(0, 1, 0)), - // values of d2f/dydz at the 8 corners (each from 4 stencil values). - quarter * (fetch(0, 1, 1) - fetch(0, -1, 1) - fetch(0, 1, -1) + fetch(0, -1, -1)), - quarter * (fetch(1, 1, 1) - fetch(1, -1, 1) - fetch(1, 1, -1) + fetch(1, -1, -1)), - quarter * (fetch(0, 2, 1) - fetch(0, 0, 1) - fetch(0, 2, -1) + fetch(0, 0, -1)), - quarter * (fetch(1, 2, 1) - fetch(1, 0, 1) - fetch(1, 2, -1) + fetch(1, 0, -1)), - quarter * (fetch(0, 1, 2) - fetch(0, -1, 2) - fetch(0, 1, 0) + fetch(0, -1, 0)), - quarter * (fetch(1, 1, 2) - fetch(1, -1, 2) - fetch(1, 1, 0) + fetch(1, -1, 0)), - quarter * (fetch(0, 2, 2) - fetch(0, 0, 2) - fetch(0, 2, 0) + fetch(0, 0, 0)), - quarter * (fetch(1, 2, 2) - fetch(1, 0, 2) - fetch(1, 2, 0) + fetch(1, 0, 0)), - // values of d3f/dxdydz at the 8 corners (each from 8 stencil values). - eighth * (fetch(1, 1, 1) - fetch(-1, 1, 1) - fetch(1, -1, 1) + fetch(-1, -1, 1) - fetch(1, 1, -1) + fetch(-1, 1, -1) + fetch(1, -1, -1) - fetch(-1, -1, -1)), - eighth * (fetch(2, 1, 1) - fetch(0, 1, 1) - fetch(2, -1, 1) + fetch(0, -1, 1) - fetch(2, 1, -1) + fetch(0, 1, -1) + fetch(2, -1, -1) - fetch(0, -1, -1)), - eighth * (fetch(1, 2, 1) - fetch(-1, 2, 1) - fetch(1, 0, 1) + fetch(-1, 0, 1) - fetch(1, 2, -1) + fetch(-1, 2, -1) + fetch(1, 0, -1) - fetch(-1, 0, -1)), - eighth * (fetch(2, 2, 1) - fetch(0, 2, 1) - fetch(2, 0, 1) + fetch(0, 0, 1) - fetch(2, 2, -1) + fetch(0, 2, -1) + fetch(2, 0, -1) - fetch(0, 0, -1)), - eighth * (fetch(1, 1, 2) - fetch(-1, 1, 2) - fetch(1, -1, 2) + fetch(-1, -1, 2) - fetch(1, 1, 0) + fetch(-1, 1, 0) + fetch(1, -1, 0) - fetch(-1, -1, 0)), - eighth * (fetch(2, 1, 2) - fetch(0, 1, 2) - fetch(2, -1, 2) + fetch(0, -1, 2) - fetch(2, 1, 0) + fetch(0, 1, 0) + fetch(2, -1, 0) - fetch(0, -1, 0)), - eighth * (fetch(1, 2, 2) - fetch(-1, 2, 2) - fetch(1, 0, 2) + fetch(-1, 0, 2) - fetch(1, 2, 0) + fetch(-1, 2, 0) + fetch(1, 0, 0) - fetch(-1, 0, 0)), - eighth * (fetch(2, 2, 2) - fetch(0, 2, 2) - fetch(2, 0, 2) + fetch(0, 0, 2) - fetch(2, 2, 0) + fetch(0, 2, 0) + fetch(2, 0, 0) - fetch(0, 0, 0))}; - - // 4Kb of static table (int8_t has a range of -127 -> 127 which suffices) - static const int8_t A[64][64] = { - {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {-3, 3, 0, 0, 0, 0, 0, 0, -2, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {2, -2, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {-3, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0, 0, 0, -3, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {9, -9, -9, 9, 0, 0, 0, 0, 6, 3, -6, -3, 0, 0, 0, 0, 6, -6, 3, -3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 2, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {-6, 6, 6, -6, 0, 0, 0, 0, -3, -3, 3, 3, 0, 0, 0, 0, -4, 4, -2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, -2, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {2, 0, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0, 0, 0, 2, 0, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {-6, 6, 6, -6, 0, 0, 0, 0, -4, -2, 4, 2, 0, 0, 0, 0, -3, 3, -3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, -1, -2, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {4, -4, -4, 4, 0, 0, 0, 0, 2, 2, -2, -2, 0, 0, 0, 0, 2, -2, 2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, 3, 0, 0, 0, 0, 0, 0, -2, -1, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, -2, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, -1, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, -9, -9, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 3, -6, -3, 0, 0, 0, 0, 6, -6, 3, -3, 0, 0, 0, 0, 4, 2, 2, 1, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -6, 6, 6, -6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, -3, 3, 3, 0, 0, 0, 0, -4, 4, -2, 2, 0, 0, 0, 0, -2, -2, -1, -1, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -6, 6, 6, -6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, -2, 4, 2, 0, 0, 0, 0, -3, 3, -3, 3, 0, 0, 0, 0, -2, -1, -2, -1, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, -4, -4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, -2, -2, 0, 0, 0, 0, 2, -2, 2, -2, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0}, - {-3, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0, 0, 0, -3, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {9, -9, 0, 0, -9, 9, 0, 0, 6, 3, 0, 0, -6, -3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, -6, 0, 0, 3, -3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 2, 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {-6, 6, 0, 0, 6, -6, 0, 0, -3, -3, 0, 0, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, 4, 0, 0, -2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, -2, 0, 0, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, 0, 0, -1, 0, 0, 0}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, -9, 0, 0, -9, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 3, 0, 0, -6, -3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, -6, 0, 0, 3, -3, 0, 0, 4, 2, 0, 0, 2, 1, 0, 0}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -6, 6, 0, 0, 6, -6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, -3, 0, 0, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, 4, 0, 0, -2, 2, 0, 0, -2, -2, 0, 0, -1, -1, 0, 0}, - {9, 0, -9, 0, -9, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 3, 0, -6, 0, -3, 0, 6, 0, -6, 0, 3, 0, -3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 2, 0, 2, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0, 0, 0, 9, 0, -9, 0, -9, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 3, 0, -6, 0, -3, 0, 6, 0, -6, 0, 3, 0, -3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 2, 0, 2, 0, 1, 0}, - {-27, 27, 27, -27, 27, -27, -27, 27, -18, -9, 18, 9, 18, 9, -18, -9, -18, 18, -9, 9, 18, -18, 9, -9, -18, 18, 18, -18, -9, 9, 9, -9, -12, -6, -6, -3, 12, 6, 6, 3, -12, -6, 12, 6, -6, -3, 6, 3, -12, 12, -6, 6, -6, 6, -3, 3, -8, -4, -4, -2, -4, -2, -2, -1}, - {18, -18, -18, 18, -18, 18, 18, -18, 9, 9, -9, -9, -9, -9, 9, 9, 12, -12, 6, -6, -12, 12, -6, 6, 12, -12, -12, 12, 6, -6, -6, 6, 6, 6, 3, 3, -6, -6, -3, -3, 6, 6, -6, -6, 3, 3, -3, -3, 8, -8, 4, -4, 4, -4, 2, -2, 4, 4, 2, 2, 2, 2, 1, 1}, - {-6, 0, 6, 0, 6, 0, -6, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, 0, -3, 0, 3, 0, 3, 0, -4, 0, 4, 0, -2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, -2, 0, -1, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0, 0, 0, -6, 0, 6, 0, 6, 0, -6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, 0, -3, 0, 3, 0, 3, 0, -4, 0, 4, 0, -2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, -2, 0, -1, 0, -1, 0}, - {18, -18, -18, 18, -18, 18, 18, -18, 12, 6, -12, -6, -12, -6, 12, 6, 9, -9, 9, -9, -9, 9, -9, 9, 12, -12, -12, 12, 6, -6, -6, 6, 6, 3, 6, 3, -6, -3, -6, -3, 8, 4, -8, -4, 4, 2, -4, -2, 6, -6, 6, -6, 3, -3, 3, -3, 4, 2, 4, 2, 2, 1, 2, 1}, - {-12, 12, 12, -12, 12, -12, -12, 12, -6, -6, 6, 6, 6, 6, -6, -6, -6, 6, -6, 6, 6, -6, 6, -6, -8, 8, 8, -8, -4, 4, 4, -4, -3, -3, -3, -3, 3, 3, 3, 3, -4, -4, 4, 4, -2, -2, 2, 2, -4, 4, -4, 4, -2, 2, -2, 2, -2, -2, -2, -2, -1, -1, -1, -1}, - {2, 0, 0, 0, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {-6, 6, 0, 0, 6, -6, 0, 0, -4, -2, 0, 0, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, 3, 0, 0, -3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, -1, 0, 0, -2, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {4, -4, 0, 0, -4, 4, 0, 0, 2, 2, 0, 0, -2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, -2, 0, 0, 2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -6, 6, 0, 0, 6, -6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, -2, 0, 0, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, 3, 0, 0, -3, 3, 0, 0, -2, -1, 0, 0, -2, -1, 0, 0}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, -4, 0, 0, -4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0, -2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, -2, 0, 0, 2, -2, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0}, - {-6, 0, 6, 0, 6, 0, -6, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, 0, -2, 0, 4, 0, 2, 0, -3, 0, 3, 0, -3, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, -1, 0, -2, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0, 0, 0, -6, 0, 6, 0, 6, 0, -6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, 0, -2, 0, 4, 0, 2, 0, -3, 0, 3, 0, -3, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, -1, 0, -2, 0, -1, 0}, - {18, -18, -18, 18, -18, 18, 18, -18, 12, 6, -12, -6, -12, -6, 12, 6, 12, -12, 6, -6, -12, 12, -6, 6, 9, -9, -9, 9, 9, -9, -9, 9, 8, 4, 4, 2, -8, -4, -4, -2, 6, 3, -6, -3, 6, 3, -6, -3, 6, -6, 3, -3, 6, -6, 3, -3, 4, 2, 2, 1, 4, 2, 2, 1}, - {-12, 12, 12, -12, 12, -12, -12, 12, -6, -6, 6, 6, 6, 6, -6, -6, -8, 8, -4, 4, 8, -8, 4, -4, -6, 6, 6, -6, -6, 6, 6, -6, -4, -4, -2, -2, 4, 4, 2, 2, -3, -3, 3, 3, -3, -3, 3, 3, -4, 4, -2, 2, -4, 4, -2, 2, -2, -2, -1, -1, -2, -2, -1, -1}, - {4, 0, -4, 0, -4, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, -2, 0, -2, 0, 2, 0, -2, 0, 2, 0, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0, 0, 0, 4, 0, -4, 0, -4, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, -2, 0, -2, 0, 2, 0, -2, 0, 2, 0, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0}, - {-12, 12, 12, -12, 12, -12, -12, 12, -8, -4, 8, 4, 8, 4, -8, -4, -6, 6, -6, 6, 6, -6, 6, -6, -6, 6, 6, -6, -6, 6, 6, -6, -4, -2, -4, -2, 4, 2, 4, 2, -4, -2, 4, 2, -4, -2, 4, 2, -3, 3, -3, 3, -3, 3, -3, 3, -2, -1, -2, -1, -2, -1, -2, -1}, - {8, -8, -8, 8, -8, 8, 8, -8, 4, 4, -4, -4, -4, -4, 4, 4, 4, -4, 4, -4, -4, 4, -4, 4, 4, -4, -4, 4, 4, -4, -4, 4, 2, 2, 2, 2, -2, -2, -2, -2, 2, 2, -2, -2, 2, 2, -2, -2, 2, -2, 2, -2, 2, -2, 2, -2, 1, 1, 1, 1, 1, 1, 1, 1}}; - - for (int i = 0; i < 64; ++i) { // C = A * X - C[i] = ValueT(0); -#if 0 - for (int j = 0; j < 64; j += 4) { - C[i] = fma(A[i][j], X[j], fma(A[i][j+1], X[j+1], fma(A[i][j+2], X[j+2], fma(A[i][j+3], X[j+3], C[i])))); - } -#else - for (int j = 0; j < 64; j += 4) { - C[i] += A[i][j] * X[j] + A[i][j + 1] * X[j + 1] + A[i][j + 2] * X[j + 2] + A[i][j + 3] * X[j + 3]; - } -#endif - } -} - -template -template class Vec3T> -__hostdev__ typename TreeOrAccT::ValueType TricubicSampler::sample(const Vec3T &xyz, const ValueT (&C)[64]) -{ - ValueT zPow(1), sum(0); - for (int k = 0, n = 0; k < 4; ++k) { - ValueT yPow(1); - for (int j = 0; j < 4; ++j, n += 4) { -#if 0 - sum = fma( yPow, zPow * fma(xyz[0], fma(xyz[0], fma(xyz[0], C[n + 3], C[n + 2]), C[n + 1]), C[n]), sum); -#else - sum += yPow * zPow * (C[n] + xyz[0] * (C[n + 1] + xyz[0] * (C[n + 2] + xyz[0] * C[n + 3]))); -#endif - yPow *= xyz[1]; - } - zPow *= xyz[2]; - } - return sum; -} - -template -class SampleFromVoxels : public TricubicSampler -{ - using BaseT = TricubicSampler; - using ValueT = typename TreeOrAccT::ValueType; - using CoordT = typename TreeOrAccT::CoordType; - - mutable CoordT mPos; - mutable ValueT mC[64]; - - template class Vec3T> - __hostdev__ void cache(Vec3T& xyz) const; - -public: - /// @brief Construction from a Tree or ReadAccessor - __hostdev__ SampleFromVoxels(const TreeOrAccT& acc) - : BaseT(acc) - { - } - - /// @note xyz is in index space space - template class Vec3T> - inline __hostdev__ ValueT operator()(Vec3T xyz) const; - - // @brief Return value at the coordinate @a ijk in index space space - __hostdev__ ValueT operator()(const CoordT &ijk) const {return BaseT::mAcc.getValue(ijk);} - -}; // SampleFromVoxels - -template -template class Vec3T> -__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels::operator()(Vec3T xyz) const -{ - this->cache(xyz); - return BaseT::sample(xyz, mC); -} - -template -template class Vec3T> -__hostdev__ void SampleFromVoxels::cache(Vec3T& xyz) const -{ - CoordT ijk = Floor(xyz); - if (ijk != mPos) { - mPos = ijk; - BaseT::stencil(ijk, mC); - } -} - -template -class SampleFromVoxels : public TricubicSampler -{ - using BaseT = TricubicSampler; - using ValueT = typename TreeOrAccT::ValueType; - using CoordT = typename TreeOrAccT::CoordType; - -public: - /// @brief Construction from a Tree or ReadAccessor - __hostdev__ SampleFromVoxels(const TreeOrAccT& acc) - : BaseT(acc) - { - } - - /// @note xyz is in index space space - template class Vec3T> - inline __hostdev__ ValueT operator()(Vec3T xyz) const; - - __hostdev__ ValueT operator()(const CoordT &ijk) const {return BaseT::mAcc.getValue(ijk);} - -}; // SampleFromVoxels - -template -template class Vec3T> -__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels::operator()(Vec3T xyz) const -{ - ValueT C[64]; - CoordT ijk = Floor(xyz); - BaseT::stencil(ijk, C); - return BaseT::sample(xyz, C); -} - -} // namespace nanovdb - -#endif // NANOVDB_SAMPLE_FROM_VOXELS_H_HAS_BEEN_INCLUDED +#include // for NANOVDB_DEPRECATED_HEADER +#include +NANOVDB_DEPRECATED_HEADER("Include nanovdb/math/SampleFromVoxels.h instead.") diff --git a/nanovdb/nanovdb/util/Stencils.h b/nanovdb/nanovdb/util/Stencils.h index 88e943f4ff..c93b4a15cf 100644 --- a/nanovdb/nanovdb/util/Stencils.h +++ b/nanovdb/nanovdb/util/Stencils.h @@ -1,1028 +1,6 @@ // Copyright Contributors to the OpenVDB Project // SPDX-License-Identifier: MPL-2.0 -// -/// @author Ken Museth -/// -/// @date April 9, 2021 -/// -/// @file Stencils.h -/// -/// @brief Defines various finite-difference stencils that allow for the -/// computation of gradients of order 1 to 5, mean curvatures, -/// gaussian curvatures, principal curvatures, tri-linear interpolation, -/// zero-crossing, laplacian, and closest point transform. -#ifndef NANOVDB_STENCILS_HAS_BEEN_INCLUDED -#define NANOVDB_STENCILS_HAS_BEEN_INCLUDED - -#include // for __hostdev__, Vec3, Min, Max, Pow2, Pow3, Pow4 - -namespace nanovdb { - -// ---------------------------- WENO5 ---------------------------- - -/// @brief Implementation of nominally fifth-order finite-difference WENO -/// @details This function returns the numerical flux. See "High Order Finite Difference and -/// Finite Volume WENO Schemes and Discontinuous Galerkin Methods for CFD" - Chi-Wang Shu -/// ICASE Report No 2001-11 (page 6). Also see ICASE No 97-65 for a more complete reference -/// (Shu, 1997). -/// Given v1 = f(x-2dx), v2 = f(x-dx), v3 = f(x), v4 = f(x+dx) and v5 = f(x+2dx), -/// return an interpolated value f(x+dx/2) with the special property that -/// ( f(x+dx/2) - f(x-dx/2) ) / dx = df/dx (x) + error, -/// where the error is fifth-order in smooth regions: O(dx) <= error <=O(dx^5) -template -__hostdev__ inline ValueType -WENO5(const ValueType& v1, - const ValueType& v2, - const ValueType& v3, - const ValueType& v4, - const ValueType& v5, - RealT scale2 = 1.0)// openvdb uses scale2 = 0.01 -{ - static const RealT C = 13.0 / 12.0; - // WENO is formulated for non-dimensional equations, here the optional scale2 - // is a reference value (squared) for the function being interpolated. For - // example if 'v' is of order 1000, then scale2 = 10^6 is ok. But in practice - // leave scale2 = 1. - const RealT eps = RealT(1.0e-6) * scale2; - // {\tilde \omega_k} = \gamma_k / ( \beta_k + \epsilon)^2 in Shu's ICASE report) - const RealT A1 = RealT(0.1)/Pow2(C*Pow2(v1-2*v2+v3)+RealT(0.25)*Pow2(v1-4*v2+3*v3)+eps), - A2 = RealT(0.6)/Pow2(C*Pow2(v2-2*v3+v4)+RealT(0.25)*Pow2(v2-v4)+eps), - A3 = RealT(0.3)/Pow2(C*Pow2(v3-2*v4+v5)+RealT(0.25)*Pow2(3*v3-4*v4+v5)+eps); - - return static_cast((A1*(2*v1 - 7*v2 + 11*v3) + - A2*(5*v3 - v2 + 2*v4) + - A3*(2*v3 + 5*v4 - v5))/(6*(A1+A2+A3))); -} - -// ---------------------------- GodunovsNormSqrd ---------------------------- - -template -__hostdev__ inline RealT -GodunovsNormSqrd(bool isOutside, - RealT dP_xm, RealT dP_xp, - RealT dP_ym, RealT dP_yp, - RealT dP_zm, RealT dP_zp) -{ - RealT dPLen2; - if (isOutside) { // outside - dPLen2 = Max(Pow2(Max(dP_xm, RealT(0))), Pow2(Min(dP_xp, RealT(0)))); // (dP/dx)2 - dPLen2 += Max(Pow2(Max(dP_ym, RealT(0))), Pow2(Min(dP_yp, RealT(0)))); // (dP/dy)2 - dPLen2 += Max(Pow2(Max(dP_zm, RealT(0))), Pow2(Min(dP_zp, RealT(0)))); // (dP/dz)2 - } else { // inside - dPLen2 = Max(Pow2(Min(dP_xm, RealT(0))), Pow2(Max(dP_xp, RealT(0)))); // (dP/dx)2 - dPLen2 += Max(Pow2(Min(dP_ym, RealT(0))), Pow2(Max(dP_yp, RealT(0)))); // (dP/dy)2 - dPLen2 += Max(Pow2(Min(dP_zm, RealT(0))), Pow2(Max(dP_zp, RealT(0)))); // (dP/dz)2 - } - return dPLen2; // |\nabla\phi|^2 -} - -template -__hostdev__ inline RealT -GodunovsNormSqrd(bool isOutside, - const Vec3& gradient_m, - const Vec3& gradient_p) -{ - return GodunovsNormSqrd(isOutside, - gradient_m[0], gradient_p[0], - gradient_m[1], gradient_p[1], - gradient_m[2], gradient_p[2]); -} - -// ---------------------------- BaseStencil ---------------------------- - -// BaseStencil uses curiously recurring template pattern (CRTP) -template -class BaseStencil -{ -public: - using ValueType = typename GridT::ValueType; - using GridType = GridT; - using TreeType = typename GridT::TreeType; - using AccessorType = typename GridT::AccessorType;// ReadAccessor; - - /// @brief Initialize the stencil buffer with the values of voxel (i, j, k) - /// and its neighbors. - /// @param ijk Index coordinates of stencil center - __hostdev__ inline void moveTo(const Coord& ijk) - { - mCenter = ijk; - mValues[0] = mAcc.getValue(ijk); - static_cast(*this).init(mCenter); - } - - /// @brief Initialize the stencil buffer with the values of voxel (i, j, k) - /// and its neighbors. The method also takes a value of the center - /// element of the stencil, assuming it is already known. - /// @param ijk Index coordinates of stencil center - /// @param centerValue Value of the center element of the stencil - __hostdev__ inline void moveTo(const Coord& ijk, const ValueType& centerValue) - { - mCenter = ijk; - mValues[0] = centerValue; - static_cast(*this).init(mCenter); - } - - /// @brief Initialize the stencil buffer with the values of voxel - /// (x, y, z) and its neighbors. - /// - /// @note This version is slightly faster than the one above, since - /// the center voxel's value is read directly from the iterator. - template - __hostdev__ inline void moveTo(const IterType& iter) - { - mCenter = iter.getCoord(); - mValues[0] = *iter; - static_cast(*this).init(mCenter); - } - - /// @brief Initialize the stencil buffer with the values of voxel (x, y, z) - /// and its neighbors. - /// @param xyz Floating point voxel coordinates of stencil center - /// @details This method will check to see if it is necessary to - /// update the stencil based on the cached index coordinates of - /// the center point. - template - __hostdev__ inline void moveTo(const Vec3& xyz) - { - Coord ijk = RoundDown(xyz); - if (ijk != mCenter) this->moveTo(ijk); - } - - /// @brief Return the value from the stencil buffer with linear - /// offset pos. - /// - /// @note The default (@a pos = 0) corresponds to the first element - /// which is typically the center point of the stencil. - __hostdev__ inline const ValueType& getValue(unsigned int pos = 0) const - { - NANOVDB_ASSERT(pos < SIZE); - return mValues[pos]; - } - - /// @brief Return the value at the specified location relative to the center of the stencil - template - __hostdev__ inline const ValueType& getValue() const - { - return mValues[static_cast(*this).template pos()]; - } - - /// @brief Set the value at the specified location relative to the center of the stencil - template - __hostdev__ inline void setValue(const ValueType& value) - { - mValues[static_cast(*this).template pos()] = value; - } - - /// @brief Return the size of the stencil buffer. - __hostdev__ static int size() { return SIZE; } - - /// @brief Return the mean value of the current stencil. - __hostdev__ inline ValueType mean() const - { - ValueType sum = 0.0; - for (int i = 0; i < SIZE; ++i) sum += mValues[i]; - return sum / ValueType(SIZE); - } - - /// @brief Return the smallest value in the stencil buffer. - __hostdev__ inline ValueType min() const - { - ValueType v = mValues[0]; - for (int i=1; i v) v = mValues[i]; - } - return v; - } - - /// @brief Return the coordinates of the center point of the stencil. - __hostdev__ inline const Coord& getCenterCoord() const { return mCenter; } - - /// @brief Return the value at the center of the stencil - __hostdev__ inline const ValueType& getCenterValue() const { return mValues[0]; } - - /// @brief Return true if the center of the stencil intersects the - /// iso-contour specified by the isoValue - __hostdev__ inline bool intersects(const ValueType &isoValue = ValueType(0) ) const - { - const bool less = this->getValue< 0, 0, 0>() < isoValue; - return (less ^ (this->getValue<-1, 0, 0>() < isoValue)) || - (less ^ (this->getValue< 1, 0, 0>() < isoValue)) || - (less ^ (this->getValue< 0,-1, 0>() < isoValue)) || - (less ^ (this->getValue< 0, 1, 0>() < isoValue)) || - (less ^ (this->getValue< 0, 0,-1>() < isoValue)) || - (less ^ (this->getValue< 0, 0, 1>() < isoValue)) ; - } - struct Mask { - uint8_t bits; - __hostdev__ Mask() : bits(0u) {} - __hostdev__ void set(int i) { bits |= (1 << i); } - __hostdev__ bool test(int i) const { return bits & (1 << i); } - __hostdev__ bool any() const { return bits > 0u; } - __hostdev__ bool all() const { return bits == 255u; } - __hostdev__ bool none() const { return bits == 0u; } - __hostdev__ int count() const { return CountOn(bits); } - };// Mask - - /// @brief Return true a bit-mask where the 6 lower bits indicates if the - /// center of the stencil intersects the iso-contour specified by the isoValue. - /// - /// @note There are 2^6 = 64 different possible cases, including no intersections! - /// - /// @details The ordering of bit mask is ( -x, +x, -y, +y, -z, +z ), so to - /// check if there is an intersection in -y use (mask & (1u<<2)) where mask is - /// ther return value from this function. To check if there are any - /// intersections use mask!=0u, and for no intersections use mask==0u. - /// To count the number of intersections use __builtin_popcount(mask). - __hostdev__ inline Mask intersectionMask(ValueType isoValue = ValueType(0)) const - { - Mask mask; - const bool less = this->getValue< 0, 0, 0>() < isoValue; - if (less ^ (this->getValue<-1, 0, 0>() < isoValue)) mask.set(0);// |= 1u; - if (less ^ (this->getValue< 1, 0, 0>() < isoValue)) mask.set(1);// |= 2u; - if (less ^ (this->getValue< 0,-1, 0>() < isoValue)) mask.set(2);// |= 4u; - if (less ^ (this->getValue< 0, 1, 0>() < isoValue)) mask.set(3);// |= 8u; - if (less ^ (this->getValue< 0, 0,-1>() < isoValue)) mask.set(4);// |= 16u; - if (less ^ (this->getValue< 0, 0, 1>() < isoValue)) mask.set(5);// |= 32u; - return mask; - } - - /// @brief Return a const reference to the grid from which this - /// stencil was constructed. - __hostdev__ inline const GridType& grid() const { return *mGrid; } - - /// @brief Return a const reference to the ValueAccessor - /// associated with this Stencil. - __hostdev__ inline const AccessorType& accessor() const { return mAcc; } - -protected: - // Constructor is protected to prevent direct instantiation. - __hostdev__ BaseStencil(const GridType& grid) - : mGrid(&grid) - , mAcc(grid) - , mCenter(Coord::max()) - { - } - - const GridType* mGrid; - AccessorType mAcc; - ValueType mValues[SIZE]; - Coord mCenter; - -}; // BaseStencil class - - -// ---------------------------- BoxStencil ---------------------------- - - -namespace { // anonymous namespace for stencil-layout map - - // the eight point box stencil - template struct BoxPt {}; - template<> struct BoxPt< 0, 0, 0> { enum { idx = 0 }; }; - template<> struct BoxPt< 0, 0, 1> { enum { idx = 1 }; }; - template<> struct BoxPt< 0, 1, 1> { enum { idx = 2 }; }; - template<> struct BoxPt< 0, 1, 0> { enum { idx = 3 }; }; - template<> struct BoxPt< 1, 0, 0> { enum { idx = 4 }; }; - template<> struct BoxPt< 1, 0, 1> { enum { idx = 5 }; }; - template<> struct BoxPt< 1, 1, 1> { enum { idx = 6 }; }; - template<> struct BoxPt< 1, 1, 0> { enum { idx = 7 }; }; - -} - -template -class BoxStencil: public BaseStencil, 8, GridT> -{ - using SelfT = BoxStencil; - using BaseType = BaseStencil; -public: - using GridType = GridT; - using TreeType = typename GridT::TreeType; - using ValueType = typename GridT::ValueType; - - static constexpr int SIZE = 8; - - __hostdev__ BoxStencil(const GridType& grid) : BaseType(grid) {} - - /// Return linear offset for the specified stencil point relative to its center - template - __hostdev__ unsigned int pos() const { return BoxPt::idx; } - - /// @brief Return true if the center of the stencil intersects the - /// iso-contour specified by the isoValue - __hostdev__ inline bool intersects(ValueType isoValue = ValueType(0)) const - { - const bool less = mValues[0] < isoValue; - return (less ^ (mValues[1] < isoValue)) || - (less ^ (mValues[2] < isoValue)) || - (less ^ (mValues[3] < isoValue)) || - (less ^ (mValues[4] < isoValue)) || - (less ^ (mValues[5] < isoValue)) || - (less ^ (mValues[6] < isoValue)) || - (less ^ (mValues[7] < isoValue)) ; - } - - /// @brief Return the trilinear interpolation at the normalized position. - /// @param xyz Floating point coordinate position. Index space and NOT world space. - /// @warning It is assumed that the stencil has already been moved - /// to the relevant voxel position, e.g. using moveTo(xyz). - /// @note Trilinear interpolation kernal reads as: - /// v000 (1-u)(1-v)(1-w) + v001 (1-u)(1-v)w + v010 (1-u)v(1-w) + v011 (1-u)vw - /// + v100 u(1-v)(1-w) + v101 u(1-v)w + v110 uv(1-w) + v111 uvw - __hostdev__ inline ValueType interpolation(const Vec3& xyz) const - { - const ValueType u = xyz[0] - mCenter[0]; - const ValueType v = xyz[1] - mCenter[1]; - const ValueType w = xyz[2] - mCenter[2]; - - NANOVDB_ASSERT(u>=0 && u<=1); - NANOVDB_ASSERT(v>=0 && v<=1); - NANOVDB_ASSERT(w>=0 && w<=1); - - ValueType V = BaseType::template getValue<0,0,0>(); - ValueType A = V + (BaseType::template getValue<0,0,1>() - V) * w; - V = BaseType::template getValue< 0, 1, 0>(); - ValueType B = V + (BaseType::template getValue<0,1,1>() - V) * w; - ValueType C = A + (B - A) * v; - - V = BaseType::template getValue<1,0,0>(); - A = V + (BaseType::template getValue<1,0,1>() - V) * w; - V = BaseType::template getValue<1,1,0>(); - B = V + (BaseType::template getValue<1,1,1>() - V) * w; - ValueType D = A + (B - A) * v; - - return C + (D - C) * u; - } - - /// @brief Return the gradient in world space of the trilinear interpolation kernel. - /// @param xyz Floating point coordinate position. - /// @warning It is assumed that the stencil has already been moved - /// to the relevant voxel position, e.g. using moveTo(xyz). - /// @note Computed as partial derivatives of the trilinear interpolation kernel: - /// v000 (1-u)(1-v)(1-w) + v001 (1-u)(1-v)w + v010 (1-u)v(1-w) + v011 (1-u)vw - /// + v100 u(1-v)(1-w) + v101 u(1-v)w + v110 uv(1-w) + v111 uvw - __hostdev__ inline Vec3 gradient(const Vec3& xyz) const - { - const ValueType u = xyz[0] - mCenter[0]; - const ValueType v = xyz[1] - mCenter[1]; - const ValueType w = xyz[2] - mCenter[2]; - - NANOVDB_ASSERT(u>=0 && u<=1); - NANOVDB_ASSERT(v>=0 && v<=1); - NANOVDB_ASSERT(w>=0 && w<=1); - - ValueType D[4]={BaseType::template getValue<0,0,1>()-BaseType::template getValue<0,0,0>(), - BaseType::template getValue<0,1,1>()-BaseType::template getValue<0,1,0>(), - BaseType::template getValue<1,0,1>()-BaseType::template getValue<1,0,0>(), - BaseType::template getValue<1,1,1>()-BaseType::template getValue<1,1,0>()}; - - // Z component - ValueType A = D[0] + (D[1]- D[0]) * v; - ValueType B = D[2] + (D[3]- D[2]) * v; - Vec3 grad(0, 0, A + (B - A) * u); - - D[0] = BaseType::template getValue<0,0,0>() + D[0] * w; - D[1] = BaseType::template getValue<0,1,0>() + D[1] * w; - D[2] = BaseType::template getValue<1,0,0>() + D[2] * w; - D[3] = BaseType::template getValue<1,1,0>() + D[3] * w; - - // X component - A = D[0] + (D[1] - D[0]) * v; - B = D[2] + (D[3] - D[2]) * v; - - grad[0] = B - A; - - // Y component - A = D[1] - D[0]; - B = D[3] - D[2]; - - grad[1] = A + (B - A) * u; - - return BaseType::mGrid->map().applyIJT(grad); - } - -private: - __hostdev__ inline void init(const Coord& ijk) - { - mValues[ 1] = mAcc.getValue(ijk.offsetBy( 0, 0, 1)); - mValues[ 2] = mAcc.getValue(ijk.offsetBy( 0, 1, 1)); - mValues[ 3] = mAcc.getValue(ijk.offsetBy( 0, 1, 0)); - mValues[ 4] = mAcc.getValue(ijk.offsetBy( 1, 0, 0)); - mValues[ 5] = mAcc.getValue(ijk.offsetBy( 1, 0, 1)); - mValues[ 6] = mAcc.getValue(ijk.offsetBy( 1, 1, 1)); - mValues[ 7] = mAcc.getValue(ijk.offsetBy( 1, 1, 0)); - } - - template friend class BaseStencil; // allow base class to call init() - using BaseType::mAcc; - using BaseType::mValues; - using BaseType::mCenter; -};// BoxStencil class - - -// ---------------------------- GradStencil ---------------------------- - -namespace { // anonymous namespace for stencil-layout map - - template struct GradPt {}; - template<> struct GradPt< 0, 0, 0> { enum { idx = 0 }; }; - template<> struct GradPt< 1, 0, 0> { enum { idx = 2 }; }; - template<> struct GradPt< 0, 1, 0> { enum { idx = 4 }; }; - template<> struct GradPt< 0, 0, 1> { enum { idx = 6 }; }; - template<> struct GradPt<-1, 0, 0> { enum { idx = 1 }; }; - template<> struct GradPt< 0,-1, 0> { enum { idx = 3 }; }; - template<> struct GradPt< 0, 0,-1> { enum { idx = 5 }; }; -} - -/// This is a simple 7-point nearest neighbor stencil that supports -/// gradient by second-order central differencing, first-order upwinding, -/// Laplacian, closest-point transform and zero-crossing test. -/// -/// @note For optimal random access performance this class -/// includes its own grid accessor. -template -class GradStencil : public BaseStencil, 7, GridT> -{ - using SelfT = GradStencil; - using BaseType = BaseStencil; -public: - using GridType = GridT; - using TreeType = typename GridT::TreeType; - using ValueType = typename GridT::ValueType; - - static constexpr int SIZE = 7; - - __hostdev__ GradStencil(const GridType& grid) - : BaseType(grid) - , mInv2Dx(ValueType(0.5 / grid.voxelSize()[0])) - , mInvDx2(ValueType(4.0 * mInv2Dx * mInv2Dx)) - { - } - - __hostdev__ GradStencil(const GridType& grid, double dx) - : BaseType(grid) - , mInv2Dx(ValueType(0.5 / dx)) - , mInvDx2(ValueType(4.0 * mInv2Dx * mInv2Dx)) - { - } - - /// @brief Return the norm square of the single-sided upwind gradient - /// (computed via Godunov's scheme) at the previously buffered location. - /// - /// @note This method should not be called until the stencil - /// buffer has been populated via a call to moveTo(ijk). - __hostdev__ inline ValueType normSqGrad() const - { - return mInvDx2 * GodunovsNormSqrd(mValues[0] > ValueType(0), - mValues[0] - mValues[1], - mValues[2] - mValues[0], - mValues[0] - mValues[3], - mValues[4] - mValues[0], - mValues[0] - mValues[5], - mValues[6] - mValues[0]); - } - - /// @brief Return the gradient computed at the previously buffered - /// location by second order central differencing. - /// - /// @note This method should not be called until the stencil - /// buffer has been populated via a call to moveTo(ijk). - __hostdev__ inline Vec3 gradient() const - { - return Vec3(mValues[2] - mValues[1], - mValues[4] - mValues[3], - mValues[6] - mValues[5])*mInv2Dx; - } - /// @brief Return the first-order upwind gradient corresponding to the direction V. - /// - /// @note This method should not be called until the stencil - /// buffer has been populated via a call to moveTo(ijk). - __hostdev__ inline Vec3 gradient(const Vec3& V) const - { - return Vec3( - V[0]>0 ? mValues[0] - mValues[1] : mValues[2] - mValues[0], - V[1]>0 ? mValues[0] - mValues[3] : mValues[4] - mValues[0], - V[2]>0 ? mValues[0] - mValues[5] : mValues[6] - mValues[0])*2*mInv2Dx; - } - - /// Return the Laplacian computed at the previously buffered - /// location by second-order central differencing. - __hostdev__ inline ValueType laplacian() const - { - return mInvDx2 * (mValues[1] + mValues[2] + - mValues[3] + mValues[4] + - mValues[5] + mValues[6] - 6*mValues[0]); - } - - /// Return @c true if the sign of the value at the center point of the stencil - /// is different from the signs of any of its six nearest neighbors. - __hostdev__ inline bool zeroCrossing() const - { - return (mValues[0]>0 ? (mValues[1]<0 || mValues[2]<0 || mValues[3]<0 || mValues[4]<0 || mValues[5]<0 || mValues[6]<0) - : (mValues[1]>0 || mValues[2]>0 || mValues[3]>0 || mValues[4]>0 || mValues[5]>0 || mValues[6]>0)); - } - - /// @brief Compute the closest-point transform to a level set. - /// @return the closest point in index space to the surface - /// from which the level set was derived. - /// - /// @note This method assumes that the grid represents a level set - /// with distances in world units and a simple affine transfrom - /// with uniform scaling. - __hostdev__ inline Vec3 cpt() - { - const Coord& ijk = BaseType::getCenterCoord(); - const ValueType d = ValueType(mValues[0] * 0.5 * mInvDx2); // distance in voxels / (2dx^2) - const auto value = Vec3(ijk[0] - d*(mValues[2] - mValues[1]), - ijk[1] - d*(mValues[4] - mValues[3]), - ijk[2] - d*(mValues[6] - mValues[5])); - return value; - } - - /// Return linear offset for the specified stencil point relative to its center - template - __hostdev__ unsigned int pos() const { return GradPt::idx; } - -private: - - __hostdev__ inline void init(const Coord& ijk) - { - mValues[ 1] = mAcc.getValue(ijk.offsetBy(-1, 0, 0)); - mValues[ 2] = mAcc.getValue(ijk.offsetBy( 1, 0, 0)); - - mValues[ 3] = mAcc.getValue(ijk.offsetBy( 0,-1, 0)); - mValues[ 4] = mAcc.getValue(ijk.offsetBy( 0, 1, 0)); - - mValues[ 5] = mAcc.getValue(ijk.offsetBy( 0, 0,-1)); - mValues[ 6] = mAcc.getValue(ijk.offsetBy( 0, 0, 1)); - } - - template friend class BaseStencil; // allow base class to call init() - using BaseType::mAcc; - using BaseType::mValues; - const ValueType mInv2Dx, mInvDx2; -}; // GradStencil class - - -// ---------------------------- WenoStencil ---------------------------- - -namespace { // anonymous namespace for stencil-layout map - - template struct WenoPt {}; - template<> struct WenoPt< 0, 0, 0> { enum { idx = 0 }; }; - - template<> struct WenoPt<-3, 0, 0> { enum { idx = 1 }; }; - template<> struct WenoPt<-2, 0, 0> { enum { idx = 2 }; }; - template<> struct WenoPt<-1, 0, 0> { enum { idx = 3 }; }; - template<> struct WenoPt< 1, 0, 0> { enum { idx = 4 }; }; - template<> struct WenoPt< 2, 0, 0> { enum { idx = 5 }; }; - template<> struct WenoPt< 3, 0, 0> { enum { idx = 6 }; }; - - template<> struct WenoPt< 0,-3, 0> { enum { idx = 7 }; }; - template<> struct WenoPt< 0,-2, 0> { enum { idx = 8 }; }; - template<> struct WenoPt< 0,-1, 0> { enum { idx = 9 }; }; - template<> struct WenoPt< 0, 1, 0> { enum { idx =10 }; }; - template<> struct WenoPt< 0, 2, 0> { enum { idx =11 }; }; - template<> struct WenoPt< 0, 3, 0> { enum { idx =12 }; }; - - template<> struct WenoPt< 0, 0,-3> { enum { idx =13 }; }; - template<> struct WenoPt< 0, 0,-2> { enum { idx =14 }; }; - template<> struct WenoPt< 0, 0,-1> { enum { idx =15 }; }; - template<> struct WenoPt< 0, 0, 1> { enum { idx =16 }; }; - template<> struct WenoPt< 0, 0, 2> { enum { idx =17 }; }; - template<> struct WenoPt< 0, 0, 3> { enum { idx =18 }; }; - -} - -/// @brief This is a special 19-point stencil that supports optimal fifth-order WENO -/// upwinding, second-order central differencing, Laplacian, and zero-crossing test. -/// -/// @note For optimal random access performance this class -/// includes its own grid accessor. -template -class WenoStencil: public BaseStencil, 19, GridT> -{ - using SelfT = WenoStencil; - using BaseType = BaseStencil; -public: - using GridType = GridT; - using TreeType = typename GridT::TreeType; - using ValueType = typename GridT::ValueType; - - static constexpr int SIZE = 19; - - __hostdev__ WenoStencil(const GridType& grid) - : BaseType(grid) - , mDx2(ValueType(Pow2(grid.voxelSize()[0]))) - , mInv2Dx(ValueType(0.5 / grid.voxelSize()[0])) - , mInvDx2(ValueType(1.0 / mDx2)) - { - } - - __hostdev__ WenoStencil(const GridType& grid, double dx) - : BaseType(grid) - , mDx2(ValueType(dx * dx)) - , mInv2Dx(ValueType(0.5 / dx)) - , mInvDx2(ValueType(1.0 / mDx2)) - { - } - - /// @brief Return the norm-square of the WENO upwind gradient (computed via - /// WENO upwinding and Godunov's scheme) at the previously buffered location. - /// - /// @note This method should not be called until the stencil - /// buffer has been populated via a call to moveTo(ijk). - __hostdev__ inline ValueType normSqGrad(ValueType isoValue = ValueType(0)) const - { - const ValueType* v = mValues; - const RealT - dP_xm = WENO5(v[ 2]-v[ 1],v[ 3]-v[ 2],v[ 0]-v[ 3],v[ 4]-v[ 0],v[ 5]-v[ 4],mDx2), - dP_xp = WENO5(v[ 6]-v[ 5],v[ 5]-v[ 4],v[ 4]-v[ 0],v[ 0]-v[ 3],v[ 3]-v[ 2],mDx2), - dP_ym = WENO5(v[ 8]-v[ 7],v[ 9]-v[ 8],v[ 0]-v[ 9],v[10]-v[ 0],v[11]-v[10],mDx2), - dP_yp = WENO5(v[12]-v[11],v[11]-v[10],v[10]-v[ 0],v[ 0]-v[ 9],v[ 9]-v[ 8],mDx2), - dP_zm = WENO5(v[14]-v[13],v[15]-v[14],v[ 0]-v[15],v[16]-v[ 0],v[17]-v[16],mDx2), - dP_zp = WENO5(v[18]-v[17],v[17]-v[16],v[16]-v[ 0],v[ 0]-v[15],v[15]-v[14],mDx2); - return mInvDx2*static_cast( - GodunovsNormSqrd(v[0]>isoValue, dP_xm, dP_xp, dP_ym, dP_yp, dP_zm, dP_zp)); - } - - /// Return the optimal fifth-order upwind gradient corresponding to the - /// direction V. - /// - /// @note This method should not be called until the stencil - /// buffer has been populated via a call to moveTo(ijk). - __hostdev__ inline Vec3 gradient(const Vec3& V) const - { - const ValueType* v = mValues; - return 2*mInv2Dx * Vec3( - V[0]>0 ? WENO5(v[ 2]-v[ 1],v[ 3]-v[ 2],v[ 0]-v[ 3], v[ 4]-v[ 0],v[ 5]-v[ 4],mDx2) - : WENO5(v[ 6]-v[ 5],v[ 5]-v[ 4],v[ 4]-v[ 0], v[ 0]-v[ 3],v[ 3]-v[ 2],mDx2), - V[1]>0 ? WENO5(v[ 8]-v[ 7],v[ 9]-v[ 8],v[ 0]-v[ 9], v[10]-v[ 0],v[11]-v[10],mDx2) - : WENO5(v[12]-v[11],v[11]-v[10],v[10]-v[ 0], v[ 0]-v[ 9],v[ 9]-v[ 8],mDx2), - V[2]>0 ? WENO5(v[14]-v[13],v[15]-v[14],v[ 0]-v[15], v[16]-v[ 0],v[17]-v[16],mDx2) - : WENO5(v[18]-v[17],v[17]-v[16],v[16]-v[ 0], v[ 0]-v[15],v[15]-v[14],mDx2)); - } - /// Return the gradient computed at the previously buffered - /// location by second-order central differencing. - /// - /// @note This method should not be called until the stencil - /// buffer has been populated via a call to moveTo(ijk). - __hostdev__ inline Vec3 gradient() const - { - return mInv2Dx * Vec3(mValues[ 4] - mValues[ 3], - mValues[10] - mValues[ 9], - mValues[16] - mValues[15]); - } - - /// Return the Laplacian computed at the previously buffered - /// location by second-order central differencing. - /// - /// @note This method should not be called until the stencil - /// buffer has been populated via a call to moveTo(ijk). - __hostdev__ inline ValueType laplacian() const - { - return mInvDx2 * ( - mValues[ 3] + mValues[ 4] + - mValues[ 9] + mValues[10] + - mValues[15] + mValues[16] - 6*mValues[0]); - } - - /// Return @c true if the sign of the value at the center point of the stencil - /// differs from the sign of any of its six nearest neighbors - __hostdev__ inline bool zeroCrossing() const - { - const ValueType* v = mValues; - return (v[ 0]>0 ? (v[ 3]<0 || v[ 4]<0 || v[ 9]<0 || v[10]<0 || v[15]<0 || v[16]<0) - : (v[ 3]>0 || v[ 4]>0 || v[ 9]>0 || v[10]>0 || v[15]>0 || v[16]>0)); - } - - /// Return linear offset for the specified stencil point relative to its center - template - __hostdev__ unsigned int pos() const { return WenoPt::idx; } - -private: - __hostdev__ inline void init(const Coord& ijk) - { - mValues[ 1] = mAcc.getValue(ijk.offsetBy(-3, 0, 0)); - mValues[ 2] = mAcc.getValue(ijk.offsetBy(-2, 0, 0)); - mValues[ 3] = mAcc.getValue(ijk.offsetBy(-1, 0, 0)); - mValues[ 4] = mAcc.getValue(ijk.offsetBy( 1, 0, 0)); - mValues[ 5] = mAcc.getValue(ijk.offsetBy( 2, 0, 0)); - mValues[ 6] = mAcc.getValue(ijk.offsetBy( 3, 0, 0)); - - mValues[ 7] = mAcc.getValue(ijk.offsetBy( 0, -3, 0)); - mValues[ 8] = mAcc.getValue(ijk.offsetBy( 0, -2, 0)); - mValues[ 9] = mAcc.getValue(ijk.offsetBy( 0, -1, 0)); - mValues[10] = mAcc.getValue(ijk.offsetBy( 0, 1, 0)); - mValues[11] = mAcc.getValue(ijk.offsetBy( 0, 2, 0)); - mValues[12] = mAcc.getValue(ijk.offsetBy( 0, 3, 0)); - - mValues[13] = mAcc.getValue(ijk.offsetBy( 0, 0, -3)); - mValues[14] = mAcc.getValue(ijk.offsetBy( 0, 0, -2)); - mValues[15] = mAcc.getValue(ijk.offsetBy( 0, 0, -1)); - mValues[16] = mAcc.getValue(ijk.offsetBy( 0, 0, 1)); - mValues[17] = mAcc.getValue(ijk.offsetBy( 0, 0, 2)); - mValues[18] = mAcc.getValue(ijk.offsetBy( 0, 0, 3)); - } - - template friend class BaseStencil; // allow base class to call init() - using BaseType::mAcc; - using BaseType::mValues; - const ValueType mDx2, mInv2Dx, mInvDx2; -}; // WenoStencil class - - -// ---------------------------- CurvatureStencil ---------------------------- - -namespace { // anonymous namespace for stencil-layout map - - template struct CurvPt {}; - template<> struct CurvPt< 0, 0, 0> { enum { idx = 0 }; }; - - template<> struct CurvPt<-1, 0, 0> { enum { idx = 1 }; }; - template<> struct CurvPt< 1, 0, 0> { enum { idx = 2 }; }; - - template<> struct CurvPt< 0,-1, 0> { enum { idx = 3 }; }; - template<> struct CurvPt< 0, 1, 0> { enum { idx = 4 }; }; - - template<> struct CurvPt< 0, 0,-1> { enum { idx = 5 }; }; - template<> struct CurvPt< 0, 0, 1> { enum { idx = 6 }; }; - - template<> struct CurvPt<-1,-1, 0> { enum { idx = 7 }; }; - template<> struct CurvPt< 1,-1, 0> { enum { idx = 8 }; }; - template<> struct CurvPt<-1, 1, 0> { enum { idx = 9 }; }; - template<> struct CurvPt< 1, 1, 0> { enum { idx =10 }; }; - - template<> struct CurvPt<-1, 0,-1> { enum { idx =11 }; }; - template<> struct CurvPt< 1, 0,-1> { enum { idx =12 }; }; - template<> struct CurvPt<-1, 0, 1> { enum { idx =13 }; }; - template<> struct CurvPt< 1, 0, 1> { enum { idx =14 }; }; - - template<> struct CurvPt< 0,-1,-1> { enum { idx =15 }; }; - template<> struct CurvPt< 0, 1,-1> { enum { idx =16 }; }; - template<> struct CurvPt< 0,-1, 1> { enum { idx =17 }; }; - template<> struct CurvPt< 0, 1, 1> { enum { idx =18 }; }; - -} - -template -class CurvatureStencil: public BaseStencil, 19, GridT> -{ - using SelfT = CurvatureStencil; - using BaseType = BaseStencil; -public: - using GridType = GridT; - using TreeType = typename GridT::TreeType; - using ValueType = typename GridT::ValueType; - - static constexpr int SIZE = 19; - - __hostdev__ CurvatureStencil(const GridType& grid) - : BaseType(grid) - , mInv2Dx(ValueType(0.5 / grid.voxelSize()[0])) - , mInvDx2(ValueType(4.0 * mInv2Dx * mInv2Dx)) - { - } - - __hostdev__ CurvatureStencil(const GridType& grid, double dx) - : BaseType(grid) - , mInv2Dx(ValueType(0.5 / dx)) - , mInvDx2(ValueType(4.0 * mInv2Dx * mInv2Dx)) - { - } - - /// @brief Return the mean curvature at the previously buffered location. - /// - /// @note This method should not be called until the stencil - /// buffer has been populated via a call to moveTo(ijk). - __hostdev__ inline ValueType meanCurvature() const - { - RealT alpha, normGrad; - return this->meanCurvature(alpha, normGrad) ? - ValueType(alpha*mInv2Dx/Pow3(normGrad)) : 0; - } - - /// @brief Return the Gaussian curvature at the previously buffered location. - /// - /// @note This method should not be called until the stencil - /// buffer has been populated via a call to moveTo(ijk). - __hostdev__ inline ValueType gaussianCurvature() const - { - RealT alpha, normGrad; - return this->gaussianCurvature(alpha, normGrad) ? - ValueType(alpha*mInvDx2/Pow4(normGrad)) : 0; - } - - /// @brief Return both the mean and the Gaussian curvature at the - /// previously buffered location. - /// - /// @note This method should not be called until the stencil - /// buffer has been populated via a call to moveTo(ijk). - __hostdev__ inline void curvatures(ValueType &mean, ValueType& gauss) const - { - RealT alphaM, alphaG, normGrad; - if (this->curvatures(alphaM, alphaG, normGrad)) { - mean = ValueType(alphaM*mInv2Dx/Pow3(normGrad)); - gauss = ValueType(alphaG*mInvDx2/Pow4(normGrad)); - } else { - mean = gauss = 0; - } - } - - /// Return the mean curvature multiplied by the norm of the - /// central-difference gradient. This method is very useful for - /// mean-curvature flow of level sets! - /// - /// @note This method should not be called until the stencil - /// buffer has been populated via a call to moveTo(ijk). - __hostdev__ inline ValueType meanCurvatureNormGrad() const - { - RealT alpha, normGrad; - return this->meanCurvature(alpha, normGrad) ? - ValueType(alpha*mInvDx2/(2*Pow2(normGrad))) : 0; - } - - /// Return the mean Gaussian multiplied by the norm of the - /// central-difference gradient. - /// - /// @note This method should not be called until the stencil - /// buffer has been populated via a call to moveTo(ijk). - __hostdev__ inline ValueType gaussianCurvatureNormGrad() const - { - RealT alpha, normGrad; - return this->gaussianCurvature(alpha, normGrad) ? - ValueType(2*alpha*mInv2Dx*mInvDx2/Pow3(normGrad)) : 0; - } - - /// @brief Return both the mean and the Gaussian curvature at the - /// previously buffered location. - /// - /// @note This method should not be called until the stencil - /// buffer has been populated via a call to moveTo(ijk). - __hostdev__ inline void curvaturesNormGrad(ValueType &mean, ValueType& gauss) const - { - RealT alphaM, alphaG, normGrad; - if (this->curvatures(alphaM, alphaG, normGrad)) { - mean = ValueType(alphaM*mInvDx2/(2*Pow2(normGrad))); - gauss = ValueType(2*alphaG*mInv2Dx*mInvDx2/Pow3(normGrad)); - } else { - mean = gauss = 0; - } - } - - /// @brief Computes the minimum and maximum principal curvature at the - /// previously buffered location. - /// - /// @note This method should not be called until the stencil - /// buffer has been populated via a call to moveTo(ijk). - __hostdev__ inline void principalCurvatures(ValueType &min, ValueType &max) const - { - min = max = 0; - RealT alphaM, alphaG, normGrad; - if (this->curvatures(alphaM, alphaG, normGrad)) { - const RealT mean = alphaM*mInv2Dx/Pow3(normGrad); - const RealT tmp = Sqrt(mean*mean - alphaG*mInvDx2/Pow4(normGrad)); - min = ValueType(mean - tmp); - max = ValueType(mean + tmp); - } - } - - /// Return the Laplacian computed at the previously buffered - /// location by second-order central differencing. - /// - /// @note This method should not be called until the stencil - /// buffer has been populated via a call to moveTo(ijk). - __hostdev__ inline ValueType laplacian() const - { - return mInvDx2 * ( - mValues[1] + mValues[2] + - mValues[3] + mValues[4] + - mValues[5] + mValues[6] - 6*mValues[0]); - } - - /// Return the gradient computed at the previously buffered - /// location by second-order central differencing. - /// - /// @note This method should not be called until the stencil - /// buffer has been populated via a call to moveTo(ijk). - __hostdev__ inline Vec3 gradient() const - { - return Vec3( - mValues[2] - mValues[1], - mValues[4] - mValues[3], - mValues[6] - mValues[5])*mInv2Dx; - } - - /// Return linear offset for the specified stencil point relative to its center - template - __hostdev__ unsigned int pos() const { return CurvPt::idx; } - -private: - __hostdev__ inline void init(const Coord &ijk) - { - mValues[ 1] = mAcc.getValue(ijk.offsetBy(-1, 0, 0)); - mValues[ 2] = mAcc.getValue(ijk.offsetBy( 1, 0, 0)); - - mValues[ 3] = mAcc.getValue(ijk.offsetBy( 0, -1, 0)); - mValues[ 4] = mAcc.getValue(ijk.offsetBy( 0, 1, 0)); - - mValues[ 5] = mAcc.getValue(ijk.offsetBy( 0, 0, -1)); - mValues[ 6] = mAcc.getValue(ijk.offsetBy( 0, 0, 1)); - - mValues[ 7] = mAcc.getValue(ijk.offsetBy(-1, -1, 0)); - mValues[ 8] = mAcc.getValue(ijk.offsetBy( 1, -1, 0)); - mValues[ 9] = mAcc.getValue(ijk.offsetBy(-1, 1, 0)); - mValues[10] = mAcc.getValue(ijk.offsetBy( 1, 1, 0)); - - mValues[11] = mAcc.getValue(ijk.offsetBy(-1, 0, -1)); - mValues[12] = mAcc.getValue(ijk.offsetBy( 1, 0, -1)); - mValues[13] = mAcc.getValue(ijk.offsetBy(-1, 0, 1)); - mValues[14] = mAcc.getValue(ijk.offsetBy( 1, 0, 1)); - - mValues[15] = mAcc.getValue(ijk.offsetBy( 0, -1, -1)); - mValues[16] = mAcc.getValue(ijk.offsetBy( 0, 1, -1)); - mValues[17] = mAcc.getValue(ijk.offsetBy( 0, -1, 1)); - mValues[18] = mAcc.getValue(ijk.offsetBy( 0, 1, 1)); - } - - __hostdev__ inline RealT Dx() const { return 0.5*(mValues[2] - mValues[1]); }// * 1/dx - __hostdev__ inline RealT Dy() const { return 0.5*(mValues[4] - mValues[3]); }// * 1/dx - __hostdev__ inline RealT Dz() const { return 0.5*(mValues[6] - mValues[5]); }// * 1/dx - __hostdev__ inline RealT Dxx() const { return mValues[2] - 2 * mValues[0] + mValues[1]; }// * 1/dx2 - __hostdev__ inline RealT Dyy() const { return mValues[4] - 2 * mValues[0] + mValues[3]; }// * 1/dx2} - __hostdev__ inline RealT Dzz() const { return mValues[6] - 2 * mValues[0] + mValues[5]; }// * 1/dx2 - __hostdev__ inline RealT Dxy() const { return 0.25 * (mValues[10] - mValues[ 8] + mValues[ 7] - mValues[ 9]); }// * 1/dx2 - __hostdev__ inline RealT Dxz() const { return 0.25 * (mValues[14] - mValues[12] + mValues[11] - mValues[13]); }// * 1/dx2 - __hostdev__ inline RealT Dyz() const { return 0.25 * (mValues[18] - mValues[16] + mValues[15] - mValues[17]); }// * 1/dx2 - - __hostdev__ inline bool meanCurvature(RealT& alpha, RealT& normGrad) const - { - // For performance all finite differences are unscaled wrt dx - const RealT Dx = this->Dx(), Dy = this->Dy(), Dz = this->Dz(), - Dx2 = Dx*Dx, Dy2 = Dy*Dy, Dz2 = Dz*Dz, normGrad2 = Dx2 + Dy2 + Dz2; - if (normGrad2 <= Tolerance::value()) { - alpha = normGrad = 0; - return false; - } - const RealT Dxx = this->Dxx(), Dyy = this->Dyy(), Dzz = this->Dzz(); - alpha = Dx2*(Dyy + Dzz) + Dy2*(Dxx + Dzz) + Dz2*(Dxx + Dyy) - - 2*(Dx*(Dy*this->Dxy() + Dz*this->Dxz()) + Dy*Dz*this->Dyz());// * 1/dx^4 - normGrad = Sqrt(normGrad2); // * 1/dx - return true; - } - - __hostdev__ inline bool gaussianCurvature(RealT& alpha, RealT& normGrad) const - { - // For performance all finite differences are unscaled wrt dx - const RealT Dx = this->Dx(), Dy = this->Dy(), Dz = this->Dz(), - Dx2 = Dx*Dx, Dy2 = Dy*Dy, Dz2 = Dz*Dz, normGrad2 = Dx2 + Dy2 + Dz2; - if (normGrad2 <= Tolerance::value()) { - alpha = normGrad = 0; - return false; - } - const RealT Dxx = this->Dxx(), Dyy = this->Dyy(), Dzz = this->Dzz(), - Dxy = this->Dxy(), Dxz = this->Dxz(), Dyz = this->Dyz(); - alpha = Dx2*(Dyy*Dzz - Dyz*Dyz) + Dy2*(Dxx*Dzz - Dxz*Dxz) + Dz2*(Dxx*Dyy - Dxy*Dxy) + - 2*( Dy*Dz*(Dxy*Dxz - Dyz*Dxx) + Dx*Dz*(Dxy*Dyz - Dxz*Dyy) + Dx*Dy*(Dxz*Dyz - Dxy*Dzz) );// * 1/dx^6 - normGrad = Sqrt(normGrad2); // * 1/dx - return true; - } - - __hostdev__ inline bool curvatures(RealT& alphaM, RealT& alphaG, RealT& normGrad) const - { - // For performance all finite differences are unscaled wrt dx - const RealT Dx = this->Dx(), Dy = this->Dy(), Dz = this->Dz(), - Dx2 = Dx*Dx, Dy2 = Dy*Dy, Dz2 = Dz*Dz, normGrad2 = Dx2 + Dy2 + Dz2; - if (normGrad2 <= Tolerance::value()) { - alphaM = alphaG =normGrad = 0; - return false; - } - const RealT Dxx = this->Dxx(), Dyy = this->Dyy(), Dzz = this->Dzz(), - Dxy = this->Dxy(), Dxz = this->Dxz(), Dyz = this->Dyz(); - alphaM = Dx2*(Dyy + Dzz) + Dy2*(Dxx + Dzz) + Dz2*(Dxx + Dyy) - - 2*(Dx*(Dy*Dxy + Dz*Dxz) + Dy*Dz*Dyz);// *1/dx^4 - alphaG = Dx2*(Dyy*Dzz - Dyz*Dyz) + Dy2*(Dxx*Dzz - Dxz*Dxz) + Dz2*(Dxx*Dyy - Dxy*Dxy) + - 2*( Dy*Dz*(Dxy*Dxz - Dyz*Dxx) + Dx*Dz*(Dxy*Dyz - Dxz*Dyy) + Dx*Dy*(Dxz*Dyz - Dxy*Dzz) );// *1/dx^6 - normGrad = Sqrt(normGrad2); // * 1/dx - return true; - } - - template friend class BaseStencil; // allow base class to call init() - using BaseType::mAcc; - using BaseType::mValues; - const ValueType mInv2Dx, mInvDx2; -}; // CurvatureStencil class - -} // end nanovdb namespace - -#endif // NANOVDB_STENCILS_HAS_BEEN_INCLUDED +#include // for NANOVDB_DEPRECATED_HEADER +#include +NANOVDB_DEPRECATED_HEADER("Include nanovdb/math/Stencils.h instead.") diff --git a/nanovdb/nanovdb/util/Timer.h b/nanovdb/nanovdb/util/Timer.h new file mode 100644 index 0000000000..992b055b4c --- /dev/null +++ b/nanovdb/nanovdb/util/Timer.h @@ -0,0 +1,87 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: MPL-2.0 + +/// @file nanovdb/util/Timer.h +/// +/// @author Ken Museth +/// +/// @brief A simple timing class (in case openvdb::util::CpuTimer is unavailable) + +#ifndef NANOVDB_UTIL_TIMER_H_HAS_BEEN_INCLUDED +#define NANOVDB_UTIL_TIMER_H_HAS_BEEN_INCLUDED + +#include +#include + +namespace nanovdb { + +namespace util { + +class Timer +{ + std::chrono::high_resolution_clock::time_point mStart; +public: + /// @brief Default constructor + Timer() {} + + /// @brief Constructor that starts the timer + /// @param msg string message to be printed when timer is started + /// @param os output stream for the message above + Timer(const std::string &msg, std::ostream& os = std::cerr) {this->start(msg, os);} + + /// @brief Start the timer + /// @param msg string message to be printed when timer is started + /// @param os output stream for the message above + void start(const std::string &msg, std::ostream& os = std::cerr) + { + os << msg << " ... " << std::flush; + mStart = std::chrono::high_resolution_clock::now(); + } + + /// @brief elapsed time (since start) in miliseconds + template + auto elapsed() + { + auto end = std::chrono::high_resolution_clock::now(); + return std::chrono::duration_cast(end - mStart).count(); + } + + /// @brief stop the timer + /// @tparam AccuracyT Template parameter defining the accuracy of the reported times + /// @param os output stream for the message above + template + void stop(std::ostream& os = std::cerr) + { + auto end = std::chrono::high_resolution_clock::now(); + auto diff = std::chrono::duration_cast(end - mStart).count(); + os << "completed in " << diff; + if (std::is_same::value) {// resolved at compile-time + os << " microseconds" << std::endl; + } else if (std::is_same::value) { + os << " milliseconds" << std::endl; + } else if (std::is_same::value) { + os << " seconds" << std::endl; + } else { + os << " unknown time unit" << std::endl; + } + } + + /// @brief stop and start the timer + /// @tparam AccuracyT Template parameter defining the accuracy of the reported times + /// @param msg string message to be printed when timer is started + /// @param os output stream for the message above + template + void restart(const std::string &msg, std::ostream& os = std::cerr) + { + this->stop(); + this->start(msg, os); + } +};// Timer + +}// namespace util + +using CpuTimer [[deprecated("Use nanovdb::util::Timer instead")]] = util::Timer; + +} // namespace nanovdb + +#endif // NANOVDB_UTIL_TIMER_HAS_BEEN_INCLUDED diff --git a/nanovdb/nanovdb/util/Util.h b/nanovdb/nanovdb/util/Util.h new file mode 100644 index 0000000000..e8ebfc1c63 --- /dev/null +++ b/nanovdb/nanovdb/util/Util.h @@ -0,0 +1,657 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: MPL-2.0 + +/*! + \file nanovdb/util/Util.h + + \author Ken Museth + + \date January 8, 2020 + + \brief Utility functions +*/ + +#ifndef NANOVDB_UTIL_UTIL_H_HAS_BEEN_INCLUDED +#define NANOVDB_UTIL_UTIL_H_HAS_BEEN_INCLUDED + +#ifdef __CUDACC_RTC__ + +typedef signed char int8_t; +typedef short int16_t; +typedef int int32_t; +typedef long long int64_t; +typedef unsigned char uint8_t; +typedef unsigned int uint32_t; +typedef unsigned short uint16_t; +typedef unsigned long long uint64_t; + +#define NANOVDB_ASSERT(x) + +#ifndef UINT64_C +#define UINT64_C(x) (x ## ULL) +#endif + +#else // !__CUDACC_RTC__ + +#include // for abs in clang7 +#include // for types like int32_t etc +#include // for size_t type +#include // for assert +#include // for stderr and snprintf +#include // for sqrt and fma +#include // for numeric_limits +#include // for std::move +#ifdef NANOVDB_USE_IOSTREAMS +#include // for read/writeUncompressedGrids +#endif// ifdef NANOVDB_USE_IOSTREAMS + +// All asserts can be disabled here, even for debug builds +#if 1 +#define NANOVDB_ASSERT(x) assert(x) +#else +#define NANOVDB_ASSERT(x) +#endif + +#if defined(NANOVDB_USE_INTRINSICS) && defined(_MSC_VER) +#include +#pragma intrinsic(_BitScanReverse) +#pragma intrinsic(_BitScanForward) +#pragma intrinsic(_BitScanReverse64) +#pragma intrinsic(_BitScanForward64) +#endif + +#endif // __CUDACC_RTC__ + +#if defined(__CUDACC__) || defined(__HIP__) +// Only define __hostdev__ qualifier when using NVIDIA CUDA or HIP compilers +#ifndef __hostdev__ +#define __hostdev__ __host__ __device__ // Runs on the CPU and GPU, called from the CPU or the GPU +#endif +#else +// Dummy definitions of macros only defined by CUDA and HIP compilers +#ifndef __hostdev__ +#define __hostdev__ // Runs on the CPU and GPU, called from the CPU or the GPU +#endif +#ifndef __global__ +#define __global__ // Runs on the GPU, called from the CPU or the GPU +#endif +#ifndef __device__ +#define __device__ // Runs on the GPU, called from the GPU +#endif +#ifndef __host__ +#define __host__ // Runs on the CPU, called from the CPU +#endif + +#endif // if defined(__CUDACC__) || defined(__HIP__) + +// The following macro will suppress annoying warnings when nvcc +// compiles functions that call (host) intrinsics (which is perfectly valid) +#if defined(_MSC_VER) && defined(__CUDACC__) +#define NANOVDB_HOSTDEV_DISABLE_WARNING __pragma("hd_warning_disable") +#elif defined(__GNUC__) && defined(__CUDACC__) +#define NANOVDB_HOSTDEV_DISABLE_WARNING _Pragma("hd_warning_disable") +#else +#define NANOVDB_HOSTDEV_DISABLE_WARNING +#endif + +// Define compiler warnings that work with all compilers +//#if defined(_MSC_VER) +//#define NANO_WARNING(msg) _pragma("message" #msg) +//#else +//#define NANO_WARNING(msg) _Pragma("message" #msg) +//#endif + +//============================================== +/// @brief Defines macros that issues warnings for deprecated header files +/// @details Example: +/// @code +/// #include // for NANOVDB_DEPRECATED_HEADER +/// #include +/// NANOVDB_DEPRECATED_HEADER("This header file is deprecated, please use instead") +/// @endcode +#ifdef __GNUC__ +#define NANOVDB_PRAGMA(X) _Pragma(#X) +#define NANOVDB_DEPRECATED_HEADER(MSG) NANOVDB_PRAGMA(GCC warning MSG) +#elif defined(_MSC_VER) +#define NANOVDB_STRINGIZE_(MSG) #MSG +#define NANOVDB_STRINGIZE(MSG) NANOVDB_STRINGIZE_(MSG) +#define NANOVDB_DEPRECATED_HEADER(MSG) \ + __pragma(message(__FILE__ "(" NANOVDB_STRINGIZE(__LINE__) ") : Warning: " MSG)) +#endif + +// A portable implementation of offsetof - unfortunately it doesn't work with static_assert +#define NANOVDB_OFFSETOF(CLASS, MEMBER) ((int)(size_t)((char*)&((CLASS*)0)->MEMBER - (char*)0)) + +namespace nanovdb {// ================================================================= + +namespace util {// ==================================================================== + +/// @brief Minimal implementation of std::declval, which converts any type @c T to +//// a reference type, making it possible to use member functions in the operand +/// of the decltype specifier without the need to go through constructors. +/// @tparam T Template type to be converted to T&& +/// @return T&& +/// @warning Unlike std::declval, this version does not work when T = void! However, +/// NVRTC does not like std::declval, so we provide our own implementation. +template +T&& declval() noexcept; + +// --------------------------> string utility functions <------------------------------------ + +/// @brief tests if a c-string @c str is empty, that is its first value is '\0' +/// @param str c-string to be tested for null termination +/// @return true if str[0] = '\0' +__hostdev__ inline bool empty(const char* str) +{ + NANOVDB_ASSERT(str != nullptr); + return *str == '\0'; +}// util::empty + +/// @brief length of a c-sting, excluding '\0'. +/// @param str c-string +/// @return the number of characters that precede the terminating null character. +__hostdev__ inline size_t strlen(const char *str) +{ + NANOVDB_ASSERT(str != nullptr); + const char *s = str; + while(*s) ++s; ; + return (s - str); +}// util::strlen + +/// @brief Copy characters from @c src to @c dst. +/// @param dst pointer to the destination string. +/// @param src pointer to the null-terminated source string. +/// @return destination string @c dst. +/// @note Emulates the behaviour of std::strcpy, except this version also runs on the GPU. +__hostdev__ inline char* strcpy(char *dst, const char *src) +{ + NANOVDB_ASSERT(dst != nullptr && src != nullptr); + for (char *p = dst; (*p++ = *src) != '\0'; ++src); + return dst; +}// util::strcpy(char*, const char*) + +/// @brief Copies the first num characters of @c src to @c dst. +/// If the end of the source C string (which is signaled by a +/// null-character) is found before @c max characters have been +/// copied, @c dst is padded with zeros until a total of @c max +/// characters have been written to it. +/// @param dst destination string +/// @param src source string +/// @param max maximum number of character in destination string +/// @return destination string @c dst +/// @warning if strncpy(dst, src, max)[max-1]!='\0' then @c src has more +/// characters than @c max and the return string needs to be +/// manually null-terminated, i.e. strncpy(dst, src, max)[max-1]='\0' +__hostdev__ inline char* strncpy(char *dst, const char *src, size_t max) +{ + NANOVDB_ASSERT(dst != nullptr && src != nullptr); + size_t i = 0; + for (; i < max && src[i] != '\0'; ++i) dst[i] = src[i]; + for (; i < max; ++i) dst[i] = '\0'; + return dst; +}// util::strncpy(char *dst, const char *src, size_t max) + +/// @brief converts a number to a string using a specific base +/// @param dst destination string +/// @param num signed number to be concatenated after @c dst +/// @param bas base used when converting @c num to a string +/// @return destination string @c dst +/// @note Emulates the behaviour of itoa, except this verion also works on the GPU. +__hostdev__ inline char* strcpy(char* dst, int num, int bas = 10) +{ + NANOVDB_ASSERT(dst != nullptr && bas > 0); + int len = 0;// length of number once converted to a string + if (num == 0) dst[len++] = '0'; + for (int abs = num < 0 && bas == 10 ? -num : num; abs; abs /= bas) { + const int rem = abs % bas; + dst[len++] = rem > 9 ? rem - 10 + 'a' : rem + '0'; + } + if (num < 0) dst[len++] = '-';// append '-' if negative + for (char *a = dst, *b = a + len - 1; a < b; ++a, --b) {// reverse dst + dst[len] = *a;// use end of string as temp + *a = *b; + *b = dst[len]; + } + dst[len] = '\0';// explicitly terminate end of string + return dst; +}// util::strcpy(char*, int, int) + +/// @brief Appends a copy of the character string pointed to by @c src to +/// the end of the character string pointed to by @c dst on the device. +/// @param dst pointer to the null-terminated byte string to append to. +/// @param src pointer to the null-terminated byte string to copy from. +/// @return pointer to the character array being appended to. +/// @note Emulates the behaviour of std::strcat, except this version also runs on the GPU. +__hostdev__ inline char* strcat(char *dst, const char *src) +{ + NANOVDB_ASSERT(dst != nullptr && src != nullptr); + char *p = dst; + while (*p != '\0') ++p;// advance till end of dst + strcpy(p, src);// append src + return dst; +}// util::strcat(char*, const char*) + +/// @brief concatenates a number after a string using a specific base +/// @param dst null terminated destination string +/// @param num signed number to be concatenated after @c dst +/// @param bas base used when converting @c num to a string +/// @return destination string @c dst +__hostdev__ inline char* strcat(char* dst, int num, int bas = 10) +{ + NANOVDB_ASSERT(dst != nullptr); + char *p = dst; + while (*p != '\0') ++p; + strcpy(p, num, bas); + return dst; +}// util::strcat(char*, int, int) + +/// @brief Compares two null-terminated byte strings lexicographically. +/// @param lhs pointer to the null-terminated byte strings to compare +/// @param rhs pointer to the null-terminated byte strings to compare +/// @return Negative value if @c lhs appears before @c rhs in lexicographical order. +/// Zero if @c lhs and @c rhs compare equal. Positive value if @c lhs appears +/// after @c rhs in lexicographical order. +/// @note Emulates the behaviour of std::strcmp, except this version also runs on the GPU. +__hostdev__ inline int strcmp(const char *lhs, const char *rhs) +{ + while(*lhs != '\0' && (*lhs == *rhs)){ + lhs++; + rhs++; + } + return *(const unsigned char*)lhs - *(const unsigned char*)rhs;// zero if lhs == rhs +}// util::strcmp(const char*, const char*) + +/// @brief Test if two null-terminated byte strings are the same +/// @param lhs pointer to the null-terminated byte strings to compare +/// @param rhs pointer to the null-terminated byte strings to compare +/// @return true if the two c-strings are identical +__hostdev__ inline bool streq(const char *lhs, const char *rhs) +{ + return strcmp(lhs, rhs) == 0; +}// util::streq + +namespace impl {// ======================================================= +// Base-case implementation of Variadic Template function impl::sprint +__hostdev__ inline char* sprint(char *dst){return dst;} +// Variadic Template function impl::sprint +template +__hostdev__ inline char* sprint(char *dst, T var1, Types... var2) +{ + return impl::sprint(strcat(dst, var1), var2...); +} +}// namespace impl ========================================================= + +/// @brief prints a variable number of string and/or numbers to a destination string +template +__hostdev__ inline char* sprint(char *dst, T var1, Types... var2) +{ + return impl::sprint(strcpy(dst, var1), var2...); +}// util::sprint + +// --------------------------> memzero <------------------------------------ + +/// @brief Zero initialization of memory +/// @param dst pointer to destination +/// @param byteCount number of bytes to be initialized to zero +/// @return destination pointer @c dst +__hostdev__ inline static void* memzero(void *dst, size_t byteCount) +{ + NANOVDB_ASSERT(dst); + const size_t wordCount = byteCount >> 3; + if (wordCount << 3 == byteCount) { + for (auto *d = (uint64_t*)dst, *e = d + wordCount; d != e; ++d) *d = 0ULL; + } else { + for (auto *d = (char*)dst, *e = d + byteCount; d != e; ++d) *d = '\0'; + } + return dst; +}// util::memzero + +// --------------------------> util::is_same <------------------------------------ + +/// @brief C++11 implementation of std::is_same +/// @note When more than two arguments are provided value = T0==T1 || T0==T2 || ... +template +struct is_same +{ + static constexpr bool value = is_same::value || is_same::value; +}; + +template +struct is_same {static constexpr bool value = false;}; + +template +struct is_same {static constexpr bool value = true;}; + +// --------------------------> util::is_floating_point <------------------------------------ + +/// @brief C++11 implementation of std::is_floating_point +template +struct is_floating_point {static constexpr bool value = is_same::value;}; + +// --------------------------> util::enable_if <------------------------------------ + +/// @brief C++11 implementation of std::enable_if +template +struct enable_if {}; + +template +struct enable_if {using type = T;}; + +// --------------------------> util::disable_if <------------------------------------ + +template +struct disable_if {using type = T;}; + +template +struct disable_if {}; + +// --------------------------> util::is_const <------------------------------------ + +template +struct is_const {static constexpr bool value = false;}; + +template +struct is_const {static constexpr bool value = true;}; + +// --------------------------> util::is_pointer <------------------------------------ + +/// @brief Trait used to identify template parameter that are pointers +/// @tparam T Template parameter to be tested +template +struct is_pointer {static constexpr bool value = false;}; + +/// @brief Template specialization of pointers +/// @tparam T Template parameter to be tested +/// @note T can be both a non-const and const type +template +struct is_pointer {static constexpr bool value = true;}; + +// --------------------------> util::conditional <------------------------------------ + +/// @brief C++11 implementation of std::conditional +template +struct conditional { using type = TrueT; }; + +/// @brief Template specialization of conditional +/// @tparam FalseT Type used when boolean is false +/// @tparam TrueT Type used when boolean is true +template +struct conditional { using type = FalseT; }; + +// --------------------------> util::remove_const <------------------------------------ + +/// @brief Trait use to const from type. Default implementation is just a pass-through +/// @tparam T Type +/// @details remove_pointer::type = float +template +struct remove_const {using type = T;}; + +/// @brief Template specialization of trait class use to remove const qualifier type from a type +/// @tparam T Type of the const type +/// @details remove_pointer::type = float +template +struct remove_const {using type = T;}; + +// --------------------------> util::remove_reference <------------------------------------ + +/// @brief Trait use to remove reference, i.e. "&", qualifier from a type. Default implementation is just a pass-through +/// @tparam T Type +/// @details remove_pointer::type = float +template +struct remove_reference {using type = T;}; + +/// @brief Template specialization of trait class use to remove reference, i.e. "&", qualifier from a type +/// @tparam T Type of the reference +/// @details remove_pointer::type = float +template +struct remove_reference {using type = T;}; + +// --------------------------> util::remove_pointer <------------------------------------ + +/// @brief Trait use to remove pointer, i.e. "*", qualifier from a type. Default implementation is just a pass-through +/// @tparam T Type +/// @details remove_pointer::type = float +template +struct remove_pointer {using type = T;}; + +/// @brief Template specialization of trait class use to to remove pointer, i.e. "*", qualifier from a type +/// @tparam T Type of the pointer +/// @details remove_pointer::type = float +template +struct remove_pointer {using type = T;}; + +// --------------------------> util::match_const <------------------------------------ + +/// @brief Trait used to transfer the const-ness of a reference type to another type +/// @tparam T Type whose const-ness needs to match the reference type +/// @tparam ReferenceT Reference type that is not const +/// @details match_const::type = int +/// match_const::type = int +template +struct match_const {using type = typename remove_const::type;}; + +/// @brief Template specialization used to transfer the const-ness of a reference type to another type +/// @tparam T Type that will adopt the const-ness of the reference type +/// @tparam ReferenceT Reference type that is const +/// @details match_const::type = const int +/// match_const::type = const int +template +struct match_const {using type = const typename remove_const::type;}; + +// --------------------------> util::is_specialization <------------------------------------ + +/// @brief Metafunction used to determine if the first template +/// parameter is a specialization of the class template +/// given in the second template parameter. +/// +/// @details is_specialization, Vec3>::value == true; +/// is_specialization::value == true; +/// is_specialization, std::vector>::value == true; +template class TemplateType> +struct is_specialization {static const bool value = false;}; +template class TemplateType> +struct is_specialization, TemplateType> +{ + static const bool value = true; +};// util::is_specialization + +// --------------------------> util::PtrDiff <------------------------------------ + +/// @brief Compute the distance, in bytes, between two pointers, dist = p - q +/// @param p fist pointer, assumed to NOT be NULL +/// @param q second pointer, assumed to NOT be NULL +/// @return signed distance between pointer, p - q, addresses in units of bytes +__hostdev__ inline static int64_t PtrDiff(const void* p, const void* q) +{ + NANOVDB_ASSERT(p && q); + return reinterpret_cast(p) - reinterpret_cast(q); +}// util::PtrDiff + +// --------------------------> util::PtrAdd <------------------------------------ + +/// @brief Adds a byte offset to a non-const pointer to produce another non-const pointer +/// @tparam DstT Type of the return pointer (defaults to void) +/// @param p non-const input pointer, assumed to NOT be NULL +/// @param offset signed byte offset +/// @return a non-const pointer defined as the offset of an input pointer +template +__hostdev__ inline static DstT* PtrAdd(void* p, int64_t offset) +{ + NANOVDB_ASSERT(p); + return reinterpret_cast(reinterpret_cast(p) + offset); +}// util::PtrAdd + +/// @brief Adds a byte offset to a const pointer to produce another const pointer +/// @tparam DstT Type of the return pointer (defaults to void) +/// @param p const input pointer, assumed to NOT be NULL +/// @param offset signed byte offset +/// @return a const pointer defined as the offset of a const input pointer +template +__hostdev__ inline static const DstT* PtrAdd(const void* p, int64_t offset) +{ + NANOVDB_ASSERT(p); + return reinterpret_cast(reinterpret_cast(p) + offset); +}// util::PtrAdd + +// -------------------> findLowestOn <---------------------------- + +/// @brief Returns the index of the lowest, i.e. least significant, on bit in the specified 32 bit word +/// +/// @warning Assumes that at least one bit is set in the word, i.e. @a v != uint32_t(0)! +NANOVDB_HOSTDEV_DISABLE_WARNING +__hostdev__ inline uint32_t findLowestOn(uint32_t v) +{ + NANOVDB_ASSERT(v); +#if (defined(__CUDA_ARCH__) || defined(__HIP__)) && defined(NANOVDB_USE_INTRINSICS) + return __ffs(v) - 1; // one based indexing +#elif defined(_MSC_VER) && defined(NANOVDB_USE_INTRINSICS) + unsigned long index; + _BitScanForward(&index, v); + return static_cast(index); +#elif (defined(__GNUC__) || defined(__clang__)) && defined(NANOVDB_USE_INTRINSICS) + return static_cast(__builtin_ctzl(v)); +#else + //NANO_WARNING("Using software implementation for findLowestOn(uint32_t v)") + static const unsigned char DeBruijn[32] = { + 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9}; +// disable unary minus on unsigned warning +#if defined(_MSC_VER) && !defined(__NVCC__) +#pragma warning(push) +#pragma warning(disable : 4146) +#endif + return DeBruijn[uint32_t((v & -v) * 0x077CB531U) >> 27]; +#if defined(_MSC_VER) && !defined(__NVCC__) +#pragma warning(pop) +#endif + +#endif +}// util::findLowestOn(uint32_t) + +/// @brief Returns the index of the lowest, i.e. least significant, on bit in the specified 64 bit word +/// +/// @warning Assumes that at least one bit is set in the word, i.e. @a v != uint32_t(0)! +NANOVDB_HOSTDEV_DISABLE_WARNING +__hostdev__ inline uint32_t findLowestOn(uint64_t v) +{ + NANOVDB_ASSERT(v); +#if (defined(__CUDA_ARCH__) || defined(__HIP__)) && defined(NANOVDB_USE_INTRINSICS) + return __ffsll(static_cast(v)) - 1; // one based indexing +#elif defined(_MSC_VER) && defined(NANOVDB_USE_INTRINSICS) + unsigned long index; + _BitScanForward64(&index, v); + return static_cast(index); +#elif (defined(__GNUC__) || defined(__clang__)) && defined(NANOVDB_USE_INTRINSICS) + return static_cast(__builtin_ctzll(v)); +#else + //NANO_WARNING("Using software implementation for util::findLowestOn(uint64_t)") + static const unsigned char DeBruijn[64] = { + 0, 1, 2, 53, 3, 7, 54, 27, 4, 38, 41, 8, 34, 55, 48, 28, + 62, 5, 39, 46, 44, 42, 22, 9, 24, 35, 59, 56, 49, 18, 29, 11, + 63, 52, 6, 26, 37, 40, 33, 47, 61, 45, 43, 21, 23, 58, 17, 10, + 51, 25, 36, 32, 60, 20, 57, 16, 50, 31, 19, 15, 30, 14, 13, 12, + }; +// disable unary minus on unsigned warning +#if defined(_MSC_VER) && !defined(__NVCC__) +#pragma warning(push) +#pragma warning(disable : 4146) +#endif + return DeBruijn[uint64_t((v & -v) * UINT64_C(0x022FDD63CC95386D)) >> 58]; +#if defined(_MSC_VER) && !defined(__NVCC__) +#pragma warning(pop) +#endif + +#endif +}// util::findLowestOn(uint64_t) + +// -------------------> findHighestOn <---------------------------- + +/// @brief Returns the index of the highest, i.e. most significant, on bit in the specified 32 bit word +/// +/// @warning Assumes that at least one bit is set in the word, i.e. @a v != uint32_t(0)! +NANOVDB_HOSTDEV_DISABLE_WARNING +__hostdev__ inline uint32_t findHighestOn(uint32_t v) +{ + NANOVDB_ASSERT(v); +#if (defined(__CUDA_ARCH__) || defined(__HIP__)) && defined(NANOVDB_USE_INTRINSICS) + return sizeof(uint32_t) * 8 - 1 - __clz(v); // Return the number of consecutive high-order zero bits in a 32-bit integer. +#elif defined(_MSC_VER) && defined(NANOVDB_USE_INTRINSICS) + unsigned long index; + _BitScanReverse(&index, v); + return static_cast(index); +#elif (defined(__GNUC__) || defined(__clang__)) && defined(NANOVDB_USE_INTRINSICS) + return sizeof(unsigned long) * 8 - 1 - __builtin_clzl(v); +#else + //NANO_WARNING("Using software implementation for util::findHighestOn(uint32_t)") + static const unsigned char DeBruijn[32] = { + 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, + 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31}; + v |= v >> 1; // first round down to one less than a power of 2 + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + return DeBruijn[uint32_t(v * 0x07C4ACDDU) >> 27]; +#endif +}// util::findHighestOn + +/// @brief Returns the index of the highest, i.e. most significant, on bit in the specified 64 bit word +/// +/// @warning Assumes that at least one bit is set in the word, i.e. @a v != uint32_t(0)! +NANOVDB_HOSTDEV_DISABLE_WARNING +__hostdev__ inline uint32_t findHighestOn(uint64_t v) +{ + NANOVDB_ASSERT(v); +#if (defined(__CUDA_ARCH__) || defined(__HIP__)) && defined(NANOVDB_USE_INTRINSICS) + return sizeof(unsigned long) * 8 - 1 - __clzll(static_cast(v)); +#elif defined(_MSC_VER) && defined(NANOVDB_USE_INTRINSICS) + unsigned long index; + _BitScanReverse64(&index, v); + return static_cast(index); +#elif (defined(__GNUC__) || defined(__clang__)) && defined(NANOVDB_USE_INTRINSICS) + return sizeof(unsigned long) * 8 - 1 - __builtin_clzll(v); +#else + const uint32_t* p = reinterpret_cast(&v); + return p[1] ? 32u + findHighestOn(p[1]) : findHighestOn(p[0]); +#endif +}// util::findHighestOn + +// ----------------------------> util::countOn <-------------------------------------- + +/// @return Number of bits that are on in the specified 64-bit word +NANOVDB_HOSTDEV_DISABLE_WARNING +__hostdev__ inline uint32_t countOn(uint64_t v) +{ +#if (defined(__CUDA_ARCH__) || defined(__HIP__)) && defined(NANOVDB_USE_INTRINSICS) + //#warning Using popcll for util::countOn + return __popcll(v); +// __popcnt64 intrinsic support was added in VS 2019 16.8 +#elif defined(_MSC_VER) && defined(_M_X64) && (_MSC_VER >= 1928) && defined(NANOVDB_USE_INTRINSICS) + //#warning Using popcnt64 for util::countOn + return uint32_t(__popcnt64(v)); +#elif (defined(__GNUC__) || defined(__clang__)) && defined(NANOVDB_USE_INTRINSICS) + //#warning Using builtin_popcountll for util::countOn + return __builtin_popcountll(v); +#else // use software implementation + //NANO_WARNING("Using software implementation for util::countOn") + v = v - ((v >> 1) & uint64_t(0x5555555555555555)); + v = (v & uint64_t(0x3333333333333333)) + ((v >> 2) & uint64_t(0x3333333333333333)); + return (((v + (v >> 4)) & uint64_t(0xF0F0F0F0F0F0F0F)) * uint64_t(0x101010101010101)) >> 56; +#endif +}// util::countOn(uint64_t) + +}// namespace util ================================================================== + +[[deprecated("Use nanovdb::util::findLowestOn instead")]] +__hostdev__ inline uint32_t FindLowestOn(uint32_t v){return util::findLowestOn(v);} +[[deprecated("Use nanovdb::util::findLowestOn instead")]] +__hostdev__ inline uint32_t FindLowestOn(uint64_t v){return util::findLowestOn(v);} +[[deprecated("Use nanovdb::util::findHighestOn instead")]] +__hostdev__ inline uint32_t FindHighestOn(uint32_t v){return util::findHighestOn(v);} +[[deprecated("Use nanovdb::util::findHighestOn instead")]] +__hostdev__ inline uint32_t FindHighestOn(uint64_t v){return util::findHighestOn(v);} +[[deprecated("Use nanovdb::util::countOn instead")]] +__hostdev__ inline uint32_t CountOn(uint64_t v){return util::countOn(v);} + +} // namespace nanovdb =================================================================== + +#endif // end of NANOVDB_UTIL_UTIL_H_HAS_BEEN_INCLUDED diff --git a/nanovdb/nanovdb/util/cuda/CudaAddBlindData.cuh b/nanovdb/nanovdb/util/cuda/CudaAddBlindData.cuh index c750412458..d366bd9845 100644 --- a/nanovdb/nanovdb/util/cuda/CudaAddBlindData.cuh +++ b/nanovdb/nanovdb/util/cuda/CudaAddBlindData.cuh @@ -1,127 +1,6 @@ // Copyright Contributors to the OpenVDB Project // SPDX-License-Identifier: MPL-2.0 -/*! - \file CudaAddBlindData.cuh - - \author Ken Museth - - \date August 3, 2023 - - \brief Defines function that appends blind device data to and existing device NanoGrid - - \warning The header file contains cuda device code so be sure - to only include it in .cu files (or other .cuh files) -*/ - -#ifndef NVIDIA_CUDA_ADD_BLIND_DATA_CUH_HAS_BEEN_INCLUDED -#define NVIDIA_CUDA_ADD_BLIND_DATA_CUH_HAS_BEEN_INCLUDED - -#include -#include "CudaDeviceBuffer.h" -#include -#include -#include -#include - -#include // for std::strcpy - -namespace nanovdb { - -/// @brief This function appends blind data to and existing NanoGrid -/// @tparam BuildT Build type of the grid -/// @tparam BlindDataT Type of the blind data -/// @tparam BufferT Type of the buffer used for allocation -/// @param d_grid Pointer to device grid -/// @param d_blindData Pointer to device blind data -/// @param valueCount number of values in the blind data -/// @param blindClass class of the blind data -/// @param semantics semantics of the blind data -/// @param name optional name of the blind data -/// @param pool optional pool used for allocation -/// @param stream optional CUDA stream (defaults to CUDA stream 0) -/// @return GridHandle with blind data appended -template -GridHandle -cudaAddBlindData(const NanoGrid *d_grid, - const BlindDataT *d_blindData, - uint64_t valueCount, - GridBlindDataClass blindClass = GridBlindDataClass::Unknown, - GridBlindDataSemantic semantics = GridBlindDataSemantic::Unknown, - const char *name = "", - const BufferT &pool = BufferT(), - cudaStream_t stream = 0) -{ - // In: |-----------|--------- |-----------| - // old grid old meta old data - // Out: |-----------|----------|----------|-----------|------------| - // old grid old meta new meta old data new data - - static_assert(BufferTraits::hasDeviceDual, "Expected BufferT to support device allocation"); - - // extract byte sizes of the grid, blind meta data and blind data - enum {GRID=0, META=1, DATA=2, CHECKSUM=3}; - uint64_t tmp[4], *d_tmp; - cudaCheck(cudaMallocAsync((void**)&d_tmp, 4*sizeof(uint64_t), stream)); - cudaLambdaKernel<<<1, 1, 0, stream>>>(1, [=] __device__(size_t) { - if (auto count = d_grid->blindDataCount()) { - d_tmp[GRID] = PtrDiff(&d_grid->blindMetaData(0), d_grid); - d_tmp[META] = count*sizeof(GridBlindMetaData); - d_tmp[DATA] = d_grid->gridSize() - d_tmp[GRID] - d_tmp[META]; - } else { - d_tmp[GRID] = d_grid->gridSize(); - d_tmp[META] = d_tmp[DATA] = 0u; - } - d_tmp[CHECKSUM] = d_grid->checksum(); - }); cudaCheckError(); - cudaCheck(cudaMemcpyAsync(&tmp, d_tmp, 4*sizeof(uint64_t), cudaMemcpyDeviceToHost, stream)); - - GridBlindMetaData metaData{int64_t(sizeof(GridBlindMetaData) + tmp[DATA]), valueCount, - sizeof(BlindDataT), semantics, blindClass, mapToGridType()}; - if (!metaData.isValid()) throw std::runtime_error("cudaAddBlindData: invalid combination of blind meta data"); - std::strcpy(metaData.mName, name); - auto buffer = BufferT::create(tmp[GRID] + tmp[META] + sizeof(GridBlindMetaData) + tmp[DATA] + metaData.blindDataSize(), &pool, false); - auto d_data = buffer.deviceData(); - - // 1: |-----------|----------| - // old grid old meta - cudaCheck(cudaMemcpyAsync(d_data, d_grid, tmp[GRID] + tmp[META], cudaMemcpyDeviceToDevice, stream)); - - // 2: |-----------|----------|----------| - // old grid old meta new meta - cudaCheck(cudaMemcpyAsync(d_data + tmp[GRID] + tmp[META], &metaData, sizeof(GridBlindMetaData), cudaMemcpyHostToDevice, stream)); - - // 3: |-----------|----------|----------|-----------| - // old grid old meta new meta old data - cudaCheck(cudaMemcpyAsync(d_data + tmp[GRID] + tmp[META] + sizeof(GridBlindMetaData), - (const char*)d_grid + tmp[GRID] + tmp[META], tmp[DATA], cudaMemcpyDeviceToDevice, stream)); - - // 4: |-----------|----------|----------|-----------|------------| - // old grid old meta new meta old data new data - const size_t dataSize = valueCount*sizeof(BlindDataT);// no padding - cudaCheck(cudaMemcpyAsync(d_data + tmp[GRID] + tmp[META] + sizeof(GridBlindMetaData) + tmp[DATA], - d_blindData, dataSize, cudaMemcpyDeviceToDevice, stream)); - if (auto padding = metaData.blindDataSize() - dataSize) {// zero out possible padding - cudaCheck(cudaMemsetAsync(d_data + tmp[GRID] + tmp[META] + sizeof(GridBlindMetaData) + tmp[DATA] + dataSize, 0, padding, stream)); - } - - // increment grid size and blind data counter in output grid - cudaLambdaKernel<<<1, 1, 0, stream>>>(1, [=] __device__(size_t) { - auto &grid = *reinterpret_cast*>(d_data); - grid.mBlindMetadataCount += 1; - grid.mBlindMetadataOffset = d_tmp[GRID]; - auto *meta = PtrAdd(d_data, grid.mBlindMetadataOffset);// points to first blind meta data - for (uint32_t i=0, n=grid.mBlindMetadataCount-1; imDataOffset += sizeof(GridBlindMetaData); - grid.mGridSize += sizeof(GridBlindMetaData) + meta->blindDataSize();// expansion with 32 byte alignment - }); cudaCheckError(); - cudaCheck(cudaFreeAsync(d_tmp, stream)); - - GridChecksum cs(tmp[CHECKSUM]); - cudaGridChecksum(reinterpret_cast(d_data), cs.mode()); - - return GridHandle(std::move(buffer)); -}// cudaAddBlindData - -}// nanovdb namespace - -#endif // NVIDIA_CUDA_ADD_BLIND_DATA_CUH_HAS_BEEN_INCLUDED \ No newline at end of file +#include // for NANOVDB_DEPRECATED_HEADER +#include +NANOVDB_DEPRECATED_HEADER("Include nanovdb/tools/cuda/AddBlindData.cuh instead.") diff --git a/nanovdb/nanovdb/util/cuda/CudaDeviceBuffer.h b/nanovdb/nanovdb/util/cuda/CudaDeviceBuffer.h index 4b9820771d..b05fbac802 100644 --- a/nanovdb/nanovdb/util/cuda/CudaDeviceBuffer.h +++ b/nanovdb/nanovdb/util/cuda/CudaDeviceBuffer.h @@ -1,194 +1,6 @@ // Copyright Contributors to the OpenVDB Project // SPDX-License-Identifier: MPL-2.0 -/*! - \file CudaDeviceBuffer.h - - \author Ken Museth - - \date January 8, 2020 - - \brief Implements a simple dual (host/device) CUDA buffer. - - \note This file has no device-only (kernel) function calls, - which explains why it's a .h and not .cuh file. -*/ - -#ifndef NANOVDB_CUDA_DEVICE_BUFFER_H_HAS_BEEN_INCLUDED -#define NANOVDB_CUDA_DEVICE_BUFFER_H_HAS_BEEN_INCLUDED - -#include "../HostBuffer.h" // for BufferTraits -#include "CudaUtils.h"// for cudaMalloc/cudaMallocManaged/cudaFree - -namespace nanovdb { - -// ----------------------------> CudaDeviceBuffer <-------------------------------------- - -/// @brief Simple memory buffer using un-managed pinned host memory when compiled with NVCC. -/// Obviously this class is making explicit used of CUDA so replace it with your own memory -/// allocator if you are not using CUDA. -/// @note While CUDA's pinned host memory allows for asynchronous memory copy between host and device -/// it is significantly slower then cached (un-pinned) memory on the host. -class CudaDeviceBuffer -{ - - uint64_t mSize; // total number of bytes managed by this buffer (assumed to be identical for host and device) - uint8_t *mCpuData, *mGpuData; // raw pointers to the host and device buffers - -public: - /// @brief Static factory method that return an instance of this buffer - /// @param size byte size of buffer to be initialized - /// @param dummy this argument is currently ignored but required to match the API of the HostBuffer - /// @param host If true buffer is initialized only on the host/CPU, else on the device/GPU - /// @param stream optional stream argument (defaults to stream NULL) - /// @return An instance of this class using move semantics - static CudaDeviceBuffer create(uint64_t size, const CudaDeviceBuffer* dummy = nullptr, bool host = true, void* stream = nullptr); - - /// @brief Constructor - /// @param size byte size of buffer to be initialized - /// @param host If true buffer is initialized only on the host/CPU, else on the device/GPU - /// @param stream optional stream argument (defaults to stream NULL) - CudaDeviceBuffer(uint64_t size = 0, bool host = true, void* stream = nullptr) - : mSize(0) - , mCpuData(nullptr) - , mGpuData(nullptr) - { - if (size > 0) this->init(size, host, stream); - } - - /// @brief Disallow copy-construction - CudaDeviceBuffer(const CudaDeviceBuffer&) = delete; - - /// @brief Move copy-constructor - CudaDeviceBuffer(CudaDeviceBuffer&& other) noexcept - : mSize(other.mSize) - , mCpuData(other.mCpuData) - , mGpuData(other.mGpuData) - { - other.mSize = 0; - other.mCpuData = nullptr; - other.mGpuData = nullptr; - } - - /// @brief Disallow copy assignment operation - CudaDeviceBuffer& operator=(const CudaDeviceBuffer&) = delete; - - /// @brief Move copy assignment operation - CudaDeviceBuffer& operator=(CudaDeviceBuffer&& other) noexcept - { - this->clear(); - mSize = other.mSize; - mCpuData = other.mCpuData; - mGpuData = other.mGpuData; - other.mSize = 0; - other.mCpuData = nullptr; - other.mGpuData = nullptr; - return *this; - } - - /// @brief Destructor frees memory on both the host and device - ~CudaDeviceBuffer() { this->clear(); }; - - /// @brief Initialize buffer - /// @param size byte size of buffer to be initialized - /// @param host If true buffer is initialized only on the host/CPU, else on the device/GPU - /// @note All existing buffers are first cleared - /// @warning size is expected to be non-zero. Use clear() clear buffer! - void init(uint64_t size, bool host = true, void* stream = nullptr); - - /// @brief Retuns a raw pointer to the host/CPU buffer managed by this allocator. - /// @warning Note that the pointer can be NULL! - uint8_t* data() const { return mCpuData; } - - /// @brief Retuns a raw pointer to the device/GPU buffer managed by this allocator. - /// @warning Note that the pointer can be NULL! - uint8_t* deviceData() const { return mGpuData; } - - /// @brief Upload this buffer from the host to the device, i.e. CPU -> GPU. - /// @param stream optional CUDA stream (defaults to CUDA stream 0) - /// @param sync if false the memory copy is asynchronous - /// @note If the device/GPU buffer does not exist it is first allocated - /// @warning Assumes that the host/CPU buffer already exists - void deviceUpload(void* stream = nullptr, bool sync = true) const; - - /// @brief Upload this buffer from the device to the host, i.e. GPU -> CPU. - /// @param stream optional CUDA stream (defaults to CUDA stream 0) - /// @param sync if false the memory copy is asynchronous - /// @note If the host/CPU buffer does not exist it is first allocated - /// @warning Assumes that the device/GPU buffer already exists - void deviceDownload(void* stream = nullptr, bool sync = true) const; - - /// @brief Returns the size in bytes of the raw memory buffer managed by this allocator. - uint64_t size() const { return mSize; } - - //@{ - /// @brief Returns true if this allocator is empty, i.e. has no allocated memory - bool empty() const { return mSize == 0; } - bool isEmpty() const { return mSize == 0; } - //@} - - /// @brief De-allocate all memory managed by this allocator and set all pointers to NULL - void clear(void* stream = nullptr); - -}; // CudaDeviceBuffer class - -template<> -struct BufferTraits -{ - static constexpr bool hasDeviceDual = true; -}; - -// --------------------------> Implementations below <------------------------------------ - -inline CudaDeviceBuffer CudaDeviceBuffer::create(uint64_t size, const CudaDeviceBuffer*, bool host, void* stream) -{ - return CudaDeviceBuffer(size, host, stream); -} - -inline void CudaDeviceBuffer::init(uint64_t size, bool host, void* stream) -{ - if (mSize>0) this->clear(stream); - NANOVDB_ASSERT(size > 0); - if (host) { - cudaCheck(cudaMallocHost((void**)&mCpuData, size)); // un-managed pinned memory on the host (can be slow to access!). Always 32B aligned - checkPtr(mCpuData, "CudaDeviceBuffer::init: failed to allocate host buffer"); - } else { - cudaCheck(cudaMallocAsync((void**)&mGpuData, size, reinterpret_cast(stream))); // un-managed memory on the device, always 32B aligned! - checkPtr(mGpuData, "CudaDeviceBuffer::init: failed to allocate device buffer"); - } - mSize = size; -} // CudaDeviceBuffer::init - -inline void CudaDeviceBuffer::deviceUpload(void* stream, bool sync) const -{ - checkPtr(mCpuData, "uninitialized cpu data"); - if (mGpuData == nullptr) { - cudaCheck(cudaMallocAsync((void**)&mGpuData, mSize, reinterpret_cast(stream))); // un-managed memory on the device, always 32B aligned! - } - checkPtr(mGpuData, "uninitialized gpu data"); - cudaCheck(cudaMemcpyAsync(mGpuData, mCpuData, mSize, cudaMemcpyHostToDevice, reinterpret_cast(stream))); - if (sync) cudaCheck(cudaStreamSynchronize(reinterpret_cast(stream))); -} // CudaDeviceBuffer::gpuUpload - -inline void CudaDeviceBuffer::deviceDownload(void* stream, bool sync) const -{ - checkPtr(mGpuData, "uninitialized gpu data"); - if (mCpuData == nullptr) { - cudaCheck(cudaMallocHost((void**)&mCpuData, mSize)); // un-managed pinned memory on the host (can be slow to access!). Always 32B aligned - } - checkPtr(mCpuData, "uninitialized cpu data"); - cudaCheck(cudaMemcpyAsync(mCpuData, mGpuData, mSize, cudaMemcpyDeviceToHost, reinterpret_cast(stream))); - if (sync) cudaCheck(cudaStreamSynchronize(reinterpret_cast(stream))); -} // CudaDeviceBuffer::gpuDownload - -inline void CudaDeviceBuffer::clear(void *stream) -{ - if (mGpuData) cudaCheck(cudaFreeAsync(mGpuData, reinterpret_cast(stream))); - if (mCpuData) cudaCheck(cudaFreeHost(mCpuData)); - mCpuData = mGpuData = nullptr; - mSize = 0; -} // CudaDeviceBuffer::clear - -} // namespace nanovdb - -#endif // end of NANOVDB_CUDA_DEVICE_BUFFER_H_HAS_BEEN_INCLUDED +#include // for NANOVDB_DEPRECATED_HEADER +#include +NANOVDB_DEPRECATED_HEADER("Include nanovdb/cuda/DeviceBuffer.h instead.") \ No newline at end of file diff --git a/nanovdb/nanovdb/util/cuda/CudaGridChecksum.cuh b/nanovdb/nanovdb/util/cuda/CudaGridChecksum.cuh index e3ae9a941f..e52ee89ac4 100644 --- a/nanovdb/nanovdb/util/cuda/CudaGridChecksum.cuh +++ b/nanovdb/nanovdb/util/cuda/CudaGridChecksum.cuh @@ -1,244 +1,6 @@ // Copyright Contributors to the OpenVDB Project // SPDX-License-Identifier: MPL-2.0 -/*! - \file CudaGridChecksum.cuh - - \author Ken Museth - - \date September 28, 2023 - - \brief Compute CRC32 checksum of NanoVDB grids - -*/ - -#ifndef NANOVDB_CUDA_GRID_CHECKSUM_CUH_HAS_BEEN_INCLUDED -#define NANOVDB_CUDA_GRID_CHECKSUM_CUH_HAS_BEEN_INCLUDED - -#include "CudaDeviceBuffer.h"// required for instantiation of move c-tor of GridHandle -#include "CudaNodeManager.cuh" -#include "../GridChecksum.h"// for -#include "../GridHandle.h" - -namespace nanovdb { - -namespace crc32 { - -/// @bried Cuda kernel to initiate lookup table for CRC32 computation -/// @tparam T Dummy template parameter used to avoid multiple instantiations. T should be uint32_t! -/// @param d_lut Device pointer to lookup table of size 256 -template -__global__ void initLutKernel(T *d_lut) -{ - static_assert(is_same::value,"Expected uint32_t"); - const uint32_t tid = blockIdx.x * blockDim.x + threadIdx.x; - if (tid < 256u) crc32::initLut(d_lut, tid); -} - -/// @brief Cuda kernel that computes CRC32 checksums of blocks of data using a look-up-table -/// @param d_data device pointer to raw data from wich to compute the CRC32 checksums -/// @param d_blockCRC device pointer to array of @c blockCount checksums for each block -/// @param blockCount number of blocks and checksums -/// @param blockSize size of each block in bytes -/// @param d_lut device pointer to CRC32 Lookup Table -template -__global__ void checksumKernel(const T *d_data, uint32_t* d_blockCRC, uint32_t blockCount, uint32_t blockSize, const uint32_t *d_lut) -{ - const uint32_t tid = blockIdx.x * blockDim.x + threadIdx.x; - if (tid < blockCount) d_blockCRC[tid] = crc32::checksum((const uint8_t*)d_data + tid * blockSize, blockSize, d_lut); -} - -/// @brief Cuda kernel that computes CRC32 checksums of blocks of data (without using a look-up-table) -/// @param d_data device pointer to raw data from wich to compute the CRC32 checksums -/// @param d_blockCRC device pointer to array of @c blockCount checksums for each block -/// @param blockCount number of blocks and checksums -/// @param blockSize size of each block in bytes -template -__global__ void checksumKernel(const T *d_data, uint32_t* d_blockCRC, uint32_t blockCount, uint32_t blockSize) -{ - const uint32_t tid = blockIdx.x * blockDim.x + threadIdx.x; - if (tid < blockCount) d_blockCRC[tid] = crc32::checksum((const uint8_t*)d_data + tid * blockSize, blockSize); -} - -/// @brief Host function to allocate and initiate a Look-Up-Table of size 256 for subsequent CRC32 computation on the device -/// @param stream optional cuda stream (defaults to zero) -/// @return returns a device point to a lookup-table for CRC32 computation -/// @warning It is the responsibility of the caller to delete the returned array -inline uint32_t* cudaCreateLut(cudaStream_t stream = 0) -{ - uint32_t *d_lut; - cudaCheck(cudaMallocAsync((void**)&d_lut, 256*sizeof(uint32_t), stream)); - initLutKernel<<<1, 256, 0, stream>>>(d_lut); - cudaCheckError(); - return d_lut; -} - -}// namespace crc - -#ifdef NANOVDB_CRC32_LOG2_BLOCK_SIZE// new approach computes CRC32 checksums for each 4 KB block - -/// @brief Update the checksum of a device grid -/// @param d_gridData device pointer to GridData -/// @param mode Mode of computation for the checksum. -/// @param stream optional cuda stream (defaults to zero) -/// @return The actual mode used for checksum computation. Eg. if @c d_gridData is NULL (or @c mode = ChecksumMode::Disable) -/// then ChecksumMode::Disable is always returned. Elseif the grid has no nodes or blind data ChecksumMode::Partial -/// is always returnd (even if @c mode = ChecksumMode::Full). -inline ChecksumMode cudaGridChecksum(GridData *d_gridData, ChecksumMode mode = ChecksumMode::Partial, cudaStream_t stream = 0) -{ - if (d_gridData == nullptr || mode == ChecksumMode::Disable) return ChecksumMode::Disable; - - static constexpr unsigned int mNumThreads = 128;// seems faster than the old value of 256! - auto numBlocks = [&](unsigned int n)->unsigned int{return (n + mNumThreads - 1) / mNumThreads;}; - uint8_t *d_begin = reinterpret_cast(d_gridData); - uint32_t *d_lut = crc32::cudaCreateLut(stream);// allocate and generate device LUT for CRC32 - uint64_t size[2], *d_size;// {total size of grid, partial size for first checksum} - cudaCheck(cudaMallocAsync((void**)&d_size, 2*sizeof(uint64_t), stream)); - - // Compute CRC32 checksum of GridData, TreeData, RootData (+tiles), but exclude GridData::mMagic and GridData::mChecksum - cudaLambdaKernel<<<1, 1, 0, stream>>>(1, [=] __device__(size_t) { - d_size[0] = d_gridData->mGridSize; - uint8_t *d_mid = d_gridData->template nodePtr<2>(); - if (d_mid == nullptr) {// no upper nodes - if (d_gridData->mBlindMetadataCount) { - d_mid = d_begin + d_gridData->mBlindMetadataOffset;// exclude blind data from partial checksum - } else { - d_mid = d_begin + d_gridData->mGridSize;// no nodes or blind data, so partial checksum is computed on the entire grid buffer - } - } - d_size[1] = d_mid - d_begin; - uint32_t *p = reinterpret_cast(&(d_gridData->mChecksum)); - p[0] = crc32::checksum(d_begin + 16u, d_mid, d_lut);// exclude GridData::mMagic and GridData::mChecksum - }); - cudaCheckError(); - cudaCheck(cudaMemcpyAsync(size, d_size, 2*sizeof(uint64_t), cudaMemcpyDeviceToHost, stream)); - cudaCheck(cudaFreeAsync(d_size, stream)); - - if (mode != ChecksumMode::Full || size[0] == size[1]) return ChecksumMode::Partial; - - // Compute CRC32 checksum of 4K block of everything remaining in the buffer, i.e. nodes and blind data - const uint8_t *d_mid = d_begin + size[1], *d_end = d_begin + size[0]; - uint32_t *d_checksums;// 4096 byte chunks - const uint64_t checksumCount = (d_end - d_mid) >> NANOVDB_CRC32_LOG2_BLOCK_SIZE;// 4 KB (4096 byte) - cudaCheck(cudaMallocAsync((void**)&d_checksums, checksumCount*sizeof(uint32_t), stream)); - cudaLambdaKernel<<>>(checksumCount, [=] __device__(size_t tid) { - uint32_t size = 1<>>(1, [=] __device__(size_t) { - uint32_t *p = reinterpret_cast(&(d_gridData->mChecksum)); - p[1] = crc32::checksum((const uint8_t*)d_checksums, checksumCount*sizeof(uint32_t), d_lut); - }); - cudaCheckError(); - cudaCheck(cudaFreeAsync(d_checksums, stream)); - cudaCheck(cudaFreeAsync(d_lut, stream)); - - return ChecksumMode::Full; -}// cudaGridChecksum - -template -inline ChecksumMode cudaGridChecksum(NanoGrid *d_grid, ChecksumMode mode = ChecksumMode::Partial, cudaStream_t stream = 0) -{ - return cudaGridChecksum(reinterpret_cast(d_grid), mode, stream); -} - -inline GridChecksum cudaGetGridChecksum(GridData *d_gridData, cudaStream_t stream = 0) -{ - uint64_t checksum, *d_checksum; - cudaCheck(cudaMallocAsync((void**)&d_checksum, sizeof(uint64_t), stream)); - cudaLambdaKernel<<<1, 1, 0, stream>>>(1, [=] __device__(size_t) {*d_checksum = d_gridData->mChecksum;}); - cudaCheckError(); - cudaCheck(cudaMemcpyAsync(&checksum, d_checksum, sizeof(uint64_t), cudaMemcpyDeviceToHost, stream)); - cudaCheck(cudaFreeAsync(d_checksum, stream)); - return GridChecksum(checksum);; -} - -inline ChecksumMode cudaUpdateGridChecksum(GridData *d_gridData, cudaStream_t stream = 0) -{ - return cudaGridChecksum(d_gridData, cudaGetGridChecksum(d_gridData, stream).mode(), stream); -} - -#else - -template -void cudaGridChecksum(NanoGrid *d_grid, ChecksumMode mode = ChecksumMode::Partial, cudaStream_t stream = 0) -{ - if (d_grid == nullptr || mode == ChecksumMode::Disable) return; - - static constexpr unsigned int mNumThreads = 128;// seems faster than the old value of 256! - auto numBlocks = [&](unsigned int n)->unsigned int{return (n + mNumThreads - 1) / mNumThreads;}; - - uint32_t *d_lut = crc32::cudaCreateLut(stream);// allocate and generate device LUT for CRC32 - uint64_t size[2], *d_size; - cudaCheck(cudaMallocAsync((void**)&d_size, 2*sizeof(uint64_t), stream)); - cudaLambdaKernel<<<1, 1, 0, stream>>>(1, [=] __device__(size_t) { - d_size[0] = d_grid->gridSize(); - d_size[1] = d_grid->memUsage() + d_grid->tree().memUsage() + d_grid->tree().root().memUsage(); - const uint8_t *begin = reinterpret_cast(d_grid); - uint32_t *p = reinterpret_cast(&(d_grid->mChecksum)); - p[0] = crc32::checksum(begin + 16u, begin + d_size[1], d_lut);// exclude mMagic and mChecksum - }); - cudaCheckError(); - cudaCheck(cudaMemcpyAsync(size, d_size, 2*sizeof(uint64_t), cudaMemcpyDeviceToHost, stream)); - cudaCheckError(); - - if (mode != ChecksumMode::Full) return; - - // Get node counts - uint32_t nodeCount[3], *d_nodeCount, *d_checksums, *d_ptr; - cudaCheck(cudaMallocAsync((void**)&d_nodeCount, 3*sizeof(uint32_t), stream)); - cudaLambdaKernel<<<1, 1, 0, stream>>>(1, [=] __device__(size_t) { - auto &tree = d_grid->tree(); - for (int i = 0; i < 3; ++i) d_nodeCount[i] = tree.nodeCount(i); - }); - cudaCheckError(); - cudaCheck(cudaMemcpyAsync(nodeCount, d_nodeCount, 3*sizeof(uint32_t), cudaMemcpyDeviceToHost, stream)); - cudaCheck(cudaFreeAsync(d_nodeCount, stream)); - cudaCheck(cudaMallocAsync((void**)&d_checksums, (nodeCount[0]+nodeCount[1]+nodeCount[2])*sizeof(uint32_t), stream)); - - auto nodeMgrHandle = cudaCreateNodeManager(d_grid, CudaDeviceBuffer(), stream); - auto *d_nodeMgr = nodeMgrHandle.template deviceMgr(); - NANOVDB_ASSERT(isValid(d_nodeMgr)); - d_ptr = d_checksums; - - // very slow due to large nodes - cudaLambdaKernel<<>>(nodeCount[2], [=] __device__(size_t tid) { - auto &node = d_nodeMgr->upper(uint32_t(tid)); - d_ptr[tid] = crc32::checksum((const uint8_t*)&node, node.memUsage(), d_lut); - }); - cudaCheckError(); - - d_ptr += nodeCount[2]; - cudaLambdaKernel<<>>(nodeCount[1], [=] __device__(size_t tid) { - auto &node = d_nodeMgr->lower(uint32_t(tid)); - d_ptr[tid] = crc32::checksum((const uint8_t*)&node, node.memUsage(), d_lut); - }); - cudaCheckError(); - - d_ptr += nodeCount[1]; - cudaLambdaKernel<<>>(nodeCount[0], [=] __device__(size_t tid) { - auto &node = d_nodeMgr->leaf(uint32_t(tid)); - d_ptr[tid] = crc32::checksum((const uint8_t*)&node, node.memUsage(), d_lut); - }); - cudaCheckError(); - - // to-do: process blind data - cudaLambdaKernel<<<1, 1, 0, stream>>>(1, [=] __device__(size_t) { - uint32_t *p = reinterpret_cast(&(d_grid->mChecksum)); - const uint8_t *begin = reinterpret_cast(d_checksums); - p[1] = crc32::checksum(begin, d_nodeMgr->tree().totalNodeCount()*sizeof(uint32_t), d_lut); - }); - cudaCheckError(); - - cudaCheck(cudaFreeAsync(d_size, stream)); - cudaCheck(cudaFreeAsync(d_checksums, stream)); - cudaCheck(cudaFreeAsync(d_lut, stream)); -}// cudaGridChecksum - -#endif - -}// namespace nanovdb - -#endif // NANOVDB_CUDA_GRID_CHECKSUM_CUH_HAS_BEEN_INCLUDED +#include // for NANOVDB_DEPRECATED_HEADER +#include +NANOVDB_DEPRECATED_HEADER("Include nanovdb/tools/cuda/GridChecksum.cuh instead.") diff --git a/nanovdb/nanovdb/util/cuda/CudaGridHandle.cuh b/nanovdb/nanovdb/util/cuda/CudaGridHandle.cuh index 5446c56231..9e0c0faeb4 100644 --- a/nanovdb/nanovdb/util/cuda/CudaGridHandle.cuh +++ b/nanovdb/nanovdb/util/cuda/CudaGridHandle.cuh @@ -1,134 +1,6 @@ // Copyright Contributors to the OpenVDB Project // SPDX-License-Identifier: MPL-2.0 -/*! - \file CudaGridHandle.cuh - - \author Ken Museth, Doyub Kim - - \date August 3, 2023 - - \brief Contains cuda kernels for GridHandle - - \warning The header file contains cuda device code so be sure - to only include it in .cu files (or other .cuh files) -*/ - -#ifndef NANOVDB_CUDA_GRID_HANDLE_CUH_HAS_BEEN_INCLUDED -#define NANOVDB_CUDA_GRID_HANDLE_CUH_HAS_BEEN_INCLUDED - -#include "CudaDeviceBuffer.h"// required for instantiation of move c-tor of GridHandle -#include "CudaGridChecksum.cuh"// for cudaUpdateChecksum -#include "../GridHandle.h" - -namespace nanovdb { - -namespace {// anonymous namespace -__global__ void cudaCpyMetaData(const GridData *data, GridHandleMetaData *meta){cpyMetaData(data, meta);} -__global__ void cudaUpdateGridCount(GridData *data, uint32_t gridIndex, uint32_t gridCount, bool *d_dirty){ - NANOVDB_ASSERT(gridIndex < gridCount); - if (*d_dirty = data->mGridIndex != gridIndex || data->mGridCount != gridCount) { - data->mGridIndex = gridIndex; - data->mGridCount = gridCount; - if (data->mChecksum == GridChecksum::EMPTY) *d_dirty = false;// no need to update checksum if it didn't already exist - } -} -}// anonymous namespace - -template -template::hasDeviceDual, int>::type> -GridHandle::GridHandle(T&& buffer) -{ - static_assert(is_same::value, "Expected U==BufferT"); - mBuffer = std::move(buffer); - if (auto *data = reinterpret_cast(mBuffer.data())) { - if (!data->isValid()) throw std::runtime_error("GridHandle was constructed with an invalid host buffer"); - mMetaData.resize(data->mGridCount); - cpyMetaData(data, mMetaData.data()); - } else { - if (auto *d_data = reinterpret_cast(mBuffer.deviceData())) { - GridData tmp; - cudaCheck(cudaMemcpy(&tmp, d_data, sizeof(GridData), cudaMemcpyDeviceToHost)); - if (!tmp.isValid()) throw std::runtime_error("GridHandle was constructed with an invalid device buffer"); - GridHandleMetaData *d_metaData; - cudaMalloc((void**)&d_metaData, tmp.mGridCount*sizeof(GridHandleMetaData)); - cudaCpyMetaData<<<1,1>>>(d_data, d_metaData); - mMetaData.resize(tmp.mGridCount); - cudaCheck(cudaMemcpy(mMetaData.data(), d_metaData,tmp.mGridCount*sizeof(GridHandleMetaData), cudaMemcpyDeviceToHost)); - cudaCheck(cudaFree(d_metaData)); - } - } -}// GridHandle(T&& buffer) - -// Dummy function that ensures instantiation of the move-constructor above when BufferT=CudaDeviceBuffer -namespace {auto __dummy(){return GridHandle(std::move(CudaDeviceBuffer()));}} - -template class VectorT = std::vector> -inline typename enable_if::hasDeviceDual, VectorT>>::type -cudaSplitGridHandles(const GridHandle &handle, const BufferT* other = nullptr, cudaStream_t stream = 0) -{ - const uint8_t *ptr = handle.deviceData(); - if (ptr == nullptr) return VectorT>(); - VectorT> handles(handle.gridCount()); - bool dirty, *d_dirty;// use this to check if the checksum needs to be recomputed - cudaCheck(cudaMallocAsync((void**)&d_dirty, sizeof(bool), stream)); - for (uint32_t n=0; n(buffer.deviceData()); - const GridData *src = reinterpret_cast(ptr); - cudaCheck(cudaMemcpyAsync(dst, src, handle.gridSize(n), cudaMemcpyDeviceToDevice, stream)); - cudaUpdateGridCount<<<1, 1, 0, stream>>>(dst, 0u, 1u, d_dirty); - cudaCheckError(); - cudaCheck(cudaMemcpyAsync(&dirty, d_dirty, sizeof(bool), cudaMemcpyDeviceToHost, stream)); - if (dirty) cudaGridChecksum(dst, ChecksumMode::Partial); - handles[n] = GridHandle(std::move(buffer)); - ptr += handle.gridSize(n); - } - cudaCheck(cudaFreeAsync(d_dirty, stream)); - return std::move(handles); -}// cudaSplitGridHandles - -template class VectorT = std::vector> -inline typename enable_if::hasDeviceDual, VectorT>>::type -splitDeviceGrids(const GridHandle &handle, const BufferT* other = nullptr, cudaStream_t stream = 0) -{ return cudaSplitGridHandles(handle, other, stream); } - -template class VectorT> -inline typename enable_if::hasDeviceDual, GridHandle>::type -cudaMergeGridHandles(const VectorT> &handles, const BufferT* other = nullptr, cudaStream_t stream = 0) -{ - uint64_t size = 0u; - uint32_t counter = 0u, gridCount = 0u; - for (auto &h : handles) { - gridCount += h.gridCount(); - for (uint32_t n=0; n(dst); - cudaUpdateGridCount<<<1, 1, 0, stream>>>(data, counter++, gridCount, d_dirty); - cudaCheckError(); - cudaCheck(cudaMemcpyAsync(&dirty, d_dirty, sizeof(bool), cudaMemcpyDeviceToHost, stream)); - if (dirty) cudaGridChecksum(data, ChecksumMode::Partial); - dst += h.gridSize(n); - src += h.gridSize(n); - } - } - cudaCheck(cudaFreeAsync(d_dirty, stream)); - return GridHandle(std::move(buffer)); -}// cudaMergeGridHandles - -template class VectorT> -inline typename enable_if::hasDeviceDual, GridHandle>::type -mergeDeviceGrids(const VectorT> &handles, const BufferT* other = nullptr, cudaStream_t stream = 0) -{ return cudaMergeGridHandles(handles, other, stream); } - -} // namespace nanovdb - -#endif // NANOVDB_CUDA_GRID_HANDLE_CUH_HAS_BEEN_INCLUDED +#include // for NANOVDB_DEPRECATED_HEADER +#include +NANOVDB_DEPRECATED_HEADER("Include nanovdb/cuda/GridHandle.cuh instead.") \ No newline at end of file diff --git a/nanovdb/nanovdb/util/cuda/CudaGridStats.cuh b/nanovdb/nanovdb/util/cuda/CudaGridStats.cuh index dcf5bfc850..64c6490768 100644 --- a/nanovdb/nanovdb/util/cuda/CudaGridStats.cuh +++ b/nanovdb/nanovdb/util/cuda/CudaGridStats.cuh @@ -1,250 +1,6 @@ // Copyright Contributors to the OpenVDB Project // SPDX-License-Identifier: MPL-2.0 -/*! - \file CudaGridStats.cuh - - \author Ken Museth - - \date October 9, 2023 - - \brief Re-computes min/max/avg/var/bbox information for each node in a - pre-existing NanoVDB grid on the device. -*/ - -#ifndef NANOVDB_CUDAGRIDSTATS_CUH_HAS_BEEN_INCLUDED -#define NANOVDB_CUDAGRIDSTATS_CUH_HAS_BEEN_INCLUDED - -#include -#include - -namespace nanovdb { - -/// @brief Re-computes the min/max, stats and bbox information for an existing NanoVDB Grid -/// -/// @param grid Grid whose stats to update -/// @param mode Mode of computation for the statistics. -/// @param stream Optional cuda stream (defaults to zero) -template -void cudaGridStats(NanoGrid *d_grid, StatsMode mode = StatsMode::Default, cudaStream_t stream = 0); - -//================================================================================================ - -/// @brief Allows for the construction of NanoVDB grids without any dependecy -template::ValueType>> -class CudaGridStats -{ - using GridT = NanoGrid; - using TreeT = typename GridT::TreeType; - using ValueT = typename TreeT::ValueType; - using Node0 = typename TreeT::Node0; // leaf - using Node1 = typename TreeT::Node1; // lower - using Node2 = typename TreeT::Node2; // upper - using RootT = typename TreeT::Node3; // root - static_assert(is_same::value, "Mismatching type"); - - ValueT mDelta; // skip rendering of node if: node.max < -mDelta || node.min > mDelta - -public: - CudaGridStats(ValueT delta = ValueT(0)) : mDelta(delta) {} - - void operator()(GridT *d_grid, cudaStream_t stream = 0); - -}; // CudaGridStats - -//================================================================================================ - -namespace {// define cuda kernels in an unnamed namespace - -template -__global__ void processLeaf(NodeManager *d_nodeMgr, StatsT *d_stats) -{ - const uint32_t tid = blockIdx.x * blockDim.x + threadIdx.x; - if (tid >= d_nodeMgr->leafCount()) return; - auto &d_leaf = d_nodeMgr->leaf(tid); - - if (d_leaf.updateBBox()) {// updates active bounding box (also updates data->mFlags) and return true if non-empty - if constexpr(StatsT::hasStats()) { - StatsT stats; - for (auto it = d_leaf.cbeginValueOn(); it; ++it) stats.add(*it); - if constexpr(StatsT::hasAverage()) { - d_stats[tid] = stats; - *reinterpret_cast(&d_leaf.mMinimum) = tid; - } else { - stats.setStats(d_leaf); - } - } - } - d_leaf.mFlags &= ~uint8_t(1u);// enable rendering -}// processLeaf - -template -__global__ void processInternal(NodeManager *d_nodeMgr, StatsT *d_stats) -{ - using ChildT = typename NanoNode::type; - const uint32_t tid = blockIdx.x * blockDim.x + threadIdx.x; - if (tid >= d_nodeMgr->nodeCount(LEVEL)) return; - auto &d_node = d_nodeMgr->template node(tid); - auto &bbox = d_node.mBBox; - bbox = CoordBBox();// empty bbox - StatsT stats; - uint32_t childID = 0u; - - for (auto it = d_node.beginChild(); it; ++it) { - auto &child = *it; - bbox.expand( child.bbox() ); - if constexpr(StatsT::hasAverage()) { - childID = *reinterpret_cast(&child.mMinimum); - StatsT &s = d_stats[childID]; - s.setStats(child); - stats.add(s); - } else if constexpr(StatsT::hasMinMax()) { - stats.add(child.minimum()); - stats.add(child.maximum()); - } - } - for (auto it = d_node.cbeginValueOn(); it; ++it) { - const Coord ijk = it.getCoord(); - bbox[0].minComponent(ijk); - bbox[1].maxComponent(ijk + Coord(ChildT::DIM - 1)); - if constexpr(StatsT::hasStats()) stats.add(*it, ChildT::NUM_VALUES); - } - if constexpr(StatsT::hasAverage()) { - d_stats[childID] = stats; - *reinterpret_cast(&d_node.mMinimum) = childID; - } else if constexpr(StatsT::hasMinMax()) { - stats.setStats(d_node); - } - d_node.mFlags &= ~uint64_t(1u);// enable rendering -}// processInternal - -template -__global__ void processRootAndGrid(NodeManager *d_nodeMgr, StatsT *d_stats) -{ - using ChildT = NanoUpper; - using ValueT = typename ChildT::ValueType; - - // process root - auto &root = d_nodeMgr->root(); - root.mBBox = CoordBBox(); - if (root.isEmpty()) { - root.mMinimum = root.mMaximum = root.mBackground; - root.mAverage = root.mStdDevi = 0; - } else { - ValueT v; - StatsT s; - for (auto it = root.beginDense(); it; ++it) { - if (auto *child = it.probeChild(v)) { - root.mBBox.expand( child->bbox() ); - if constexpr(StatsT::hasAverage()) { - StatsT &stats = d_stats[*reinterpret_cast(&child->mMinimum)]; - stats.setStats(*child); - s.add(stats); - } else if constexpr(StatsT::hasMinMax()){ - s.add(child->minimum()); - s.add(child->maximum()); - } - } else if (it.isValueOn()) { - const Coord ijk = it.getCoord(); - root.mBBox[0].minComponent(ijk); - root.mBBox[1].maxComponent(ijk + Coord(ChildT::DIM - 1)); - if constexpr(StatsT::hasStats()) s.add(v, ChildT::NUM_VALUES); - } - } - s.setStats(root); - } - - // process Grid - auto& grid = d_nodeMgr->grid(); - const auto& indexBBox = root.bbox(); - if (indexBBox.empty()) { - grid.mWorldBBox = BBox(); - grid.setBBoxOn(false); - } else { - // Note that below max is offset by one since CoordBBox.max is inclusive - // while bbox.max is exclusive. However, min is inclusive in both - // CoordBBox and BBox. This also guarantees that a grid with a single - // active voxel, does not have an empty world bbox! E.g. if a grid with a - // unit index-to-world transformation only contains the active voxel (0,0,0) - // then indeBBox = (0,0,0) -> (0,0,0) and then worldBBox = (0.0, 0.0, 0.0) - // -> (1.0, 1.0, 1.0). This is a consequence of the different definitions - // of index and world bounding boxes inherited from OpenVDB! - const Coord min = indexBBox[0]; - const Coord max = indexBBox[1] + Coord(1); - - auto& wBBox = grid.mWorldBBox; - const auto& map = grid.map(); - wBBox[0] = wBBox[1] = map.applyMap(Vec3d(min[0], min[1], min[2])); - wBBox.expand(map.applyMap(Vec3d(min[0], min[1], max[2]))); - wBBox.expand(map.applyMap(Vec3d(min[0], max[1], min[2]))); - wBBox.expand(map.applyMap(Vec3d(max[0], min[1], min[2]))); - wBBox.expand(map.applyMap(Vec3d(max[0], max[1], min[2]))); - wBBox.expand(map.applyMap(Vec3d(max[0], min[1], max[2]))); - wBBox.expand(map.applyMap(Vec3d(min[0], max[1], max[2]))); - wBBox.expand(map.applyMap(Vec3d(max[0], max[1], max[2]))); - grid.setBBoxOn(true); - } - - // set bit flags - grid.setMinMaxOn(StatsT::hasMinMax()); - grid.setAverageOn(StatsT::hasAverage()); - grid.setStdDeviationOn(StatsT::hasStdDeviation()); -}// processRootAndGrid - -}// cuda kernels are defined in an unnamed namespace - -//================================================================================================ - -template -void CudaGridStats::operator()(NanoGrid *d_grid, cudaStream_t stream) -{ - static const uint32_t threadsPerBlock = 128; - auto blocksPerGrid = [&](uint32_t count)->uint32_t{return (count + (threadsPerBlock - 1)) / threadsPerBlock;}; - - auto nodeMgrHandle = cudaCreateNodeManager(d_grid, CudaDeviceBuffer(), stream); - auto *d_nodeMgr = nodeMgrHandle.template deviceMgr(); - - uint32_t nodeCount[3];// {leaf, lower, upper} - cudaCheck(cudaMemcpyAsync(nodeCount, (char*)d_grid + sizeof(GridData) + 4*sizeof(uint64_t), 3*sizeof(uint32_t), cudaMemcpyDeviceToHost, stream)); - cudaStreamSynchronize(stream);// finish all device tasks in stream - - StatsT *d_stats = nullptr; - - if constexpr(StatsT::hasAverage()) cudaCheck(cudaMallocAsync((void**)&d_stats, nodeCount[0]*sizeof(StatsT), stream)); - - processLeaf<<>>(d_nodeMgr, d_stats); - - processInternal<<>>(d_nodeMgr, d_stats); - - processInternal<<>>(d_nodeMgr, d_stats); - - processRootAndGrid<<<1, 1, 0, stream>>>(d_nodeMgr, d_stats); - - if constexpr(StatsT::hasAverage()) cudaCheck(cudaFreeAsync(d_stats, stream)); - -} // CudaGridStats::operator()( Grid ) - -//================================================================================================ - -template -void cudaGridStats(NanoGrid *d_grid, StatsMode mode, cudaStream_t stream) -{ - if (d_grid == nullptr && mode == StatsMode::Disable) { - return; - } else if (mode == StatsMode::BBox || is_same::value) { - CudaGridStats > stats; - stats(d_grid, stream); - } else if (mode == StatsMode::MinMax) { - CudaGridStats > stats; - stats(d_grid, stream); - } else if (mode == StatsMode::All) { - CudaGridStats > stats; - stats(d_grid, stream); - } else { - throw std::runtime_error("cudaGridStats: Unsupported statistics mode."); - } -}// cudaGridStats - -} // namespace nanovdb - -#endif // NANOVDB_CUDAGRIDSTATS_CUH_HAS_BEEN_INCLUDED +#include // for NANOVDB_DEPRECATED_HEADER +#include +NANOVDB_DEPRECATED_HEADER("Include nanovdb/tools/cuda/GridStats.cuh instead.") diff --git a/nanovdb/nanovdb/util/cuda/CudaGridValidator.cuh b/nanovdb/nanovdb/util/cuda/CudaGridValidator.cuh new file mode 100644 index 0000000000..ca535d4013 --- /dev/null +++ b/nanovdb/nanovdb/util/cuda/CudaGridValidator.cuh @@ -0,0 +1,6 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: MPL-2.0 + +#include // for NANOVDB_DEPRECATED_HEADER +#include +NANOVDB_DEPRECATED_HEADER("Include nanovdb/tools/cuda/GridValidator.cuh instead.") diff --git a/nanovdb/nanovdb/util/cuda/CudaIndexToGrid.cuh b/nanovdb/nanovdb/util/cuda/CudaIndexToGrid.cuh index 8394ecefe1..ed5e67da33 100644 --- a/nanovdb/nanovdb/util/cuda/CudaIndexToGrid.cuh +++ b/nanovdb/nanovdb/util/cuda/CudaIndexToGrid.cuh @@ -1,386 +1,6 @@ // Copyright Contributors to the OpenVDB Project // SPDX-License-Identifier: MPL-2.0 -/*! - \file CudaIndexToGrid.cuh - - \author Ken Museth - - \date April 17, 2023 - - \brief Combines an IndexGrid and values into a regular Grid on the device - - \warning The header file contains cuda device code so be sure - to only include it in .cu files (or other .cuh files) -*/ - -#ifndef NVIDIA_CUDA_INDEX_TO_GRID_CUH_HAS_BEEN_INCLUDED -#define NVIDIA_CUDA_INDEX_TO_GRID_CUH_HAS_BEEN_INCLUDED - -#include -#include "CudaDeviceBuffer.h" -#include -#include -#include - -namespace nanovdb { - -/// @brief Freestanding function that combines an IndexGrid and values into a regular Grid -/// @tparam DstBuildT Build time of the destination/output Grid -/// @tparam SrcBuildT Build type of the source/input IndexGrid -/// @tparam BufferT Type of the buffer used for allocation of the destination Grid -/// @param d_srcGrid Device pointer to source/input IndexGrid, i.e. SrcBuildT={ValueIndex,ValueOnIndex,ValueIndexMask,ValueOnIndexMask} -/// @param d_srcValues Device pointer to an array of values -/// @param pool Memory pool used to create a buffer for the destination/output Grid -/// @param stream optional CUDA stream (defaults to CUDA stream 0 -/// @note If d_srcGrid has stats (min,max,avg,std-div), the d_srcValues is also assumed -/// to have the same information, all of which are then copied to the destination/output grid. -/// An exception to this rule is if the type of d_srcValues is different from the stats type -/// NanoRoot::FloatType, e.g. if DstBuildT=Vec3f then NanoRoot::FloatType=float, -/// in which case average and standard-deviation is undefined in the output grid. -/// @return -template -typename enable_if::is_index, GridHandle>::type -cudaIndexToGrid(const NanoGrid *d_srcGrid, const typename BuildToValueMap::type *d_srcValues, const BufferT &pool = BufferT(), cudaStream_t stream = 0); - - -template -typename enable_if::is_index, GridHandle>::type -cudaCreateNanoGrid(const NanoGrid *d_srcGrid, const typename BuildToValueMap::type *d_srcValues, const BufferT &pool = BufferT(), cudaStream_t stream = 0) -{ - return cudaIndexToGrid(d_srcGrid, d_srcValues, pool, stream); -} - -namespace {// anonymous namespace - -template -class CudaIndexToGrid -{ - using SrcGridT = NanoGrid; -public: - struct NodeAccessor; - - /// @brief Constructor from a source IndeGrid - /// @param srcGrid Device pointer to IndexGrid used as the source - CudaIndexToGrid(const SrcGridT *d_srcGrid, cudaStream_t stream = 0); - - ~CudaIndexToGrid() {cudaCheck(cudaFreeAsync(mDevNodeAcc, mStream));} - - /// @brief Toggle on and off verbose mode - /// @param on if true verbose is turned on - void setVerbose(bool on = true) {mVerbose = on; } - - /// @brief Set the name of the destination/output grid - /// @param name Name used for the destination grid - void setGridName(const std::string &name) {mGridName = name;} - - /// @brief Combines the IndexGrid with values to produce a regular Grid - /// @tparam DstBuildT Template parameter of the destination grid and value type - /// @tparam BufferT Template parameter of the memory allocator - /// @param srcValues pointer to values that will be inserted into the output grid - /// @param buffer optional buffer used for memory allocation - /// @return A new GridHandle with the grid of type @c DstBuildT - template - GridHandle getHandle(const typename BuildToValueMap::type *srcValues, const BufferT &buffer = BufferT()); - -private: - cudaStream_t mStream{0}; - GpuTimer mTimer; - std::string mGridName; - bool mVerbose{false}; - NodeAccessor mNodeAcc, *mDevNodeAcc; - - template - BufferT getBuffer(const BufferT &pool); -};// CudaIndexToGrid - -//================================================================================================ - -template -struct CudaIndexToGrid::NodeAccessor -{ - uint64_t grid, tree, root, node[3], meta, blind, size;// byte offsets, node: 0=leaf,1=lower, 2=upper - const SrcGridT *d_srcGrid;// device point to source IndexGrid - void *d_dstPtr;// device pointer to buffer with destination Grid - char *d_gridName; - uint32_t nodeCount[4];// 0=leaf, 1=lower, 2=upper, 3=root tiles - - __device__ const NanoGrid& srcGrid() const {return *d_srcGrid;} - __device__ const NanoTree& srcTree() const {return d_srcGrid->tree();} - __device__ const NanoRoot& srcRoot() const {return d_srcGrid->tree().root();} - template - __device__ const typename NanoNode::type& srcNode(int i) const { - return *(this->srcTree().template getFirstNode() + i); - } - - template - __device__ NanoGrid& dstGrid() const {return *PtrAdd>(d_dstPtr, grid);} - template - __device__ NanoTree& dstTree() const {return *PtrAdd>(d_dstPtr, tree);} - template - __device__ NanoRoot& dstRoot() const {return *PtrAdd>(d_dstPtr, root);} - template - __device__ typename NanoNode::type& dstNode(int i) const { - return *(PtrAdd::type>(d_dstPtr, node[LEVEL])+i); - } -};// CudaIndexToGrid::NodeAccessor - -//================================================================================================ - -template -__global__ void cudaProcessGridTreeRoot(typename CudaIndexToGrid::NodeAccessor *nodeAcc, - const typename BuildToValueMap::type *srcValues) -{ - using SrcValueT = typename BuildToValueMap::type; - using DstStatsT = typename NanoRoot::FloatType; - - auto &srcGrid = nodeAcc->srcGrid(); - auto &dstGrid = nodeAcc->template dstGrid(); - auto &srcTree = srcGrid.tree(); - auto &dstTree = nodeAcc->template dstTree(); - auto &srcRoot = srcTree.root(); - auto &dstRoot = nodeAcc->template dstRoot(); - - // process Grid - *dstGrid.data() = *srcGrid.data(); - dstGrid.mGridType = mapToGridType(); - dstGrid.mData1 = 0u; - // we will recompute GridData::mChecksum later - - // process Tree - *dstTree.data() = *srcTree.data(); - dstTree.setRoot(&dstRoot); - dstTree.setFirstNode(&nodeAcc->template dstNode(0)); - dstTree.setFirstNode(&nodeAcc->template dstNode(0)); - dstTree.setFirstNode(&nodeAcc->template dstNode(0)); - - // process Root - dstRoot.mBBox = srcRoot.mBBox; - dstRoot.mTableSize = srcRoot.mTableSize; - dstRoot.mBackground = srcValues[srcRoot.mBackground]; - if (srcGrid.hasMinMax()) { - dstRoot.mMinimum = srcValues[srcRoot.mMinimum]; - dstRoot.mMaximum = srcValues[srcRoot.mMaximum]; - } - if constexpr(is_same::value) {// e.g. {float,float} or {Vec3f,float} - if (srcGrid.hasAverage()) dstRoot.mAverage = srcValues[srcRoot.mAverage]; - if (srcGrid.hasStdDeviation()) dstRoot.mStdDevi = srcValues[srcRoot.mStdDevi]; - } -}// cudaProcessGridTreeRoot - -//================================================================================================ - -template -__global__ void cudaProcessRootTiles(typename CudaIndexToGrid::NodeAccessor *nodeAcc, - const typename BuildToValueMap::type *srcValues) -{ - const auto tid = blockIdx.x; - - // Process children and tiles - const auto &srcTile = *nodeAcc->srcRoot().tile(tid); - auto &dstTile = *nodeAcc->template dstRoot().tile(tid); - dstTile.key = srcTile.key; - if (srcTile.child) { - dstTile.child = sizeof(NanoRoot) + sizeof(NanoRoot::Tile)*((srcTile.child - sizeof(NanoRoot))/sizeof(NanoRoot::Tile)); - dstTile.value = srcValues[0];// set to background - dstTile.state = false; - } else { - dstTile.child = 0;// i.e. no child node - dstTile.value = srcValues[srcTile.value]; - dstTile.state = srcTile.state; - } -}// cudaProcessRootTiles - -//================================================================================================ - -template -__global__ void cudaProcessInternalNodes(typename CudaIndexToGrid::NodeAccessor *nodeAcc, - const typename BuildToValueMap::type *srcValues) -{ - using SrcNodeT = typename NanoNode::type; - using DstNodeT = typename NanoNode::type; - using SrcChildT = typename SrcNodeT::ChildNodeType; - using DstChildT = typename DstNodeT::ChildNodeType; - using SrcValueT = typename BuildToValueMap::type; - using DstStatsT = typename NanoRoot::FloatType; - - auto &srcNode = nodeAcc->template srcNode(blockIdx.x); - auto &dstNode = nodeAcc->template dstNode(blockIdx.x); - - if (threadIdx.x == 0 && threadIdx.y == 0) { - dstNode.mBBox = srcNode.mBBox; - dstNode.mFlags = srcNode.mFlags; - dstNode.mValueMask = srcNode.mValueMask; - dstNode.mChildMask = srcNode.mChildMask; - auto &srcGrid = nodeAcc->srcGrid(); - if (srcGrid.hasMinMax()) { - dstNode.mMinimum = srcValues[srcNode.mMinimum]; - dstNode.mMaximum = srcValues[srcNode.mMaximum]; - } - if constexpr(is_same::value) {// e.g. {float,float} or {Vec3f,float} - if (srcGrid.hasAverage()) dstNode.mAverage = srcValues[srcNode.mAverage]; - if (srcGrid.hasStdDeviation()) dstNode.mStdDevi = srcValues[srcNode.mStdDevi]; - } - } - const uint64_t nodeSkip = nodeAcc->nodeCount[LEVEL] - blockIdx.x, srcOff = sizeof(SrcNodeT)*nodeSkip, dstOff = sizeof(DstNodeT)*nodeSkip;// offset to first node of child type - const int off = blockDim.x*blockDim.y*threadIdx.x + blockDim.x*threadIdx.y; - for (int threadIdx_z=0; threadIdx_z -__global__ void cudaProcessLeafNodes(typename CudaIndexToGrid::NodeAccessor *nodeAcc, - const typename BuildToValueMap::type *srcValues) -{ - using SrcValueT = typename BuildToValueMap::type; - using DstStatsT = typename NanoRoot::FloatType; - static_assert(!BuildTraits::is_special, "Invalid destination type!"); - auto &srcLeaf = nodeAcc->template srcNode<0>(blockIdx.x); - auto &dstLeaf = nodeAcc->template dstNode(blockIdx.x); - if (threadIdx.x == 0 && threadIdx.y == 0) { - dstLeaf.mBBoxMin = srcLeaf.mBBoxMin; - for (int i=0; i<3; ++i) dstLeaf.mBBoxDif[i] = srcLeaf.mBBoxDif[i]; - dstLeaf.mFlags = srcLeaf.mFlags; - dstLeaf.mValueMask = srcLeaf.mValueMask; - /// - auto &srcGrid = nodeAcc->srcGrid(); - if (srcGrid.hasMinMax()) { - dstLeaf.mMinimum = srcValues[srcLeaf.getMin()]; - dstLeaf.mMaximum = srcValues[srcLeaf.getMax()]; - } - if constexpr(is_same::value) {// e.g. {float,float} or {Vec3f,float} - if (srcGrid.hasAverage()) dstLeaf.mAverage = srcValues[srcLeaf.getAvg()]; - if (srcGrid.hasStdDeviation()) dstLeaf.mStdDevi = srcValues[srcLeaf.getDev()]; - } - } - const int off = blockDim.x*blockDim.y*threadIdx.x + blockDim.x*threadIdx.y; - auto *dst = dstLeaf.mValues + off; - for (int threadIdx_z=0; threadIdx_z -__global__ void cudaCpyNodeCount(const NanoGrid *srcGrid, - typename CudaIndexToGrid::NodeAccessor *nodeAcc) -{ - assert(srcGrid->isSequential()); - nodeAcc->d_srcGrid = srcGrid; - for (int i=0; i<3; ++i) nodeAcc->nodeCount[i] = srcGrid->tree().nodeCount(i); - nodeAcc->nodeCount[3] = srcGrid->tree().root().tileCount(); -} - -}// anonymous namespace - -//================================================================================================ - -template -CudaIndexToGrid::CudaIndexToGrid(const SrcGridT *d_srcGrid, cudaStream_t stream) - : mStream(stream), mTimer(stream) -{ - NANOVDB_ASSERT(d_srcGrid); - cudaCheck(cudaMallocAsync((void**)&mDevNodeAcc, sizeof(NodeAccessor), mStream)); - cudaCpyNodeCount<<<1, 1, 0, mStream>>>(d_srcGrid, mDevNodeAcc); - cudaCheckError(); - cudaCheck(cudaMemcpyAsync(&mNodeAcc, mDevNodeAcc, sizeof(NodeAccessor), cudaMemcpyDeviceToHost, mStream));// mNodeAcc = *mDevNodeAcc -} - -//================================================================================================ - -template -template -GridHandle CudaIndexToGrid::getHandle(const typename BuildToValueMap::type *srcValues, - const BufferT &pool) -{ - if (mVerbose) mTimer.start("Initiate buffer"); - auto buffer = this->template getBuffer(pool); - - if (mVerbose) mTimer.restart("Process grid,tree,root"); - cudaProcessGridTreeRoot<<<1, 1, 0, mStream>>>(mDevNodeAcc, srcValues); - cudaCheckError(); - - if (mVerbose) mTimer.restart("Process root children and tiles"); - cudaProcessRootTiles<<>>(mDevNodeAcc, srcValues); - cudaCheckError(); - - cudaCheck(cudaFreeAsync(mNodeAcc.d_gridName, mStream)); - - if (mVerbose) mTimer.restart("Process upper internal nodes"); - cudaProcessInternalNodes<<>>(mDevNodeAcc, srcValues); - cudaCheckError(); - - if (mVerbose) mTimer.restart("Process lower internal nodes"); - cudaProcessInternalNodes<<>>(mDevNodeAcc, srcValues); - cudaCheckError(); - - if (mVerbose) mTimer.restart("Process leaf nodes"); - cudaProcessLeafNodes<<>>(mDevNodeAcc, srcValues); - if (mVerbose) mTimer.stop(); - cudaCheckError(); - - if (mVerbose) mTimer.restart("Compute checksums"); - cudaUpdateGridChecksum((GridData*)mNodeAcc.d_dstPtr, mStream); - if (mVerbose) mTimer.stop(); - - cudaStreamSynchronize(mStream);// finish all device tasks in mStream - return GridHandle(std::move(buffer)); -}// CudaIndexToGrid::getHandle - -//================================================================================================ - -template -template -inline BufferT CudaIndexToGrid::getBuffer(const BufferT &pool) -{ - mNodeAcc.grid = 0;// grid is always stored at the start of the buffer! - mNodeAcc.tree = NanoGrid::memUsage(); // grid ends and tree begins - mNodeAcc.root = mNodeAcc.tree + NanoTree::memUsage(); // tree ends and root node begins - mNodeAcc.node[2] = mNodeAcc.root + NanoRoot::memUsage(mNodeAcc.nodeCount[3]); // root node ends and upper internal nodes begin - mNodeAcc.node[1] = mNodeAcc.node[2] + NanoUpper::memUsage()*mNodeAcc.nodeCount[2]; // upper internal nodes ends and lower internal nodes begin - mNodeAcc.node[0] = mNodeAcc.node[1] + NanoLower::memUsage()*mNodeAcc.nodeCount[1]; // lower internal nodes ends and leaf nodes begin - mNodeAcc.meta = mNodeAcc.node[0] + NanoLeaf::DataType::memUsage()*mNodeAcc.nodeCount[0];// leaf nodes end and blind meta data begins - mNodeAcc.blind = mNodeAcc.meta + 0*sizeof(GridBlindMetaData); // meta data ends and blind data begins - mNodeAcc.size = mNodeAcc.blind;// end of buffer - auto buffer = BufferT::create(mNodeAcc.size, &pool, false, mStream); - mNodeAcc.d_dstPtr = buffer.deviceData(); - if (mNodeAcc.d_dstPtr == nullptr) throw std::runtime_error("Failed memory allocation on the device"); - - if (size_t size = mGridName.size()) { - cudaCheck(cudaMallocAsync((void**)&mNodeAcc.d_gridName, size, mStream)); - cudaCheck(cudaMemcpyAsync(mNodeAcc.d_gridName, mGridName.data(), size, cudaMemcpyHostToDevice, mStream)); - } else { - mNodeAcc.d_gridName = nullptr; - } - cudaCheck(cudaMemcpyAsync(mDevNodeAcc, &mNodeAcc, sizeof(NodeAccessor), cudaMemcpyHostToDevice, mStream));// copy NodeAccessor CPU -> GPU - return buffer; -} - -//================================================================================================ - -template -typename enable_if::is_index, GridHandle>::type -cudaIndexToGrid(const NanoGrid *d_srcGrid, const typename BuildToValueMap::type *d_srcValues, const BufferT &pool, cudaStream_t stream) -{ - CudaIndexToGrid converter(d_srcGrid, stream); - return converter.template getHandle(d_srcValues, pool); -} - -}// nanovdb namespace - -#endif // NVIDIA_CUDA_INDEX_TO_GRID_CUH_HAS_BEEN_INCLUDED +#include // for NANOVDB_DEPRECATED_HEADER +#include +NANOVDB_DEPRECATED_HEADER("Include nanovdb/tools/cuda/IndexToGrid.cuh instead.") diff --git a/nanovdb/nanovdb/util/cuda/CudaNodeManager.cuh b/nanovdb/nanovdb/util/cuda/CudaNodeManager.cuh index 3d35a4b902..5aa5b84965 100644 --- a/nanovdb/nanovdb/util/cuda/CudaNodeManager.cuh +++ b/nanovdb/nanovdb/util/cuda/CudaNodeManager.cuh @@ -1,90 +1,6 @@ // Copyright Contributors to the OpenVDB Project // SPDX-License-Identifier: MPL-2.0 -/*! - \file CudaNodeManager.cuh - - \author Ken Museth - - \date October 3, 2023 - - \brief Contains cuda kernels for NodeManager - - \warning The header file contains cuda device code so be sure - to only include it in .cu files (or other .cuh files) -*/ - -#ifndef NANOVDB_CUDA_NODE_MANAGER_CUH_HAS_BEEN_INCLUDED -#define NANOVDB_CUDA_NODE_MANAGER_CUH_HAS_BEEN_INCLUDED - -#include "CudaUtils.h"// for cudaLambdaKernel -#include "CudaDeviceBuffer.h" -#include "../NodeManager.h" - -namespace nanovdb { - -/// @brief Construct a NodeManager from a device grid pointer -/// -/// @param d_grid device grid pointer whose nodes will be accessed sequentially -/// @param buffer buffer from which to allocate the output handle -/// @param stream cuda stream -/// @return Handle that contains a device NodeManager -template -inline typename enable_if::hasDeviceDual, NodeManagerHandle>::type -cudaCreateNodeManager(const NanoGrid *d_grid, - const BufferT& pool = BufferT(), - cudaStream_t stream = 0) -{ - auto buffer = BufferT::create(sizeof(NodeManagerData), &pool, false, stream); - auto *d_data = (NodeManagerData*)buffer.deviceData(); - size_t size = 0u, *d_size; - cudaCheck(cudaMallocAsync((void**)&d_size, sizeof(size_t), stream)); - cudaLambdaKernel<<<1, 1, 0, stream>>>(1, [=] __device__(size_t) { -#ifdef NANOVDB_USE_NEW_MAGIC_NUMBERS - *d_data = NodeManagerData{NANOVDB_MAGIC_NODE, 0u, (void*)d_grid, {0u,0u,0u}}; -#else - *d_data = NodeManagerData{NANOVDB_MAGIC_NUMBER, 0u, (void*)d_grid, {0u,0u,0u}}; -#endif - *d_size = sizeof(NodeManagerData); - auto &tree = d_grid->tree(); - if (NodeManager::FIXED_SIZE && d_grid->isBreadthFirst()) { - d_data->mLinear = uint8_t(1u); - d_data->mOff[0] = PtrDiff(tree.template getFirstNode<0>(), d_grid); - d_data->mOff[1] = PtrDiff(tree.template getFirstNode<1>(), d_grid); - d_data->mOff[2] = PtrDiff(tree.template getFirstNode<2>(), d_grid); - } else { - *d_size += sizeof(uint64_t)*tree.totalNodeCount(); - } - }); - cudaCheckError(); - cudaCheck(cudaMemcpyAsync(&size, d_size, sizeof(size_t), cudaMemcpyDeviceToHost, stream)); - cudaCheck(cudaFreeAsync(d_size, stream)); - if (size > sizeof(NodeManagerData)) { - auto tmp = BufferT::create(size, &pool, false, stream);// only allocate buffer on the device - cudaCheck(cudaMemcpyAsync(tmp.deviceData(), buffer.deviceData(), sizeof(NodeManagerData), cudaMemcpyDeviceToDevice, stream)); - buffer = std::move(tmp); - d_data = reinterpret_cast(buffer.deviceData()); - cudaLambdaKernel<<<1, 1, 0, stream>>>(1, [=] __device__ (size_t) { - auto &tree = d_grid->tree(); - int64_t *ptr0 = d_data->mPtr[0] = reinterpret_cast(d_data + 1); - int64_t *ptr1 = d_data->mPtr[1] = d_data->mPtr[0] + tree.nodeCount(0); - int64_t *ptr2 = d_data->mPtr[2] = d_data->mPtr[1] + tree.nodeCount(1); - // Performs depth first traversal but breadth first insertion - for (auto it2 = tree.root().cbeginChild(); it2; ++it2) { - *ptr2++ = PtrDiff(&*it2, d_grid); - for (auto it1 = it2->cbeginChild(); it1; ++it1) { - *ptr1++ = PtrDiff(&*it1, d_grid); - for (auto it0 = it1->cbeginChild(); it0; ++it0) { - *ptr0++ = PtrDiff(&*it0, d_grid); - }// loop over child nodes of the lower internal node - }// loop over child nodes of the upper internal node - }// loop over child nodes of the root node - }); - } - - return NodeManagerHandle(mapToGridType(), std::move(buffer)); -}// cudaCreateNodeManager - -} // namespace nanovdb - -#endif // NANOVDB_CUDA_NODE_MANAGER_CUH_HAS_BEEN_INCLUDED +#include // for NANOVDB_DEPRECATED_HEADER +#include +NANOVDB_DEPRECATED_HEADER("Include nanovdb/cuda/NodeManager.cuh instead.") \ No newline at end of file diff --git a/nanovdb/nanovdb/util/cuda/CudaPointsToGrid.cuh b/nanovdb/nanovdb/util/cuda/CudaPointsToGrid.cuh index 733dc35cb9..91e7ad0b5c 100644 --- a/nanovdb/nanovdb/util/cuda/CudaPointsToGrid.cuh +++ b/nanovdb/nanovdb/util/cuda/CudaPointsToGrid.cuh @@ -1,1174 +1,6 @@ // Copyright Contributors to the OpenVDB Project // SPDX-License-Identifier: MPL-2.0 -/*! - \file CudaPointsToGrid.cuh - - \authors Greg Klar (initial version) and Ken Museth (final version) - - \brief Generates NanoVDB grids from a list of voxels or points on the device - - \warning The header file contains cuda device code so be sure - to only include it in .cu files (or other .cuh files) -*/ - -#ifndef NVIDIA_CUDA_POINTS_TO_GRID_CUH_HAS_BEEN_INCLUDED -#define NVIDIA_CUDA_POINTS_TO_GRID_CUH_HAS_BEEN_INCLUDED - -#include -#include -#include -#include - -#include -#include "CudaDeviceBuffer.h" -#include -#include -#include -#include - -/* - Note: 4.29 billion (=2^32) coordinates of type Vec3f have a memory footprint of 48 GB! -*/ - -namespace nanovdb { - -// Define the type used when the points are encoded as blind data in the output grid -enum class PointType : uint32_t { Disable = 0,// no point information e.g. when BuildT != Point - PointID = 1,// linear index of type uint32_t to points - World64 = 2,// Vec3d in world space - World32 = 3,// Vec3f in world space - Grid64 = 4,// Vec3d in grid space - Grid32 = 5,// Vec3f in grid space - Voxel32 = 6,// Vec3f in voxel space - Voxel16 = 7,// Vec3u16 in voxel space - Voxel8 = 8,// Vec3u8 in voxel space - Default = 9,// output matches input, i.e. Vec3d or Vec3f in world space - End =10 }; - -//================================================================================================ - -/// @brief Example class of a fancy pointer that can optionally be used as a template for writing -/// a custom fancy pointer that allows for particle coordinates to be arrange non-linearly -/// in memory. For instance with coordinates are interlaced with other dats, i.e. an array -/// of structs, a custom implementation of fancy_ptr::operator[](size_t i) can account for -/// strides that skip other interlaces data. -/// @tparam T Template type that specifies the type use for the coordinates of the points -template -class fancy_ptr -{ - const T* mPtr; -public: - /// @brief Default constructor. - /// @note This method is atcually not required by CudaPointsToGrid - /// @param ptr Pointer to array of elements - __hostdev__ explicit fancy_ptr(const T* ptr = nullptr) : mPtr(ptr) {} - /// @brief Index acces into the array pointed to by the stored pointer. - /// @note This method is required by CudaPointsToGrid! - /// @param i Unsigned index of the element to be returned - /// @return Const refernce to the element at the i'th poisiton - __hostdev__ inline const T& operator[](size_t i) const {return mPtr[i];} - /// @brief Dummy implementation required by pointer_traits. - /// @note Note that only the return type matters! - /// @details Unlike operator[] it is safe to assume that all pointer types have operator*, - /// which is why pointer_traits makes use of it to determine the element_type that - /// a pointer class is pointing to. E.g. operator[] is not always defined for std::shared_ptr! - __hostdev__ inline const T& operator*() const {return *mPtr;} -};// fancy_ptr - -/// @brief Simple stand-alone function that can be used to conveniently construct a fancy_ptr -/// @tparam T Template type that specifies the type use for the coordinates of the points -/// @param ptr Raw pointer to data -/// @return a new instance of a fancy_ptr -template -fancy_ptr make_fancy(const T* ptr = nullptr) {return fancy_ptr(ptr);} - -/// @brief Trait of points, like type of pointer and size of the pointer type -template -struct pointer_traits; - -template -struct pointer_traits { - using element_type = T; - static constexpr size_t element_size = sizeof(T); -}; - -template -struct pointer_traits { - using element_type = typename remove_reference())>::type;// assumes T::operator*() exists! - static constexpr size_t element_size = sizeof(element_type); -}; - -//================================================================================================ - -/// @brief Generates a NanoGrid from a list of point coordinates on the device. This method is -/// mainly used as a means to build a BVH acceleration structure for points, e.g. for efficient rendering. -/// @tparam PtrT Template type to a raw or fancy-pointer of point coordinates in world space. Dereferencing should return Vec3f or Vec3d. -/// @tparam BufferT Template type of buffer used for memory allocation on the device -/// @tparam AllocT Template type of optional device allocator for internal temporary memory -/// @param dWorldPoints Raw or fancy pointer to list of point coordinates in world space on the device -/// @param pointCount number of point in the list @c d_world -/// @param voxelSize Size of a voxel in world units used for the output grid -/// @param type Defined the way point information is represented in the output grid (see PointType enum above) -/// Should not be PointType::Disable! -/// @param buffer Instance of the device buffer used for memory allocation -/// @param stream optional CUDA stream (defaults to CUDA stream 0) -/// @return Returns a handle with a grid of type NanoGrid where point information, e.g. coordinates, -/// are represented as blind data defined by @c type. -template -GridHandle -cudaPointsToGrid(const PtrT dWorldPoints, - int pointCount, - double voxelSize = 1.0, - PointType type = PointType::Default, - const BufferT &buffer = BufferT(), - cudaStream_t stream = 0); - -//================================================================================================ - -template -GridHandle -cudaPointsToGrid(std::vector> pointSet, - const BufferT &buffer = BufferT(), - cudaStream_t stream = 0); - -//================================================================================================ - -/// @brief Generates a NanoGrid of any type from a list of voxel coordinates on the device. Unlike @c cudaPointsToGrid -/// this method only builds the grid but does not encode the coordinates as blind data. It is mainly useful as a -/// means to generate a grid that is know to contain the voxels given in the list. -/// @tparam BuildT Template type of the return grid -/// @tparam PtrT Template type to a raw or fancy-pointer of point coordinates in world space. Dereferencing should return Vec3f or Vec3d. -/// @tparam BufferT Template type of buffer used for memory allocation on the device -/// @tparam AllocT Template type of optional device allocator for internal temporary memory -/// @param dGridVoxels Raw or fancy pointer to list of voxel coordinates in grid (or index) space on the device -/// @param pointCount number of voxel in the list @c dGridVoxels -/// @param voxelSize Size of a voxel in world units used for the output grid -/// @param buffer Instance of the device buffer used for memory allocation -/// @return Returns a handle with the grid of type NanoGrid -template -GridHandle -cudaVoxelsToGrid(const PtrT dGridVoxels, - size_t voxelCount, - double voxelSize = 1.0, - const BufferT &buffer = BufferT(), - cudaStream_t stream = 0); - -//================================================================================================ - -template -GridHandle -cudaVoxelsToGrid(std::vector> pointSet, - const BufferT &buffer = BufferT(), - cudaStream_t stream = 0); - -//================================================================================================ - -template -__hostdev__ inline static void worldToVoxel(Vec3u8 &voxel, const Vec3T &world, const Map &map) -{ - const Vec3d ijk = map.applyInverseMap(world);// world -> index - static constexpr double encode = double((1<<8) - 1); - voxel[0] = uint8_t( encode*(ijk[0] - Floor(ijk[0] + 0.5) + 0.5) ); - voxel[1] = uint8_t( encode*(ijk[1] - Floor(ijk[1] + 0.5) + 0.5) ); - voxel[2] = uint8_t( encode*(ijk[2] - Floor(ijk[2] + 0.5) + 0.5) ); -} - -template -__hostdev__ inline static void worldToVoxel(Vec3u16 &voxel, const Vec3T &world, const Map &map) -{ - const Vec3d ijk = map.applyInverseMap(world);// world -> index - static constexpr double encode = double((1<<16) - 1); - voxel[0] = uint16_t( encode*(ijk[0] - Floor(ijk[0] + 0.5) + 0.5) ); - voxel[1] = uint16_t( encode*(ijk[1] - Floor(ijk[1] + 0.5) + 0.5) ); - voxel[2] = uint16_t( encode*(ijk[2] - Floor(ijk[2] + 0.5) + 0.5) ); -} - -template -__hostdev__ inline static void worldToVoxel(Vec3f &voxel, const Vec3T &world, const Map &map) -{ - const Vec3d ijk = map.applyInverseMap(world);// world -> index - voxel[0] = float( ijk[0] - Floor(ijk[0] + 0.5) ); - voxel[1] = float( ijk[1] - Floor(ijk[1] + 0.5) ); - voxel[2] = float( ijk[2] - Floor(ijk[2] + 0.5) ); -} - -//================================================================================================ - -template -__hostdev__ inline static Vec3T voxelToWorld(const Vec3u8 &voxel, const Coord &ijk, const Map &map) -{ - static constexpr double decode = 1.0/double((1<<8) - 1); - if constexpr(is_same::value) { - return map.applyMap( Vec3d(ijk[0] + decode*voxel[0] - 0.5, ijk[1] + decode*voxel[1] - 0.5, ijk[2] + decode*voxel[2] - 0.5)); - } else { - return map.applyMapF(Vec3f(ijk[0] + decode*voxel[0] - 0.5f, ijk[1] + decode*voxel[1] - 0.5f, ijk[2] + decode*voxel[2] - 0.5f)); - } -} - -template -__hostdev__ inline static Vec3T voxelToWorld(const Vec3u16 &voxel, const Coord &ijk, const Map &map) -{ - static constexpr double decode = 1.0/double((1<<16) - 1); - if constexpr(is_same::value) { - return map.applyMap( Vec3d(ijk[0] + decode*voxel[0] - 0.5, ijk[1] + decode*voxel[1] - 0.5, ijk[2] + decode*voxel[2] - 0.5)); - } else { - return map.applyMapF(Vec3f(ijk[0] + decode*voxel[0] - 0.5f, ijk[1] + decode*voxel[1] - 0.5f, ijk[2] + decode*voxel[2] - 0.5f)); - } -} - -template -__hostdev__ inline static Vec3T voxelToWorld(const Vec3f &voxel, const Coord &ijk, const Map &map) -{ - if constexpr(is_same::value) { - return map.applyMap( Vec3d(ijk[0] + voxel[0], ijk[1] + voxel[1], ijk[2] + voxel[2])); - } else { - return map.applyMapF(Vec3f(ijk[0] + voxel[0], ijk[1] + voxel[1], ijk[2] + voxel[2])); - } -} - -//================================================================================================ - -namespace {// anonymous namespace - -template -class CudaPointsToGrid -{ -public: - - struct Data { - Map map; - void *d_bufferPtr; - uint64_t *d_keys, *d_tile_keys, *d_lower_keys, *d_leaf_keys;// device pointer to 64 bit keys - uint64_t grid, tree, root, upper, lower, leaf, meta, blind, size;// byte offsets to nodes in buffer - uint32_t *d_indx;// device pointer to point indices (or IDs) - uint32_t nodeCount[3], *pointsPerLeafPrefix, *pointsPerLeaf;// 0=leaf,1=lower, 2=upper - uint32_t voxelCount, *pointsPerVoxelPrefix, *pointsPerVoxel; - BitFlags<16> flags; - __hostdev__ NanoGrid& getGrid() const {return *PtrAdd>(d_bufferPtr, grid);} - __hostdev__ NanoTree& getTree() const {return *PtrAdd>(d_bufferPtr, tree);} - __hostdev__ NanoRoot& getRoot() const {return *PtrAdd>(d_bufferPtr, root);} - __hostdev__ NanoUpper& getUpper(int i) const {return *(PtrAdd>(d_bufferPtr, upper)+i);} - __hostdev__ NanoLower& getLower(int i) const {return *(PtrAdd>(d_bufferPtr, lower)+i);} - __hostdev__ NanoLeaf& getLeaf(int i) const {return *(PtrAdd>(d_bufferPtr, leaf)+i);} - __hostdev__ GridBlindMetaData& getMeta() const { return *PtrAdd(d_bufferPtr, meta);}; - template - __hostdev__ Vec3T& getPoint(int i) const {return *(PtrAdd(d_bufferPtr, blind)+i);} - };// Data - - /// @brief Constructor from a Map - /// @param map Map to be used for the output device grid - /// @param stream optional CUDA stream (defaults to CUDA stream 0) - CudaPointsToGrid(const Map &map, cudaStream_t stream = 0) - : mStream(stream) - , mPointType(is_same::value ? PointType::Default : PointType::Disable) - { - mData.map = map; - mData.flags.initMask({GridFlags::HasBBox, GridFlags::IsBreadthFirst}); - cudaCheck(cudaMallocAsync((void**)&mDeviceData, sizeof(Data), mStream)); - } - - /// @brief Default constructor - /// @param scale Voxel size in world units - /// @param trans Translation of origin in world units - /// @param stream optional CUDA stream (defaults to CUDA stream 0) - CudaPointsToGrid(const double scale = 1.0, const Vec3d &trans = Vec3d(0.0), cudaStream_t stream = 0) - : CudaPointsToGrid(Map(scale, trans), stream) {} - - /// @brief Destructor - ~CudaPointsToGrid() {cudaCheck(cudaFreeAsync(mDeviceData, mStream));} - - /// @brief Toggle on and off verbose mode - /// @param level Verbose level: 0=quiet, 1=timing, 2=benchmarking - void setVerbose(int level = 1) {mVerbose = level; mData.flags.setBit(7u, level); } - - /// @brief Set the mode for checksum computation, which is disabled by default - /// @param mode Mode of checksum computation - void setChecksum(ChecksumMode mode = ChecksumMode::Disable){mChecksum = mode;} - - /// @brief Toggle on and off the computation of a bounding-box - /// @param on If true bbox will be computed - void includeBBox(bool on = true) { mData.flags.setMask(GridFlags::HasBBox, on); } - - /// @brief Set the name of the output grid - /// @param name name of the output grid - void setGridName(const std::string &name) {mGridName = name;} - - // only available when BuildT == Point - template typename enable_if::value>::type - setPointType(PointType type) { mPointType = type; } - - /// @brief Creates a handle to a grid with the specified build type from a list of points in index or world space - /// @tparam BuildT Build type of the output grid, i.e NanoGrid - /// @tparam PtrT Template type to a raw or fancy-pointer of point coordinates in world or index space. - /// @tparam BufferT Buffer type used for allocation of the grid handle - /// @param points device point to an array of points in world space - /// @param pointCount number of input points or voxels - /// @param gridName optional name of the output grid - /// @param buffer optional buffer (currently ignored) - /// @return returns a handle with a grid of type NanoGrid - template - GridHandle getHandle(const PtrT points, - size_t pointCount, - const BufferT &buffer = BufferT()); - - template - void countNodes(const PtrT points, size_t pointCount); - - template - void processGridTreeRoot(const PtrT points, size_t pointCount); - - void processUpperNodes(); - - void processLowerNodes(); - - template - void processLeafNodes(const PtrT points); - - template - void processPoints(const PtrT points, size_t pointCount); - - void processBBox(); - - // the following methods are only defined when BuildT == Point - template typename enable_if::value, uint32_t>::type - maxPointsPerVoxel() const {return mMaxPointsPerVoxel;} - template typename enable_if::value, uint32_t>::type - maxPointsPerLeaf() const {return mMaxPointsPerLeaf;} - -private: - static constexpr unsigned int mNumThreads = 128;// seems faster than the old value of 256! - static unsigned int numBlocks(unsigned int n) {return (n + mNumThreads - 1) / mNumThreads;} - - cudaStream_t mStream{0}; - GpuTimer mTimer; - PointType mPointType; - std::string mGridName; - int mVerbose{0}; - Data mData, *mDeviceData; - uint32_t mMaxPointsPerVoxel{0u}, mMaxPointsPerLeaf{0u}; - ChecksumMode mChecksum{ChecksumMode::Disable}; - - // wrapper of cub::CachingDeviceAllocator with a shared scratch space - struct Allocator { - AllocT mAllocator; - void* d_scratch; - size_t scratchSize, actualScratchSize; - Allocator() : d_scratch(nullptr), scratchSize(0), actualScratchSize(0) {} - ~Allocator() { - if (scratchSize > 0) this->free(d_scratch);// a bug in cub makes this necessary - mAllocator.FreeAllCached(); - } - template - T* alloc(size_t count, cudaStream_t stream) { - T* d_ptr = nullptr; - cudaCheck(mAllocator.DeviceAllocate((void**)&d_ptr, sizeof(T)*count, stream)); - return d_ptr; - } - void free(void *d_ptr) {if (d_ptr) cudaCheck(mAllocator.DeviceFree(d_ptr));} - template - void free(void *d_ptr, T... other) { - if (d_ptr) cudaCheck(mAllocator.DeviceFree(d_ptr)); - this->free(other...); - } - void adjustScratch(cudaStream_t stream){ - if (scratchSize > actualScratchSize) { - if (actualScratchSize>0) cudaCheck(mAllocator.DeviceFree(d_scratch)); - cudaCheck(mAllocator.DeviceAllocate((void**)&d_scratch, scratchSize, stream)); - actualScratchSize = scratchSize; - } - } - } mMemPool; - - template - BufferT getBuffer(const PtrT points, size_t pointCount, const BufferT &buffer); -};// CudaPointsToGrid - - -namespace kernels { -/// @details Used by CudaPointsToGrid::processLeafNodes before the computation -/// of prefix-sum for index grid. -/// Moving this away from an implementation using the cudaLambdaKernel wrapper -/// to fix the following on Windows platform: -/// error : For this host platform/dialect, an extended lambda cannot be defined inside the 'if' -/// or 'else' block of a constexpr if statement. -/// function in a lambda through cudaLambdaKernel wrapper defined in CudaUtils.h. -template -__global__ void fillValueIndexKernel(const size_t numItems, uint64_t* devValueIndex, typename CudaPointsToGrid::Data* d_data) { - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - if (tid >= numItems) - return; - - devValueIndex[tid] = static_cast(d_data->getLeaf(tid).mValueMask.countOn()); -} - -/// @details Used by CudaPointsToGrid::processLeafNodes for the computation -/// of prefix-sum for index grid. -/// Moving this away from an implementation using the cudaLambdaKernel wrapper -/// to fix the following on Windows platform: -/// error : For this host platform/dialect, an extended lambda cannot be defined inside the 'if' -/// or 'else' block of a constexpr if statement. -template -__global__ void leafPrefixSumKernel(const size_t numItems, uint64_t* devValueIndexPrefix, typename CudaPointsToGrid::Data* d_data) { - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - if (tid >= numItems) - return; - - auto &leaf = d_data->getLeaf(tid); - leaf.mOffset = 1u;// will be re-set below - const uint64_t *w = leaf.mValueMask.words(); - uint64_t &prefixSum = leaf.mPrefixSum, sum = CountOn(*w++); - prefixSum = sum; - for (int n = 9; n < 55; n += 9) {// n=i*9 where i=1,2,..6 - sum += CountOn(*w++); - prefixSum |= sum << n;// each pre-fixed sum is encoded in 9 bits - } - if (tid==0) { - d_data->getGrid().mData1 = 1u + devValueIndexPrefix[d_data->nodeCount[0]-1];// set total count - d_data->getTree().mVoxelCount = devValueIndexPrefix[d_data->nodeCount[0]-1]; - } else { - leaf.mOffset = 1u + devValueIndexPrefix[tid-1];// background is index 0 - } -} - -/// @details Used by CudaPointsToGrid::processLeafNodes to make sure leaf.mMask - leaf.mValueMask. -/// Moving this away from an implementation using the cudaLambdaKernel wrapper -/// to fix the following on Windows platform: -/// error : For this host platform/dialect, an extended lambda cannot be defined inside the 'if' -/// or 'else' block of a constexpr if statement. -template -__global__ void setMaskEqValMaskKernel(const size_t numItems, typename CudaPointsToGrid::Data* d_data) { - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - if (tid >= numItems) - return; - - auto &leaf = d_data->getLeaf(tid); - leaf.mMask = leaf.mValueMask; -} -} // namespace kernels - - -//================================================================================================ - -// Define utility macro used to call cub functions that use dynamic temporary storage -#ifndef CALL_CUBS -#ifdef _WIN32 -#define CALL_CUBS(func, ...) \ - cudaCheck(cub::func(nullptr, mMemPool.scratchSize, __VA_ARGS__, mStream)); \ - mMemPool.adjustScratch(mStream); \ - cudaCheck(cub::func(mMemPool.d_scratch, mMemPool.scratchSize, __VA_ARGS__, mStream)); -#else// fdef _WIN32 -#define CALL_CUBS(func, args...) \ - cudaCheck(cub::func(nullptr, mMemPool.scratchSize, args, mStream)); \ - mMemPool.adjustScratch(mStream); \ - cudaCheck(cub::func(mMemPool.d_scratch, mMemPool.scratchSize, args, mStream)); -#endif// ifdef _WIN32 -#endif// ifndef CALL_CUBS - -}// anonymous namespace - -//================================================================================================ - -template -template -inline GridHandle -CudaPointsToGrid::getHandle(const PtrT points, - size_t pointCount, - const BufferT &pool) -{ - if (mVerbose==1) mTimer.start("\nCounting nodes"); - this->countNodes(points, pointCount); - - if (mVerbose==1) mTimer.restart("Initiate buffer"); - auto buffer = this->getBuffer(points, pointCount, pool); - - if (mVerbose==1) mTimer.restart("Process grid,tree,root"); - this->processGridTreeRoot(points, pointCount); - - if (mVerbose==1) mTimer.restart("Process upper nodes"); - this->processUpperNodes(); - - if (mVerbose==1) mTimer.restart("Process lower nodes"); - this->processLowerNodes(); - - if (mVerbose==1) mTimer.restart("Process leaf nodes"); - this->processLeafNodes(points); - - if (mVerbose==1) mTimer.restart("Process points"); - this->processPoints(points, pointCount); - - if (mVerbose==1) mTimer.restart("Process bbox"); - this->processBBox(); - if (mVerbose==1) mTimer.stop(); - - if (mChecksum != ChecksumMode::Disable) { - if (mVerbose==1) mTimer.restart("Computation of checksum"); - cudaGridChecksum((GridData*)buffer.deviceData(), mChecksum); - if (mVerbose==1) mTimer.stop(); - } - - cudaStreamSynchronize(mStream);// finish all device tasks in mStream - - return GridHandle(std::move(buffer)); -}// CudaPointsToGrid::getHandle - -//================================================================================================ - -// --- CUB helpers --- -template -struct ShiftRight -{ - __hostdev__ inline OutT operator()(const InT& v) const {return static_cast(v >> BitCount);} -}; - -template -struct ShiftRightIterator : public cub::TransformInputIterator, InT*> -{ - using BASE = cub::TransformInputIterator, InT*>; - __hostdev__ inline ShiftRightIterator(uint64_t* input_itr) : BASE(input_itr, ShiftRight()) {} -}; - -//================================================================================================ - -template -template -void CudaPointsToGrid::countNodes(const PtrT points, size_t pointCount) -{ - using Vec3T = typename remove_const::element_type>::type; - if constexpr(is_same::value) { - static_assert(is_same::value, "Point (vs voxels) coordinates should be represented as Vec3f or Vec3d"); - } else { - static_assert(is_same::value, "Voxel coordinates should be represented as Coord, Vec3f or Vec3d"); - } - - mData.d_keys = mMemPool.template alloc(pointCount, mStream); - mData.d_indx = mMemPool.template alloc(pointCount, mStream);// uint32_t can index 4.29 billion Coords, corresponding to 48 GB - cudaCheck(cudaMemcpyAsync(mDeviceData, &mData, sizeof(Data), cudaMemcpyHostToDevice, mStream));// copy mData from CPU -> GPU - - if (mVerbose==2) mTimer.start("\nAllocating arrays for keys and indices"); - auto *d_keys = mMemPool.template alloc(pointCount, mStream); - auto *d_indx = mMemPool.template alloc(pointCount, mStream); - - if (mVerbose==2) mTimer.restart("Generate tile keys"); - cudaLambdaKernel<<>>(pointCount, [=] __device__(size_t tid, const Data *d_data, const PtrT points) { - auto coordToKey = [](const Coord &ijk)->uint64_t{ - // Note: int32_t has a range of -2^31 to 2^31 - 1 whereas uint32_t has a range of 0 to 2^32 - 1 - static constexpr int64_t offset = 1 << 31; - return (uint64_t(uint32_t(int64_t(ijk[2]) + offset) >> 12) ) | // z is the lower 21 bits - (uint64_t(uint32_t(int64_t(ijk[1]) + offset) >> 12) << 21) | // y is the middle 21 bits - (uint64_t(uint32_t(int64_t(ijk[0]) + offset) >> 12) << 42); // x is the upper 21 bits - };// coordToKey lambda functor - d_indx[tid] = uint32_t(tid); - uint64_t &key = d_keys[tid]; - if constexpr(is_same::value) {// points are in world space - if constexpr(is_same::value) { - key = coordToKey(d_data->map.applyInverseMapF(points[tid]).round()); - } else {// points are Vec3d - key = coordToKey(d_data->map.applyInverseMap(points[tid]).round()); - } - } else if constexpr(is_same::value) {// points Coord are in index space - key = coordToKey(points[tid]); - } else {// points are Vec3f or Vec3d in index space - key = coordToKey(points[tid].round()); - } - }, mDeviceData, points); - cudaCheckError(); - if (mVerbose==2) mTimer.restart("DeviceRadixSort of "+std::to_string(pointCount)+" tile keys"); - CALL_CUBS(DeviceRadixSort::SortPairs, d_keys, mData.d_keys, d_indx, mData.d_indx, pointCount, 0, 62);// 21 bits per coord - std::swap(d_indx, mData.d_indx);// sorted indices are now in d_indx - - if (mVerbose==2) mTimer.restart("Allocate runs"); - auto *d_points_per_tile = mMemPool.template alloc(pointCount, mStream); - uint32_t *d_node_count = mMemPool.template alloc(3, mStream); - - if (mVerbose==2) mTimer.restart("DeviceRunLengthEncode tile keys"); - CALL_CUBS(DeviceRunLengthEncode::Encode, mData.d_keys, d_keys, d_points_per_tile, d_node_count+2, pointCount); - cudaCheck(cudaMemcpyAsync(mData.nodeCount+2, d_node_count+2, sizeof(uint32_t), cudaMemcpyDeviceToHost, mStream)); - mData.d_tile_keys = mMemPool.template alloc(mData.nodeCount[2], mStream); - cudaCheck(cudaMemcpyAsync(mData.d_tile_keys, d_keys, mData.nodeCount[2]*sizeof(uint64_t), cudaMemcpyDeviceToDevice, mStream)); - - if (mVerbose) mTimer.restart("DeviceRadixSort of " + std::to_string(pointCount) + " voxel keys in " + std::to_string(mData.nodeCount[2]) + " tiles"); - uint32_t *points_per_tile = new uint32_t[mData.nodeCount[2]]; - cudaCheck(cudaMemcpyAsync(points_per_tile, d_points_per_tile, mData.nodeCount[2]*sizeof(uint32_t), cudaMemcpyDeviceToHost, mStream)); - mMemPool.free(d_points_per_tile); - - for (uint32_t id = 0, offset = 0; id < mData.nodeCount[2]; ++id) { - const uint32_t count = points_per_tile[id]; - cudaLambdaKernel<<>>(count, [=] __device__(size_t tid, const Data *d_data) { - auto voxelKey = [] __device__ (uint64_t tileID, const Coord &ijk){ - return tileID << 36 | // upper offset: 64-15-12-9=28, i.e. last 28 bits - uint64_t(NanoUpper::CoordToOffset(ijk)) << 21 | // lower offset: 32^3 = 2^15, i.e. next 15 bits - uint64_t(NanoLower::CoordToOffset(ijk)) << 9 | // leaf offset: 16^3 = 2^12, i.e. next 12 bits - uint64_t(NanoLeaf< BuildT>::CoordToOffset(ijk)); // voxel offset: 8^3 = 2^9, i.e. first 9 bits - };// voxelKey lambda functor - tid += offset; - Vec3T p = points[d_indx[tid]]; - if constexpr(is_same::value) p = is_same::value ? d_data->map.applyInverseMapF(p) : d_data->map.applyInverseMap(p); - d_keys[tid] = voxelKey(id, p.round()); - }, mDeviceData); cudaCheckError(); - CALL_CUBS(DeviceRadixSort::SortPairs, d_keys + offset, mData.d_keys + offset, d_indx + offset, mData.d_indx + offset, count, 0, 36);// 9+12+15=36 - offset += count; - } - mMemPool.free(d_indx); - delete [] points_per_tile; - - if (mVerbose==2) mTimer.restart("Count points per voxel"); - - mData.pointsPerVoxel = mMemPool.template alloc(pointCount, mStream); - uint32_t *d_voxel_count = mMemPool.template alloc(1, mStream); - CALL_CUBS(DeviceRunLengthEncode::Encode, mData.d_keys, d_keys, mData.pointsPerVoxel, d_voxel_count, pointCount); - cudaCheck(cudaMemcpyAsync(&mData.voxelCount, d_voxel_count, sizeof(uint32_t), cudaMemcpyDeviceToHost, mStream)); - mMemPool.free(d_voxel_count); - - if constexpr(is_same::value) { - if (mVerbose==2) mTimer.restart("Count max points per voxel"); - uint32_t *d_maxPointsPerVoxel = mMemPool.template alloc(1, mStream); - CALL_CUBS(DeviceReduce::Max, mData.pointsPerVoxel, d_maxPointsPerVoxel, mData.voxelCount); - cudaCheck(cudaMemcpyAsync(&mMaxPointsPerVoxel, d_maxPointsPerVoxel, sizeof(uint32_t), cudaMemcpyDeviceToHost, mStream)); - mMemPool.free(d_maxPointsPerVoxel); - } - - //printf("\n Active voxel count = %u, max points per voxel = %u\n", mData.voxelCount, mMaxPointsPerVoxel); - if (mVerbose==2) mTimer.restart("Compute prefix sum of points per voxel"); - mData.pointsPerVoxelPrefix = mMemPool.template alloc(mData.voxelCount, mStream); - CALL_CUBS(DeviceScan::ExclusiveSum, mData.pointsPerVoxel, mData.pointsPerVoxelPrefix, mData.voxelCount); - - mData.pointsPerLeaf = mMemPool.template alloc(pointCount, mStream); - CALL_CUBS(DeviceRunLengthEncode::Encode, ShiftRightIterator<9>(mData.d_keys), d_keys, mData.pointsPerLeaf, d_node_count, pointCount); - cudaCheck(cudaMemcpyAsync(mData.nodeCount, d_node_count, sizeof(uint32_t), cudaMemcpyDeviceToHost, mStream)); - - if constexpr(is_same::value) { - uint32_t *d_maxPointsPerLeaf = mMemPool.template alloc(1, mStream); - CALL_CUBS(DeviceReduce::Max, mData.pointsPerLeaf, d_maxPointsPerLeaf, mData.nodeCount[0]); - cudaCheck(cudaMemcpyAsync(&mMaxPointsPerLeaf, d_maxPointsPerLeaf, sizeof(uint32_t), cudaMemcpyDeviceToHost, mStream)); - //printf("\n Leaf count = %u, max points per leaf = %u\n", mData.nodeCount[0], mMaxPointsPerLeaf); - if (mMaxPointsPerLeaf > std::numeric_limits::max()) { - throw std::runtime_error("Too many points per leaf: "+std::to_string(mMaxPointsPerLeaf)); - } - mMemPool.free(d_maxPointsPerLeaf); - } - - mData.pointsPerLeafPrefix = mMemPool.template alloc(mData.nodeCount[0], mStream); - CALL_CUBS(DeviceScan::ExclusiveSum, mData.pointsPerLeaf, mData.pointsPerLeafPrefix, mData.nodeCount[0]); - - mData.d_leaf_keys = mMemPool.template alloc(mData.nodeCount[0], mStream); - cudaCheck(cudaMemcpyAsync(mData.d_leaf_keys, d_keys, mData.nodeCount[0]*sizeof(uint64_t), cudaMemcpyDeviceToDevice, mStream)); - - CALL_CUBS(DeviceSelect::Unique, ShiftRightIterator<12>(mData.d_leaf_keys), d_keys, d_node_count+1, mData.nodeCount[0]);// count lower nodes - cudaCheck(cudaMemcpyAsync(mData.nodeCount+1, d_node_count+1, sizeof(uint32_t), cudaMemcpyDeviceToHost, mStream)); - mData.d_lower_keys = mMemPool.template alloc(mData.nodeCount[1], mStream); - cudaCheck(cudaMemcpyAsync(mData.d_lower_keys, d_keys, mData.nodeCount[1]*sizeof(uint64_t), cudaMemcpyDeviceToDevice, mStream)); - - mMemPool.free(d_keys, d_node_count); - if (mVerbose==2) mTimer.stop(); - - //printf("Leaf count = %u, lower count = %u, upper count = %u\n", mData.nodeCount[0], mData.nodeCount[1], mData.nodeCount[2]); -}// CudaPointsToGrid::countNodes - -//================================================================================================ - -template -template -inline BufferT CudaPointsToGrid::getBuffer(const PtrT, size_t pointCount, const BufferT &pool) -{ - auto sizeofPoint = [&]()->size_t{ - switch (mPointType){ - case PointType::PointID: return sizeof(uint32_t); - case PointType::World64: return sizeof(Vec3d); - case PointType::World32: return sizeof(Vec3f); - case PointType::Grid64: return sizeof(Vec3d); - case PointType::Grid32: return sizeof(Vec3f); - case PointType::Voxel32: return sizeof(Vec3f); - case PointType::Voxel16: return sizeof(Vec3u16); - case PointType::Voxel8: return sizeof(Vec3u8); - case PointType::Default: return pointer_traits::element_size; - default: return size_t(0);// PointType::Disable - } - }; - - mData.grid = 0;// grid is always stored at the start of the buffer! - mData.tree = NanoGrid::memUsage(); // grid ends and tree begins - mData.root = mData.tree + NanoTree::memUsage(); // tree ends and root node begins - mData.upper = mData.root + NanoRoot::memUsage(mData.nodeCount[2]); // root node ends and upper internal nodes begin - mData.lower = mData.upper + NanoUpper::memUsage()*mData.nodeCount[2]; // upper internal nodes ends and lower internal nodes begin - mData.leaf = mData.lower + NanoLower::memUsage()*mData.nodeCount[1]; // lower internal nodes ends and leaf nodes begin - mData.meta = mData.leaf + NanoLeaf::DataType::memUsage()*mData.nodeCount[0];// leaf nodes end and blind meta data begins - mData.blind = mData.meta + sizeof(GridBlindMetaData)*int( mPointType!=PointType::Disable ); // meta data ends and blind data begins - mData.size = mData.blind + pointCount*sizeofPoint();// end of buffer - - auto buffer = BufferT::create(mData.size, &pool, false);// only allocate buffer on the device - mData.d_bufferPtr = buffer.deviceData(); - if (mData.d_bufferPtr == nullptr) throw std::runtime_error("Failed to allocate grid buffer on the device"); - cudaCheck(cudaMemcpyAsync(mDeviceData, &mData, sizeof(Data), cudaMemcpyHostToDevice, mStream));// copy Data CPU -> GPU - return buffer; -}// CudaPointsToGrid::getBuffer - -//================================================================================================ - -template -template -inline void CudaPointsToGrid::processGridTreeRoot(const PtrT points, size_t pointCount) -{ - using Vec3T = typename remove_const::element_type>::type; - cudaLambdaKernel<<<1, 1, 0, mStream>>>(1, [=] __device__(size_t, Data *d_data, PointType pointType) { - // process Root - auto &root = d_data->getRoot(); - root.mBBox = CoordBBox(); // init to empty - root.mTableSize = d_data->nodeCount[2]; - root.mBackground = NanoRoot::ValueType(0);// background_value - root.mMinimum = root.mMaximum = NanoRoot::ValueType(0); - root.mAverage = root.mStdDevi = NanoRoot::FloatType(0); - - // process Tree - auto &tree = d_data->getTree(); - tree.setRoot(&root); - tree.setFirstNode(&d_data->getUpper(0)); - tree.setFirstNode(&d_data->getLower(0)); - tree.setFirstNode(&d_data->getLeaf(0)); - tree.mNodeCount[2] = tree.mTileCount[2] = d_data->nodeCount[2]; - tree.mNodeCount[1] = tree.mTileCount[1] = d_data->nodeCount[1]; - tree.mNodeCount[0] = tree.mTileCount[0] = d_data->nodeCount[0]; - tree.mVoxelCount = d_data->voxelCount; - - // process Grid - auto &grid = d_data->getGrid(); - grid.init({GridFlags::HasBBox, GridFlags::IsBreadthFirst}, d_data->size, d_data->map, mapToGridType()); - grid.mChecksum = ~uint64_t(0);// set all bits on which means it's disabled - grid.mBlindMetadataCount = is_same::value;// ? 1u : 0u; - grid.mBlindMetadataOffset = d_data->meta; - if (pointType != PointType::Disable) { - const auto lastLeaf = tree.mNodeCount[0] - 1; - grid.mData1 = d_data->pointsPerLeafPrefix[lastLeaf] + d_data->pointsPerLeaf[lastLeaf]; - auto &meta = d_data->getMeta(); - meta.mDataOffset = sizeof(GridBlindMetaData);// blind data is placed right after this meta data - meta.mValueCount = pointCount; - // Blind meta data - switch (pointType){ - case PointType::PointID: - grid.mGridClass = GridClass::PointIndex; - meta.mSemantic = GridBlindDataSemantic::PointId; - meta.mDataClass = GridBlindDataClass::IndexArray; - meta.mDataType = mapToGridType(); - meta.mValueSize = sizeof(uint32_t); - cudaStrcpy(meta.mName, "PointID: uint32_t indices to points"); - break; - case PointType::World64: - grid.mGridClass = GridClass::PointData; - meta.mSemantic = GridBlindDataSemantic::WorldCoords; - meta.mDataClass = GridBlindDataClass::AttributeArray; - meta.mDataType = mapToGridType(); - meta.mValueSize = sizeof(Vec3d); - cudaStrcpy(meta.mName, "World64: Vec3 point coordinates in world space"); - break; - case PointType::World32: - grid.mGridClass = GridClass::PointData; - meta.mSemantic = GridBlindDataSemantic::WorldCoords; - meta.mDataClass = GridBlindDataClass::AttributeArray; - meta.mDataType = mapToGridType(); - meta.mValueSize = sizeof(Vec3f); - cudaStrcpy(meta.mName, "World32: Vec3 point coordinates in world space"); - break; - case PointType::Grid64: - grid.mGridClass = GridClass::PointData; - meta.mSemantic = GridBlindDataSemantic::GridCoords; - meta.mDataClass = GridBlindDataClass::AttributeArray; - meta.mDataType = mapToGridType(); - meta.mValueSize = sizeof(Vec3d); - cudaStrcpy(meta.mName, "Grid64: Vec3 point coordinates in grid space"); - break; - case PointType::Grid32: - grid.mGridClass = GridClass::PointData; - meta.mSemantic = GridBlindDataSemantic::GridCoords; - meta.mDataClass = GridBlindDataClass::AttributeArray; - meta.mDataType = mapToGridType(); - meta.mValueSize = sizeof(Vec3f); - cudaStrcpy(meta.mName, "Grid32: Vec3 point coordinates in grid space"); - break; - case PointType::Voxel32: - grid.mGridClass = GridClass::PointData; - meta.mSemantic = GridBlindDataSemantic::VoxelCoords; - meta.mDataClass = GridBlindDataClass::AttributeArray; - meta.mDataType = mapToGridType(); - meta.mValueSize = sizeof(Vec3f); - cudaStrcpy(meta.mName, "Voxel32: Vec3 point coordinates in voxel space"); - break; - case PointType::Voxel16: - grid.mGridClass = GridClass::PointData; - meta.mSemantic = GridBlindDataSemantic::VoxelCoords; - meta.mDataClass = GridBlindDataClass::AttributeArray; - meta.mDataType = mapToGridType(); - meta.mValueSize = sizeof(Vec3u16); - cudaStrcpy(meta.mName, "Voxel16: Vec3 point coordinates in voxel space"); - break; - case PointType::Voxel8: - grid.mGridClass = GridClass::PointData; - meta.mSemantic = GridBlindDataSemantic::VoxelCoords; - meta.mDataClass = GridBlindDataClass::AttributeArray; - meta.mDataType = mapToGridType(); - meta.mValueSize = sizeof(Vec3u8); - cudaStrcpy(meta.mName, "Voxel8: Vec3 point coordinates in voxel space"); - break; - case PointType::Default: - grid.mGridClass = GridClass::PointData; - meta.mSemantic = GridBlindDataSemantic::WorldCoords; - meta.mDataClass = GridBlindDataClass::AttributeArray; - meta.mDataType = mapToGridType(); - meta.mValueSize = sizeof(Vec3T); - if constexpr(is_same::value) { - cudaStrcpy(meta.mName, "World32: Vec3 point coordinates in world space"); - } else if constexpr(is_same::value){ - cudaStrcpy(meta.mName, "World64: Vec3 point coordinates in world space"); - } else { - printf("Error in CudaPointsToGrid::processGridTreeRoot: expected Vec3T = Vec3f or Vec3d\n"); - } - break; - default: - printf("Error in CudaPointsToGrid::processGridTreeRoot: invalid pointType\n"); - } - } else if constexpr(BuildTraits::is_offindex) { - grid.mData1 = 1u + 512u*d_data->nodeCount[0]; - grid.mGridClass = GridClass::IndexGrid; - } - }, mDeviceData, mPointType);// cudaLambdaKernel - cudaCheckError(); - - char *dst = mData.getGrid().mGridName; - if (const char *src = mGridName.data()) { - cudaCheck(cudaMemcpyAsync(dst, src, GridData::MaxNameSize, cudaMemcpyHostToDevice, mStream)); - } else { - cudaCheck(cudaMemsetAsync(dst, 0, GridData::MaxNameSize, mStream)); - } -}// CudaPointsToGrid::processGridTreeRoot - -//================================================================================================ - -template -inline void CudaPointsToGrid::processUpperNodes() -{ - cudaLambdaKernel<<>>(mData.nodeCount[2], [=] __device__(size_t tid, Data *d_data) { - auto &root = d_data->getRoot(); - auto &upper = d_data->getUpper(tid); -#if 1 - auto keyToCoord = [](uint64_t key)->nanovdb::Coord{ - static constexpr int64_t offset = 1 << 31;// max values of uint32_t is 2^31 - 1 - static constexpr uint64_t MASK = (1u << 21) - 1; // used to mask out 21 lower bits - return nanovdb::Coord(int(int64_t(((key >> 42) & MASK) << 12) - offset), // x are the upper 21 bits - int(int64_t(((key >> 21) & MASK) << 12) - offset), // y are the middle 21 bits - int(int64_t(( key & MASK) << 12) - offset)); // z are the lower 21 bits - }; - const Coord ijk = keyToCoord(d_data->d_tile_keys[tid]); -#else - const Coord ijk = NanoRoot::KeyToCoord(d_data->d_tile_keys[tid]); -#endif - root.tile(tid)->setChild(ijk, &upper, &root); - upper.mBBox[0] = ijk; - upper.mFlags = 0; - upper.mValueMask.setOff(); - upper.mChildMask.setOff(); - upper.mMinimum = upper.mMaximum = NanoLower::ValueType(0); - upper.mAverage = upper.mStdDevi = NanoLower::FloatType(0); - }, mDeviceData); - cudaCheckError(); - - mMemPool.free(mData.d_tile_keys); - - const uint64_t valueCount = mData.nodeCount[2] << 15; - cudaLambdaKernel<<>>(valueCount, [=] __device__(size_t tid, Data *d_data) { - auto &upper = d_data->getUpper(tid >> 15); - upper.mTable[tid & 32767u].value = NanoUpper::ValueType(0);// background - }, mDeviceData); - cudaCheckError(); -}// CudaPointsToGrid::processUpperNodes - -//================================================================================================ - -template -inline void CudaPointsToGrid::processLowerNodes() -{ - cudaLambdaKernel<<>>(mData.nodeCount[1], [=] __device__(size_t tid, Data *d_data) { - auto &root = d_data->getRoot(); - const uint64_t lowerKey = d_data->d_lower_keys[tid]; - auto &upper = d_data->getUpper(lowerKey >> 15); - const uint32_t upperOffset = lowerKey & 32767u;// (1 << 15) - 1 = 32767 - upper.mChildMask.setOnAtomic(upperOffset); - auto &lower = d_data->getLower(tid); - upper.setChild(upperOffset, &lower); - lower.mBBox[0] = upper.offsetToGlobalCoord(upperOffset); - lower.mFlags = 0; - lower.mValueMask.setOff(); - lower.mChildMask.setOff(); - lower.mMinimum = lower.mMaximum = NanoLower::ValueType(0);// background; - lower.mAverage = lower.mStdDevi = NanoLower::FloatType(0); - }, mDeviceData); - cudaCheckError(); - - const uint64_t valueCount = mData.nodeCount[1] << 12; - cudaLambdaKernel<<>>(valueCount, [=] __device__(size_t tid, Data *d_data) { - auto &lower = d_data->getLower(tid >> 12); - lower.mTable[tid & 4095u].value = NanoLower::ValueType(0);// background - }, mDeviceData); - cudaCheckError(); -}// CudaPointsToGrid::processLowerNodes - -//================================================================================================ - -template -template -inline void CudaPointsToGrid::processLeafNodes(const PtrT points) -{ - const uint8_t flags = static_cast(mData.flags.data());// mIncludeStats ? 16u : 0u;// 4th bit indicates stats - - if (mVerbose==2) mTimer.start("process leaf meta data"); - // loop over leaf nodes and add it to its parent node - cudaLambdaKernel<<>>(mData.nodeCount[0], [=] __device__(size_t tid, Data *d_data) { - const uint64_t leafKey = d_data->d_leaf_keys[tid], tile_id = leafKey >> 27; - auto &upper = d_data->getUpper(tile_id); - const uint32_t lowerOffset = leafKey & 4095u, upperOffset = (leafKey >> 12) & 32767u; - auto &lower = *upper.getChild(upperOffset); - lower.mChildMask.setOnAtomic(lowerOffset); - auto &leaf = d_data->getLeaf(tid); - lower.setChild(lowerOffset, &leaf); - leaf.mBBoxMin = lower.offsetToGlobalCoord(lowerOffset); - leaf.mFlags = flags; - auto &valueMask = leaf.mValueMask; - valueMask.setOff();// initiate all bits to off - - if constexpr(is_same::value) { - leaf.mOffset = d_data->pointsPerLeafPrefix[tid]; - leaf.mPointCount = d_data->pointsPerLeaf[tid]; - } else if constexpr(BuildTraits::is_offindex) { - leaf.mOffset = tid*512u + 1u;// background is index 0 - leaf.mPrefixSum = 0u; - } else if constexpr(!BuildTraits::is_special) { - leaf.mAverage = leaf.mStdDevi = NanoLeaf::FloatType(0); - leaf.mMinimum = leaf.mMaximum = NanoLeaf::ValueType(0); - } - }, mDeviceData); cudaCheckError(); - - if (mVerbose==2) mTimer.restart("set active voxel state and values"); - // loop over all active voxels and set LeafNode::mValueMask and LeafNode::mValues - cudaLambdaKernel<<>>(mData.voxelCount, [=] __device__(size_t tid, Data *d_data) { - const uint32_t pointID = d_data->pointsPerVoxelPrefix[tid]; - const uint64_t voxelKey = d_data->d_keys[pointID]; - auto &upper = d_data->getUpper(voxelKey >> 36); - auto &lower = *upper.getChild((voxelKey >> 21) & 32767u); - auto &leaf = *lower.getChild((voxelKey >> 9) & 4095u); - const uint32_t n = voxelKey & 511u; - leaf.mValueMask.setOnAtomic(n);// <--- slow! - if constexpr(is_same::value) { - leaf.mValues[n] = uint16_t(pointID + d_data->pointsPerVoxel[tid] - leaf.offset()); - } else if constexpr(!BuildTraits::is_special) { - leaf.mValues[n] = NanoLeaf::ValueType(1);// set value of active voxels that are not points (or index) - } - }, mDeviceData); cudaCheckError(); - - mMemPool.free(mData.d_keys, mData.pointsPerVoxel, mData.pointsPerVoxelPrefix, mData.pointsPerLeafPrefix, mData.pointsPerLeaf); - - if (mVerbose==2) mTimer.restart("set inactive voxel values"); - const uint64_t denseVoxelCount = mData.nodeCount[0] << 9; - cudaLambdaKernel<<>>(denseVoxelCount, [=] __device__(size_t tid, Data *d_data) { - auto &leaf = d_data->getLeaf(tid >> 9u); - const uint32_t n = tid & 511u; - if (leaf.mValueMask.isOn(n)) return; - if constexpr(is_same::value) { - const uint32_t m = leaf.mValueMask.findPrev(n - 1); - leaf.mValues[n] = m < 512u ? leaf.mValues[m] : 0u; - } else if constexpr(!BuildTraits::is_special) { - leaf.mValues[n] = NanoLeaf::ValueType(0);// value of inactive voxels - } - }, mDeviceData); cudaCheckError(); - - if constexpr(BuildTraits::is_onindex) { - if (mVerbose==2) mTimer.restart("prefix-sum for index grid"); - uint64_t *devValueIndex = mMemPool.template alloc(mData.nodeCount[0], mStream); - auto devValueIndexPrefix = mMemPool.template alloc(mData.nodeCount[0], mStream); - kernels::fillValueIndexKernel<<>>(mData.nodeCount[0], devValueIndex, mDeviceData); - cudaCheckError(); - CALL_CUBS(DeviceScan::InclusiveSum, devValueIndex, devValueIndexPrefix, mData.nodeCount[0]); - mMemPool.free(devValueIndex); - kernels::leafPrefixSumKernel<<>>(mData.nodeCount[0], devValueIndexPrefix, mDeviceData); - cudaCheckError(); - mMemPool.free(devValueIndexPrefix); - } - - if constexpr(BuildTraits::is_indexmask) { - if (mVerbose==2) mTimer.restart("leaf.mMask = leaf.mValueMask"); - kernels::setMaskEqValMaskKernel<<>>(mData.nodeCount[0], mDeviceData); - cudaCheckError(); - } - if (mVerbose==2) mTimer.stop(); -}// CudaPointsToGrid::processLeafNodes - -//================================================================================================ - -template -template -inline void CudaPointsToGrid::processPoints(const PtrT, size_t) -{ - mMemPool.free(mData.d_indx, mStream); -} - -//================================================================================================ - -// Template specialization with BuildT = Point -template <> -template -inline void CudaPointsToGrid::processPoints(const PtrT points, size_t pointCount) -{ - switch (mPointType){ - case PointType::Disable: - throw std::runtime_error("CudaPointsToGrid::processPoints: mPointType == PointType::Disable\n"); - case PointType::PointID: - cudaLambdaKernel<<>>(pointCount, [=] __device__(size_t tid, Data *d_data) { - d_data->template getPoint(tid) = d_data->d_indx[tid]; - }, mDeviceData); cudaCheckError(); - break; - case PointType::World64: - cudaLambdaKernel<<>>(pointCount, [=] __device__(size_t tid, Data *d_data) { - d_data->template getPoint(tid) = points[d_data->d_indx[tid]]; - }, mDeviceData); cudaCheckError(); - break; - case PointType::World32: - cudaLambdaKernel<<>>(pointCount, [=] __device__(size_t tid, Data *d_data) { - d_data->template getPoint(tid) = points[d_data->d_indx[tid]]; - }, mDeviceData); cudaCheckError(); - break; - case PointType::Grid64: - cudaLambdaKernel<<>>(pointCount, [=] __device__(size_t tid, Data *d_data) { - d_data->template getPoint(tid) = d_data->map.applyInverseMap(points[d_data->d_indx[tid]]); - }, mDeviceData); cudaCheckError(); - break; - case PointType::Grid32: - cudaLambdaKernel<<>>(pointCount, [=] __device__(size_t tid, Data *d_data) { - d_data->template getPoint(tid) = d_data->map.applyInverseMapF(points[d_data->d_indx[tid]]); - }, mDeviceData); cudaCheckError(); - break; - case PointType::Voxel32: - cudaLambdaKernel<<>>(pointCount, [=] __device__(size_t tid, Data *d_data) { - worldToVoxel(d_data->template getPoint(tid), points[d_data->d_indx[tid]], d_data->map); - }, mDeviceData); cudaCheckError(); - break; - case PointType::Voxel16: - cudaLambdaKernel<<>>(pointCount, [=] __device__(size_t tid, Data *d_data) { - worldToVoxel(d_data->template getPoint(tid), points[d_data->d_indx[tid]], d_data->map); - }, mDeviceData); cudaCheckError(); - break; - case PointType::Voxel8: - cudaLambdaKernel<<>>(pointCount, [=] __device__(size_t tid, Data *d_data) { - worldToVoxel(d_data->template getPoint(tid), points[d_data->d_indx[tid]], d_data->map); - }, mDeviceData); cudaCheckError(); - break; - case PointType::Default: - cudaLambdaKernel<<>>(pointCount, [=] __device__(size_t tid, Data *d_data) { - d_data->template getPoint::element_type>(tid) = points[d_data->d_indx[tid]]; - }, mDeviceData); cudaCheckError(); - break; - default: - printf("Internal error in CudaPointsToGrid::processPoints\n"); - } - mMemPool.free(mData.d_indx); -}// CudaPointsToGrid::processPoints - -//================================================================================================ - -template -inline void CudaPointsToGrid::processBBox() -{ - if (mData.flags.isMaskOff(GridFlags::HasBBox)) { - mMemPool.free(mData.d_leaf_keys, mData.d_lower_keys); - return; - } - - // reset bbox in lower nodes - cudaLambdaKernel<<>>(mData.nodeCount[1], [=] __device__(size_t tid, Data *d_data) { - d_data->getLower(tid).mBBox = CoordBBox(); - }, mDeviceData); - cudaCheckError(); - - // update and propagate bbox from leaf -> lower/parent nodes - cudaLambdaKernel<<>>(mData.nodeCount[0], [=] __device__(size_t tid, Data *d_data) { - const uint64_t leafKey = d_data->d_leaf_keys[tid]; - auto &upper = d_data->getUpper(leafKey >> 27); - auto &lower = *upper.getChild((leafKey >> 12) & 32767u); - auto &leaf = d_data->getLeaf(tid); - leaf.updateBBox(); - lower.mBBox.expandAtomic(leaf.bbox()); - }, mDeviceData); - mMemPool.free(mData.d_leaf_keys); - cudaCheckError(); - - // reset bbox in upper nodes - cudaLambdaKernel<<>>(mData.nodeCount[2], [=] __device__(size_t tid, Data *d_data) { - d_data->getUpper(tid).mBBox = CoordBBox(); - }, mDeviceData); - cudaCheckError(); - - // propagate bbox from lower -> upper/parent node - cudaLambdaKernel<<>>(mData.nodeCount[1], [=] __device__(size_t tid, Data *d_data) { - const uint64_t lowerKey = d_data->d_lower_keys[tid]; - auto &upper = d_data->getUpper(lowerKey >> 15); - auto &lower = d_data->getLower(tid); - upper.mBBox.expandAtomic(lower.bbox()); - }, mDeviceData); - mMemPool.free(mData.d_lower_keys); - cudaCheckError() - - // propagate bbox from upper -> root/parent node - cudaLambdaKernel<<>>(mData.nodeCount[2], [=] __device__(size_t tid, Data *d_data) { - d_data->getRoot().mBBox.expandAtomic(d_data->getUpper(tid).bbox()); - }, mDeviceData); - cudaCheckError(); - - // update the world-bbox in the root node - cudaLambdaKernel<<<1, 1, 0, mStream>>>(1, [=] __device__(size_t, Data *d_data) { - d_data->getGrid().mWorldBBox = d_data->getRoot().mBBox.transform(d_data->map); - }, mDeviceData); - cudaCheckError(); -}// CudaPointsToGrid::processBBox - -//================================================================================================ - -template -GridHandle// Grid with PointType coordinates as blind data -cudaPointsToGrid(const PtrT d_xyz, int pointCount, double voxelSize, PointType type, const BufferT &buffer, cudaStream_t stream) -{ - CudaPointsToGrid converter(voxelSize, Vec3d(0.0), stream); - converter.setPointType(type); - return converter.getHandle(d_xyz, pointCount, buffer); -} - -//================================================================================================ - -template -GridHandle// Grid -cudaVoxelsToGrid(const PtrT d_ijk, size_t voxelCount, double voxelSize, const BufferT &buffer, cudaStream_t stream) -{ - CudaPointsToGrid converter(voxelSize, Vec3d(0.0), stream); - return converter.getHandle(d_ijk, voxelCount, buffer); -} - -//================================================================================================ - -template -GridHandle -cudaPointsToGrid(std::vector> vec, const BufferT &buffer, cudaStream_t stream) -{ - std::vector> handles; - for (auto &p : vec) handles.push_back(cudaPointsToGrid(std::get<0>(p), std::get<1>(p), std::get<2>(p), std::get<3>(p), buffer, stream)); - return mergeDeviceGrids(handles, stream); -} - -//================================================================================================ - -template -GridHandle -cudaVoxelsToGrid(std::vector> vec, const BufferT &buffer, cudaStream_t stream) -{ - std::vector> handles; - for (auto &p : vec) handles.push_back(cudaVoxelsToGrid(std::get<0>(p), std::get<1>(p), std::get<2>(p), buffer, stream)); - return mergeDeviceGrids(handles, stream); -} - -}// nanovdb namespace - -#endif // NVIDIA_CUDA_POINTS_TO_GRID_CUH_HAS_BEEN_INCLUDED +#include // for NANOVDB_DEPRECATED_HEADER +#include +NANOVDB_DEPRECATED_HEADER("Include nanovdb/tools/cuda/PointsToGrid.cuh instead.") diff --git a/nanovdb/nanovdb/util/cuda/CudaSignedFloodFill.cuh b/nanovdb/nanovdb/util/cuda/CudaSignedFloodFill.cuh index 2f4bf203d6..f9ba99b8fc 100644 --- a/nanovdb/nanovdb/util/cuda/CudaSignedFloodFill.cuh +++ b/nanovdb/nanovdb/util/cuda/CudaSignedFloodFill.cuh @@ -1,201 +1,6 @@ // Copyright Contributors to the OpenVDB Project // SPDX-License-Identifier: MPL-2.0 -/*! - \file CudaSignedFloodFill.cuh - - \author Ken Museth - - \date May 3, 2023 - - \brief Performs signed flood-fill operation on the hierarchical tree structure on the device - - \todo This tools needs to handle the (extremely) rare case when root node - needs to be modified during the signed flood fill operation. This happens - when the root-table needs to be expanded with tile values (of size 4096^3) - that are completely inside the implicit surface. - - \warning The header file contains cuda device code so be sure - to only include it in .cu files (or other .cuh files) -*/ - -#ifndef NANOVDB_CUDA_SIGNED_FLOOD_FILL_CUH_HAS_BEEN_INCLUDED -#define NANOVDB_CUDA_SIGNED_FLOOD_FILL_CUH_HAS_BEEN_INCLUDED - -#include -#include -#include -#include -#include - -namespace nanovdb { - -/// @brief Performs signed flood-fill operation on the hierarchical tree structure on the device -/// @tparam BuildT Build type of the grid to be flood-filled -/// @param d_grid Non-const device pointer to the grid that will be flood-filled -/// @param verbose If true timing information will be printed to the terminal -/// @param stream optional cuda stream -template -typename enable_if::is_float, void>::type -cudaSignedFloodFill(NanoGrid *d_grid, bool verbose = false, cudaStream_t stream = 0); - -namespace {// anonymous namespace - -template -class CudaSignedFloodFill -{ -public: - CudaSignedFloodFill(bool verbose = false, cudaStream_t stream = 0) - : mStream(stream), mVerbose(verbose) {} - - /// @brief Toggle on and off verbose mode - /// @param on if true verbose is turned on - void setVerbose(bool on = true) {mVerbose = on;} - - void operator()(NanoGrid *d_grid); - -private: - cudaStream_t mStream{0}; - GpuTimer mTimer; - bool mVerbose{false}; - -};// CudaSignedFloodFill - -//================================================================================================ - -template -__global__ void cudaProcessRootNode(NanoTree *tree) -{ - // auto &root = tree->root(); - /* - using ChildT = typename RootT::ChildNodeType; - // Insert the child nodes into a map sorted according to their origin - std::map nodeKeys; - typename RootT::ChildOnIter it = root.beginChildOn(); - for (; it; ++it) nodeKeys.insert(std::pair(it.getCoord(), &(*it))); - static const Index DIM = RootT::ChildNodeType::DIM; - - // We employ a simple z-scanline algorithm that inserts inactive tiles with - // the inside value if they are sandwiched between inside child nodes only! - typename std::map::const_iterator b = nodeKeys.begin(), e = nodeKeys.end(); - if ( b == e ) return; - for (typename std::map::const_iterator a = b++; b != e; ++a, ++b) { - Coord d = b->first - a->first; // delta of neighboring coordinates - if (d[0]!=0 || d[1]!=0 || d[2]==Int32(DIM)) continue;// not same z-scanline or neighbors - const ValueT fill[] = { a->second->getLastValue(), b->second->getFirstValue() }; - if (!(fill[0] < 0) || !(fill[1] < 0)) continue; // scanline isn't inside - Coord c = a->first + Coord(0u, 0u, DIM); - for (; c[2] != b->first[2]; c[2] += DIM) root.addTile(c, mInside, false); - } - */ - //root.setBackground(mOutside, /*updateChildNodes=*/false); -}// cudaProcessRootNode - -//================================================================================================ - -template -__global__ void cudaProcessInternalNodes(NanoTree *tree, size_t count) -{ - using NodeT = typename NanoNode::type; - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - if (tid >= count) return; - const uint32_t nValue = tid & (NodeT::SIZE - 1u); - auto &node = *(tree->template getFirstNode() + (tid >> (3*NodeT::LOG2DIM))); - const auto &mask = node.childMask(); - if (mask.isOn(nValue)) return;// ignore if child - auto value = tree->background();// initiate to outside value - auto n = mask.template findNext(nValue); - if (n < NodeT::SIZE) { - if (node.getChild(n)->getFirstValue() < 0) value = -value; - } else if ((n = mask.template findPrev(nValue)) < NodeT::SIZE) { - if (node.getChild(n)->getLastValue() < 0) value = -value; - } else if (node.getValue(0)<0) { - value = -value; - } - node.setValue(nValue, value); -}// cudaProcessInternalNodes - -//================================================================================================ - -template -__global__ void cudaProcessLeafNodes(NanoTree *tree, size_t count) -{ - using LeafT = NanoLeaf; - const size_t tid = blockIdx.x * blockDim.x + threadIdx.x; - if (tid >= count) return; - const uint32_t nVoxel = tid & (LeafT::SIZE - 1u); - auto *leaf = tree->getFirstLeaf() + (tid >> (3*LeafT::LOG2DIM)); - const auto &mask = leaf->valueMask(); - if (mask.isOn(nVoxel)) return; - auto *buffer = leaf->mValues; - auto n = mask.template findNext(nVoxel); - if (n == LeafT::SIZE && (n = mask.template findPrev(nVoxel)) == LeafT::SIZE) n = 0u; - buffer[nVoxel] = buffer[n]<0 ? -tree->background() : tree->background(); -}// cudaProcessLeafNodes - -//================================================================================================ - -template -__global__ void cudaCpyNodeCount(NanoGrid *d_grid, uint64_t *d_count) -{ - NANOVDB_ASSERT(d_grid->isSequential()); - for (int i=0; i<3; ++i) *d_count++ = d_grid->tree().nodeCount(i); - *d_count = d_grid->tree().root().tileCount(); -} - -}// anonymous namespace - -//================================================================================================ - -template -void CudaSignedFloodFill::operator()(NanoGrid *d_grid) -{ - static_assert(BuildTraits::is_float, "CudaSignedFloodFill only works on float grids"); - NANOVDB_ASSERT(d_grid); - uint64_t count[4], *d_count = nullptr; - cudaCheck(cudaMallocAsync((void**)&d_count, 4*sizeof(uint64_t), mStream)); - cudaCpyNodeCount<<<1, 1, 0, mStream>>>(d_grid, d_count); - cudaCheckError(); - cudaCheck(cudaMemcpyAsync(&count, d_count, 4*sizeof(uint64_t), cudaMemcpyDeviceToHost, mStream)); - cudaCheck(cudaFreeAsync(d_count, mStream)); - - static const int threadsPerBlock = 128; - auto blocksPerGrid = [&](size_t count)->uint32_t{return (count + (threadsPerBlock - 1)) / threadsPerBlock;}; - auto *tree = reinterpret_cast*>(d_grid + 1); - - if (mVerbose) mTimer.start("\nProcess leaf nodes"); - cudaProcessLeafNodes<<>>(tree, count[0]<<9); - cudaCheckError(); - - if (mVerbose) mTimer.restart("Process lower internal nodes"); - cudaProcessInternalNodes<<>>(tree, count[1]<<12); - cudaCheckError(); - - if (mVerbose) mTimer.restart("Process upper internal nodes"); - cudaProcessInternalNodes<<>>(tree, count[2]<<15); - cudaCheckError(); - - //if (mVerbose) mTimer.restart("Process root node"); - //cudaProcessRootNode<<<1, 1, 0, mStream>>>(tree); - if (mVerbose) mTimer.stop(); - cudaCheckError(); -}// CudaSignedFloodFill::operator() - -//================================================================================================ - -template -typename enable_if::is_float, void>::type -cudaSignedFloodFill(NanoGrid *d_grid, bool verbose, cudaStream_t stream) -{ - CudaSignedFloodFill sff(verbose, stream); - sff(d_grid); - auto *d_gridData = d_grid->data(); - GridChecksum cs = cudaGetGridChecksum(d_gridData, stream); - if (cs.mode() == ChecksumMode::Full) {// ChecksumMode::Partial checksum is unaffected - cudaGridChecksum(d_gridData, ChecksumMode::Full, stream); - } -} - -}// nanovdb namespace - -#endif // NANOVDB_CUDA_SIGNED_FLOOD_FILL_CUH_HAS_BEEN_INCLUDED +#include // for NANOVDB_DEPRECATED_HEADER +#include +NANOVDB_DEPRECATED_HEADER("Include nanovdb/tools/cuda/SignedFloodFill.cuh instead.") diff --git a/nanovdb/nanovdb/util/cuda/CudaUtils.h b/nanovdb/nanovdb/util/cuda/CudaUtils.h index 40001748ee..e154ff9b4e 100644 --- a/nanovdb/nanovdb/util/cuda/CudaUtils.h +++ b/nanovdb/nanovdb/util/cuda/CudaUtils.h @@ -1,136 +1,6 @@ // Copyright Contributors to the OpenVDB Project // SPDX-License-Identifier: MPL-2.0 -#ifndef NANOVDB_CUDA_UTILS_H_HAS_BEEN_INCLUDED -#define NANOVDB_CUDA_UTILS_H_HAS_BEEN_INCLUDED - -#include -#include - -//#if defined(DEBUG) || defined(_DEBUG) - static inline void gpuAssert(cudaError_t code, const char* file, int line, bool abort = true) - { - if (code != cudaSuccess) { - fprintf(stderr, "CUDA error %u: %s (%s:%d)\n", unsigned(code), cudaGetErrorString(code), file, line); - //fprintf(stderr, "CUDA Runtime Error: %s %s %d\n", cudaGetErrorString(code), file, line); - if (abort) exit(code); - } - } - static inline void ptrAssert(const void* ptr, const char* msg, const char* file, int line, bool abort = true) - { - if (ptr == nullptr) { - fprintf(stderr, "NULL pointer error: %s %s %d\n", msg, file, line); - if (abort) exit(1); - } else if (uint64_t(ptr) % NANOVDB_DATA_ALIGNMENT) { - fprintf(stderr, "Pointer misalignment error: %s %s %d\n", msg, file, line); - if (abort) exit(1); - } - } -//#else -// static inline void gpuAssert(cudaError_t, const char*, int, bool = true){} -// static inline void ptrAssert(void*, const char*, const char*, int, bool = true){} -//#endif - -// Convenience function for checking CUDA runtime API results -// can be wrapped around any runtime API call. No-op in release builds. -#define cudaCheck(ans) \ - { \ - gpuAssert((ans), __FILE__, __LINE__); \ - } - -#define checkPtr(ptr, msg) \ - { \ - ptrAssert((ptr), (msg), __FILE__, __LINE__); \ - } - -#define cudaSync() \ - { \ - cudaCheck(cudaDeviceSynchronize()); \ - } - -#define cudaCheckError() \ - { \ - cudaCheck(cudaGetLastError()); \ - } - -#if CUDART_VERSION < 11020 // 11.2 introduced cudaMallocAsync and cudaFreeAsync - -/// @brief Dummy implementation of cudaMallocAsync that calls cudaMalloc -/// @param d_ptr Device pointer to allocated device memory -/// @param size Number of bytes to allocate -/// @param dummy The stream establishing the stream ordering contract and the memory pool to allocate from (ignored) -/// @return Cuda error code -inline cudaError_t cudaMallocAsync(void** d_ptr, size_t size, cudaStream_t){return cudaMalloc(d_ptr, size);} - -/// @brief Dummy implementation of cudaFreeAsync that calls cudaFree -/// @param d_ptr Device pointer that will be freed -/// @param dummy The stream establishing the stream ordering promise (ignored) -/// @return Cuda error code -inline cudaError_t cudaFreeAsync(void* d_ptr, cudaStream_t){return cudaFree(d_ptr);} - -#endif - -#if defined(__CUDACC__)// the following functions only run on the GPU! - -// --- Wrapper for launching lambda kernels -template -__global__ void cudaLambdaKernel(const size_t numItems, Func func, Args... args) -{ - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - if (tid >= numItems) return; - func(tid, args...); -} - -/// @brief Copy characters from @c src to @c dst on the device. -/// @param dst pointer to the character array to write to. -/// @param src pointer to the null-terminated character string to copy from. -/// @return pointer to the character array being written to. -/// @note Emulates the behaviour of std::strcpy. -__device__ inline char* cudaStrcpy(char *dst, const char *src) -{ - char *p = dst; - do {*p++ = *src;} while(*src++); - return dst; -} - -/// @brief Appends a copy of the character string pointed to by @c src to -/// the end of the character string pointed to by @c dst on the device. -/// @param dst pointer to the null-terminated byte string to append to. -/// @param src pointer to the null-terminated byte string to copy from. -/// @return pointer to the character array being appended to. -/// @note Emulates the behaviour of std::strcat. -__device__ inline char* cudaStrcat(char *dst, const char *src) -{ - char *p = dst; - while (*p) ++p; - cudaStrcpy(p, src); - return dst; -} - -/// @brief Compares two null-terminated byte strings lexicographically on the device. -/// @param lhs pointer to the null-terminated byte strings to compare -/// @param rhs pointer to the null-terminated byte strings to compare -/// @return Negative value if @c lhs appears before @c rhs in lexicographical order. -/// Zero if @c lhs and @c rhs compare equal. Positive value if @c lhs appears -/// after @c rhs in lexicographical order. -__device__ inline int cudaStrcmp(const char *lhs, const char *rhs) -{ - while(*lhs && (*lhs == *rhs)){ - lhs++; - rhs++; - } - return *(const unsigned char*)lhs - *(const unsigned char*)rhs;// zero if lhs == rhs -} - -/// @brief Test if two null-terminated byte strings are the same -/// @param lhs pointer to the null-terminated byte strings to compare -/// @param rhs pointer to the null-terminated byte strings to compare -/// @return true if the two c-strings are identical -__device__ inline bool cudaStrEq(const char *lhs, const char *rhs) -{ - return cudaStrcmp(lhs, rhs) == 0; -} - -#endif// __CUDACC__ - -#endif// NANOVDB_CUDA_UTILS_H_HAS_BEEN_INCLUDED \ No newline at end of file +#include // for NANOVDB_DEPRECATED_HEADER +#include +NANOVDB_DEPRECATED_HEADER("Include nanovdb/util/cuda/Util.h instead.") \ No newline at end of file diff --git a/nanovdb/nanovdb/util/cuda/GpuTimer.h b/nanovdb/nanovdb/util/cuda/GpuTimer.h index 6c6e217403..be7f81b227 100644 --- a/nanovdb/nanovdb/util/cuda/GpuTimer.h +++ b/nanovdb/nanovdb/util/cuda/GpuTimer.h @@ -1,110 +1,6 @@ // Copyright Contributors to the OpenVDB Project // SPDX-License-Identifier: MPL-2.0 -/// @file GpuTimer.h -/// -/// @author Ken Museth -/// -/// @brief A simple GPU timing class - -#ifndef NANOVDB_GPU_TIMER_H_HAS_BEEN_INCLUDED -#define NANOVDB_GPU_TIMER_H_HAS_BEEN_INCLUDED - -#include // for std::cerr -#include -#include - -namespace nanovdb { - -class GpuTimer -{ - cudaStream_t mStream{0}; - cudaEvent_t mStart, mStop; - -public: - /// @brief Default constructor - /// @param stream CUDA stream to be timed (defaults to stream 0) - /// @note Starts the timer - GpuTimer(cudaStream_t stream = 0) : mStream(stream) - { - cudaEventCreate(&mStart); - cudaEventCreate(&mStop); - cudaEventRecord(mStart, mStream); - } - - /// @brief Construct and start the timer - /// @param msg string message to be printed when timer is started - /// @param stream CUDA stream to be timed (defaults to stream 0) - /// @param os output stream for the message above - GpuTimer(const std::string &msg, cudaStream_t stream = 0, std::ostream& os = std::cerr) - : mStream(stream) - { - os << msg << " ... " << std::flush; - cudaEventCreate(&mStart); - cudaEventCreate(&mStop); - cudaEventRecord(mStart, mStream); - } - - /// @brief Destructor - ~GpuTimer() - { - cudaEventDestroy(mStart); - cudaEventDestroy(mStop); - } - - /// @brief Start the timer - /// @param stream CUDA stream to be timed (defaults to stream 0) - /// @param os output stream for the message above - void start() {cudaEventRecord(mStart, mStream);} - - /// @brief Start the timer - /// @param msg string message to be printed when timer is started - - /// @param os output stream for the message above - void start(const std::string &msg, std::ostream& os = std::cerr) - { - os << msg << " ... " << std::flush; - this->start(); - } - - /// @brief Start the timer - /// @param msg string message to be printed when timer is started - /// @param os output stream for the message above - void start(const char* msg, std::ostream& os = std::cerr) - { - os << msg << " ... " << std::flush; - this->start(); - } - - /// @brief elapsed time (since start) in miliseconds - /// @return elapsed time (since start) in miliseconds - float elapsed() - { - cudaEventRecord(mStop, mStream); - cudaEventSynchronize(mStop); - float diff = 0.0f; - cudaEventElapsedTime(&diff, mStart, mStop); - return diff; - } - - /// @brief stop the timer - /// @param os output stream for the message above - void stop(std::ostream& os = std::cerr) - { - float diff = this->elapsed(); - os << "completed in " << diff << " milliseconds" << std::endl; - } - - /// @brief stop and start the timer - /// @param msg string message to be printed when timer is started - /// @warning Remember to call start before restart - void restart(const std::string &msg, std::ostream& os = std::cerr) - { - this->stop(); - this->start(msg, os); - } -};// GpuTimer - -} // namespace nanovdb - -#endif // NANOVDB_GPU_TIMER_H_HAS_BEEN_INCLUDED +#include // for NANOVDB_DEPRECATED_HEADER +#include +NANOVDB_DEPRECATED_HEADER("Include nanovdb/util/cuda/Timer.h instead.") \ No newline at end of file diff --git a/nanovdb/nanovdb/util/cuda/Timer.h b/nanovdb/nanovdb/util/cuda/Timer.h new file mode 100644 index 0000000000..1bb7224461 --- /dev/null +++ b/nanovdb/nanovdb/util/cuda/Timer.h @@ -0,0 +1,116 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: MPL-2.0 + +/// @file nanovdb/util/cuda/Timer.h +/// +/// @author Ken Museth +/// +/// @brief A simple GPU timing class + +#ifndef NANOVDB_UTIL_CUDA_TIMER_H_HAS_BEEN_INCLUDED +#define NANOVDB_UTIL_CUDA_TIMER_H_HAS_BEEN_INCLUDED + +#include // for std::cerr +#include +#include + +namespace nanovdb { + +namespace util::cuda { + +class Timer +{ + cudaStream_t mStream{0}; + cudaEvent_t mStart, mStop; + +public: + /// @brief Default constructor + /// @param stream CUDA stream to be timed (defaults to stream 0) + /// @note Starts the timer + Timer(cudaStream_t stream = 0) : mStream(stream) + { + cudaEventCreate(&mStart); + cudaEventCreate(&mStop); + cudaEventRecord(mStart, mStream); + } + + /// @brief Construct and start the timer + /// @param msg string message to be printed when timer is started + /// @param stream CUDA stream to be timed (defaults to stream 0) + /// @param os output stream for the message above + Timer(const std::string &msg, cudaStream_t stream = 0, std::ostream& os = std::cerr) + : mStream(stream) + { + os << msg << " ... " << std::flush; + cudaEventCreate(&mStart); + cudaEventCreate(&mStop); + cudaEventRecord(mStart, mStream); + } + + /// @brief Destructor + ~Timer() + { + cudaEventDestroy(mStart); + cudaEventDestroy(mStop); + } + + /// @brief Start the timer + /// @param stream CUDA stream to be timed (defaults to stream 0) + /// @param os output stream for the message above + void start() {cudaEventRecord(mStart, mStream);} + + /// @brief Start the timer + /// @param msg string message to be printed when timer is started + + /// @param os output stream for the message above + void start(const std::string &msg, std::ostream& os = std::cerr) + { + os << msg << " ... " << std::flush; + this->start(); + } + + /// @brief Start the timer + /// @param msg string message to be printed when timer is started + /// @param os output stream for the message above + void start(const char* msg, std::ostream& os = std::cerr) + { + os << msg << " ... " << std::flush; + this->start(); + } + + /// @brief elapsed time (since start) in miliseconds + /// @return elapsed time (since start) in miliseconds + float elapsed() + { + cudaEventRecord(mStop, mStream); + cudaEventSynchronize(mStop); + float diff = 0.0f; + cudaEventElapsedTime(&diff, mStart, mStop); + return diff; + } + + /// @brief stop the timer + /// @param os output stream for the message above + void stop(std::ostream& os = std::cerr) + { + float diff = this->elapsed(); + os << "completed in " << diff << " milliseconds" << std::endl; + } + + /// @brief stop and start the timer + /// @param msg string message to be printed when timer is started + /// @warning Remember to call start before restart + void restart(const std::string &msg, std::ostream& os = std::cerr) + { + this->stop(); + this->start(msg, os); + } +};// Timer + +}// namespace util::cuda + +using GpuTimer [[deprecated("Use nanovdb::util::cuda::Timer instead")]]= util::cuda::Timer; + +} // namespace nanovdb + +#endif // NANOVDB_UTIL_CUDA_TIMER_H_HAS_BEEN_INCLUDED diff --git a/nanovdb/nanovdb/util/cuda/Util.h b/nanovdb/nanovdb/util/cuda/Util.h new file mode 100644 index 0000000000..8ebfde61e2 --- /dev/null +++ b/nanovdb/nanovdb/util/cuda/Util.h @@ -0,0 +1,193 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: MPL-2.0 + +/*! + \file nanovdb/util/cuda/Util.h + + \author Ken Museth + + \date December 20, 2023 + + \brief Cuda specific utility functions +*/ + +#ifndef NANOVDB_UTIL_CUDA_UTIL_H_HAS_BEEN_INCLUDED +#define NANOVDB_UTIL_CUDA_UTIL_H_HAS_BEEN_INCLUDED + +#include +#include +#include // for stderr and NANOVDB_ASSERT + +// change 1 -> 0 to only perform asserts during debug builds +#if 1 || defined(DEBUG) || defined(_DEBUG) + static inline void gpuAssert(cudaError_t code, const char* file, int line, bool abort = true) + { + if (code != cudaSuccess) { + fprintf(stderr, "CUDA error %u: %s (%s:%d)\n", unsigned(code), cudaGetErrorString(code), file, line); + //fprintf(stderr, "CUDA Runtime Error: %s %s %d\n", cudaGetErrorString(code), file, line); + if (abort) exit(code); + } + } + static inline void ptrAssert(const void* ptr, const char* msg, const char* file, int line, bool abort = true) + { + if (ptr == nullptr) { + fprintf(stderr, "NULL pointer error: %s %s %d\n", msg, file, line); + if (abort) exit(1); + } else if (uint64_t(ptr) % 32) { + fprintf(stderr, "Pointer misalignment error: %s %s %d\n", msg, file, line); + if (abort) exit(1); + } + } +#else + static inline void gpuAssert(cudaError_t, const char*, int, bool = true){} + static inline void ptrAssert(void*, const char*, const char*, int, bool = true){} +#endif + +// Convenience function for checking CUDA runtime API results +// can be wrapped around any runtime API call. No-op in release builds. +#define cudaCheck(ans) \ + { \ + gpuAssert((ans), __FILE__, __LINE__); \ + } + +#define checkPtr(ptr, msg) \ + { \ + ptrAssert((ptr), (msg), __FILE__, __LINE__); \ + } + +#define cudaSync() \ + { \ + cudaCheck(cudaDeviceSynchronize()); \ + } + +#define cudaCheckError() \ + { \ + cudaCheck(cudaGetLastError()); \ + } + +namespace nanovdb {// ========================================================= + +namespace util::cuda {// ====================================================== + +//#define NANOVDB_USE_SYNC_CUDA_MALLOC +// cudaMallocAsync and cudaFreeAsync were introduced in CUDA 11.2 so we introduce +// custom implementations that map to cudaMalloc and cudaFree below. If NANOVDB_USE_SYNC_CUDA_MALLOC +// is defined these implementations will also be defined, which is useful in virtualized environments +// that slice up the GPU and share it between instances as vGPU's. GPU unified memory is usually disabled +// out of security considerations. Asynchronous CUDA malloc/free depends on GPU unified memory, so it +// is not possible to use cudaMallocAsync and cudaFreeAsync in such environments. + +#if (CUDART_VERSION < 11020) || defined(NANOVDB_USE_SYNC_CUDA_MALLOC) // 11.2 introduced cudaMallocAsync and cudaFreeAsync + +/// @brief Simple wrapper that calls cudaMalloc +/// @param d_ptr Device pointer to allocated device memory +/// @param size Number of bytes to allocate +/// @param dummy The stream establishing the stream ordering contract and the memory pool to allocate from (ignored) +/// @return Cuda error code +inline cudaError_t mallocAsync(void** d_ptr, size_t size, cudaStream_t){return cudaMalloc(d_ptr, size);} + +/// @brief Simple wrapper that calls cudaFree +/// @param d_ptr Device pointer that will be freed +/// @param dummy The stream establishing the stream ordering promise (ignored) +/// @return Cuda error code +inline cudaError_t freeAsync(void* d_ptr, cudaStream_t){return cudaFree(d_ptr);} + +#else + +/// @brief Simple wrapper that calls cudaMallocAsync +/// @param d_ptr Device pointer to allocated device memory +/// @param size Number of bytes to allocate +/// @param stream The stream establishing the stream ordering contract and the memory pool to allocate from +/// @return Cuda error code +inline cudaError_t mallocAsync(void** d_ptr, size_t size, cudaStream_t stream){return cudaMallocAsync(d_ptr, size, stream);} + +/// @brief Simple wrapper that calls cudaFreeAsync +/// @param d_ptr Device pointer that will be freed +/// @param stream The stream establishing the stream ordering promise +/// @return Cuda error code +inline cudaError_t freeAsync(void* d_ptr, cudaStream_t stream){return cudaFreeAsync(d_ptr, stream);} + +#endif + +/// @brief Simple (naive) implementation of a unique device pointer +/// using stream ordered memory allocation and deallocation. +/// @tparam T Type of the device pointer +template +class unique_ptr +{ + T *mPtr;// pointer to stream ordered memory allocation + cudaStream_t mStream; +public: + unique_ptr(size_t count = 0, cudaStream_t stream = 0) : mPtr(nullptr), mStream(stream) + { + if (count>0) cudaCheck(mallocAsync((void**)&mPtr, count*sizeof(T), stream)); + } + unique_ptr(const unique_ptr&) = delete; + unique_ptr(unique_ptr&& other) : mPtr(other.mPtr), mStream(other.mStream) + { + other.mPtr = nullptr; + } + ~unique_ptr() + { + if (mPtr) cudaCheck(freeAsync(mPtr, mStream)); + } + unique_ptr& operator=(const unique_ptr&) = delete; + unique_ptr& operator=(unique_ptr&& rhs) noexcept + { + mPtr = rhs.mPtr; + mStream = rhs.mStream; + rhs.mPtr = nullptr; + return *this; + } + void reset() { + if (mPtr) { + cudaCheck(freeAsync(mPtr, mStream)); + mPtr = nullptr; + } + } + T* get() const {return mPtr;} + explicit operator bool() const {return mPtr != nullptr;} +};// util::cuda::unique_ptr + +/// @brief Computes the number of blocks per grid given the problem size and number of threads per block +/// @param numItems Problem size +/// @param threadsPerBlock Number of threads per block (second CUDA launch parameter) +/// @return number of blocks per grid (first CUDA launch parameter) +/// @note CUDA launch parameters: kernel<<< blocksPerGrid, threadsPerBlock, sharedMemSize, streamID>>> +inline size_t blocksPerGrid(size_t numItems, size_t threadsPerBlock) +{ + NANOVDB_ASSERT(numItems > 0 && threadsPerBlock >= 32 && threadsPerBlock % 32 == 0); + return (numItems + threadsPerBlock - 1) / threadsPerBlock; +} + + +#if defined(__CUDACC__)// the following functions only run on the GPU! + +/// @brief Cuda kernel that launches device lambda functions +/// @param numItems Problem size +template +__global__ void lambdaKernel(const size_t numItems, Func func, Args... args) +{ + const int tid = blockIdx.x * blockDim.x + threadIdx.x; + if (tid >= numItems) return; + func(tid, args...); +}// util::cuda::lambdaKernel + +#endif// __CUDACC__ + +}// namespace util::cuda ============================================================ + +}// namespace nanovdb =============================================================== + +#if defined(__CUDACC__)// the following functions only run on the GPU! +template +[[deprecated("Use nanovdb::cuda::lambdaKernel instead")]] +__global__ void cudaLambdaKernel(const size_t numItems, Func func, Args... args) +{ + const int tid = blockIdx.x * blockDim.x + threadIdx.x; + if (tid >= numItems) return; + func(tid, args...); +} +#endif// __CUDACC__ + +#endif// NANOVDB_UTIL_CUDA_UTIL_H_HAS_BEEN_INCLUDED \ No newline at end of file diff --git a/openvdb/openvdb/Types.h b/openvdb/openvdb/Types.h index ad811e1520..d654750b7e 100644 --- a/openvdb/openvdb/Types.h +++ b/openvdb/openvdb/Types.h @@ -688,6 +688,23 @@ class Steal {}; /// @brief Tag dispatch class that distinguishes constructors during file input class PartialCreate {}; +// For half compilation +namespace math { +template<> +inline auto cwiseAdd(const math::Vec3& v, const float s) +{ + math::Vec3 out; + const math::half* ip = v.asPointer(); + math::half* op = out.asPointer(); + for (unsigned i = 0; i < 3; ++i, ++op, ++ip) { + OPENVDB_NO_TYPE_CONVERSION_WARNING_BEGIN + *op = *ip + s; + OPENVDB_NO_TYPE_CONVERSION_WARNING_END + } + return out; +} +} // namespace math + } // namespace OPENVDB_VERSION_NAME } // namespace openvdb diff --git a/openvdb/openvdb/unittest/TestLinearInterp.cc b/openvdb/openvdb/unittest/TestLinearInterp.cc index 944f0ef60b..137c81f7de 100644 --- a/openvdb/openvdb/unittest/TestLinearInterp.cc +++ b/openvdb/openvdb/unittest/TestLinearInterp.cc @@ -999,7 +999,7 @@ template void TestLinearInterp::testStencilsMatch() { - typedef typename GridType::ValueType ValueType; + using ValueType = typename GridType::ValueType; GridType grid; typename GridType::TreeType& tree = grid.tree(); @@ -1022,14 +1022,13 @@ TestLinearInterp::testStencilsMatch() openvdb::tools::GridSampler interpolator(grid); - openvdb::math::BoxStencil - stencil(grid); - - typename GridType::ValueType val1 = interpolator.sampleVoxel(pos.x(), pos.y(), pos.z()); + openvdb::math::BoxStencil stencil(grid); + const ValueType val1 = interpolator.sampleVoxel(pos.x(), pos.y(), pos.z()); stencil.moveTo(pos); - typename GridType::ValueType val2 = stencil.interpolation(pos); - EXPECT_EQ(val1, val2); + const ValueType val2 = stencil.interpolation(pos); + static const ValueType epsilon = openvdb::math::Delta::value(); + EXPECT_NEAR(val1, val2, epsilon); } } TEST_F(TestLinearInterp, testStencilsMatchFloat) { testStencilsMatch(); } diff --git a/pendingchanges/nanovdb_32.7.txt b/pendingchanges/nanovdb_32.7.txt new file mode 100644 index 0000000000..78e2f71d01 --- /dev/null +++ b/pendingchanges/nanovdb_32.7.txt @@ -0,0 +1,85 @@ +Bug fix: +nanovdb::readGrids works with raw grid buffer. + +Improvements: +Restructure files location and namespace to be more align with OpenVDB. The +namespaces touched by the restructuring are: io, cuda, util, tools, and math. +Add two scripts updateFiles.sh and updateFiles.py to update the files using +NanoVDB. The script updateFiles.py works on both Windows and Linux. +For a more complete list of changes, see API Changes (details). + +cuda::PointsToGrid supports target density. +Add support for NanoVDB Grid of type UInt8. +Add ability to use externally managed CUDA buffer. +Add create methods for CudaDeviceBuffer and exceptions. +Improve GridValidator logic, e.g. include check for grid count. +Add operator > and >= for class Coord according to lexicographical order. +Add toCodec to convert string to Codec enumeration type. +Add nanovdb::strlen(). +Add strncpy util. +Add NANOVDB_DISABLE_SYNC_CUDA_MALLOC that maps cudaMallocAsync and +cudaFreeAsync to cudaMalloc and cudaFree respectively. +Add guard to UINT64_C. +Remove use of cudaMallocAsync in PointsToGrid.cuh. +Align PNanoVDB blind metadata to NanoVDB. + +API Changes: +Change mapToGridType to toGridType. +Change mapToMagic to toMagic. +Change CpuTimer.h to Timer.h. + +API Changes (details): +These APIs are now under the math namespace: Ray, DDA, HDDA, Vec3, Vec4, BBox, +ZeroCrossing, TreeMarcher, PointTreeMarcher, BoxStencil, CurvatureStencil, +GradStencil, WenoStencil, AlignUp, Min, Max, Abs, Clamp, Sqrt, Sign, Maximum, +Delta, RoundDown, pi, isApproxZero, Round, createSampler, SampleFromVoxels. + +These APIs are now under the tools namespace: createNanoGrid, StatsMode, +createLevelSetSphere, createFogVolumeSphere, createFogVolumeSphere, +createFogVolumeSphere, createFogVolumeTorus, createLevelSetBox, CreateNanoGrid, +updateGridStats, evalChecksum, validateChecksum, checkGrid, Extrema. + +These APIs are now under the util namespace: is_floating_point, findLowestOn, +findHighestOn, Range, streq, strcpy, strcat, empty, Split, invoke, forEach, +reduce, prefixSum, is_same, is_specialization, PtrAdd, PtrDiff. + +Move nanovdb::build to nanovdb::tools::build. +Rename nanovdb::BBoxR to nanovdb::Vec3dBBox. +Rename nanovdb::BBox to nanovdb::Vec3dBbox. +Move nanovdb::cudaCreateNodeManager to nanovdb::cuda::createNodeManager. +Move and rename nanovdb::cudaVoxelsToGrid to nanovdb::cuda::voxelsToGrid. +Move and rename nanovdb::cudaPointsToGrid to nanovdb::cuda::pointsToGrid. +Move nanovdb::DitherLUT to nanovdb::math::DitherLUT. +Move and rename nanovdb::PackedRGBA8 to nanovdb::math::Rgba8. +Move nanovdb::Rgba8 to nanovdb::math::Rgba8. +Move and rename nanovdb::CpuTimer to nanovdb::util::Timer. +Move nanovdb::GpuTimer to nanovdb::util::cuda::Timer. +Move and rename nanovdb::CountOn to nanovdb::util::countOn. + +Move util/GridHandle.h to GridHandle.h. +Move util/BuildGrid.h to tools/GridBuilder.h. +Move util/GridBuilder.h to tools/GridBuilder.h. +Move util/IO.h to io/IO.h. +Move util/CSampleFromVoxels.h to math/CSampleFromVoxels.h. +Move util/DitherLUT.h to math/DitherLUT.h. +Move util/HDDA.h to math/HDDA.h. +Move util/Ray.h to math/Ray.h. +Move util/SampleFromVoxels.h to math/SampleFromVoxels.h. +Move util/Stencils.h to nanovdb/math/Stencils.h. +Move util/CreateNanoGrid.h to tools/CreateNanoGrid.h. +Move and rename util/Primitives.h to tools/CreatePrimitives.h. +Move util/GridChecksum.h to tools/GridChecksum.h. +Move util/GridStats.h to tools/GridStats.h. +Move util/GridChecksum.h to tools/GridChecksum.h. +Move util/GridValidator.h to tools/GridValidator.h. +Move util/NanoToOpenVDB.h to tools/NanoToOpenVDB.h. +Move util/cuda/CudaGridChecksum.cuh to tools/cuda/CudaGridChecksum.cuh. +Move util/cuda/CudaGridStats.cuh to tools/cuda/CudaGridStats.cuh. +Move util/cuda/CudaGridValidator.cuh to tools/cuda/CudaGridValidator.cuh. +Move util/cuda/CudaIndexToGrid.cuh to tools/cuda/CudaIndexToGrid.cuh. +Move and rename util/cuda/CudaPointsToGrid.cuh to tools/cuda/PointsToGrid.cuh. +Move util/cuda/CudaSignedFloodFill.cuh to tools/cuda/CudaSignedFloodFill.cuh. +Move and rename util/cuda/CudaDeviceBuffer.h to cuda/DeviceBuffer.h. +Move and rename util/cuda/CudaGridHandle.cuh to cuda/GridHandle.cuh. +Move and rename util/cuda/CudaUtils.h to util/cuda/Util.h. +Move and consolidate util/cuda/GpuTimer.h to util/cuda/Timer.h. diff --git a/pendingchanges/windows_static_blosc.txt b/pendingchanges/windows_static_blosc.txt new file mode 100644 index 0000000000..882dc1f3fa --- /dev/null +++ b/pendingchanges/windows_static_blosc.txt @@ -0,0 +1,2 @@ +Build: + - Fixed an issue with the Blosc CMake FindPackage for the OpenVDB Windows static library. diff --git a/tsc/meetings/2023-01-24.md b/tsc/meetings/2023-01-24.md new file mode 100644 index 0000000000..966a406bfa --- /dev/null +++ b/tsc/meetings/2023-01-24.md @@ -0,0 +1,125 @@ +Minutes from OpenVDB TSC meeting, January 24, 2023 + +Attendees: *Jeff* L., *Andre* P, *Dan* B., *Ken* M., *Nick* A., *Greg* H. + +Additional Attendees: JT Nelson (Blender) + +Regrets: *Rich* J. + +Agenda: + +1) Confirm quorum +2) Secretary +3) Broken Houdini download link +4) Open PR's +5) Fracture CSG tools +6) Active states +7) Sharpening filter + + +------------ + +1) Confirm quorum + +Quorum is present. + +2) Secretary + +Secretary is Greg Hurst. + +3) Broken Houdini download link + +Houdini download link is broken at openvdb.org downloads page. Happened during git lfs switch. Need to upload regularly using git. + +File not updated often, so it's ok to not use git lfs and just use regular git commit. + +Maybe the URL needs to change in the html source when using git lfs? + +4) Open PR's + +Python bindings (1515) +* Ken will try to look at the PR tomorrow +* Recreate the PR to remedy the CLA issues and credit the original author +* Or squash everything and go as if everything is just one commit + +Switch to using the static asserts (1522) +* Why do we have a special wrapper for the static assert? +* NANOVDB_ASSERT instead of the static_assert +* Soon nanovdb will require C++17 (waiting on pnanovdb) +* Ideally we'd have #ifdef platform instead of #if 1, so keep skeleton code present through #if 0 +* Ken will approve and merge + +Prefer fixed-width integer types instead of size_t (1528) +* Awaiting another approval -- Dan will approve + +Add missing separate_arguments cmake call (1534) +* Needs another look -- not entirely clear why this was added +* Splits list of arguments in case they're separated by non-standard delimiters +* Perhaps re-ask OP what failed / why make this PR + +Support for IlmBase versions < 3.1 is deprecated and will be removed (1529) +* vdbtool stuff +* This PR looks to remove support for old version + +Remove the explicit default assignment operator (1530) +* Remove explicit default assignment operator in nanovdb +* Once something is given a default, you need to set default for other things too +* More defaults need to be removed in the same file before approval + +Consolidated ValueAccessor implementations (1547) +* Perhaps someone can build and see if everything still works, test against Houdini, etc. +* Implementation related questions added to the PR by Dan +* The override specifiers might be redundant +* Need to add missing isCached in code base in similar piece of code. + +Fix Segfault in Projection Mode of VDB Advect Points SOP (1559) +* Just awaiting approval from Jeff + +Fix all the int-in-bool-context warnings with GCC9 (1563) +* Switch to use constexpr +* Still need macros to guard type conversions? (Node type conversion warning, just relevant to float portion now) +* LOD removed for bool grids + +5) Fracture CSG tools + +https://github.com/AcademySoftwareFoundation/openvdb/issues/1566 + +Seemless free cuts -- how can you do this with OpenVDB? Can do it in Houdini though since it has robust mesh support. + +Really need this to make your split-frame free of artifacts and for water-tight union. + +Our current choice is to not support robust mesh computation. Currently OpenVDB just has polygon soup. Mainly used for translation purposes. Robust support could lead us down a rabbit hole. + +By templating our meshes, it's probably not clear that if you wrote your own accelerated structure, what methods you need. + +Houdini seems to use everything in OpenVDB here, and so we could return polygon soup and edge data list (MeshToVoxelEdgeData) and the user can do it. The SOP can be a reference. + +Point OP to this SOP / OpenVDB methods. + +6) Active / Inactive States + +What should the default behavior be and how to expose different functionalities. + +Default behavior proposed: Max of values and if either is active make result active. + +Currently the activeness of states is not being brought over. + +Not more efficient to make 2 passes when combining multiple grids. Loses cache coherency. +2 passes node-wise will be more efficient than 2 passes tree-wise. + +Do we want the ability to handle active states differently? +Maybe we have a use to ignore when any grid has an inactive value. + +max( (0.0, inactive), (-1.0, active) ) --> (0.0, active) or (0.0, inactive)? + +What does it mean for a fog volume node to be active? Tags (GRID_LEVEL_SET, etc in enum GridClass) give implicit meaning to active states / values +Majority of tools don't seem to normalize fogs to lie between 0 and 1. Difficult to maintain this discipline. + +Make sure to make whatever choices extendable. Tricky part is coming up with different patterns. +Selection merge and reduction merge, etc. + +7) Sharpening filter + +Switch away from boost dependencies & add unit tests. + +Seems like updated PR could be around the corner. diff --git a/tsc/meetings/2023-05-02.md b/tsc/meetings/2023-05-02.md new file mode 100644 index 0000000000..b75abfcd72 --- /dev/null +++ b/tsc/meetings/2023-05-02.md @@ -0,0 +1,89 @@ +Minutes from OpenVDB TSC meeting, May 02, 2023 + +Attendees: *Jeff* L., *Andre* P, *Ken* M., *Greg* H., *Dan* B. + +Additional Attendees: + +Regrets: *Rich* J., *Nick* A. + +Agenda: + +1) Confirm quorum +2) Secretary +3) SIGGRAPH 2023 +4) Website broken link +5) Root node offset +6) I/O revamp + +------------ + + +1) Confirm quorum + +Quorum is present. + + +2) Secretary + +Secretary is Greg Hurst. + + +3) SIGGRAPH 2023 + +Course accepted with good feedback from reviewers. +Must sign over copyrights for anything in the presentation. Need to do this now. +Course material must be submitted by June 5. Option to revise slides by August 11. + +For ASWF: tentatively do Bird of a Feather and advertise SIGGRAPH course. + + +4) Website broken link + +PR for broken Houdini link to be merged. +https://github.com/AcademySoftwareFoundation/openvdb-website/pull/71 + +Fixed link: +https://www.openvdb.org/download/files/houdini_examples.hip-1.0.1.zip + + +5) Root node offset + +Root node dense, all other nodes are dense. Root essentially hash table. + +Since root is sparse, root access is slower. Tend to avoid touching the root node. e.g. value accessors. + +Root is centered at origin (0, 0, 0), and so a small sphere centered at the origin makes 8 children. + +The offset mitigates this issue. + +Root node now has mOrigin member, just like all other nodes (added in v10) + +Currently mOrigin is hard coded to origin still and even has checks to throw errors if not. + +First pass tried to hard code half offset (-2048, -2048, -2048) but saw no measurable speedup. + +Can we make mOrigin anything? If so looks like we will have massive overhead -- merging trees, etc will need to rebuild tree structure. + +If you guarantee that the root node is aligned with grandchild of other root +e.g. If mOrigin is a multiple of 128, then only misaligned is child nodes of the root. +And so during these operations, only root node needs to be rebuilt. +It _can_ generalize to arbitrary fan factors but need different number from 128. +2 level tree is a special case, but n (>= 3) follows above logic. + +What is the impact on the existing code? CSG, Combinations, etc. +Merging 2 grids with incommensurate origins is tricky if const operators... duplicate data etc. + +How to maintain backward compatibility for I/O if we just hardcode (-2048, -2048, -2048)? +And (it seems) that's the only backward compatibility to suss out. +Export will need to recenter to (0, 0, 0)? +I/O needs to be refactored anyway... +Hardcoded global offset means we don't need to explicitly export it + +Ken will investigate and do deep dive + + +6) I/O revamp + +Would be good to investigate into I/O revamp. +Come up with a list of modern requirements. +Refer to this list in future development efforts. diff --git a/tsc/meetings/2023-09-05.md b/tsc/meetings/2023-09-05.md new file mode 100644 index 0000000000..93203b7d91 --- /dev/null +++ b/tsc/meetings/2023-09-05.md @@ -0,0 +1,129 @@ +Minutes from OpenVDB TSC meeting, September 5, 2023 + +Attendees: *Jeff* L., *Rich* J., *Ken* M., *Greg* H., *Dan* B., *Andre* P. + +Additional Attendees: + +Regrets: *Nick* A. + +Agenda: + +1) Confirm quorum +2) Secretary +3) VTT +4) VDB Maya +5) V10.1 +6) PRs + +------------ + + +1) Confirm quorum + +Quorum is present. + + +2) Secretary + +Secretary is Greg Hurst. + + +3) VTT + +Autodesk has a product call Bifrost (sim framework) + +Internal multires grid + +NanoVTT github repo expires in September... but it's a fork of OpenVDB? + +Bifrost group seems gunghoe about open sourcing it + +Why open source it? +Integration of nanovtt into OpenVDB will be intricate. +Attend meetings, contribute to the CI is a good start, but will be much more complicating. What's the balance? + +Sampling across tiles is tricky and they have the method they want to use -- could be advantageous to open source as a standard + +Why should this be part of OpenVDB and not its own product? Best not to have competing formats +But how can the two coexist in a meaningful way? Can't just have two independent things + +OpenVDB has threadpools, math functions, metadata, transforms, etc. And a standard API. VTT could integrate into these. + +VDB's are sparse (active / inactive, etc) +VTT's is in some sense dense, but adaptive +Complementary data structures + +This is an opportunity to rip out delayed loading for vdb +We can have a family of grids that perform and specialize in different use cases +When we write tools, what grids should & could these tools support? + +Could this be confusing to general users? +Is VTT too similar sounding to VDB + +We will need support from them integrating properly +We need commitment to delivering everything, not just nanovtt + +Another need is conversion between vdb and vtt, something that's missing at the moment + +Can we iterate of vtt grids in similar fashions (API-wise at least) to DynamicNodeManager? + +If they first just give us NanoVTT, then they write a converter, is that even a meaningful thing? +OpenVDB grid does not contain adaptive information, but possible ways one might want to convert + +How does VTT compare to a stack of VDBs? + +Did VTT mention point support at all? Points to volume mentioned in their ppt + +Mathematica link to vtt? Probably, yes + +********** + +we agree we don't want just NanoVTT +C++ structure for non-NanoVTT should have: + VTT needs a sampler + way to save and load from disk + NodeManager-esque interfaces + Converters + Random access + +********** + +Worth asking them about feasibility of above and what they have in the bifrost SDK + +Let's organize all of this in a Google doc to establish minimally required features. + +What version would this go into? +This will change ABI? and so V12 integration? + +Probably would inherit GridBase without a Tree pointer. + + +4) VDB Maya + +What happens to VDB Maya now? + +Probably broken at this point... + +Should we just move it to its own repo and retire it from OpenVDB repo? + +It's a useful reference and useful starting point. + +Who own's the separate repo, etc... + +What about deleting from git repo but keep folder with a text file saying to go to a branch to find it? + + +5) V10.1 + +Ellipsoid stuff still being worked on + +Just push out what we have now + + +6) PR + +PR 1651 suffering from TBB build errors: + https://github.com/oneapi-src/oneTBB/issues/301 + Bumping up to TBB 2021.2 will probably fix this +PR 1655 needs a look +PR 1666 on fast sweeping needs to be refactored diff --git a/tsc/meetings/2023-10-17.md b/tsc/meetings/2023-10-17.md new file mode 100644 index 0000000000..88b39cf93f --- /dev/null +++ b/tsc/meetings/2023-10-17.md @@ -0,0 +1,65 @@ +Minutes from OpenVDB TSC meeting, October 17, 2023 + +Attendees: *Jeff* L., *Andre* P, *Ken* M., *Greg* H., *Dan* B., *Rich* J., *Nick* A. + +Additional Attendees: + +Regrets: + +Agenda: + +1) Confirm quorum +2) Secretary +3) PRs +4) Root Node TBB concurrent hash node +5) ABI changes in NanoVDB +6) VDB 11 + +------------ + + +1) Confirm quorum + +Quorum is present. + + +2) Secretary + +Secretary is Greg Hurst. + + +3) PRs + +1687 need another approval and CLI +1685 abstraction layer. + 122 files changed + wrapper around TBB + higher level wrappers in nano: + for_each, reduce, ... + define a functor that you do parallel_reduce over + std::thread implementation & serial fallback functionality + similar TBB mechanisms we use in the codebase, we should consolidate this at first + use case here is if you have your own thread pool + another is to be able to build OpenVDB without TBB + + OpenVDB needs to have it's own highlevel parallel functionality that just uses TBB under the hood + grainsize is important keep exposed +1679 + checking for the grid but dereferencing the iterator + looks good to go + +4) Root Node TBB concurrent hash node + violates ABI? + we should get rid of this + +5) ABI changes in NanoVDB + Nano has its own versioning system + Do we adopt same ABI change policies for nano used in OpenVDB + So now is the time to change the ABI + +6) VDB 11 + Need people to look at infrastructure changes + Removes support for ilmBase + Does this build with Houdini 20? + 10.1 works just fine + Closer to removing Boost dependencies (still optional for delayed loading) diff --git a/tsc/meetings/2024-02-06.md b/tsc/meetings/2024-02-06.md new file mode 100644 index 0000000000..dae626664b --- /dev/null +++ b/tsc/meetings/2024-02-06.md @@ -0,0 +1,73 @@ +Minutes from OpenVDB TSC meeting, February 06, 2024 + +Attendees: *Jeff* L., *Andre* P, *Ken* M., *Greg* H., *Dan* B., *Rich* J. + +Additional Attendees: + +Regrets: + +Agenda: + +1) Confirm quorum +2) Secretary +3) PRs +4) Major refactoring in Nano + +------------ + + +1) Confirm quorum + +Quorum is present. + + +2) Secretary + +Secretary is Greg Hurst. + + +3) PRs + +1762: Vector fog volumes throw a printf to the console whenever one is created + +Remove #if 1 fprintf(stderr,"Warning: .... + +1760: Adaptive grid placeholder. +Simple grid: A background value only grid -- extendable to dense grids, etc. + +adaptive name space and in directory openvdb/adaptive +Rename GridTypes to SparseGridTypes and adds AdaptiveGridTypes too. +Instead should we do AllGridTypes and keep GridTypes unchanged OR do we deprecate the GridTypes alias? +The name 'GridType' is confusing... it really is a value type for a grid + +Introduces TreeTraits -- determine if a grid is sparse, adaptive, etc. +e.g. + +if constexpr (TreeTraits::IsSparse) { + // sparse implementation +} else if constexpr (TreeTraits::IsAdaptive) { + // adaptive implementation +} +OPENVDB_THROW(NotImplementedError, ""); + +Do we have an entry method and then a header file for each grid type? +e.g. openvdb/tools/Count.h has an entry method that calls openvdb/tools/sparse/Count.h, etc + +How many tools will/should support multiple grid types? Andre has a list of the ~200 tools to sift through +certainly samplers +Let's add in support as needed +API and user include paths need to remain the same then implementation details aren't as important + +Proposed types: +Sparse (OG grids) +Adaptive +Dense +Procedural/Implicit + +Invite back to VTT team and walk them through these ideas and attempt +(Change example from memUsage to sampler on a flat list of points) + +4) Major refactoring in Nano + +Use of namespaces that emulate OpenVDB and accompanying subdirectories. +Affects client code, but there's a script that can help alleviate. diff --git a/tsc/meetings/2024-04-06.md b/tsc/meetings/2024-04-06.md new file mode 100644 index 0000000000..bf7d78e53f --- /dev/null +++ b/tsc/meetings/2024-04-06.md @@ -0,0 +1,84 @@ +Minutes from OpenVDB TSC meeting, April 06, 2024 + +Attendees: *Jeff* L., *Andre* P, *Ken* M., *Greg* H., *Dan* B., + +Additional Attendees: Nishith Singhai + +Regrets: + +Agenda: + +1) Confirm quorum +2) Secretary +3) Half Grid & Grid registration +4) PR 1780 +5) retooling IO +6) Adaptive grids +7) xz + +------------ + + +1) Confirm quorum + +Quorum is present. + + +2) Secretary + +Secretary is Greg Hurst. + + +3) Half Grid + +Andre will merge his work into master first +ghurst will then retool his branch too +Autodesk is working from Andre's branch too for half IO conversions + +We should add Vec3HGrid + +Other Vec2XXX grids, we might not want to register in the openvdb repo + +We should be willing to add IO but not instantiation by default + + +4) PR 1780 + +Ivo presented 2 weeks ago + +Question: +Is it worth to expose convert to half grid only first? +A lot of improvements could be made to IO in general, so it might make sense to start retooling here with a leaner implementation. + +Answers: +But it's already 'done' and could influence how we want to retool the IO... +It is also much more efficient to do a JIT conversion during import. + + +5) retooling IO + +Need to do some benchmarking to determine if it's worth retooling IO + +Is there a way to merge vdb and nvdb into one file format? + +What about adaptive, dense, etc. + +No multipassing (multiple traversals of the tree to export), and so you work against that. i.e. you must write topology then data. + +It's because the writers are on the tree because the methods need to be virtual. And so you can just write out certain internal nodes, etc. + + +6) Adaptive grids + +We should decide if we're going to pitch to Autodesk the prototype that's put together. + +Looking over PR 1760 again... + + +7) xz + +SSH vulnerability since xz is compromised + +Consequences for OpenVDB? + +Treat external vdbs as suspect, and therefore we 'round-trip' import/export vdb or recreate the vdb ourselves? So binary vdbs being read for bug submissions / unit-tests. diff --git a/tsc/meetings/2024-04-23.md b/tsc/meetings/2024-04-23.md new file mode 100644 index 0000000000..144d9cd061 --- /dev/null +++ b/tsc/meetings/2024-04-23.md @@ -0,0 +1,60 @@ +Minutes from OpenVDB TSC meeting, April 23rd, 2024 + +Attendees: *Jeff* L., *Andre* P, *Dan* B., *Greg* H., *Ken* M. + +Additional Attendees: +Rabih Masri (Solitons, Inc: building an ultra-realistic underwater metaverse) + +Regrets: *Nick* A., *Rich* J. + +Agenda: + +1) Confirm quorum +2) Secretary +3) Bifrost team +4) PR 1793 +5) PR 1794 +6) PR 1796 +7) fVDB + +------------ + +1) Confirm quorum + +Quorum is present. + +2) Secretary + +Secretary is Greg Hurst. + +3) Bifrost team + +Confirmed we have sent them relevant info pertaining to adaptive grids and are waiting to hear back + +4) PR 1793 + +Fillet filter based off LevelSetFilter + +"Clamped min-principle curvature flow", so it's similar to mean-curvature flow + +Appears in page 204 on 'Level Set Methods and Fast Marching Methods by J.A. Sethian' + +indefinite iterations convereges to convex hull + +5) PR 1794 + +Quick fix to prevent integer overflow in NodeManager and LeafManager when iterating over large grids + +6) PR 1796 + +guarding UINT64_C in nanovdb (for NVRTC users who have it defined already) + +7) fVDB + +Future project to be open sourced in the OpenVDB repo + +Will get its own folder, similar to Houdini, Maya, Wolfram, etc. + +Stands for "feature VDB" and uses to ML to reconstruct geometry from point clouds + +PyTorch dependancy diff --git a/tsc/meetings/2024-04-30.md b/tsc/meetings/2024-04-30.md new file mode 100644 index 0000000000..dd4439b7d7 --- /dev/null +++ b/tsc/meetings/2024-04-30.md @@ -0,0 +1,67 @@ +Minutes from OpenVDB TSC meeting, April 30th, 2024 + +Attendees: *Ken* M., *Dan* B., *Greg* H., *Rich* J., *Andre* P + +Additional Attendees: +Matthew Cong (NVIDIA), Alexandre Sirois-Vigneux (SideFX), +Efty Sifakis (Univ. Wisconsin), Francis Williams (NVIDIA), +Jonathan Schwartz (NVIDIA), Michiel Hagedoorn +Dhruv Govil (Apple), Tom (Sidefx), Rayhaan Tanweer, +Rabih, Youmna, Shahan N + +Regrets: *Jeff* L., *Nick* A. + +Agenda: + +1) Confirm quorum +2) Secretary +3) Migration from PyBind11 to NanoBind +4) Greg's ASWF membership +5) FVDB +6) Next meeting + +------------ + +1) Confirm quorum + +Quorum is present. + +2) Secretary + +Secretary is Andre Pradhana. + +3) Migration from PyBind11 to NanoBind + +Matthew Cong presented a solution to handle NanoBind dependency +by using pip. He has done work on the NanoBind-side to allow this workflow. +The solution with Git-subtree/submodule is brittle because it +can run into firewall issues. + +It was re-iterated that NanoBind is preferred because of zero-interop +on the GPU side. + +Dhruv Govil pointed out that PyBind is used by other projects +for its support for multiple inheritance (which NanoBind doesn’t +support). + +4) Greg's ASWF membership + +Greg will follow up with John Mertic to be added to ASWF organization. + +5) FVDB + +NVIDIA team presented a presentation on fVDB, a project that is for +consideration to be adopted by OpenVDB project. It is a framework to +do spatial reasoning on 3D volumetric dataset, which includes deep- +learning. + +The main dependencies is pytorch. The project will live in its own +directory, parallel to the `openvdb` directory. + +Ken will bring up the need for GPU-support in the CI in the TAC meeting. +TSC members will be added to the private fVDB repository for further +investigation. Jonathan Schwartz provided us with documentation. + +6) Next meeting + +Next meeting is on May 7th, 2024. 2pm-3pm EDT (GMT-4) diff --git a/tsc/meetings/2024-05-21.md b/tsc/meetings/2024-05-21.md new file mode 100644 index 0000000000..243057dff8 --- /dev/null +++ b/tsc/meetings/2024-05-21.md @@ -0,0 +1,72 @@ +Minutes from OpenVDB TSC meeting, May 21st, 2024 + +Attendees: *Jeff* L., *Andre* P, *Dan* B., *Greg* H., *Rich* J, *Ken* +M.. + +Additional Attendees: +Rabih, Matthew Cong (NVidia), Jonathan Swartz, Francis Williams. + +Regrets: *Nick* A. + +Agenda: + +1) Confirm quorum +2) Secretary +3) TAC Update +4) PR 1807 & CI Updates +5) fVDB Updates +6) Next meeting + +------------ + +1) Confirm quorum + +Quorum is present. + +2) Secretary + +Secretary is Jeff Lait. + +3) TAC Update + +Proposed NanoBind and asked for GPUs on CI runners + +Apparently we already have a service for GPUs, which is paid for, but +the ASWF seems fine with that. Apparently used by ColorIO, so we can +ask them for advice. We might want to avoid using GPU machines for +the build? But that would require build artifacts. There is some +experience at NVidia splitting the GPU and CPU CI tests. Ken will get +instructions on how to run on at all, that we'll test with fVDB first. + +There is a CI slack channel we can go to for more aid as well. + +NanoBind was presented to the TAC as well. Positive feedback. They +seem okay with us using it moving forward. It is also suggested other +projects will move to Nanobind. + +Nanobind does not provide an interpreter, but can be called from a +host's own interpreter. + +4) PR 1807 & CI Updates + +Mac runner has to update to latest version. + +Cuda compiler is failing with 11.5. This is Oct 2021, so we should +drop support? Matthew Cong will take a look at it. + +Missing Changelist, which Andre will provide. + +5) fVDB updates + +Who should review this? Do we avoid NVidia-centered review? So long +as it is supported and git hub issues are supported not much concern. + +The TSC members are needed to sign off, but do not have to be the +"actual" reviewers. These can be reviewed by non-TSC members, and TSC +members can approve the reviews. + +Initial version should be 0.X as API can change. + +6) Next meeting + +Next meeting is on May 28th, 2024. 2pm-3pm EDT (GMT-4) diff --git a/tsc/meetings/2024-06-25.md b/tsc/meetings/2024-06-25.md new file mode 100644 index 0000000000..9737d82001 --- /dev/null +++ b/tsc/meetings/2024-06-25.md @@ -0,0 +1,75 @@ +Minutes from OpenVDB TSC meeting, June 25th, 2024 + +Attendees: *Ken* M., *Jeff* L., *Greg* H., *Rich* J, *Andre* P. + +Regrets: *Nick* A., *Dan* B. + +Additional Attendees: +Dhruv Govil (Apple), Jonathan Swartz (NVIDIA), Francis Williams (NVIDIA), +John Mertic (The Linux Foundation), J. T. Nelson (Blender) + +Agenda: + +1) Confirm quorum +2) Secretary +3) CLA Language Option +4) License change PRs +5) NanoVDB Review +6) fVDB PR +7) Half Grid +8) SIGGRAPH OpenVDB BoF (Birds of a Feather) +9) Next Meeting + +------------ + +1) Confirm quorum + +Quorum is present. + +2) Secretary + +Secretary is Andre Pradhana. + +3) CLA Language Option + +John Mertic brings up the possibility to update the CLA (Contributor +License Agreement) for the OpenVDB project: one is using the Linux-Foundation +short form and then other one using Apache-style CLA. The advantage of moving +to a new CLA is to align the project with other organizations, reducing +possible friction for contributors to contribute. Ken will send an email to the +private TSC mailing address and we will take a vote. + +4) License change PRs + +Ken and Andre will work on merging more relicensing PR. + +Dhruv will write the script that will change the license identifier +from MPL 2.0 license to Apache 2.0. He will submit a PR. + +We need ILM sign-off for license change. + +5) NanoVDB Review + +Andre is trying to get unblock on the failing NanoVDB Windows CI. + +Andre suggests a name change from nanovdb::callNanoGrid to nanovdb::processTypedGrid +to reflect what is available on openvdb. + +6) fVDB PR + +We want to get fVDB to merge in before SIGGRAPH. We need more progress +in the fVDB Code Review. + +7) Half Grid + +Greg asks for more progress on the Half Grid PR. So far, Greg has been battle-testing +the Half-Grid PR and found no issues with it. Andre will write more unit tests after he +is done with the NanoVDB PR. + +8) SIGGRAPH OpenVDB BoF (Birds of a Feather) + +SIGGRAPH OpenVDB BOF is on Monday at 9 a.m. + +9) Next Meeting + +Next meeting is on July 2nd, 2024. 2pm-3pm EDT (GMT-4). diff --git a/tsc/meetings/2024-07-02.md b/tsc/meetings/2024-07-02.md new file mode 100644 index 0000000000..94ba460eac --- /dev/null +++ b/tsc/meetings/2024-07-02.md @@ -0,0 +1,65 @@ +Minutes from OpenVDB TSC meeting, July 2nd, 2024 + +Attendees: *Jeff* L., *Greg* H., *Rich* J, *Ken* M., *Andre* P. + +Regrets: *Nick* A., *Dan* B. + +Additional Attendees: +Dhruv Govil (Apple), Jonathan Swartz (NVIDIA), Francis Williams (NVIDIA), +JT Nelson (Blender) + +Agenda: + +1) Confirm quorum +2) Secretary +3) CLA Voting +4) PR for License Change +5) NanoVDB Code Review +6) fVDB PR +7) Siggraph BOF +8) Blender +9) Next Meeting + +------------ + +1) Confirm quorum + +Quorum is present. + +2) Secretary + +Secretary is Andre Pradhana. + +3) CLA Voting + +We voted to move the our CLA to the one used by most other ASWF +projects. + +4) License Change PR. + +Dhruv puts a PR for updating the license change: [PR-1858](https://github.com/AcademySoftwareFoundation/openvdb/pull/1858). + +5) NanoVDB Code Review + +Linux CI is currently not passing. Suggests a named change from +callNanoGrid to nanovdb::processTypedGrid since it provides the +same functionality as the openvdb counterpart. + +6) fVDB PR + +fVDB Branch needs to be public in github and it should be visible. +The first step is to reopen the fVDB PR. Need to add link to the +fVDB documentation in the OpenVDB website. We need CI to build docs +for fVDB. + +7) Siggraph BOF + +SIGGRAPH BOF is Monday, July 29 at 9 a.m. + +8) Blender + +OpenVDB in blender is working with Open3D Engine. + +9) Next Meeting + +Next meeting is on July 9th, 2024. 2pm-3pm EDT (GMT-4). diff --git a/tsc/meetings/2024-08-27.md b/tsc/meetings/2024-08-27.md new file mode 100644 index 0000000000..276a9005ec --- /dev/null +++ b/tsc/meetings/2024-08-27.md @@ -0,0 +1,117 @@ +Minutes from OpenVDB TSC meeting, August 27th, 2024 + +Attendees: *Jeff* L., *Greg* H., *Rich* J, *Ken* M., *Andre* P., *Dan* B. + +Regrets: *Nick* A., *Ken* M. + +Additional Attendees: +Dhruv Govil (Apple), Jonathan Swartz (NVIDIA), Barry Dempsey + +Agenda: + +1) Confirm quorum +2) Secretary +3) VDB 12 Release Items +4) License Changes +5) Maintainer List +6) CI Issues +7) PRs to merge +8) Next meeting + +------------ + +1) Confirm quorum + +Quorum is present. + +2) Secretary + +Secretary is Jeff Lait. + +3) VDB 12 Release Items + +a) Half support for v12 + +Should half grid be a REAL grid? Ie, should the macros for REAL grid +type include it? Conclusion was they should not. Instead a +higher-level named macro should contain Half. + +With this decided it is a v12 candidate. Proxy grid test is +failing, but other than that seems good. The problem is we can't +instantiate a proxy tree at the moment. + +b) fVDB + +Will be a feature branch. Can we have a cadence separate from main VDB? +Can binaries be released on a different place, like Anaconda? + +NanoVDB has a separate release system. + +Tags can be created on branches to mark a release point. + +This would require the fVDB release to be based against a stable +version of VDB. + +Currently NanoVDB and fVDB are tightly coupled - improvements to +NanoVDB are coming from fVDB. This suggests NanoVDB needs to match +fVDB. + +4) License Changes + +Still trying to set up the second repo to verify CLAs. + +5) Maintainer List + +Unanimous for changing Committer to Maintainer + +Unanimous that it is two maintainers to commit. + +Unanimous that all current TSC member are also maintainers. + +Unanimous that Jonathan Swartz to be added as a maintainer. + +Jeff will attempt to update the relevant policy docs. + +6) CI Issues + +No good tricks to speed up CI debugging. Can sometimes turn off +unnecessary runners, but some runners just take a long time until the +first possible error. + +The secret is for the Houdini download. + +cudann download seemed to be failing. This is likely not from our +runner side. + +Need notes for the PR for CI to explain why some of the unusual +solutions are present. + +Why certain versions of clang? This is likely to try to match the g++ +of the VFX platform, but we are not sure. The docker images came from +the ASWF. We have clang at all to catch more issues than g++ alone. + +VDB12 will likely drop support for gcc9. + +7) PRs to merge + +a) 744 - remove boost any + +Someone must approve by next meeting or it will be approved then. + +b) 1723 - boost conversion traits + +Waiting on the Half PR? We should take it out of the Half PR so this +can get in, Half PR can rebase on top of the resulting change. + +c) 1789, 1775 - Makes adaptive grid easier + +1789 is needed so you can use the same Grid with different custom Trees. + +1775 explores all possible instantiations and ensures they are handled. + +These are both on schedule to be decided on by the next meeting. + +8) Next Meeting + +Next meeting is on September 10th, 2024. 2pm-3pm EDT (GMT-4). + diff --git a/tsc/process/codereview.md b/tsc/process/codereview.md index 418e81d783..5cb7a0c015 100644 --- a/tsc/process/codereview.md +++ b/tsc/process/codereview.md @@ -1,24 +1,24 @@ **Code Reviewing and Merging OpenVDB Pull Requests** -The Technical Steering Committee have write-access to the OpenVDB repository and are responsible for triaging, reviewing and ultimately merging or rejecting GitHub pull requests. This document lists the policy and best practices to guide the TSC in this process. +The Maintainers have write-access to the OpenVDB repository and are responsible for triaging, reviewing and ultimately merging or rejecting GitHub pull requests. This document lists the policy and best practices to guide the Maintainers in this process. ***Policy*** * No direct commits to the master (or any other protected) branch, every code change should be a pull request * Any member of the community can provide feedback on any pull request (and is encouraged to do so) * A CODEOWNERS file introduced to the root of the repo to configure ownership (global, per-directory and/or per-file) - this will automatically request pull request reviews from the relevant maintainers (https://help.github.com/articles/about-codeowners/) -* Minimum of one non-author TSC member approval on every pull request before merging +* Minimum of one non-author Maintainer member approval on every pull request before merging * Non fast-forward merges must be used (ie the merge must not be rebased onto master) * Travis CI and DCO status checks must strictly pass before merging, ASWF Jenkins CI should loosely pass (https://help.github.com/articles/types-of-required-status-checks) ***Best Practices*** * Prefer all requested reviewers to approve before merging -* Merging a pull request should be the responsibility of the author if they are a TSC member -* Any TSC member can merge a pull request authored by a non-TSC member, but with a preferred minimum of two approvals from TSC members (including themself) +* Merging a pull request should be the responsibility of the author if they are a Maintainer member +* Any Maintainer member can merge a pull request authored by a non-Maintainer member, but with a preferred minimum of two approvals from Maintainer members (including themself) * Re-writing the branch history by rebasing a pull request branch just before a merge is discouraged, unless it significantly improves the overall history (such as any broken commits on the review branch that might make reverting or bisecting more difficult) * Prefer pull requests to be open for a minimum of 72 hours before merging in order to gather any feedback -* Aim for all pull requests to be responded to by one of the TSC members within a minimum of two weeks with either explanation of non-acceptance, request for changes or merge +* Aim for all pull requests to be responded to by one of the Maintainer members within a minimum of two weeks with either explanation of non-acceptance, request for changes or merge * TSC meetings should regularly review and discuss any outstanding pull requests * Pull requests should link to the associated Jira ticket (if applicable) in the description or title, this provides a route back to the Jira ticket through the Git history (git blame -> PR merge -> Jira ticket) * All pull request comments should aim to be answered and resolved before committing