diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..5590359 --- /dev/null +++ b/.gitignore @@ -0,0 +1,556 @@ +## Ignore Visual Studio temporary files, build results, and +## files generated by popular Visual Studio add-ons. +## +## Get latest from https://github.com/github/gitignore/blob/main/VisualStudio.gitignore + +# User-specific files +*.rsuser +*.suo +*.user +*Dd]ebugPS/ +[Rr]eleasePS/ +dlldata.c + +# Benchmark Results +BenchmarkDotNet.Artifacts/ + +# .NET Core +project.lock.json + +*.tmp_proj +*_wpftmp.csproj +*.log +*.tlog +*.vspscc +*.vssscc +.builds +*.pidb +*.svclog +*.scc + +# Chutzpah Test files +_Chutzpah* + +# Visual C++ cache files +ipch/ +*.aps +*.ncb +*.opendb +*.opensdf +*.sdf +*.cachefile +*.VC.db +*.VC.VC.opendb + +# Visual Studio profiler +*.psess +*.vsp +*.vspx +*.sap + +# Visual Studio Trace Files +*.e2e + +# TFS 2012 Local Workspace +$tf/ + +# Guidance Automation Toolkit +*.gpState + +# ReSharper is a .NET coding add-in +_ReSharper*/ +*.[Rr]e[Ss]harper +*.DotSettings.user + +# TeamCity is a build add-in +_TeamCity* + +# DotCover is a Code Coverage Tool +*.dotCover + +# AxoCover is a Code Coverage Tool +.axoCover/* +!.axoCover/settings.json + +# Coverlet is a free, cross platform Code Coverage Tool +coverage*.json +coverage*.xml +coverage*.info + +# Visual Studio code coverage results +*an present a security risk +# (https://github.com/github/gitignore/pull/2483#issue-259490424) +#*.snk + +# Since there are multiple workflows, uncomment next line to ignore bower_components +# (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) +#bower_components/ + +# RIA/Silverlight projects +Generated_Code/ + +# Backup & report files from converting an old project file +# to a newer Visual Studio version. Backup files are not needed, +# because we have git ;-) +_UpgradeReport_Files/ +Backup*/ +UpgradeLog*.XML +UpgradeLog*.htm +ServiceFabricBackup/ +*.rptproj.bak + +# SQL Server files +*.mdf +*.ldf +*.ndf + +# Business Intelligence projects +*.rdl.data +*.bim.layout +*.bim_*.settings +*.rptproj.rsuser +*- [Bb]ackup.rdl +*- [Bb]ackup ([0-9]).rdl +*- [Bb]ackup ([0-9][0-9]).rdl + +# Microsoft Fakes +FakesAssemblies/ + +# GhostDoc plugin setting file +*.GhostDoc.xml + +# Node.js Tools for Visual Studio +.ntvs_analysis.dat +node_modules/ +.vscode/extensions.json +*.code-workspace + +# Local History for Visual Studio Code +.history/ + +# Windows Installer files from build outputs +*.cab +*.msi +*.msix +*.msm +*.msp + +# JetBrains Rider +*.sln.imlproject.fragment.lock.json +artifacts/ + +# ASP.NET Scaffolding +ScaffoldingReadMe.txt + +# StyleCop +StyleCopReport.xml + +# Files built by Visual Studio +*_i.c +*_p.c +*_h.h +*.ilk +*.meta +*.obj +*.iobj +*.pch +*.pdb +*.ipdb +*.pgc +*.pgd +*.rsp +*.sbr +*.tlb +*.tli +*.tlh +*.tmp.userosscache +*.sln.docstates + +# User-specific files (MonoDevelop/Xamarin Studio) +*.userprefs + +# Mono auto generated files +mono_crash.* + +# Build results +[Dd]ebug/ +[Dd]ebugPublic/ +[Rr]elease/ +[Rr]eleases/ +x64/ +x86/ +[Ww][Ii][Nn]32/ +[Aa][Rr][Mm]/ +[Aa][Rr][Mm]64/ +bld/ +[Bb]in/ +[Oo]bj/ +[Ll]og/ +[Ll]ogs/ + +# Visual Studio 2015/2017 cache/options directory +.vs/ +# Uncomment if you have tasks that create the project's static files in wwwroot +#wwwroot/ + +# Visual Studio 20 but keep track of directories ending in .cache +!?*.[Cc]ache/ + +# Others +ClientBin/ +~$* +*~ +*.dbmdl +*.dbproj.schemaview +*.jfm +*.pfx +*.publishsettings +orleans.codegen.cs + +# Including strong name files c +# Visual Studio 6 build log +*.plg + +# Visual Studio 6 workspace options file +*.opt + +# Visual Studio 6 auto-generated workspace file (contains which files were open etc.) +*.vbw + +# Visual Studio 6 auto-generated project file (contains which files were open etc.) +*.vbp + +# Visual Studio 6 workspace and project file (working project files containing files to include in project) +*.dsw +*.dsp + +# Visual Studio 6 technical files +*.ncb +*.aps + +# Visual Studio LightSwitch build output +**/*.HTMLClient/GeneratedArtifacts +**/*.DesktopClient/GeneratedArtifacts +**/*.DesktopClient/ModelManifest.xml +**/*.Server/GeneratedArtifacts +**/*.Server/ModelManifest.xml +_Pvt_Extensions + +# Paket dependency manager +.paket/paket.exe +paket-files/ + +## Note: Comment the next line if you want to checkin your web deploy settings, +# but database connection strings (with potential passwords) will be unencrypted +*.pubxml +*.publishproj + +# Microsoft Azure Web App publish settings. Comment the next line if you want to +# checkin your Azure Web App publish settings, but sensitive information contained +# in these scripts will be unencrypted +PublishScripts/ + +# NuGet Packages +*.nupkg +# NuGet Symbol Packages +*.snupkg +# The packages folder can be ignored because of Package Restore +**/[Pp]ackages/* +# except build/, which is used as an MSBuild target. +!**/[Pp]ackages/build/ +# Uncomment if necessary however generally it will be regenerated when needed +#!**/[Pp]ackages/repositories.config +# NuGet v3's project.json files produces more ignorable files +*.nuget.props +*.nuget.targets + +# Microsoft Azure Build Output +csx/ +*.build.csdef + +# Microsoft Azure Emulator +ecf/ +rcf/ + +# Windows Store app package directroject/Help/*.hhc +DocProject/Help/*.hhk +DocProject/Help/*.hhp +DocProject/Help/Html2 +DocProject/Help/html + +# Click-Once directory +publish/ + +# Publish Web Output +*.[Pp]ublish.xml +*.azurePubxml +ories and files +AppPackages/ +BundleArtifacts/ +Package.StoreAssociation.xml +_pkginfo.txt +*.appx +*.appxbundle +*.appxupload + +# Visual Studio cache files +# files ending in .cache can be ignored +*.[Cc]ache +# FAKE - F# Make +.fake/ + +# CodeRush personal settings +.cr/personal + +# Python Tools for Visual Studio (PTVS) +__pycache__/ +*.pyc + +# Cake - Uncomment if you are using it +# tools/** +# !tools/packages.config + +# Tabs Studio +*.tss + +# Telerik's JustMock configuration file +*.jmconfig + +# BizTalk build output +*.btp.cs +*.btm.cs +*.odx.cs +*.xsd.cs + +# OpenCover UI analysis results +OpenCover/ + +# Azure Stream Analytics local run output +ASALocalRun/ + +# MSBuild Binary and Structured Log +*.binlog + +# NVidia Nsight GPU de* +!.vscode/settings.json +!.vscode/tasks.json +!.vscode/launch.json +!17 auto generated files +Generated\ Files/ + +# MSTest test Results +[Tt]est[Rr]esult*/ +[Bb]uild[Ll]og.* + +# NUnit +*.VisualState.xml +TestResult.xml +nunit-*.xml + +# Build Results of an ATL Project +[.coverage +*.coveragexml + +# NCrunch +_NCrunch_* +.*crunch*.local.xml +nCrunchTemp_* + +# MightyMoose +*.mm.* +AutoTest.Net/ + +# Web workbench (sass) +.sass-cache/ + +# Installshield output folder +[Ee]xpress/ + +# DocProject is a documentation generator add-in +DocProject/buildhelp/ +DocProject/Help/*.HxT +DocProject/Help/*.HxC +DocPbugger configuration file +*.nvuser + +# MFractors (Xamarin productivity tool) working folder +.mfractor/ + +# Local History for Visual Studio +.localhistory/ + +# Visual Studio History (VSHistory) files +.vshistory/ + +# BeatPulse healthcheck temp database +healthchecksdb + +# Backup folder for Package Reference Convert tool in Visual Studio 2017 +MigrationBackup/ + +# Ionide (cross platform F# VS Code tools) working folder +.ionide/ + +# Fody - auto-generated XML schema +FodyWeavers.xsd + +# VS Code files for those working on multiple tools +.vscode/ + +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintainted in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ +micc.exe +/deprecated/ +/mir_bug/ +/benchmarking/results/ +/generated/ +/tmp/ +*.whl diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..0598fb1 --- /dev/null +++ b/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2024 eliphat + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/README.md b/README.md new file mode 100644 index 0000000..e69de29 diff --git a/csrc/cpp.hint b/csrc/cpp.hint new file mode 100644 index 0000000..58ae506 --- /dev/null +++ b/csrc/cpp.hint @@ -0,0 +1 @@ +#define __global__ __location__(global) diff --git a/csrc/helper_math.h b/csrc/helper_math.h new file mode 100644 index 0000000..4f4831f --- /dev/null +++ b/csrc/helper_math.h @@ -0,0 +1,1469 @@ +/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of NVIDIA CORPORATION nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + + /* + * This file implements common mathematical operations on vector types + * (float3, float4 etc.) since these are not provided as standard by CUDA. + * + * The syntax is modeled on the Cg standard library. + * + * This is part of the Helper library includes + * + * Thanks to Linh Hah for additions and fixes. + */ + +#ifndef HELPER_MATH_H +#define HELPER_MATH_H + +#include "cuda_runtime.h" + +typedef unsigned int uint; +typedef unsigned short ushort; + +#ifndef EXIT_WAIVED +#define EXIT_WAIVED 2 +#endif + +#ifndef __CUDACC__ +#include + +//////////////////////////////////////////////////////////////////////////////// +// host implementations of CUDA functions +//////////////////////////////////////////////////////////////////////////////// + +inline float fminf(float a, float b) +{ + return a < b ? a : b; +} + +inline float fmaxf(float a, float b) +{ + return a > b ? a : b; +} + +inline int max(int a, int b) +{ + return a > b ? a : b; +} + +inline int min(int a, int b) +{ + return a < b ? a : b; +} + +inline float rsqrtf(float x) +{ + return 1.0f / sqrtf(x); +} +#endif + +//////////////////////////////////////////////////////////////////////////////// +// constructors +//////////////////////////////////////////////////////////////////////////////// + +inline __host__ __device__ float2 make_float2(float s) +{ + return make_float2(s, s); +} +inline __host__ __device__ float2 make_float2(float3 a) +{ + return make_float2(a.x, a.y); +} +inline __host__ __device__ float2 make_float2(int2 a) +{ + return make_float2(float(a.x), float(a.y)); +} +inline __host__ __device__ float2 make_float2(uint2 a) +{ + return make_float2(float(a.x), float(a.y)); +} + +inline __host__ __device__ int2 make_int2(int s) +{ + return make_int2(s, s); +} +inline __host__ __device__ int2 make_int2(int3 a) +{ + return make_int2(a.x, a.y); +} +inline __host__ __device__ int2 make_int2(uint2 a) +{ + return make_int2(int(a.x), int(a.y)); +} +inline __host__ __device__ int2 make_int2(float2 a) +{ + return make_int2(int(a.x), int(a.y)); +} + +inline __host__ __device__ uint2 make_uint2(uint s) +{ + return make_uint2(s, s); +} +inline __host__ __device__ uint2 make_uint2(uint3 a) +{ + return make_uint2(a.x, a.y); +} +inline __host__ __device__ uint2 make_uint2(int2 a) +{ + return make_uint2(uint(a.x), uint(a.y)); +} + +inline __host__ __device__ float3 make_float3(float s) +{ + return make_float3(s, s, s); +} +inline __host__ __device__ float3 make_float3(float2 a) +{ + return make_float3(a.x, a.y, 0.0f); +} +inline __host__ __device__ float3 make_float3(float2 a, float s) +{ + return make_float3(a.x, a.y, s); +} +inline __host__ __device__ float3 make_float3(float4 a) +{ + return make_float3(a.x, a.y, a.z); +} +inline __host__ __device__ float3 make_float3(int3 a) +{ + return make_float3(float(a.x), float(a.y), float(a.z)); +} +inline __host__ __device__ float3 make_float3(uint3 a) +{ + return make_float3(float(a.x), float(a.y), float(a.z)); +} + +inline __host__ __device__ int3 make_int3(int s) +{ + return make_int3(s, s, s); +} +inline __host__ __device__ int3 make_int3(int2 a) +{ + return make_int3(a.x, a.y, 0); +} +inline __host__ __device__ int3 make_int3(int2 a, int s) +{ + return make_int3(a.x, a.y, s); +} +inline __host__ __device__ int3 make_int3(uint3 a) +{ + return make_int3(int(a.x), int(a.y), int(a.z)); +} +inline __host__ __device__ int3 make_int3(float3 a) +{ + return make_int3(int(a.x), int(a.y), int(a.z)); +} + +inline __host__ __device__ uint3 make_uint3(uint s) +{ + return make_uint3(s, s, s); +} +inline __host__ __device__ uint3 make_uint3(uint2 a) +{ + return make_uint3(a.x, a.y, 0); +} +inline __host__ __device__ uint3 make_uint3(uint2 a, uint s) +{ + return make_uint3(a.x, a.y, s); +} +inline __host__ __device__ uint3 make_uint3(uint4 a) +{ + return make_uint3(a.x, a.y, a.z); +} +inline __host__ __device__ uint3 make_uint3(int3 a) +{ + return make_uint3(uint(a.x), uint(a.y), uint(a.z)); +} + +inline __host__ __device__ float4 make_float4(float s) +{ + return make_float4(s, s, s, s); +} +inline __host__ __device__ float4 make_float4(float3 a) +{ + return make_float4(a.x, a.y, a.z, 0.0f); +} +inline __host__ __device__ float4 make_float4(float3 a, float w) +{ + return make_float4(a.x, a.y, a.z, w); +} +inline __host__ __device__ float4 make_float4(int4 a) +{ + return make_float4(float(a.x), float(a.y), float(a.z), float(a.w)); +} +inline __host__ __device__ float4 make_float4(uint4 a) +{ + return make_float4(float(a.x), float(a.y), float(a.z), float(a.w)); +} + +inline __host__ __device__ int4 make_int4(int s) +{ + return make_int4(s, s, s, s); +} +inline __host__ __device__ int4 make_int4(int3 a) +{ + return make_int4(a.x, a.y, a.z, 0); +} +inline __host__ __device__ int4 make_int4(int3 a, int w) +{ + return make_int4(a.x, a.y, a.z, w); +} +inline __host__ __device__ int4 make_int4(uint4 a) +{ + return make_int4(int(a.x), int(a.y), int(a.z), int(a.w)); +} +inline __host__ __device__ int4 make_int4(float4 a) +{ + return make_int4(int(a.x), int(a.y), int(a.z), int(a.w)); +} + + +inline __host__ __device__ uint4 make_uint4(uint s) +{ + return make_uint4(s, s, s, s); +} +inline __host__ __device__ uint4 make_uint4(uint3 a) +{ + return make_uint4(a.x, a.y, a.z, 0); +} +inline __host__ __device__ uint4 make_uint4(uint3 a, uint w) +{ + return make_uint4(a.x, a.y, a.z, w); +} +inline __host__ __device__ uint4 make_uint4(int4 a) +{ + return make_uint4(uint(a.x), uint(a.y), uint(a.z), uint(a.w)); +} + +//////////////////////////////////////////////////////////////////////////////// +// negate +//////////////////////////////////////////////////////////////////////////////// + +inline __host__ __device__ float2 operator-(float2& a) +{ + return make_float2(-a.x, -a.y); +} +inline __host__ __device__ int2 operator-(int2& a) +{ + return make_int2(-a.x, -a.y); +} +inline __host__ __device__ float3 operator-(float3& a) +{ + return make_float3(-a.x, -a.y, -a.z); +} +inline __host__ __device__ int3 operator-(int3& a) +{ + return make_int3(-a.x, -a.y, -a.z); +} +inline __host__ __device__ float4 operator-(float4& a) +{ + return make_float4(-a.x, -a.y, -a.z, -a.w); +} +inline __host__ __device__ int4 operator-(int4& a) +{ + return make_int4(-a.x, -a.y, -a.z, -a.w); +} + +//////////////////////////////////////////////////////////////////////////////// +// addition +//////////////////////////////////////////////////////////////////////////////// + +inline __host__ __device__ float2 operator+(float2 a, float2 b) +{ + return make_float2(a.x + b.x, a.y + b.y); +} +inline __host__ __device__ void operator+=(float2& a, float2 b) +{ + a.x += b.x; + a.y += b.y; +} +inline __host__ __device__ float2 operator+(float2 a, float b) +{ + return make_float2(a.x + b, a.y + b); +} +inline __host__ __device__ float2 operator+(float b, float2 a) +{ + return make_float2(a.x + b, a.y + b); +} +inline __host__ __device__ void operator+=(float2& a, float b) +{ + a.x += b; + a.y += b; +} + +inline __host__ __device__ int2 operator+(int2 a, int2 b) +{ + return make_int2(a.x + b.x, a.y + b.y); +} +inline __host__ __device__ void operator+=(int2& a, int2 b) +{ + a.x += b.x; + a.y += b.y; +} +inline __host__ __device__ int2 operator+(int2 a, int b) +{ + return make_int2(a.x + b, a.y + b); +} +inline __host__ __device__ int2 operator+(int b, int2 a) +{ + return make_int2(a.x + b, a.y + b); +} +inline __host__ __device__ void operator+=(int2& a, int b) +{ + a.x += b; + a.y += b; +} + +inline __host__ __device__ uint2 operator+(uint2 a, uint2 b) +{ + return make_uint2(a.x + b.x, a.y + b.y); +} +inline __host__ __device__ void operator+=(uint2& a, uint2 b) +{ + a.x += b.x; + a.y += b.y; +} +inline __host__ __device__ uint2 operator+(uint2 a, uint b) +{ + return make_uint2(a.x + b, a.y + b); +} +inline __host__ __device__ uint2 operator+(uint b, uint2 a) +{ + return make_uint2(a.x + b, a.y + b); +} +inline __host__ __device__ void operator+=(uint2& a, uint b) +{ + a.x += b; + a.y += b; +} + + +inline __host__ __device__ float3 operator+(float3 a, float3 b) +{ + return make_float3(a.x + b.x, a.y + b.y, a.z + b.z); +} +inline __host__ __device__ void operator+=(float3& a, float3 b) +{ + a.x += b.x; + a.y += b.y; + a.z += b.z; +} +inline __host__ __device__ float3 operator+(float3 a, float b) +{ + return make_float3(a.x + b, a.y + b, a.z + b); +} +inline __host__ __device__ void operator+=(float3& a, float b) +{ + a.x += b; + a.y += b; + a.z += b; +} + +inline __host__ __device__ int3 operator+(int3 a, int3 b) +{ + return make_int3(a.x + b.x, a.y + b.y, a.z + b.z); +} +inline __host__ __device__ void operator+=(int3& a, int3 b) +{ + a.x += b.x; + a.y += b.y; + a.z += b.z; +} +inline __host__ __device__ int3 operator+(int3 a, int b) +{ + return make_int3(a.x + b, a.y + b, a.z + b); +} +inline __host__ __device__ void operator+=(int3& a, int b) +{ + a.x += b; + a.y += b; + a.z += b; +} + +inline __host__ __device__ uint3 operator+(uint3 a, uint3 b) +{ + return make_uint3(a.x + b.x, a.y + b.y, a.z + b.z); +} +inline __host__ __device__ void operator+=(uint3& a, uint3 b) +{ + a.x += b.x; + a.y += b.y; + a.z += b.z; +} +inline __host__ __device__ uint3 operator+(uint3 a, uint b) +{ + return make_uint3(a.x + b, a.y + b, a.z + b); +} +inline __host__ __device__ void operator+=(uint3& a, uint b) +{ + a.x += b; + a.y += b; + a.z += b; +} + +inline __host__ __device__ int3 operator+(int b, int3 a) +{ + return make_int3(a.x + b, a.y + b, a.z + b); +} +inline __host__ __device__ uint3 operator+(uint b, uint3 a) +{ + return make_uint3(a.x + b, a.y + b, a.z + b); +} +inline __host__ __device__ float3 operator+(float b, float3 a) +{ + return make_float3(a.x + b, a.y + b, a.z + b); +} + +inline __host__ __device__ float4 operator+(float4 a, float4 b) +{ + return make_float4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); +} +inline __host__ __device__ void operator+=(float4& a, float4 b) +{ + a.x += b.x; + a.y += b.y; + a.z += b.z; + a.w += b.w; +} +inline __host__ __device__ float4 operator+(float4 a, float b) +{ + return make_float4(a.x + b, a.y + b, a.z + b, a.w + b); +} +inline __host__ __device__ float4 operator+(float b, float4 a) +{ + return make_float4(a.x + b, a.y + b, a.z + b, a.w + b); +} +inline __host__ __device__ void operator+=(float4& a, float b) +{ + a.x += b; + a.y += b; + a.z += b; + a.w += b; +} + +inline __host__ __device__ int4 operator+(int4 a, int4 b) +{ + return make_int4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); +} +inline __host__ __device__ void operator+=(int4& a, int4 b) +{ + a.x += b.x; + a.y += b.y; + a.z += b.z; + a.w += b.w; +} +inline __host__ __device__ int4 operator+(int4 a, int b) +{ + return make_int4(a.x + b, a.y + b, a.z + b, a.w + b); +} +inline __host__ __device__ int4 operator+(int b, int4 a) +{ + return make_int4(a.x + b, a.y + b, a.z + b, a.w + b); +} +inline __host__ __device__ void operator+=(int4& a, int b) +{ + a.x += b; + a.y += b; + a.z += b; + a.w += b; +} + +inline __host__ __device__ uint4 operator+(uint4 a, uint4 b) +{ + return make_uint4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); +} +inline __host__ __device__ void operator+=(uint4& a, uint4 b) +{ + a.x += b.x; + a.y += b.y; + a.z += b.z; + a.w += b.w; +} +inline __host__ __device__ uint4 operator+(uint4 a, uint b) +{ + return make_uint4(a.x + b, a.y + b, a.z + b, a.w + b); +} +inline __host__ __device__ uint4 operator+(uint b, uint4 a) +{ + return make_uint4(a.x + b, a.y + b, a.z + b, a.w + b); +} +inline __host__ __device__ void operator+=(uint4& a, uint b) +{ + a.x += b; + a.y += b; + a.z += b; + a.w += b; +} + +//////////////////////////////////////////////////////////////////////////////// +// subtract +//////////////////////////////////////////////////////////////////////////////// + +inline __host__ __device__ float2 operator-(float2 a, float2 b) +{ + return make_float2(a.x - b.x, a.y - b.y); +} +inline __host__ __device__ void operator-=(float2& a, float2 b) +{ + a.x -= b.x; + a.y -= b.y; +} +inline __host__ __device__ float2 operator-(float2 a, float b) +{ + return make_float2(a.x - b, a.y - b); +} +inline __host__ __device__ float2 operator-(float b, float2 a) +{ + return make_float2(b - a.x, b - a.y); +} +inline __host__ __device__ void operator-=(float2& a, float b) +{ + a.x -= b; + a.y -= b; +} + +inline __host__ __device__ int2 operator-(int2 a, int2 b) +{ + return make_int2(a.x - b.x, a.y - b.y); +} +inline __host__ __device__ void operator-=(int2& a, int2 b) +{ + a.x -= b.x; + a.y -= b.y; +} +inline __host__ __device__ int2 operator-(int2 a, int b) +{ + return make_int2(a.x - b, a.y - b); +} +inline __host__ __device__ int2 operator-(int b, int2 a) +{ + return make_int2(b - a.x, b - a.y); +} +inline __host__ __device__ void operator-=(int2& a, int b) +{ + a.x -= b; + a.y -= b; +} + +inline __host__ __device__ uint2 operator-(uint2 a, uint2 b) +{ + return make_uint2(a.x - b.x, a.y - b.y); +} +inline __host__ __device__ void operator-=(uint2& a, uint2 b) +{ + a.x -= b.x; + a.y -= b.y; +} +inline __host__ __device__ uint2 operator-(uint2 a, uint b) +{ + return make_uint2(a.x - b, a.y - b); +} +inline __host__ __device__ uint2 operator-(uint b, uint2 a) +{ + return make_uint2(b - a.x, b - a.y); +} +inline __host__ __device__ void operator-=(uint2& a, uint b) +{ + a.x -= b; + a.y -= b; +} + +inline __host__ __device__ float3 operator-(float3 a, float3 b) +{ + return make_float3(a.x - b.x, a.y - b.y, a.z - b.z); +} +inline __host__ __device__ void operator-=(float3& a, float3 b) +{ + a.x -= b.x; + a.y -= b.y; + a.z -= b.z; +} +inline __host__ __device__ float3 operator-(float3 a, float b) +{ + return make_float3(a.x - b, a.y - b, a.z - b); +} +inline __host__ __device__ float3 operator-(float b, float3 a) +{ + return make_float3(b - a.x, b - a.y, b - a.z); +} +inline __host__ __device__ void operator-=(float3& a, float b) +{ + a.x -= b; + a.y -= b; + a.z -= b; +} + +inline __host__ __device__ int3 operator-(int3 a, int3 b) +{ + return make_int3(a.x - b.x, a.y - b.y, a.z - b.z); +} +inline __host__ __device__ void operator-=(int3& a, int3 b) +{ + a.x -= b.x; + a.y -= b.y; + a.z -= b.z; +} +inline __host__ __device__ int3 operator-(int3 a, int b) +{ + return make_int3(a.x - b, a.y - b, a.z - b); +} +inline __host__ __device__ int3 operator-(int b, int3 a) +{ + return make_int3(b - a.x, b - a.y, b - a.z); +} +inline __host__ __device__ void operator-=(int3& a, int b) +{ + a.x -= b; + a.y -= b; + a.z -= b; +} + +inline __host__ __device__ uint3 operator-(uint3 a, uint3 b) +{ + return make_uint3(a.x - b.x, a.y - b.y, a.z - b.z); +} +inline __host__ __device__ void operator-=(uint3& a, uint3 b) +{ + a.x -= b.x; + a.y -= b.y; + a.z -= b.z; +} +inline __host__ __device__ uint3 operator-(uint3 a, uint b) +{ + return make_uint3(a.x - b, a.y - b, a.z - b); +} +inline __host__ __device__ uint3 operator-(uint b, uint3 a) +{ + return make_uint3(b - a.x, b - a.y, b - a.z); +} +inline __host__ __device__ void operator-=(uint3& a, uint b) +{ + a.x -= b; + a.y -= b; + a.z -= b; +} + +inline __host__ __device__ float4 operator-(float4 a, float4 b) +{ + return make_float4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); +} +inline __host__ __device__ void operator-=(float4& a, float4 b) +{ + a.x -= b.x; + a.y -= b.y; + a.z -= b.z; + a.w -= b.w; +} +inline __host__ __device__ float4 operator-(float4 a, float b) +{ + return make_float4(a.x - b, a.y - b, a.z - b, a.w - b); +} +inline __host__ __device__ void operator-=(float4& a, float b) +{ + a.x -= b; + a.y -= b; + a.z -= b; + a.w -= b; +} + +inline __host__ __device__ int4 operator-(int4 a, int4 b) +{ + return make_int4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); +} +inline __host__ __device__ void operator-=(int4& a, int4 b) +{ + a.x -= b.x; + a.y -= b.y; + a.z -= b.z; + a.w -= b.w; +} +inline __host__ __device__ int4 operator-(int4 a, int b) +{ + return make_int4(a.x - b, a.y - b, a.z - b, a.w - b); +} +inline __host__ __device__ int4 operator-(int b, int4 a) +{ + return make_int4(b - a.x, b - a.y, b - a.z, b - a.w); +} +inline __host__ __device__ void operator-=(int4& a, int b) +{ + a.x -= b; + a.y -= b; + a.z -= b; + a.w -= b; +} + +inline __host__ __device__ uint4 operator-(uint4 a, uint4 b) +{ + return make_uint4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); +} +inline __host__ __device__ void operator-=(uint4& a, uint4 b) +{ + a.x -= b.x; + a.y -= b.y; + a.z -= b.z; + a.w -= b.w; +} +inline __host__ __device__ uint4 operator-(uint4 a, uint b) +{ + return make_uint4(a.x - b, a.y - b, a.z - b, a.w - b); +} +inline __host__ __device__ uint4 operator-(uint b, uint4 a) +{ + return make_uint4(b - a.x, b - a.y, b - a.z, b - a.w); +} +inline __host__ __device__ void operator-=(uint4& a, uint b) +{ + a.x -= b; + a.y -= b; + a.z -= b; + a.w -= b; +} + +//////////////////////////////////////////////////////////////////////////////// +// multiply +//////////////////////////////////////////////////////////////////////////////// + +inline __host__ __device__ float2 operator*(float2 a, float2 b) +{ + return make_float2(a.x * b.x, a.y * b.y); +} +inline __host__ __device__ void operator*=(float2& a, float2 b) +{ + a.x *= b.x; + a.y *= b.y; +} +inline __host__ __device__ float2 operator*(float2 a, float b) +{ + return make_float2(a.x * b, a.y * b); +} +inline __host__ __device__ float2 operator*(float b, float2 a) +{ + return make_float2(b * a.x, b * a.y); +} +inline __host__ __device__ void operator*=(float2& a, float b) +{ + a.x *= b; + a.y *= b; +} + +inline __host__ __device__ int2 operator*(int2 a, int2 b) +{ + return make_int2(a.x * b.x, a.y * b.y); +} +inline __host__ __device__ void operator*=(int2& a, int2 b) +{ + a.x *= b.x; + a.y *= b.y; +} +inline __host__ __device__ int2 operator*(int2 a, int b) +{ + return make_int2(a.x * b, a.y * b); +} +inline __host__ __device__ int2 operator*(int b, int2 a) +{ + return make_int2(b * a.x, b * a.y); +} +inline __host__ __device__ void operator*=(int2& a, int b) +{ + a.x *= b; + a.y *= b; +} + +inline __host__ __device__ uint2 operator*(uint2 a, uint2 b) +{ + return make_uint2(a.x * b.x, a.y * b.y); +} +inline __host__ __device__ void operator*=(uint2& a, uint2 b) +{ + a.x *= b.x; + a.y *= b.y; +} +inline __host__ __device__ uint2 operator*(uint2 a, uint b) +{ + return make_uint2(a.x * b, a.y * b); +} +inline __host__ __device__ uint2 operator*(uint b, uint2 a) +{ + return make_uint2(b * a.x, b * a.y); +} +inline __host__ __device__ void operator*=(uint2& a, uint b) +{ + a.x *= b; + a.y *= b; +} + +inline __host__ __device__ float3 operator*(float3 a, float3 b) +{ + return make_float3(a.x * b.x, a.y * b.y, a.z * b.z); +} +inline __host__ __device__ void operator*=(float3& a, float3 b) +{ + a.x *= b.x; + a.y *= b.y; + a.z *= b.z; +} +inline __host__ __device__ float3 operator*(float3 a, float b) +{ + return make_float3(a.x * b, a.y * b, a.z * b); +} +inline __host__ __device__ float3 operator*(float b, float3 a) +{ + return make_float3(b * a.x, b * a.y, b * a.z); +} +inline __host__ __device__ void operator*=(float3& a, float b) +{ + a.x *= b; + a.y *= b; + a.z *= b; +} + +inline __host__ __device__ int3 operator*(int3 a, int3 b) +{ + return make_int3(a.x * b.x, a.y * b.y, a.z * b.z); +} +inline __host__ __device__ void operator*=(int3& a, int3 b) +{ + a.x *= b.x; + a.y *= b.y; + a.z *= b.z; +} +inline __host__ __device__ int3 operator*(int3 a, int b) +{ + return make_int3(a.x * b, a.y * b, a.z * b); +} +inline __host__ __device__ int3 operator*(int b, int3 a) +{ + return make_int3(b * a.x, b * a.y, b * a.z); +} +inline __host__ __device__ void operator*=(int3& a, int b) +{ + a.x *= b; + a.y *= b; + a.z *= b; +} + +inline __host__ __device__ uint3 operator*(uint3 a, uint3 b) +{ + return make_uint3(a.x * b.x, a.y * b.y, a.z * b.z); +} +inline __host__ __device__ void operator*=(uint3& a, uint3 b) +{ + a.x *= b.x; + a.y *= b.y; + a.z *= b.z; +} +inline __host__ __device__ uint3 operator*(uint3 a, uint b) +{ + return make_uint3(a.x * b, a.y * b, a.z * b); +} +inline __host__ __device__ uint3 operator*(uint b, uint3 a) +{ + return make_uint3(b * a.x, b * a.y, b * a.z); +} +inline __host__ __device__ void operator*=(uint3& a, uint b) +{ + a.x *= b; + a.y *= b; + a.z *= b; +} + +inline __host__ __device__ float4 operator*(float4 a, float4 b) +{ + return make_float4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w); +} +inline __host__ __device__ void operator*=(float4& a, float4 b) +{ + a.x *= b.x; + a.y *= b.y; + a.z *= b.z; + a.w *= b.w; +} +inline __host__ __device__ float4 operator*(float4 a, float b) +{ + return make_float4(a.x * b, a.y * b, a.z * b, a.w * b); +} +inline __host__ __device__ float4 operator*(float b, float4 a) +{ + return make_float4(b * a.x, b * a.y, b * a.z, b * a.w); +} +inline __host__ __device__ void operator*=(float4& a, float b) +{ + a.x *= b; + a.y *= b; + a.z *= b; + a.w *= b; +} + +inline __host__ __device__ int4 operator*(int4 a, int4 b) +{ + return make_int4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w); +} +inline __host__ __device__ void operator*=(int4& a, int4 b) +{ + a.x *= b.x; + a.y *= b.y; + a.z *= b.z; + a.w *= b.w; +} +inline __host__ __device__ int4 operator*(int4 a, int b) +{ + return make_int4(a.x * b, a.y * b, a.z * b, a.w * b); +} +inline __host__ __device__ int4 operator*(int b, int4 a) +{ + return make_int4(b * a.x, b * a.y, b * a.z, b * a.w); +} +inline __host__ __device__ void operator*=(int4& a, int b) +{ + a.x *= b; + a.y *= b; + a.z *= b; + a.w *= b; +} + +inline __host__ __device__ uint4 operator*(uint4 a, uint4 b) +{ + return make_uint4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w); +} +inline __host__ __device__ void operator*=(uint4& a, uint4 b) +{ + a.x *= b.x; + a.y *= b.y; + a.z *= b.z; + a.w *= b.w; +} +inline __host__ __device__ uint4 operator*(uint4 a, uint b) +{ + return make_uint4(a.x * b, a.y * b, a.z * b, a.w * b); +} +inline __host__ __device__ uint4 operator*(uint b, uint4 a) +{ + return make_uint4(b * a.x, b * a.y, b * a.z, b * a.w); +} +inline __host__ __device__ void operator*=(uint4& a, uint b) +{ + a.x *= b; + a.y *= b; + a.z *= b; + a.w *= b; +} + +//////////////////////////////////////////////////////////////////////////////// +// divide +//////////////////////////////////////////////////////////////////////////////// + +inline __host__ __device__ float2 operator/(float2 a, float2 b) +{ + return make_float2(a.x / b.x, a.y / b.y); +} +inline __host__ __device__ void operator/=(float2& a, float2 b) +{ + a.x /= b.x; + a.y /= b.y; +} +inline __host__ __device__ float2 operator/(float2 a, float b) +{ + return make_float2(a.x / b, a.y / b); +} +inline __host__ __device__ void operator/=(float2& a, float b) +{ + a.x /= b; + a.y /= b; +} +inline __host__ __device__ float2 operator/(float b, float2 a) +{ + return make_float2(b / a.x, b / a.y); +} + +inline __host__ __device__ float3 operator/(float3 a, float3 b) +{ + return make_float3(a.x / b.x, a.y / b.y, a.z / b.z); +} +inline __host__ __device__ void operator/=(float3& a, float3 b) +{ + a.x /= b.x; + a.y /= b.y; + a.z /= b.z; +} +inline __host__ __device__ float3 operator/(float3 a, float b) +{ + return make_float3(a.x / b, a.y / b, a.z / b); +} +inline __host__ __device__ void operator/=(float3& a, float b) +{ + a.x /= b; + a.y /= b; + a.z /= b; +} +inline __host__ __device__ float3 operator/(float b, float3 a) +{ + return make_float3(b / a.x, b / a.y, b / a.z); +} + +inline __host__ __device__ float4 operator/(float4 a, float4 b) +{ + return make_float4(a.x / b.x, a.y / b.y, a.z / b.z, a.w / b.w); +} +inline __host__ __device__ void operator/=(float4& a, float4 b) +{ + a.x /= b.x; + a.y /= b.y; + a.z /= b.z; + a.w /= b.w; +} +inline __host__ __device__ float4 operator/(float4 a, float b) +{ + return make_float4(a.x / b, a.y / b, a.z / b, a.w / b); +} +inline __host__ __device__ void operator/=(float4& a, float b) +{ + a.x /= b; + a.y /= b; + a.z /= b; + a.w /= b; +} +inline __host__ __device__ float4 operator/(float b, float4 a) +{ + return make_float4(b / a.x, b / a.y, b / a.z, b / a.w); +} + +//////////////////////////////////////////////////////////////////////////////// +// min +//////////////////////////////////////////////////////////////////////////////// + +inline __host__ __device__ float2 fminf(float2 a, float2 b) +{ + return make_float2(fminf(a.x, b.x), fminf(a.y, b.y)); +} +inline __host__ __device__ float3 fminf(float3 a, float3 b) +{ + return make_float3(fminf(a.x, b.x), fminf(a.y, b.y), fminf(a.z, b.z)); +} +inline __host__ __device__ float4 fminf(float4 a, float4 b) +{ + return make_float4(fminf(a.x, b.x), fminf(a.y, b.y), fminf(a.z, b.z), fminf(a.w, b.w)); +} + +inline __host__ __device__ int2 min(int2 a, int2 b) +{ + return make_int2(min(a.x, b.x), min(a.y, b.y)); +} +inline __host__ __device__ int3 min(int3 a, int3 b) +{ + return make_int3(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z)); +} +inline __host__ __device__ int4 min(int4 a, int4 b) +{ + return make_int4(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z), min(a.w, b.w)); +} + +inline __host__ __device__ uint2 min(uint2 a, uint2 b) +{ + return make_uint2(min(a.x, b.x), min(a.y, b.y)); +} +inline __host__ __device__ uint3 min(uint3 a, uint3 b) +{ + return make_uint3(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z)); +} +inline __host__ __device__ uint4 min(uint4 a, uint4 b) +{ + return make_uint4(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z), min(a.w, b.w)); +} + +//////////////////////////////////////////////////////////////////////////////// +// max +//////////////////////////////////////////////////////////////////////////////// + +inline __host__ __device__ float2 fmaxf(float2 a, float2 b) +{ + return make_float2(fmaxf(a.x, b.x), fmaxf(a.y, b.y)); +} +inline __host__ __device__ float3 fmaxf(float3 a, float3 b) +{ + return make_float3(fmaxf(a.x, b.x), fmaxf(a.y, b.y), fmaxf(a.z, b.z)); +} +inline __host__ __device__ float4 fmaxf(float4 a, float4 b) +{ + return make_float4(fmaxf(a.x, b.x), fmaxf(a.y, b.y), fmaxf(a.z, b.z), fmaxf(a.w, b.w)); +} + +inline __host__ __device__ int2 max(int2 a, int2 b) +{ + return make_int2(max(a.x, b.x), max(a.y, b.y)); +} +inline __host__ __device__ int3 max(int3 a, int3 b) +{ + return make_int3(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z)); +} +inline __host__ __device__ int4 max(int4 a, int4 b) +{ + return make_int4(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z), max(a.w, b.w)); +} + +inline __host__ __device__ uint2 max(uint2 a, uint2 b) +{ + return make_uint2(max(a.x, b.x), max(a.y, b.y)); +} +inline __host__ __device__ uint3 max(uint3 a, uint3 b) +{ + return make_uint3(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z)); +} +inline __host__ __device__ uint4 max(uint4 a, uint4 b) +{ + return make_uint4(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z), max(a.w, b.w)); +} + +//////////////////////////////////////////////////////////////////////////////// +// lerp +// - linear interpolation between a and b, based on value t in [0, 1] range +//////////////////////////////////////////////////////////////////////////////// + +inline __device__ __host__ float lerp(float a, float b, float t) +{ + return a + t * (b - a); +} +inline __device__ __host__ float2 lerp(float2 a, float2 b, float t) +{ + return a + t * (b - a); +} +inline __device__ __host__ float3 lerp(float3 a, float3 b, float t) +{ + return a + t * (b - a); +} +inline __device__ __host__ float4 lerp(float4 a, float4 b, float t) +{ + return a + t * (b - a); +} + +//////////////////////////////////////////////////////////////////////////////// +// clamp +// - clamp the value v to be in the range [a, b] +//////////////////////////////////////////////////////////////////////////////// + +inline __device__ __host__ float clamp(float f, float a, float b) +{ + return fmaxf(a, fminf(f, b)); +} +inline __device__ __host__ int clamp(int f, int a, int b) +{ + return max(a, min(f, b)); +} +inline __device__ __host__ uint clamp(uint f, uint a, uint b) +{ + return max(a, min(f, b)); +} + +inline __device__ __host__ float2 clamp(float2 v, float a, float b) +{ + return make_float2(clamp(v.x, a, b), clamp(v.y, a, b)); +} +inline __device__ __host__ float2 clamp(float2 v, float2 a, float2 b) +{ + return make_float2(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y)); +} +inline __device__ __host__ float3 clamp(float3 v, float a, float b) +{ + return make_float3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b)); +} +inline __device__ __host__ float3 clamp(float3 v, float3 a, float3 b) +{ + return make_float3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z)); +} +inline __device__ __host__ float4 clamp(float4 v, float a, float b) +{ + return make_float4(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b), clamp(v.w, a, b)); +} +inline __device__ __host__ float4 clamp(float4 v, float4 a, float4 b) +{ + return make_float4(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z), clamp(v.w, a.w, b.w)); +} + +inline __device__ __host__ int2 clamp(int2 v, int a, int b) +{ + return make_int2(clamp(v.x, a, b), clamp(v.y, a, b)); +} +inline __device__ __host__ int2 clamp(int2 v, int2 a, int2 b) +{ + return make_int2(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y)); +} +inline __device__ __host__ int3 clamp(int3 v, int a, int b) +{ + return make_int3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b)); +} +inline __device__ __host__ int3 clamp(int3 v, int3 a, int3 b) +{ + return make_int3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z)); +} +inline __device__ __host__ int4 clamp(int4 v, int a, int b) +{ + return make_int4(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b), clamp(v.w, a, b)); +} +inline __device__ __host__ int4 clamp(int4 v, int4 a, int4 b) +{ + return make_int4(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z), clamp(v.w, a.w, b.w)); +} + +inline __device__ __host__ uint2 clamp(uint2 v, uint a, uint b) +{ + return make_uint2(clamp(v.x, a, b), clamp(v.y, a, b)); +} +inline __device__ __host__ uint2 clamp(uint2 v, uint2 a, uint2 b) +{ + return make_uint2(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y)); +} +inline __device__ __host__ uint3 clamp(uint3 v, uint a, uint b) +{ + return make_uint3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b)); +} +inline __device__ __host__ uint3 clamp(uint3 v, uint3 a, uint3 b) +{ + return make_uint3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z)); +} +inline __device__ __host__ uint4 clamp(uint4 v, uint a, uint b) +{ + return make_uint4(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b), clamp(v.w, a, b)); +} +inline __device__ __host__ uint4 clamp(uint4 v, uint4 a, uint4 b) +{ + return make_uint4(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z), clamp(v.w, a.w, b.w)); +} + +//////////////////////////////////////////////////////////////////////////////// +// dot product +//////////////////////////////////////////////////////////////////////////////// + +inline __host__ __device__ float dot(float2 a, float2 b) +{ + return a.x * b.x + a.y * b.y; +} +inline __host__ __device__ float dot(float3 a, float3 b) +{ + return a.x * b.x + a.y * b.y + a.z * b.z; +} +inline __host__ __device__ float dot(float4 a, float4 b) +{ + return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w; +} + +inline __host__ __device__ int dot(int2 a, int2 b) +{ + return a.x * b.x + a.y * b.y; +} +inline __host__ __device__ int dot(int3 a, int3 b) +{ + return a.x * b.x + a.y * b.y + a.z * b.z; +} +inline __host__ __device__ int dot(int4 a, int4 b) +{ + return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w; +} + +inline __host__ __device__ uint dot(uint2 a, uint2 b) +{ + return a.x * b.x + a.y * b.y; +} +inline __host__ __device__ uint dot(uint3 a, uint3 b) +{ + return a.x * b.x + a.y * b.y + a.z * b.z; +} +inline __host__ __device__ uint dot(uint4 a, uint4 b) +{ + return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w; +} + +//////////////////////////////////////////////////////////////////////////////// +// length +//////////////////////////////////////////////////////////////////////////////// + +inline __host__ __device__ float length(float2 v) +{ + return sqrtf(dot(v, v)); +} +inline __host__ __device__ float length(float3 v) +{ + return sqrtf(dot(v, v)); +} +inline __host__ __device__ float length(float4 v) +{ + return sqrtf(dot(v, v)); +} + +//////////////////////////////////////////////////////////////////////////////// +// normalize +//////////////////////////////////////////////////////////////////////////////// + +inline __host__ __device__ float2 normalize(float2 v) +{ + float invLen = rsqrtf(dot(v, v)); + return v * invLen; +} +inline __host__ __device__ float3 normalize(float3 v) +{ + float invLen = rsqrtf(dot(v, v)); + return v * invLen; +} +inline __host__ __device__ float4 normalize(float4 v) +{ + float invLen = rsqrtf(dot(v, v)); + return v * invLen; +} + +//////////////////////////////////////////////////////////////////////////////// +// floor +//////////////////////////////////////////////////////////////////////////////// + +inline __host__ __device__ float2 floorf(float2 v) +{ + return make_float2(floorf(v.x), floorf(v.y)); +} +inline __host__ __device__ float3 floorf(float3 v) +{ + return make_float3(floorf(v.x), floorf(v.y), floorf(v.z)); +} +inline __host__ __device__ float4 floorf(float4 v) +{ + return make_float4(floorf(v.x), floorf(v.y), floorf(v.z), floorf(v.w)); +} + +//////////////////////////////////////////////////////////////////////////////// +// frac - returns the fractional portion of a scalar or each vector component +//////////////////////////////////////////////////////////////////////////////// + +inline __host__ __device__ float fracf(float v) +{ + return v - floorf(v); +} +inline __host__ __device__ float2 fracf(float2 v) +{ + return make_float2(fracf(v.x), fracf(v.y)); +} +inline __host__ __device__ float3 fracf(float3 v) +{ + return make_float3(fracf(v.x), fracf(v.y), fracf(v.z)); +} +inline __host__ __device__ float4 fracf(float4 v) +{ + return make_float4(fracf(v.x), fracf(v.y), fracf(v.z), fracf(v.w)); +} + +//////////////////////////////////////////////////////////////////////////////// +// fmod +//////////////////////////////////////////////////////////////////////////////// + +inline __host__ __device__ float2 fmodf(float2 a, float2 b) +{ + return make_float2(fmodf(a.x, b.x), fmodf(a.y, b.y)); +} +inline __host__ __device__ float3 fmodf(float3 a, float3 b) +{ + return make_float3(fmodf(a.x, b.x), fmodf(a.y, b.y), fmodf(a.z, b.z)); +} +inline __host__ __device__ float4 fmodf(float4 a, float4 b) +{ + return make_float4(fmodf(a.x, b.x), fmodf(a.y, b.y), fmodf(a.z, b.z), fmodf(a.w, b.w)); +} + +//////////////////////////////////////////////////////////////////////////////// +// absolute value +//////////////////////////////////////////////////////////////////////////////// + +inline __host__ __device__ float2 fabs(float2 v) +{ + return make_float2(fabs(v.x), fabs(v.y)); +} +inline __host__ __device__ float3 fabs(float3 v) +{ + return make_float3(fabs(v.x), fabs(v.y), fabs(v.z)); +} +inline __host__ __device__ float4 fabs(float4 v) +{ + return make_float4(fabs(v.x), fabs(v.y), fabs(v.z), fabs(v.w)); +} + +inline __host__ __device__ int2 abs(int2 v) +{ + return make_int2(abs(v.x), abs(v.y)); +} +inline __host__ __device__ int3 abs(int3 v) +{ + return make_int3(abs(v.x), abs(v.y), abs(v.z)); +} +inline __host__ __device__ int4 abs(int4 v) +{ + return make_int4(abs(v.x), abs(v.y), abs(v.z), abs(v.w)); +} + +//////////////////////////////////////////////////////////////////////////////// +// reflect +// - returns reflection of incident ray I around surface normal N +// - N should be normalized, reflected vector's length is equal to length of I +//////////////////////////////////////////////////////////////////////////////// + +inline __host__ __device__ float3 reflect(float3 i, float3 n) +{ + return i - 2.0f * n * dot(n, i); +} + +//////////////////////////////////////////////////////////////////////////////// +// cross product +//////////////////////////////////////////////////////////////////////////////// + +inline __host__ __device__ float3 cross(float3 a, float3 b) +{ + return make_float3(a.y * b.z - a.z * b.y, a.z * b.x - a.x * b.z, a.x * b.y - a.y * b.x); +} + +//////////////////////////////////////////////////////////////////////////////// +// smoothstep +// - returns 0 if x < a +// - returns 1 if x > b +// - otherwise returns smooth interpolation between 0 and 1 based on x +//////////////////////////////////////////////////////////////////////////////// + +inline __device__ __host__ float smoothstep(float a, float b, float x) +{ + float y = clamp((x - a) / (b - a), 0.0f, 1.0f); + return (y * y * (3.0f - (2.0f * y))); +} +inline __device__ __host__ float2 smoothstep(float2 a, float2 b, float2 x) +{ + float2 y = clamp((x - a) / (b - a), 0.0f, 1.0f); + return (y * y * (make_float2(3.0f) - (make_float2(2.0f) * y))); +} +inline __device__ __host__ float3 smoothstep(float3 a, float3 b, float3 x) +{ + float3 y = clamp((x - a) / (b - a), 0.0f, 1.0f); + return (y * y * (make_float3(3.0f) - (make_float3(2.0f) * y))); +} +inline __device__ __host__ float4 smoothstep(float4 a, float4 b, float4 x) +{ + float4 y = clamp((x - a) / (b - a), 0.0f, 1.0f); + return (y * y * (make_float4(3.0f) - (make_float4(2.0f) * y))); +} + +#endif \ No newline at end of file diff --git a/csrc/optixdev.cu b/csrc/optixdev.cu new file mode 100644 index 0000000..ca78f97 --- /dev/null +++ b/csrc/optixdev.cu @@ -0,0 +1,59 @@ +#include +#include +#include "optixinc.h" +#include "helper_math.h" +#include + +#ifdef __INTELLISENSE__ +int __float_as_int(float in); +float __int_as_float(int in); +// Add other intrinsics as needed +#endif + +extern "C" { + __constant__ LaunchParams optixLaunchParams; + + __global__ void __raygen__rg() + { + const uint3 launch_index = optixGetLaunchIndex(); + + // Load ray origin and direction from some buffer + float3 ray_origin = ((float3*)optixLaunchParams.rays_o)[launch_index.x]; + float3 ray_direction = ((float3*)optixLaunchParams.rays_d)[launch_index.x]; + + // Trace the ray + uint32_t i, t_as_int; + optixTrace( + optixLaunchParams.traversable, + ray_origin, + ray_direction, + 0.0f, + optixLaunchParams.t_max, + 0.0f, + OptixVisibilityMask(255), + OPTIX_RAY_FLAG_DISABLE_ANYHIT, // OPTIX_RAY_FLAG_NONE, + 0, // SBT offset + 1, // SBT stride + 0, // missSBTIndex + i, t_as_int + ); + + // Obtain hit information (like triangle ID, hit point) + // Process the hit information + float t = __int_as_float(t_as_int); + ((uint32_t*)optixLaunchParams.out_i)[launch_index.x] = i; + ((float*)optixLaunchParams.out_t)[launch_index.x] = t; + } + + __global__ void __closesthit__ch() + { + optixSetPayload_0(optixGetPrimitiveIndex()); + optixSetPayload_1(__float_as_int(optixGetRayTmax())); + } + + __global__ void __miss__far() + { + optixSetPayload_0(0); + optixSetPayload_1(__float_as_int(optixLaunchParams.t_max)); + } +} diff --git a/csrc/optixdevptx.h b/csrc/optixdevptx.h new file mode 100644 index 0000000..4deecbb --- /dev/null +++ b/csrc/optixdevptx.h @@ -0,0 +1,233 @@ +unsigned char generated_torchoptixdev_ptx[] = { + 0x2f, 0x2f, 0x0d, 0x0a, 0x2f, 0x2f, 0x20, 0x47, 0x65, 0x6e, 0x65, 0x72, + 0x61, 0x74, 0x65, 0x64, 0x20, 0x62, 0x79, 0x20, 0x4e, 0x56, 0x49, 0x44, + 0x49, 0x41, 0x20, 0x4e, 0x56, 0x56, 0x4d, 0x20, 0x43, 0x6f, 0x6d, 0x70, + 0x69, 0x6c, 0x65, 0x72, 0x0d, 0x0a, 0x2f, 0x2f, 0x0d, 0x0a, 0x2f, 0x2f, + 0x20, 0x43, 0x6f, 0x6d, 0x70, 0x69, 0x6c, 0x65, 0x72, 0x20, 0x42, 0x75, + 0x69, 0x6c, 0x64, 0x20, 0x49, 0x44, 0x3a, 0x20, 0x43, 0x4c, 0x2d, 0x33, + 0x31, 0x38, 0x33, 0x33, 0x39, 0x30, 0x35, 0x0d, 0x0a, 0x2f, 0x2f, 0x20, + 0x43, 0x75, 0x64, 0x61, 0x20, 0x63, 0x6f, 0x6d, 0x70, 0x69, 0x6c, 0x61, + 0x74, 0x69, 0x6f, 0x6e, 0x20, 0x74, 0x6f, 0x6f, 0x6c, 0x73, 0x2c, 0x20, + 0x72, 0x65, 0x6c, 0x65, 0x61, 0x73, 0x65, 0x20, 0x31, 0x31, 0x2e, 0x38, + 0x2c, 0x20, 0x56, 0x31, 0x31, 0x2e, 0x38, 0x2e, 0x38, 0x39, 0x0d, 0x0a, + 0x2f, 0x2f, 0x20, 0x42, 0x61, 0x73, 0x65, 0x64, 0x20, 0x6f, 0x6e, 0x20, + 0x4e, 0x56, 0x56, 0x4d, 0x20, 0x37, 0x2e, 0x30, 0x2e, 0x31, 0x0d, 0x0a, + 0x2f, 0x2f, 0x0d, 0x0a, 0x0d, 0x0a, 0x2e, 0x76, 0x65, 0x72, 0x73, 0x69, + 0x6f, 0x6e, 0x20, 0x37, 0x2e, 0x38, 0x0d, 0x0a, 0x2e, 0x74, 0x61, 0x72, + 0x67, 0x65, 0x74, 0x20, 0x73, 0x6d, 0x5f, 0x35, 0x30, 0x0d, 0x0a, 0x2e, + 0x61, 0x64, 0x64, 0x72, 0x65, 0x73, 0x73, 0x5f, 0x73, 0x69, 0x7a, 0x65, + 0x20, 0x36, 0x34, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x2f, 0x2f, 0x20, 0x2e, + 0x67, 0x6c, 0x6f, 0x62, 0x6c, 0x09, 0x5f, 0x5f, 0x72, 0x61, 0x79, 0x67, + 0x65, 0x6e, 0x5f, 0x5f, 0x72, 0x67, 0x0d, 0x0a, 0x2e, 0x63, 0x6f, 0x6e, + 0x73, 0x74, 0x20, 0x2e, 0x61, 0x6c, 0x69, 0x67, 0x6e, 0x20, 0x38, 0x20, + 0x2e, 0x62, 0x38, 0x20, 0x6f, 0x70, 0x74, 0x69, 0x78, 0x4c, 0x61, 0x75, + 0x6e, 0x63, 0x68, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x5b, 0x34, 0x38, + 0x5d, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x2e, 0x76, 0x69, 0x73, 0x69, 0x62, + 0x6c, 0x65, 0x20, 0x2e, 0x65, 0x6e, 0x74, 0x72, 0x79, 0x20, 0x5f, 0x5f, + 0x72, 0x61, 0x79, 0x67, 0x65, 0x6e, 0x5f, 0x5f, 0x72, 0x67, 0x28, 0x29, + 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, 0x09, 0x2e, 0x72, 0x65, 0x67, 0x20, 0x2e, + 0x66, 0x33, 0x32, 0x20, 0x09, 0x25, 0x66, 0x3c, 0x31, 0x30, 0x3e, 0x3b, + 0x0d, 0x0a, 0x09, 0x2e, 0x72, 0x65, 0x67, 0x20, 0x2e, 0x62, 0x33, 0x32, + 0x20, 0x09, 0x25, 0x72, 0x3c, 0x37, 0x37, 0x3e, 0x3b, 0x0d, 0x0a, 0x09, + 0x2e, 0x72, 0x65, 0x67, 0x20, 0x2e, 0x62, 0x36, 0x34, 0x20, 0x09, 0x25, + 0x72, 0x64, 0x3c, 0x31, 0x36, 0x3e, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x0d, + 0x0a, 0x09, 0x2f, 0x2f, 0x20, 0x62, 0x65, 0x67, 0x69, 0x6e, 0x20, 0x69, + 0x6e, 0x6c, 0x69, 0x6e, 0x65, 0x20, 0x61, 0x73, 0x6d, 0x0d, 0x0a, 0x09, + 0x63, 0x61, 0x6c, 0x6c, 0x20, 0x28, 0x25, 0x72, 0x31, 0x29, 0x2c, 0x20, + 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x78, 0x5f, 0x67, 0x65, 0x74, 0x5f, 0x6c, + 0x61, 0x75, 0x6e, 0x63, 0x68, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x5f, + 0x78, 0x2c, 0x20, 0x28, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x2f, 0x2f, 0x20, + 0x65, 0x6e, 0x64, 0x20, 0x69, 0x6e, 0x6c, 0x69, 0x6e, 0x65, 0x20, 0x61, + 0x73, 0x6d, 0x0d, 0x0a, 0x09, 0x6c, 0x64, 0x2e, 0x63, 0x6f, 0x6e, 0x73, + 0x74, 0x2e, 0x75, 0x36, 0x34, 0x20, 0x09, 0x25, 0x72, 0x64, 0x32, 0x2c, + 0x20, 0x5b, 0x6f, 0x70, 0x74, 0x69, 0x78, 0x4c, 0x61, 0x75, 0x6e, 0x63, + 0x68, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2b, 0x38, 0x5d, 0x3b, 0x0d, + 0x0a, 0x09, 0x63, 0x76, 0x74, 0x61, 0x2e, 0x74, 0x6f, 0x2e, 0x67, 0x6c, + 0x6f, 0x62, 0x61, 0x6c, 0x2e, 0x75, 0x36, 0x34, 0x20, 0x09, 0x25, 0x72, + 0x64, 0x33, 0x2c, 0x20, 0x25, 0x72, 0x64, 0x32, 0x3b, 0x0d, 0x0a, 0x09, + 0x6d, 0x75, 0x6c, 0x2e, 0x77, 0x69, 0x64, 0x65, 0x2e, 0x75, 0x33, 0x32, + 0x20, 0x09, 0x25, 0x72, 0x64, 0x34, 0x2c, 0x20, 0x25, 0x72, 0x31, 0x2c, + 0x20, 0x31, 0x32, 0x3b, 0x0d, 0x0a, 0x09, 0x61, 0x64, 0x64, 0x2e, 0x73, + 0x36, 0x34, 0x20, 0x09, 0x25, 0x72, 0x64, 0x35, 0x2c, 0x20, 0x25, 0x72, + 0x64, 0x33, 0x2c, 0x20, 0x25, 0x72, 0x64, 0x34, 0x3b, 0x0d, 0x0a, 0x09, + 0x6c, 0x64, 0x2e, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x2e, 0x66, 0x33, + 0x32, 0x20, 0x09, 0x25, 0x66, 0x31, 0x2c, 0x20, 0x5b, 0x25, 0x72, 0x64, + 0x35, 0x5d, 0x3b, 0x0d, 0x0a, 0x09, 0x6c, 0x64, 0x2e, 0x67, 0x6c, 0x6f, + 0x62, 0x61, 0x6c, 0x2e, 0x66, 0x33, 0x32, 0x20, 0x09, 0x25, 0x66, 0x32, + 0x2c, 0x20, 0x5b, 0x25, 0x72, 0x64, 0x35, 0x2b, 0x34, 0x5d, 0x3b, 0x0d, + 0x0a, 0x09, 0x6c, 0x64, 0x2e, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x2e, + 0x66, 0x33, 0x32, 0x20, 0x09, 0x25, 0x66, 0x33, 0x2c, 0x20, 0x5b, 0x25, + 0x72, 0x64, 0x35, 0x2b, 0x38, 0x5d, 0x3b, 0x0d, 0x0a, 0x09, 0x6c, 0x64, + 0x2e, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x2e, 0x75, 0x36, 0x34, 0x20, 0x09, + 0x25, 0x72, 0x64, 0x36, 0x2c, 0x20, 0x5b, 0x6f, 0x70, 0x74, 0x69, 0x78, + 0x4c, 0x61, 0x75, 0x6e, 0x63, 0x68, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x73, + 0x2b, 0x31, 0x36, 0x5d, 0x3b, 0x0d, 0x0a, 0x09, 0x63, 0x76, 0x74, 0x61, + 0x2e, 0x74, 0x6f, 0x2e, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x2e, 0x75, + 0x36, 0x34, 0x20, 0x09, 0x25, 0x72, 0x64, 0x37, 0x2c, 0x20, 0x25, 0x72, + 0x64, 0x36, 0x3b, 0x0d, 0x0a, 0x09, 0x61, 0x64, 0x64, 0x2e, 0x73, 0x36, + 0x34, 0x20, 0x09, 0x25, 0x72, 0x64, 0x38, 0x2c, 0x20, 0x25, 0x72, 0x64, + 0x37, 0x2c, 0x20, 0x25, 0x72, 0x64, 0x34, 0x3b, 0x0d, 0x0a, 0x09, 0x6c, + 0x64, 0x2e, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x2e, 0x66, 0x33, 0x32, + 0x20, 0x09, 0x25, 0x66, 0x34, 0x2c, 0x20, 0x5b, 0x25, 0x72, 0x64, 0x38, + 0x5d, 0x3b, 0x0d, 0x0a, 0x09, 0x6c, 0x64, 0x2e, 0x67, 0x6c, 0x6f, 0x62, + 0x61, 0x6c, 0x2e, 0x66, 0x33, 0x32, 0x20, 0x09, 0x25, 0x66, 0x35, 0x2c, + 0x20, 0x5b, 0x25, 0x72, 0x64, 0x38, 0x2b, 0x34, 0x5d, 0x3b, 0x0d, 0x0a, + 0x09, 0x6c, 0x64, 0x2e, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x2e, 0x66, + 0x33, 0x32, 0x20, 0x09, 0x25, 0x66, 0x36, 0x2c, 0x20, 0x5b, 0x25, 0x72, + 0x64, 0x38, 0x2b, 0x38, 0x5d, 0x3b, 0x0d, 0x0a, 0x09, 0x6c, 0x64, 0x2e, + 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x2e, 0x75, 0x36, 0x34, 0x20, 0x09, 0x25, + 0x72, 0x64, 0x31, 0x2c, 0x20, 0x5b, 0x6f, 0x70, 0x74, 0x69, 0x78, 0x4c, + 0x61, 0x75, 0x6e, 0x63, 0x68, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x5d, + 0x3b, 0x0d, 0x0a, 0x09, 0x6c, 0x64, 0x2e, 0x63, 0x6f, 0x6e, 0x73, 0x74, + 0x2e, 0x66, 0x33, 0x32, 0x20, 0x09, 0x25, 0x66, 0x38, 0x2c, 0x20, 0x5b, + 0x6f, 0x70, 0x74, 0x69, 0x78, 0x4c, 0x61, 0x75, 0x6e, 0x63, 0x68, 0x50, + 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2b, 0x34, 0x30, 0x5d, 0x3b, 0x0d, 0x0a, + 0x09, 0x6d, 0x6f, 0x76, 0x2e, 0x66, 0x33, 0x32, 0x20, 0x09, 0x25, 0x66, + 0x39, 0x2c, 0x20, 0x30, 0x66, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, + 0x30, 0x3b, 0x0d, 0x0a, 0x09, 0x6d, 0x6f, 0x76, 0x2e, 0x75, 0x33, 0x32, + 0x20, 0x09, 0x25, 0x72, 0x33, 0x37, 0x2c, 0x20, 0x32, 0x35, 0x35, 0x3b, + 0x0d, 0x0a, 0x09, 0x6d, 0x6f, 0x76, 0x2e, 0x75, 0x33, 0x32, 0x20, 0x09, + 0x25, 0x72, 0x34, 0x30, 0x2c, 0x20, 0x31, 0x3b, 0x0d, 0x0a, 0x09, 0x6d, + 0x6f, 0x76, 0x2e, 0x75, 0x33, 0x32, 0x20, 0x09, 0x25, 0x72, 0x34, 0x32, + 0x2c, 0x20, 0x32, 0x3b, 0x0d, 0x0a, 0x09, 0x6d, 0x6f, 0x76, 0x2e, 0x75, + 0x33, 0x32, 0x20, 0x09, 0x25, 0x72, 0x37, 0x34, 0x2c, 0x20, 0x30, 0x3b, + 0x0d, 0x0a, 0x09, 0x2f, 0x2f, 0x20, 0x62, 0x65, 0x67, 0x69, 0x6e, 0x20, + 0x69, 0x6e, 0x6c, 0x69, 0x6e, 0x65, 0x20, 0x61, 0x73, 0x6d, 0x0d, 0x0a, + 0x09, 0x63, 0x61, 0x6c, 0x6c, 0x28, 0x25, 0x72, 0x34, 0x2c, 0x25, 0x72, + 0x35, 0x2c, 0x25, 0x72, 0x36, 0x2c, 0x25, 0x72, 0x37, 0x2c, 0x25, 0x72, + 0x38, 0x2c, 0x25, 0x72, 0x39, 0x2c, 0x25, 0x72, 0x31, 0x30, 0x2c, 0x25, + 0x72, 0x31, 0x31, 0x2c, 0x25, 0x72, 0x31, 0x32, 0x2c, 0x25, 0x72, 0x31, + 0x33, 0x2c, 0x25, 0x72, 0x31, 0x34, 0x2c, 0x25, 0x72, 0x31, 0x35, 0x2c, + 0x25, 0x72, 0x31, 0x36, 0x2c, 0x25, 0x72, 0x31, 0x37, 0x2c, 0x25, 0x72, + 0x31, 0x38, 0x2c, 0x25, 0x72, 0x31, 0x39, 0x2c, 0x25, 0x72, 0x32, 0x30, + 0x2c, 0x25, 0x72, 0x32, 0x31, 0x2c, 0x25, 0x72, 0x32, 0x32, 0x2c, 0x25, + 0x72, 0x32, 0x33, 0x2c, 0x25, 0x72, 0x32, 0x34, 0x2c, 0x25, 0x72, 0x32, + 0x35, 0x2c, 0x25, 0x72, 0x32, 0x36, 0x2c, 0x25, 0x72, 0x32, 0x37, 0x2c, + 0x25, 0x72, 0x32, 0x38, 0x2c, 0x25, 0x72, 0x32, 0x39, 0x2c, 0x25, 0x72, + 0x33, 0x30, 0x2c, 0x25, 0x72, 0x33, 0x31, 0x2c, 0x25, 0x72, 0x33, 0x32, + 0x2c, 0x25, 0x72, 0x33, 0x33, 0x2c, 0x25, 0x72, 0x33, 0x34, 0x2c, 0x25, + 0x72, 0x33, 0x35, 0x29, 0x2c, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x78, 0x5f, + 0x74, 0x72, 0x61, 0x63, 0x65, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x64, 0x5f, + 0x33, 0x32, 0x2c, 0x28, 0x25, 0x72, 0x37, 0x34, 0x2c, 0x25, 0x72, 0x64, + 0x31, 0x2c, 0x25, 0x66, 0x31, 0x2c, 0x25, 0x66, 0x32, 0x2c, 0x25, 0x66, + 0x33, 0x2c, 0x25, 0x66, 0x34, 0x2c, 0x25, 0x66, 0x35, 0x2c, 0x25, 0x66, + 0x36, 0x2c, 0x25, 0x66, 0x39, 0x2c, 0x25, 0x66, 0x38, 0x2c, 0x25, 0x66, + 0x39, 0x2c, 0x25, 0x72, 0x33, 0x37, 0x2c, 0x25, 0x72, 0x34, 0x30, 0x2c, + 0x25, 0x72, 0x37, 0x34, 0x2c, 0x25, 0x72, 0x34, 0x30, 0x2c, 0x25, 0x72, + 0x37, 0x34, 0x2c, 0x25, 0x72, 0x34, 0x32, 0x2c, 0x25, 0x72, 0x37, 0x35, + 0x2c, 0x25, 0x72, 0x37, 0x36, 0x2c, 0x25, 0x72, 0x37, 0x34, 0x2c, 0x25, + 0x72, 0x37, 0x34, 0x2c, 0x25, 0x72, 0x37, 0x34, 0x2c, 0x25, 0x72, 0x37, + 0x34, 0x2c, 0x25, 0x72, 0x37, 0x34, 0x2c, 0x25, 0x72, 0x37, 0x34, 0x2c, + 0x25, 0x72, 0x37, 0x34, 0x2c, 0x25, 0x72, 0x37, 0x34, 0x2c, 0x25, 0x72, + 0x37, 0x34, 0x2c, 0x25, 0x72, 0x37, 0x34, 0x2c, 0x25, 0x72, 0x37, 0x34, + 0x2c, 0x25, 0x72, 0x37, 0x34, 0x2c, 0x25, 0x72, 0x37, 0x34, 0x2c, 0x25, + 0x72, 0x37, 0x34, 0x2c, 0x25, 0x72, 0x37, 0x34, 0x2c, 0x25, 0x72, 0x37, + 0x34, 0x2c, 0x25, 0x72, 0x37, 0x34, 0x2c, 0x25, 0x72, 0x37, 0x34, 0x2c, + 0x25, 0x72, 0x37, 0x34, 0x2c, 0x25, 0x72, 0x37, 0x34, 0x2c, 0x25, 0x72, + 0x37, 0x34, 0x2c, 0x25, 0x72, 0x37, 0x34, 0x2c, 0x25, 0x72, 0x37, 0x34, + 0x2c, 0x25, 0x72, 0x37, 0x34, 0x2c, 0x25, 0x72, 0x37, 0x34, 0x2c, 0x25, + 0x72, 0x37, 0x34, 0x2c, 0x25, 0x72, 0x37, 0x34, 0x2c, 0x25, 0x72, 0x37, + 0x34, 0x2c, 0x25, 0x72, 0x37, 0x34, 0x2c, 0x25, 0x72, 0x37, 0x34, 0x29, + 0x3b, 0x0d, 0x0a, 0x09, 0x2f, 0x2f, 0x20, 0x65, 0x6e, 0x64, 0x20, 0x69, + 0x6e, 0x6c, 0x69, 0x6e, 0x65, 0x20, 0x61, 0x73, 0x6d, 0x0d, 0x0a, 0x09, + 0x6c, 0x64, 0x2e, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x2e, 0x75, 0x36, 0x34, + 0x20, 0x09, 0x25, 0x72, 0x64, 0x39, 0x2c, 0x20, 0x5b, 0x6f, 0x70, 0x74, + 0x69, 0x78, 0x4c, 0x61, 0x75, 0x6e, 0x63, 0x68, 0x50, 0x61, 0x72, 0x61, + 0x6d, 0x73, 0x2b, 0x33, 0x32, 0x5d, 0x3b, 0x0d, 0x0a, 0x09, 0x63, 0x76, + 0x74, 0x61, 0x2e, 0x74, 0x6f, 0x2e, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, + 0x2e, 0x75, 0x36, 0x34, 0x20, 0x09, 0x25, 0x72, 0x64, 0x31, 0x30, 0x2c, + 0x20, 0x25, 0x72, 0x64, 0x39, 0x3b, 0x0d, 0x0a, 0x09, 0x6d, 0x75, 0x6c, + 0x2e, 0x77, 0x69, 0x64, 0x65, 0x2e, 0x75, 0x33, 0x32, 0x20, 0x09, 0x25, + 0x72, 0x64, 0x31, 0x31, 0x2c, 0x20, 0x25, 0x72, 0x31, 0x2c, 0x20, 0x34, + 0x3b, 0x0d, 0x0a, 0x09, 0x61, 0x64, 0x64, 0x2e, 0x73, 0x36, 0x34, 0x20, + 0x09, 0x25, 0x72, 0x64, 0x31, 0x32, 0x2c, 0x20, 0x25, 0x72, 0x64, 0x31, + 0x30, 0x2c, 0x20, 0x25, 0x72, 0x64, 0x31, 0x31, 0x3b, 0x0d, 0x0a, 0x09, + 0x73, 0x74, 0x2e, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x2e, 0x75, 0x33, + 0x32, 0x20, 0x09, 0x5b, 0x25, 0x72, 0x64, 0x31, 0x32, 0x5d, 0x2c, 0x20, + 0x25, 0x72, 0x34, 0x3b, 0x0d, 0x0a, 0x09, 0x6c, 0x64, 0x2e, 0x63, 0x6f, + 0x6e, 0x73, 0x74, 0x2e, 0x75, 0x36, 0x34, 0x20, 0x09, 0x25, 0x72, 0x64, + 0x31, 0x33, 0x2c, 0x20, 0x5b, 0x6f, 0x70, 0x74, 0x69, 0x78, 0x4c, 0x61, + 0x75, 0x6e, 0x63, 0x68, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2b, 0x32, + 0x34, 0x5d, 0x3b, 0x0d, 0x0a, 0x09, 0x63, 0x76, 0x74, 0x61, 0x2e, 0x74, + 0x6f, 0x2e, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x2e, 0x75, 0x36, 0x34, + 0x20, 0x09, 0x25, 0x72, 0x64, 0x31, 0x34, 0x2c, 0x20, 0x25, 0x72, 0x64, + 0x31, 0x33, 0x3b, 0x0d, 0x0a, 0x09, 0x61, 0x64, 0x64, 0x2e, 0x73, 0x36, + 0x34, 0x20, 0x09, 0x25, 0x72, 0x64, 0x31, 0x35, 0x2c, 0x20, 0x25, 0x72, + 0x64, 0x31, 0x34, 0x2c, 0x20, 0x25, 0x72, 0x64, 0x31, 0x31, 0x3b, 0x0d, + 0x0a, 0x09, 0x73, 0x74, 0x2e, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x2e, + 0x75, 0x33, 0x32, 0x20, 0x09, 0x5b, 0x25, 0x72, 0x64, 0x31, 0x35, 0x5d, + 0x2c, 0x20, 0x25, 0x72, 0x35, 0x3b, 0x0d, 0x0a, 0x09, 0x72, 0x65, 0x74, + 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x7d, 0x0d, 0x0a, 0x09, 0x2f, 0x2f, 0x20, + 0x2e, 0x67, 0x6c, 0x6f, 0x62, 0x6c, 0x09, 0x5f, 0x5f, 0x63, 0x6c, 0x6f, + 0x73, 0x65, 0x73, 0x74, 0x68, 0x69, 0x74, 0x5f, 0x5f, 0x63, 0x68, 0x0d, + 0x0a, 0x2e, 0x76, 0x69, 0x73, 0x69, 0x62, 0x6c, 0x65, 0x20, 0x2e, 0x65, + 0x6e, 0x74, 0x72, 0x79, 0x20, 0x5f, 0x5f, 0x63, 0x6c, 0x6f, 0x73, 0x65, + 0x73, 0x74, 0x68, 0x69, 0x74, 0x5f, 0x5f, 0x63, 0x68, 0x28, 0x29, 0x0d, + 0x0a, 0x7b, 0x0d, 0x0a, 0x09, 0x2e, 0x72, 0x65, 0x67, 0x20, 0x2e, 0x66, + 0x33, 0x32, 0x20, 0x09, 0x25, 0x66, 0x3c, 0x32, 0x3e, 0x3b, 0x0d, 0x0a, + 0x09, 0x2e, 0x72, 0x65, 0x67, 0x20, 0x2e, 0x62, 0x33, 0x32, 0x20, 0x09, + 0x25, 0x72, 0x3c, 0x36, 0x3e, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x0d, 0x0a, + 0x09, 0x2f, 0x2f, 0x20, 0x62, 0x65, 0x67, 0x69, 0x6e, 0x20, 0x69, 0x6e, + 0x6c, 0x69, 0x6e, 0x65, 0x20, 0x61, 0x73, 0x6d, 0x0d, 0x0a, 0x09, 0x63, + 0x61, 0x6c, 0x6c, 0x20, 0x28, 0x25, 0x72, 0x31, 0x29, 0x2c, 0x20, 0x5f, + 0x6f, 0x70, 0x74, 0x69, 0x78, 0x5f, 0x72, 0x65, 0x61, 0x64, 0x5f, 0x70, + 0x72, 0x69, 0x6d, 0x69, 0x74, 0x69, 0x76, 0x65, 0x5f, 0x69, 0x64, 0x78, + 0x2c, 0x20, 0x28, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x2f, 0x2f, 0x20, 0x65, + 0x6e, 0x64, 0x20, 0x69, 0x6e, 0x6c, 0x69, 0x6e, 0x65, 0x20, 0x61, 0x73, + 0x6d, 0x0d, 0x0a, 0x09, 0x6d, 0x6f, 0x76, 0x2e, 0x75, 0x33, 0x32, 0x20, + 0x09, 0x25, 0x72, 0x32, 0x2c, 0x20, 0x30, 0x3b, 0x0d, 0x0a, 0x09, 0x2f, + 0x2f, 0x20, 0x62, 0x65, 0x67, 0x69, 0x6e, 0x20, 0x69, 0x6e, 0x6c, 0x69, + 0x6e, 0x65, 0x20, 0x61, 0x73, 0x6d, 0x0d, 0x0a, 0x09, 0x63, 0x61, 0x6c, + 0x6c, 0x20, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x78, 0x5f, 0x73, 0x65, 0x74, + 0x5f, 0x70, 0x61, 0x79, 0x6c, 0x6f, 0x61, 0x64, 0x2c, 0x20, 0x28, 0x25, + 0x72, 0x32, 0x2c, 0x20, 0x25, 0x72, 0x31, 0x29, 0x3b, 0x0d, 0x0a, 0x09, + 0x2f, 0x2f, 0x20, 0x65, 0x6e, 0x64, 0x20, 0x69, 0x6e, 0x6c, 0x69, 0x6e, + 0x65, 0x20, 0x61, 0x73, 0x6d, 0x0d, 0x0a, 0x09, 0x2f, 0x2f, 0x20, 0x62, + 0x65, 0x67, 0x69, 0x6e, 0x20, 0x69, 0x6e, 0x6c, 0x69, 0x6e, 0x65, 0x20, + 0x61, 0x73, 0x6d, 0x0d, 0x0a, 0x09, 0x63, 0x61, 0x6c, 0x6c, 0x20, 0x28, + 0x25, 0x66, 0x31, 0x29, 0x2c, 0x20, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x78, + 0x5f, 0x67, 0x65, 0x74, 0x5f, 0x72, 0x61, 0x79, 0x5f, 0x74, 0x6d, 0x61, + 0x78, 0x2c, 0x20, 0x28, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x2f, 0x2f, 0x20, + 0x65, 0x6e, 0x64, 0x20, 0x69, 0x6e, 0x6c, 0x69, 0x6e, 0x65, 0x20, 0x61, + 0x73, 0x6d, 0x0d, 0x0a, 0x09, 0x6d, 0x6f, 0x76, 0x2e, 0x62, 0x33, 0x32, + 0x20, 0x09, 0x25, 0x72, 0x35, 0x2c, 0x20, 0x25, 0x66, 0x31, 0x3b, 0x0d, + 0x0a, 0x09, 0x6d, 0x6f, 0x76, 0x2e, 0x75, 0x33, 0x32, 0x20, 0x09, 0x25, + 0x72, 0x34, 0x2c, 0x20, 0x31, 0x3b, 0x0d, 0x0a, 0x09, 0x2f, 0x2f, 0x20, + 0x62, 0x65, 0x67, 0x69, 0x6e, 0x20, 0x69, 0x6e, 0x6c, 0x69, 0x6e, 0x65, + 0x20, 0x61, 0x73, 0x6d, 0x0d, 0x0a, 0x09, 0x63, 0x61, 0x6c, 0x6c, 0x20, + 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x78, 0x5f, 0x73, 0x65, 0x74, 0x5f, 0x70, + 0x61, 0x79, 0x6c, 0x6f, 0x61, 0x64, 0x2c, 0x20, 0x28, 0x25, 0x72, 0x34, + 0x2c, 0x20, 0x25, 0x72, 0x35, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x2f, 0x2f, + 0x20, 0x65, 0x6e, 0x64, 0x20, 0x69, 0x6e, 0x6c, 0x69, 0x6e, 0x65, 0x20, + 0x61, 0x73, 0x6d, 0x0d, 0x0a, 0x09, 0x72, 0x65, 0x74, 0x3b, 0x0d, 0x0a, + 0x0d, 0x0a, 0x7d, 0x0d, 0x0a, 0x09, 0x2f, 0x2f, 0x20, 0x2e, 0x67, 0x6c, + 0x6f, 0x62, 0x6c, 0x09, 0x5f, 0x5f, 0x6d, 0x69, 0x73, 0x73, 0x5f, 0x5f, + 0x66, 0x61, 0x72, 0x0d, 0x0a, 0x2e, 0x76, 0x69, 0x73, 0x69, 0x62, 0x6c, + 0x65, 0x20, 0x2e, 0x65, 0x6e, 0x74, 0x72, 0x79, 0x20, 0x5f, 0x5f, 0x6d, + 0x69, 0x73, 0x73, 0x5f, 0x5f, 0x66, 0x61, 0x72, 0x28, 0x29, 0x0d, 0x0a, + 0x7b, 0x0d, 0x0a, 0x09, 0x2e, 0x72, 0x65, 0x67, 0x20, 0x2e, 0x62, 0x33, + 0x32, 0x20, 0x09, 0x25, 0x72, 0x3c, 0x35, 0x3e, 0x3b, 0x0d, 0x0a, 0x0d, + 0x0a, 0x0d, 0x0a, 0x09, 0x6d, 0x6f, 0x76, 0x2e, 0x75, 0x33, 0x32, 0x20, + 0x09, 0x25, 0x72, 0x32, 0x2c, 0x20, 0x30, 0x3b, 0x0d, 0x0a, 0x09, 0x2f, + 0x2f, 0x20, 0x62, 0x65, 0x67, 0x69, 0x6e, 0x20, 0x69, 0x6e, 0x6c, 0x69, + 0x6e, 0x65, 0x20, 0x61, 0x73, 0x6d, 0x0d, 0x0a, 0x09, 0x63, 0x61, 0x6c, + 0x6c, 0x20, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x78, 0x5f, 0x73, 0x65, 0x74, + 0x5f, 0x70, 0x61, 0x79, 0x6c, 0x6f, 0x61, 0x64, 0x2c, 0x20, 0x28, 0x25, + 0x72, 0x32, 0x2c, 0x20, 0x25, 0x72, 0x32, 0x29, 0x3b, 0x0d, 0x0a, 0x09, + 0x2f, 0x2f, 0x20, 0x65, 0x6e, 0x64, 0x20, 0x69, 0x6e, 0x6c, 0x69, 0x6e, + 0x65, 0x20, 0x61, 0x73, 0x6d, 0x0d, 0x0a, 0x09, 0x6c, 0x64, 0x2e, 0x63, + 0x6f, 0x6e, 0x73, 0x74, 0x2e, 0x75, 0x33, 0x32, 0x20, 0x09, 0x25, 0x72, + 0x34, 0x2c, 0x20, 0x5b, 0x6f, 0x70, 0x74, 0x69, 0x78, 0x4c, 0x61, 0x75, + 0x6e, 0x63, 0x68, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2b, 0x34, 0x30, + 0x5d, 0x3b, 0x0d, 0x0a, 0x09, 0x6d, 0x6f, 0x76, 0x2e, 0x75, 0x33, 0x32, + 0x20, 0x09, 0x25, 0x72, 0x33, 0x2c, 0x20, 0x31, 0x3b, 0x0d, 0x0a, 0x09, + 0x2f, 0x2f, 0x20, 0x62, 0x65, 0x67, 0x69, 0x6e, 0x20, 0x69, 0x6e, 0x6c, + 0x69, 0x6e, 0x65, 0x20, 0x61, 0x73, 0x6d, 0x0d, 0x0a, 0x09, 0x63, 0x61, + 0x6c, 0x6c, 0x20, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x78, 0x5f, 0x73, 0x65, + 0x74, 0x5f, 0x70, 0x61, 0x79, 0x6c, 0x6f, 0x61, 0x64, 0x2c, 0x20, 0x28, + 0x25, 0x72, 0x33, 0x2c, 0x20, 0x25, 0x72, 0x34, 0x29, 0x3b, 0x0d, 0x0a, + 0x09, 0x2f, 0x2f, 0x20, 0x65, 0x6e, 0x64, 0x20, 0x69, 0x6e, 0x6c, 0x69, + 0x6e, 0x65, 0x20, 0x61, 0x73, 0x6d, 0x0d, 0x0a, 0x09, 0x72, 0x65, 0x74, + 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a +}; +unsigned int generated_torchoptixdev_ptx_len = 2758; diff --git a/csrc/optixhost.cpp b/csrc/optixhost.cpp new file mode 100644 index 0000000..2c6099b --- /dev/null +++ b/csrc/optixhost.cpp @@ -0,0 +1,483 @@ +#define Py_LIMITED_API PY_VERSION_HEX +#include +#include +#include +#include +#include +#include +#include "optixinc.h" +#include "optixdevptx.h" + +PyObject* torchoptix_module_ref; +static OptixDeviceContext ocontext = nullptr; +static OptixModule omodule = nullptr; +static OptixProgramGroup oprograms[3] = { nullptr, nullptr, nullptr }; +static OptixPipeline opipeline = nullptr; +static int ologlevel = 4; + +PyDoc_STRVAR(torchoptix_log_level_doc, "set_log_level(level)\n\ +\n\ +Set OptiX log level (0-4)."); + +PyObject* torchoptix_log_level(PyObject* self, PyObject* args) { + unsigned long long level; + + if (!PyArg_ParseTuple(args, "K", &level)) + return nullptr; + + if (level > 4 || level < 0) + { + PyErr_SetString(PyExc_ValueError, "TorchOptiX: invalid log level (0-4 allowed)."); + return nullptr; + } + + ologlevel = level; + Py_RETURN_NONE; +} + +static void optix_log_callback(unsigned int level, const char* tag, const char* message, void* cbdata) +{ + if (level <= ologlevel) + { + printf("[OptiX] [%s: %d] %s\n", tag, level, message); + fflush(stdout); + } +} + +inline bool check_cuda(CUresult result) +{ + if (result != CUDA_SUCCESS) { + const char* errorStr; + cuGetErrorString(result, &errorStr); + PyErr_SetString(PyExc_RuntimeError, errorStr); + return false; + } + return true; +} + +inline bool ensure_initialize_context() +{ + if (!ocontext) + { + OptixDeviceContextOptions options = {}; + options.logCallbackFunction = &optix_log_callback; + options.logCallbackLevel = ologlevel; + if (optixInit() != OPTIX_SUCCESS) + { + PyErr_SetString(PyExc_RuntimeError, "OptiX initialize failed."); + return false; + } + if (optixDeviceContextCreate(NULL, &options, &ocontext) != OPTIX_SUCCESS) + { + PyErr_SetString(PyExc_RuntimeError, "OptiX create context failed."); + return false; + } + } + OptixModuleCompileOptions moduleCompileOptions = {}; + OptixPipelineCompileOptions pipelineCompileOptions = {}; + OptixPipelineLinkOptions pipelineLinkOptions = {}; + if (!omodule) + { + moduleCompileOptions.maxRegisterCount = 50; + moduleCompileOptions.optLevel = OPTIX_COMPILE_OPTIMIZATION_LEVEL_0; + moduleCompileOptions.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_NONE; + + pipelineCompileOptions = {}; + pipelineCompileOptions.traversableGraphFlags = OPTIX_TRAVERSABLE_GRAPH_FLAG_ALLOW_SINGLE_GAS; + pipelineCompileOptions.usesMotionBlur = false; + pipelineCompileOptions.numPayloadValues = 2; + pipelineCompileOptions.numAttributeValues = 2; + pipelineCompileOptions.exceptionFlags = OPTIX_EXCEPTION_FLAG_NONE; + pipelineCompileOptions.pipelineLaunchParamsVariableName = "optixLaunchParams"; + + pipelineLinkOptions.maxTraceDepth = 2; + + char log[2048] = "OptiX create module failed: "; + size_t sizeof_log = sizeof(log) - strlen(log); + + if (optixModuleCreateFromPTX(ocontext, + &moduleCompileOptions, + &pipelineCompileOptions, + (const char*)generated_torchoptixdev_ptx, + generated_torchoptixdev_ptx_len, + log + strlen(log), // Log string, concat after message + &sizeof_log, // Log string size + &omodule + ) != OPTIX_SUCCESS) + { + PyErr_SetString(PyExc_RuntimeError, log); + return false; + }; + } + if (!oprograms[0]) + { + OptixProgramGroupOptions pgOptions = {}; + OptixProgramGroupDesc pgDesc = {}; + pgDesc.kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN; + pgDesc.raygen.module = omodule; + pgDesc.raygen.entryFunctionName = "__raygen__rg"; + + // OptixProgramGroup raypg; + char log[2048] = "OptiX create raygen program failed: "; + size_t sizeof_log = sizeof(log) - strlen(log); + if (optixProgramGroupCreate(ocontext, + &pgDesc, + 1, + &pgOptions, + log, &sizeof_log, + &oprograms[0] + ) != OPTIX_SUCCESS) + { + PyErr_SetString(PyExc_RuntimeError, log); + return false; + }; + } + if (!oprograms[1]) + { + OptixProgramGroupOptions pgOptions = {}; + OptixProgramGroupDesc pgDesc = {}; + pgDesc.kind = OPTIX_PROGRAM_GROUP_KIND_MISS; + pgDesc.raygen.module = omodule; + pgDesc.raygen.entryFunctionName = "__miss__far"; + + // OptixProgramGroup raypg; + char log[2048] = "OptiX create miss program failed: "; + size_t sizeof_log = sizeof(log) - strlen(log); + if (optixProgramGroupCreate(ocontext, + &pgDesc, + 1, + &pgOptions, + log, &sizeof_log, + &oprograms[1] + ) != OPTIX_SUCCESS) + { + PyErr_SetString(PyExc_RuntimeError, log); + return false; + }; + } + if (!oprograms[2]) + { + OptixProgramGroupOptions pgOptions = {}; + OptixProgramGroupDesc pgDesc = {}; + pgDesc.kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP; + pgDesc.hitgroup.moduleCH = omodule; + pgDesc.hitgroup.entryFunctionNameCH = "__closesthit__ch"; + + char log[2048] = "OptiX create hit program failed: "; + size_t sizeof_log = sizeof(log) - strlen(log); + if (optixProgramGroupCreate(ocontext, + &pgDesc, + 1, + &pgOptions, + log, &sizeof_log, + &oprograms[2] + ) != OPTIX_SUCCESS) + { + PyErr_SetString(PyExc_RuntimeError, log); + return false; + }; + } + if (!opipeline) + { + char log[2048] = "OptiX create pipeline failed: "; + size_t sizeof_log = sizeof(log) - strlen(log); + if (optixPipelineCreate(ocontext, + &pipelineCompileOptions, + &pipelineLinkOptions, + oprograms, + 3, + log, &sizeof_log, + &opipeline + ) != OPTIX_SUCCESS) + { + PyErr_SetString(PyExc_RuntimeError, log); + return false; + } + + if (optixPipelineSetStackSize( + /* [in] The pipeline to configure the stack size for */ + opipeline, + /* [in] The direct stack size requirement for direct + callables invoked from IS or AH. */ + 1024, + /* [in] The direct stack size requirement for direct + callables invoked from RG, MS, or CH. */ + 2 * 1024, + /* [in] The continuation stack requirement. */ + 2 * 1024, + /* [in] The maximum depth of a traversable graph + passed to trace. */ + 1 + ) != OPTIX_SUCCESS) + { + PyErr_SetString(PyExc_RuntimeError, "OptiX pipeline set stack size failed."); + return false; + } + } + return true; +} + +inline bool build_acceleration_structure(void* verts, void* tris, unsigned long long nverts, unsigned long long ntris, OptixTraversableHandle& out_handle, CUdeviceptr& out_pointer) +{ + // ================================================================== + // triangle inputs + // ================================================================== + OptixBuildInput triangleInput = {}; + triangleInput.type + = OPTIX_BUILD_INPUT_TYPE_TRIANGLES; + + // create local variables, because we need a *pointer* to the + // device pointers + CUdeviceptr d_vertices = (CUdeviceptr)verts; + CUdeviceptr d_indices = (CUdeviceptr)tris; + + triangleInput.triangleArray.vertexFormat = OPTIX_VERTEX_FORMAT_FLOAT3; + triangleInput.triangleArray.vertexStrideInBytes = sizeof(float) * 3; + triangleInput.triangleArray.numVertices = (unsigned int)nverts; + triangleInput.triangleArray.vertexBuffers = &d_vertices; + + triangleInput.triangleArray.indexFormat = OPTIX_INDICES_FORMAT_UNSIGNED_INT3; + triangleInput.triangleArray.indexStrideInBytes = sizeof(int) * 3; + triangleInput.triangleArray.numIndexTriplets = (unsigned int)ntris; + triangleInput.triangleArray.indexBuffer = d_indices; + + uint32_t triangleInputFlags[1] = { 0 }; + + // in this example we have one SBT entry, and no per-primitive + // materials: + triangleInput.triangleArray.flags = triangleInputFlags; + triangleInput.triangleArray.numSbtRecords = 1; + triangleInput.triangleArray.sbtIndexOffsetBuffer = 0; + triangleInput.triangleArray.sbtIndexOffsetSizeInBytes = 0; + triangleInput.triangleArray.sbtIndexOffsetStrideInBytes = 0; + + // ================================================================== + // BLAS setup + // ================================================================== + + OptixAccelBuildOptions accelOptions = {}; + accelOptions.buildFlags = OPTIX_BUILD_FLAG_NONE; + accelOptions.motionOptions.numKeys = 1; + accelOptions.operation = OPTIX_BUILD_OPERATION_BUILD; + + OptixAccelBufferSizes blasBufferSizes; + if (optixAccelComputeMemoryUsage(ocontext, + &accelOptions, + &triangleInput, + 1, // num_build_inputs + &blasBufferSizes + ) != OPTIX_SUCCESS) + { + PyErr_SetString(PyExc_RuntimeError, "OptiX acceleration structure compute memory usage failed."); + return false; + }; + + // ================================================================== + // execute build (main stage) + // ================================================================== + + CUdeviceptr tempBuffer; + if (!check_cuda(cuMemAlloc(&tempBuffer, blasBufferSizes.tempSizeInBytes))) return false; + if (!check_cuda(cuMemAlloc(&out_pointer, blasBufferSizes.outputSizeInBytes))) return false; + + if(optixAccelBuild(ocontext, + 0, + &accelOptions, + &triangleInput, + 1, + (CUdeviceptr)tempBuffer, + blasBufferSizes.tempSizeInBytes, + (CUdeviceptr)out_pointer, + blasBufferSizes.outputSizeInBytes, + &out_handle, + nullptr, 0 + ) != OPTIX_SUCCESS) + { + PyErr_SetString(PyExc_RuntimeError, "OptiX acceleration structure build failed."); + return false; + }; + if (!check_cuda(cuCtxSynchronize())) return false; + + // ================================================================== + // aaaaaand .... clean up + // ================================================================== + if (!check_cuda(cuMemFree(tempBuffer))) return false; + return true; +} + +PyDoc_STRVAR(torchoptix_build_doc, "build(verts, tris, n_verts, n_tris) -> handle\n\ +\n\ +Build OptiX acceleration structure."); + +PyObject* torchoptix_build(PyObject* self, PyObject* args) { + unsigned long long verts, tris, nverts, ntris; + + if (!PyArg_ParseTuple(args, "KKKK", &verts, &tris, &nverts, &ntris)) + return nullptr; + + if (!ensure_initialize_context()) + return nullptr; + + OptixTraversableHandle handle; + CUdeviceptr pointer; + if (!build_acceleration_structure((void*)verts, (void*)tris, nverts, ntris, handle, pointer)) + return nullptr; + + PyObject* res = PyTuple_New(2); + PyTuple_SetItem(res, 0, PyLong_FromUnsignedLongLong((unsigned long long)handle)); + PyTuple_SetItem(res, 1, PyLong_FromUnsignedLongLong((unsigned long long)pointer)); + return res; +} + +PyDoc_STRVAR(torchoptix_release_doc, "release(handle)\n\ +\n\ +Release OptiX acceleration structure."); + +PyObject* torchoptix_release(PyObject* self, PyObject* args) { + PyObject* handle; + + if (!PyArg_ParseTuple(args, "O", &handle)) + return nullptr; + + if (!ensure_initialize_context()) + return nullptr; + + if (!check_cuda(cuMemFree((CUdeviceptr)PyLong_AsUnsignedLongLong(PyTuple_GetItem(handle, 1))))) + return nullptr; + Py_RETURN_NONE; +} + +inline bool trace_rays(OptixTraversableHandle handle, void* rays_o, void* rays_d, void* out_t, void* out_i, float t_max, unsigned long long n_rays) +{ + OptixShaderBindingTable sbt = {}; + + constexpr int sbt_record_size = (OPTIX_SBT_RECORD_HEADER_SIZE / OPTIX_SBT_RECORD_ALIGNMENT + 1) * OPTIX_SBT_RECORD_ALIGNMENT; + char record[sbt_record_size * 3]; + optixSbtRecordPackHeader(oprograms[0], record); + optixSbtRecordPackHeader(oprograms[1], record + sbt_record_size); + optixSbtRecordPackHeader(oprograms[2], record + sbt_record_size * 2); + + CUdeviceptr dsbt; + if (!check_cuda(cuMemAlloc(&dsbt, sbt_record_size * 3))) return false; + if (!check_cuda(cuMemcpyHtoD(dsbt, record, sbt_record_size * 3))) return false; + + sbt.raygenRecord = dsbt; + sbt.missRecordBase = dsbt + sbt_record_size; + sbt.hitgroupRecordBase = dsbt + sbt_record_size * 2; + sbt.missRecordStrideInBytes = sbt.hitgroupRecordStrideInBytes = sbt_record_size; + sbt.missRecordCount = sbt.hitgroupRecordCount = 1; + + LaunchParams p = {}; + p.rays_o = (unsigned long long)rays_o; + p.rays_d = (unsigned long long)rays_d; + p.out_t = (unsigned long long)out_t; + p.out_i = (unsigned long long)out_i; + p.t_max = t_max; + p.traversable = handle; + + CUdeviceptr dp; + if (!check_cuda(cuMemAlloc(&dp, sizeof(LaunchParams)))) return false; + if (!check_cuda(cuMemcpyHtoD(dp, &p, sizeof(LaunchParams)))) return false; + + if (optixLaunch(/*! pipeline we're launching launch: */ + opipeline, 0, + /*! parameters and SBT */ + (CUdeviceptr)dp, + sizeof(LaunchParams), + &sbt, + /*! dimensions of the launch: */ + n_rays, + 1, + 1 + ) != OPTIX_SUCCESS) + { + PyErr_SetString(PyExc_RuntimeError, "OptiX launch failed."); + return false; + } + + if (!check_cuda(cuCtxSynchronize())) return false; + if (!check_cuda(cuMemFree(dp))) return false; + if (!check_cuda(cuMemFree(dsbt))) return false; + + return true; +} + +PyDoc_STRVAR(torchoptix_trace_doc, "trace_rays(handle, rays_o, rays_d, out_t, out_i, t_max, n_rays)\n\ +\n\ +Trace rays with OptiX."); + +PyObject* torchoptix_trace(PyObject* self, PyObject* args) { + /* Shared references that do not need Py_DECREF before returning. */ + PyObject* handle; + unsigned long long rays_o, rays_d, out_t, out_i, n_rays; + float t_max; + + /* Parse positional and keyword arguments */ + if (!PyArg_ParseTuple(args, "OKKKKfK", &handle, &rays_o, &rays_d, &out_t, &out_i, &t_max, &n_rays)) + return nullptr; + + if (!ensure_initialize_context()) + return nullptr; + + OptixTraversableHandle ohandle = (OptixTraversableHandle)PyLong_AsUnsignedLongLong(PyTuple_GetItem(handle, 0)); + + if (!trace_rays(ohandle, (void*)rays_o, (void*)rays_d, (void*)out_t, (void*)out_i, t_max, n_rays)) + return nullptr; + + Py_RETURN_NONE; +} + +/* + * List of functions to add to torchoptix in exec_torchoptix(). + */ +static PyMethodDef torchoptix_functions[] = { + { "build", (PyCFunction)torchoptix_build, METH_VARARGS, torchoptix_build_doc }, + { "release", (PyCFunction)torchoptix_release, METH_VARARGS, torchoptix_release_doc }, + { "set_log_level", (PyCFunction)torchoptix_log_level, METH_VARARGS, torchoptix_log_level_doc }, + { "trace_rays", (PyCFunction)torchoptix_trace, METH_VARARGS, torchoptix_trace_doc }, + { NULL, NULL, 0, NULL } /* marks end of array */ +}; + +/* + * Initialize torchoptix. May be called multiple times, so avoid + * using static state. + */ +int exec_torchoptix(PyObject *module) { + torchoptix_module_ref = module; + PyModule_AddFunctions(module, torchoptix_functions); + + PyModule_AddStringConstant(module, "__author__", "eliphatfs"); + PyModule_AddStringConstant(module, "__version__", "0.0.1"); + PyModule_AddIntConstant(module, "year", 2024); + + return 0; /* success */ +} + +/* + * Documentation for torchoptix. + */ +PyDoc_STRVAR(torchoptix_doc, "Modular OptiX ray tracing functions interop with PyTorch."); + + +static PyModuleDef_Slot torchoptix_slots[] = { + { Py_mod_exec, (void*)exec_torchoptix }, + { 0, NULL } +}; + +static PyModuleDef torchoptix_def = { + PyModuleDef_HEAD_INIT, + "torchoptix", + torchoptix_doc, + 0, /* m_size */ + NULL, /* m_methods */ + torchoptix_slots, + NULL, /* m_traverse */ + NULL, /* m_clear */ + NULL, /* m_free */ +}; + +extern "C" { + PyMODINIT_FUNC PyInit_torchoptix() { + return PyModuleDef_Init(&torchoptix_def); + } +} diff --git a/csrc/optixinc.h b/csrc/optixinc.h new file mode 100644 index 0000000..910e3f0 --- /dev/null +++ b/csrc/optixinc.h @@ -0,0 +1,9 @@ +#pragma once +#include "optix.h" + +struct LaunchParams +{ + OptixTraversableHandle traversable; + unsigned long long rays_o, rays_d, out_t, out_i; + float t_max; +}; diff --git a/csrc/torchoptix.sln b/csrc/torchoptix.sln new file mode 100644 index 0000000..e74e64b --- /dev/null +++ b/csrc/torchoptix.sln @@ -0,0 +1,31 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 17 +VisualStudioVersion = 17.7.34003.232 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "torchoptix", "torchoptix.vcxproj", "{FAC31472-941E-4E50-B26F-A9A4F953D8B5}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|x64 = Debug|x64 + Debug|x86 = Debug|x86 + Release|x64 = Release|x64 + Release|x86 = Release|x86 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {FAC31472-941E-4E50-B26F-A9A4F953D8B5}.Debug|x64.ActiveCfg = Debug|x64 + {FAC31472-941E-4E50-B26F-A9A4F953D8B5}.Debug|x64.Build.0 = Debug|x64 + {FAC31472-941E-4E50-B26F-A9A4F953D8B5}.Debug|x86.ActiveCfg = Debug|Win32 + {FAC31472-941E-4E50-B26F-A9A4F953D8B5}.Debug|x86.Build.0 = Debug|Win32 + {FAC31472-941E-4E50-B26F-A9A4F953D8B5}.Release|x64.ActiveCfg = Release|x64 + {FAC31472-941E-4E50-B26F-A9A4F953D8B5}.Release|x64.Build.0 = Release|x64 + {FAC31472-941E-4E50-B26F-A9A4F953D8B5}.Release|x86.ActiveCfg = Release|Win32 + {FAC31472-941E-4E50-B26F-A9A4F953D8B5}.Release|x86.Build.0 = Release|Win32 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {DBFB910C-F76E-4A09-AD86-0B9E6550BDAA} + EndGlobalSection +EndGlobal diff --git a/csrc/torchoptix.vcxproj b/csrc/torchoptix.vcxproj new file mode 100644 index 0000000..25b0277 --- /dev/null +++ b/csrc/torchoptix.vcxproj @@ -0,0 +1,140 @@ + + + + + Debug + Win32 + + + Release + Win32 + + + Debug + x64 + + + Release + x64 + + + + 17.0 + {FAC31472-941E-4E50-B26F-A9A4F953D8B5} + Win32Proj + + + + DynamicLibrary + true + v143 + + + DynamicLibrary + false + v143 + + + DynamicLibrary + true + v143 + + + DynamicLibrary + false + v143 + + + + + + + + + + + + + + + + + + + + + true + + + true + + + true + + + true + + + + WIN32;_DEBUG;_WINDOWS;_USRDLL;TORCHOPTIX_EXPORTS;%(PreprocessorDefinitions) + Level3 + C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.8\include;D:\CondaM\include;C:\ProgramData\NVIDIA Corporation\OptiX SDK 7.5.0\include;%(AdditionalIncludeDirectories) + + + true + Windows + + + + + _DEBUG;_WINDOWS;_USRDLL;TORCHOPTIX_EXPORTS;%(PreprocessorDefinitions) + Level3 + C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.8\include;D:\CondaM\include;C:\ProgramData\NVIDIA Corporation\OptiX SDK 7.5.0\include;%(AdditionalIncludeDirectories) + + + true + Windows + + + + + WIN32;NDEBUG;_WINDOWS;_USRDLL;TORCHOPTIX_EXPORTS;%(PreprocessorDefinitions) + Level3 + C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.8\include;D:\CondaM\include;C:\ProgramData\NVIDIA Corporation\OptiX SDK 7.5.0\include;%(AdditionalIncludeDirectories) + + + true + Windows + true + true + + + + + NDEBUG;_WINDOWS;_USRDLL;TORCHOPTIX_EXPORTS;%(PreprocessorDefinitions) + Level3 + C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.8\include;D:\CondaM\include;C:\ProgramData\NVIDIA Corporation\OptiX SDK 7.5.0\include;%(AdditionalIncludeDirectories) + + + true + Windows + true + true + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/csrc/torchoptix.vcxproj.filters b/csrc/torchoptix.vcxproj.filters new file mode 100644 index 0000000..182d4be --- /dev/null +++ b/csrc/torchoptix.vcxproj.filters @@ -0,0 +1,37 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;hm;inl;inc;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav + + + + + Source Files + + + + + Header Files + + + Header Files + + + Header Files + + + + + + + \ No newline at end of file diff --git a/generate.sh b/generate.sh new file mode 100644 index 0000000..3c11493 --- /dev/null +++ b/generate.sh @@ -0,0 +1,3 @@ +mkdir -p generated +nvcc -O3 -ptx -arch=sm_50 csrc/optixdev.cu -Iinclude -o generated/torchoptixdev.ptx +xxd -i generated/torchoptixdev.ptx >csrc/optixdevptx.h diff --git a/include/internal/optix_7_device_impl.h b/include/internal/optix_7_device_impl.h new file mode 100644 index 0000000..873bc7b --- /dev/null +++ b/include/internal/optix_7_device_impl.h @@ -0,0 +1,1534 @@ +/* +* Copyright (c) 2021 NVIDIA Corporation. All rights reserved. +* +* NVIDIA Corporation and its licensors retain all intellectual property and proprietary +* rights in and to this software, related documentation and any modifications thereto. +* Any use, reproduction, disclosure or distribution of this software and related +* documentation without an express license agreement from NVIDIA Corporation is strictly +* prohibited. +* +* TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THIS SOFTWARE IS PROVIDED *AS IS* +* AND NVIDIA AND ITS SUPPLIERS DISCLAIM ALL WARRANTIES, EITHER EXPRESS OR IMPLIED, +* INCLUDING, BUT NOT LIMITED TO, IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +* PARTICULAR PURPOSE. IN NO EVENT SHALL NVIDIA OR ITS SUPPLIERS BE LIABLE FOR ANY +* SPECIAL, INCIDENTAL, INDIRECT, OR CONSEQUENTIAL DAMAGES WHATSOEVER (INCLUDING, WITHOUT +* LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, BUSINESS INTERRUPTION, LOSS OF +* BUSINESS INFORMATION, OR ANY OTHER PECUNIARY LOSS) ARISING OUT OF THE USE OF OR +* INABILITY TO USE THIS SOFTWARE, EVEN IF NVIDIA HAS BEEN ADVISED OF THE POSSIBILITY OF +* SUCH DAMAGES +*/ + +/** +* @file optix_7_device_impl.h +* @author NVIDIA Corporation +* @brief OptiX public API +* +* OptiX public API Reference - Device side implementation +*/ + +#if !defined( __OPTIX_INCLUDE_INTERNAL_HEADERS__ ) +#error("optix_7_device_impl.h is an internal header file and must not be used directly. Please use optix_device.h or optix.h instead.") +#endif + +#ifndef __optix_optix_7_device_impl_h__ +#define __optix_optix_7_device_impl_h__ + +#include "internal/optix_7_device_impl_exception.h" +#include "internal/optix_7_device_impl_transformations.h" + +#ifndef __CUDACC_RTC__ +#include +#include +#endif + +namespace optix_internal { +template +struct TypePack{}; +} // namespace optix_internal + +template +static __forceinline__ __device__ void optixTrace( OptixTraversableHandle handle, + float3 rayOrigin, + float3 rayDirection, + float tmin, + float tmax, + float rayTime, + OptixVisibilityMask visibilityMask, + unsigned int rayFlags, + unsigned int SBToffset, + unsigned int SBTstride, + unsigned int missSBTIndex, + Payload&... payload ) +{ + static_assert( sizeof...( Payload ) <= 32, "Only up to 32 payload values are allowed." ); + // std::is_same compares each type in the two TypePacks to make sure that all types are unsigned int. + // TypePack 1 unsigned int T0 T1 T2 ... Tn-1 Tn + // TypePack 2 T0 T1 T2 T3 ... Tn unsigned int +#ifndef __CUDACC_RTC__ + static_assert( std::is_same, optix_internal::TypePack>::value, + "All payload parameters need to be unsigned int." ); +#endif + + float ox = rayOrigin.x, oy = rayOrigin.y, oz = rayOrigin.z; + float dx = rayDirection.x, dy = rayDirection.y, dz = rayDirection.z; + unsigned int p[33] = { 0, payload... }; + int payloadSize = (int)sizeof...( Payload ); + asm volatile( + "call" + "(%0,%1,%2,%3,%4,%5,%6,%7,%8,%9,%10,%11,%12,%13,%14,%15,%16,%17,%18,%19,%20,%21,%22,%23,%24,%25,%26,%27,%28,%" + "29,%30,%31)," + "_optix_trace_typed_32," + "(%32,%33,%34,%35,%36,%37,%38,%39,%40,%41,%42,%43,%44,%45,%46,%47,%48,%49,%50,%51,%52,%53,%54,%55,%56,%57,%58,%" + "59,%60,%61,%62,%63,%64,%65,%66,%67,%68,%69,%70,%71,%72,%73,%74,%75,%76,%77,%78,%79,%80);" + : "=r"( p[1] ), "=r"( p[2] ), "=r"( p[3] ), "=r"( p[4] ), "=r"( p[5] ), "=r"( p[6] ), "=r"( p[7] ), + "=r"( p[8] ), "=r"( p[9] ), "=r"( p[10] ), "=r"( p[11] ), "=r"( p[12] ), "=r"( p[13] ), "=r"( p[14] ), + "=r"( p[15] ), "=r"( p[16] ), "=r"( p[17] ), "=r"( p[18] ), "=r"( p[19] ), "=r"( p[20] ), "=r"( p[21] ), + "=r"( p[22] ), "=r"( p[23] ), "=r"( p[24] ), "=r"( p[25] ), "=r"( p[26] ), "=r"( p[27] ), "=r"( p[28] ), + "=r"( p[29] ), "=r"( p[30] ), "=r"( p[31] ), "=r"( p[32] ) + : "r"( 0 ), "l"( handle ), "f"( ox ), "f"( oy ), "f"( oz ), "f"( dx ), "f"( dy ), "f"( dz ), "f"( tmin ), + "f"( tmax ), "f"( rayTime ), "r"( visibilityMask ), "r"( rayFlags ), "r"( SBToffset ), "r"( SBTstride ), + "r"( missSBTIndex ), "r"( payloadSize ), "r"( p[1] ), "r"( p[2] ), "r"( p[3] ), "r"( p[4] ), "r"( p[5] ), + "r"( p[6] ), "r"( p[7] ), "r"( p[8] ), "r"( p[9] ), "r"( p[10] ), "r"( p[11] ), "r"( p[12] ), "r"( p[13] ), + "r"( p[14] ), "r"( p[15] ), "r"( p[16] ), "r"( p[17] ), "r"( p[18] ), "r"( p[19] ), "r"( p[20] ), + "r"( p[21] ), "r"( p[22] ), "r"( p[23] ), "r"( p[24] ), "r"( p[25] ), "r"( p[26] ), "r"( p[27] ), + "r"( p[28] ), "r"( p[29] ), "r"( p[30] ), "r"( p[31] ), "r"( p[32] ) + : ); + unsigned int index = 1; + (void)std::initializer_list{ index, ( payload = p[index++] )... }; +} + +template +static __forceinline__ __device__ void optixTrace( OptixPayloadTypeID type, + OptixTraversableHandle handle, + float3 rayOrigin, + float3 rayDirection, + float tmin, + float tmax, + float rayTime, + OptixVisibilityMask visibilityMask, + unsigned int rayFlags, + unsigned int SBToffset, + unsigned int SBTstride, + unsigned int missSBTIndex, + Payload&... payload ) +{ + // std::is_same compares each type in the two TypePacks to make sure that all types are unsigned int. + // TypePack 1 unsigned int T0 T1 T2 ... Tn-1 Tn + // TypePack 2 T0 T1 T2 T3 ... Tn unsigned int + static_assert( sizeof...( Payload ) <= 32, "Only up to 32 payload values are allowed." ); + static_assert( std::is_same, optix_internal::TypePack>::value, + "All payload parameters need to be unsigned int." ); + + float ox = rayOrigin.x, oy = rayOrigin.y, oz = rayOrigin.z; + float dx = rayDirection.x, dy = rayDirection.y, dz = rayDirection.z; + unsigned int p[33] = { 0, payload... }; + int payloadSize = (int)sizeof...( Payload ); + + asm volatile( + "call" + "(%0,%1,%2,%3,%4,%5,%6,%7,%8,%9,%10,%11,%12,%13,%14,%15,%16,%17,%18,%19,%20,%21,%22,%23,%24,%25,%26,%27,%28,%" + "29,%30,%31)," + "_optix_trace_typed_32," + "(%32,%33,%34,%35,%36,%37,%38,%39,%40,%41,%42,%43,%44,%45,%46,%47,%48,%49,%50,%51,%52,%53,%54,%55,%56,%57,%58,%" + "59,%60,%61,%62,%63,%64,%65,%66,%67,%68,%69,%70,%71,%72,%73,%74,%75,%76,%77,%78,%79,%80);" + : "=r"( p[1] ), "=r"( p[2] ), "=r"( p[3] ), "=r"( p[4] ), "=r"( p[5] ), "=r"( p[6] ), "=r"( p[7] ), + "=r"( p[8] ), "=r"( p[9] ), "=r"( p[10] ), "=r"( p[11] ), "=r"( p[12] ), "=r"( p[13] ), "=r"( p[14] ), + "=r"( p[15] ), "=r"( p[16] ), "=r"( p[17] ), "=r"( p[18] ), "=r"( p[19] ), "=r"( p[20] ), "=r"( p[21] ), + "=r"( p[22] ), "=r"( p[23] ), "=r"( p[24] ), "=r"( p[25] ), "=r"( p[26] ), "=r"( p[27] ), "=r"( p[28] ), + "=r"( p[29] ), "=r"( p[30] ), "=r"( p[31] ), "=r"( p[32] ) + : "r"( type ), "l"( handle ), "f"( ox ), "f"( oy ), "f"( oz ), "f"( dx ), "f"( dy ), "f"( dz ), "f"( tmin ), + "f"( tmax ), "f"( rayTime ), "r"( visibilityMask ), "r"( rayFlags ), "r"( SBToffset ), "r"( SBTstride ), + "r"( missSBTIndex ), "r"( payloadSize ), "r"( p[1] ), "r"( p[2] ), "r"( p[3] ), "r"( p[4] ), "r"( p[5] ), + "r"( p[6] ), "r"( p[7] ), "r"( p[8] ), "r"( p[9] ), "r"( p[10] ), "r"( p[11] ), "r"( p[12] ), "r"( p[13] ), + "r"( p[14] ), "r"( p[15] ), "r"( p[16] ), "r"( p[17] ), "r"( p[18] ), "r"( p[19] ), "r"( p[20] ), + "r"( p[21] ), "r"( p[22] ), "r"( p[23] ), "r"( p[24] ), "r"( p[25] ), "r"( p[26] ), "r"( p[27] ), + "r"( p[28] ), "r"( p[29] ), "r"( p[30] ), "r"( p[31] ), "r"( p[32] ) + : ); + unsigned int index = 1; + (void)std::initializer_list{ index, ( payload = p[index++] )... }; +} + +static __forceinline__ __device__ void optixSetPayload_0( unsigned int p ) +{ + asm volatile( "call _optix_set_payload, (%0, %1);" : : "r"( 0 ), "r"( p ) : ); +} + +static __forceinline__ __device__ void optixSetPayload_1( unsigned int p ) +{ + asm volatile( "call _optix_set_payload, (%0, %1);" : : "r"( 1 ), "r"( p ) : ); +} + +static __forceinline__ __device__ void optixSetPayload_2( unsigned int p ) +{ + asm volatile( "call _optix_set_payload, (%0, %1);" : : "r"( 2 ), "r"( p ) : ); +} + +static __forceinline__ __device__ void optixSetPayload_3( unsigned int p ) +{ + asm volatile( "call _optix_set_payload, (%0, %1);" : : "r"( 3 ), "r"( p ) : ); +} + +static __forceinline__ __device__ void optixSetPayload_4( unsigned int p ) +{ + asm volatile( "call _optix_set_payload, (%0, %1);" : : "r"( 4 ), "r"( p ) : ); +} + +static __forceinline__ __device__ void optixSetPayload_5( unsigned int p ) +{ + asm volatile( "call _optix_set_payload, (%0, %1);" : : "r"( 5 ), "r"( p ) : ); +} + +static __forceinline__ __device__ void optixSetPayload_6( unsigned int p ) +{ + asm volatile( "call _optix_set_payload, (%0, %1);" : : "r"( 6 ), "r"( p ) : ); +} + +static __forceinline__ __device__ void optixSetPayload_7( unsigned int p ) +{ + asm volatile( "call _optix_set_payload, (%0, %1);" : : "r"( 7 ), "r"( p ) : ); +} + +static __forceinline__ __device__ void optixSetPayload_8( unsigned int p ) +{ + asm volatile( "call _optix_set_payload, (%0, %1);" : : "r"( 8 ), "r"( p ) : ); +} + +static __forceinline__ __device__ void optixSetPayload_9( unsigned int p ) +{ + asm volatile( "call _optix_set_payload, (%0, %1);" : : "r"( 9 ), "r"( p ) : ); +} + +static __forceinline__ __device__ void optixSetPayload_10( unsigned int p ) +{ + asm volatile( "call _optix_set_payload, (%0, %1);" : : "r"( 10 ), "r"( p ) : ); +} + +static __forceinline__ __device__ void optixSetPayload_11( unsigned int p ) +{ + asm volatile( "call _optix_set_payload, (%0, %1);" : : "r"( 11 ), "r"( p ) : ); +} + +static __forceinline__ __device__ void optixSetPayload_12( unsigned int p ) +{ + asm volatile( "call _optix_set_payload, (%0, %1);" : : "r"( 12 ), "r"( p ) : ); +} + +static __forceinline__ __device__ void optixSetPayload_13( unsigned int p ) +{ + asm volatile( "call _optix_set_payload, (%0, %1);" : : "r"( 13 ), "r"( p ) : ); +} + +static __forceinline__ __device__ void optixSetPayload_14( unsigned int p ) +{ + asm volatile( "call _optix_set_payload, (%0, %1);" : : "r"( 14 ), "r"( p ) : ); +} + +static __forceinline__ __device__ void optixSetPayload_15( unsigned int p ) +{ + asm volatile( "call _optix_set_payload, (%0, %1);" : : "r"( 15 ), "r"( p ) : ); +} + +static __forceinline__ __device__ void optixSetPayload_16( unsigned int p ) +{ + asm volatile( "call _optix_set_payload, (%0, %1);" : : "r"( 16 ), "r"( p ) : ); +} + +static __forceinline__ __device__ void optixSetPayload_17( unsigned int p ) +{ + asm volatile( "call _optix_set_payload, (%0, %1);" : : "r"( 17 ), "r"( p ) : ); +} + +static __forceinline__ __device__ void optixSetPayload_18( unsigned int p ) +{ + asm volatile( "call _optix_set_payload, (%0, %1);" : : "r"( 18 ), "r"( p ) : ); +} + +static __forceinline__ __device__ void optixSetPayload_19( unsigned int p ) +{ + asm volatile( "call _optix_set_payload, (%0, %1);" : : "r"( 19 ), "r"( p ) : ); +} + +static __forceinline__ __device__ void optixSetPayload_20( unsigned int p ) +{ + asm volatile( "call _optix_set_payload, (%0, %1);" : : "r"( 20 ), "r"( p ) : ); +} + +static __forceinline__ __device__ void optixSetPayload_21( unsigned int p ) +{ + asm volatile( "call _optix_set_payload, (%0, %1);" : : "r"( 21 ), "r"( p ) : ); +} + +static __forceinline__ __device__ void optixSetPayload_22( unsigned int p ) +{ + asm volatile( "call _optix_set_payload, (%0, %1);" : : "r"( 22 ), "r"( p ) : ); +} + +static __forceinline__ __device__ void optixSetPayload_23( unsigned int p ) +{ + asm volatile( "call _optix_set_payload, (%0, %1);" : : "r"( 23 ), "r"( p ) : ); +} + +static __forceinline__ __device__ void optixSetPayload_24( unsigned int p ) +{ + asm volatile( "call _optix_set_payload, (%0, %1);" : : "r"( 24 ), "r"( p ) : ); +} + +static __forceinline__ __device__ void optixSetPayload_25( unsigned int p ) +{ + asm volatile( "call _optix_set_payload, (%0, %1);" : : "r"( 25 ), "r"( p ) : ); +} + +static __forceinline__ __device__ void optixSetPayload_26( unsigned int p ) +{ + asm volatile( "call _optix_set_payload, (%0, %1);" : : "r"( 26 ), "r"( p ) : ); +} + +static __forceinline__ __device__ void optixSetPayload_27( unsigned int p ) +{ + asm volatile( "call _optix_set_payload, (%0, %1);" : : "r"( 27 ), "r"( p ) : ); +} + +static __forceinline__ __device__ void optixSetPayload_28( unsigned int p ) +{ + asm volatile( "call _optix_set_payload, (%0, %1);" : : "r"( 28 ), "r"( p ) : ); +} + +static __forceinline__ __device__ void optixSetPayload_29( unsigned int p ) +{ + asm volatile( "call _optix_set_payload, (%0, %1);" : : "r"( 29 ), "r"( p ) : ); +} + +static __forceinline__ __device__ void optixSetPayload_30( unsigned int p ) +{ + asm volatile( "call _optix_set_payload, (%0, %1);" : : "r"( 30 ), "r"( p ) : ); +} + +static __forceinline__ __device__ void optixSetPayload_31( unsigned int p ) +{ + asm volatile( "call _optix_set_payload, (%0, %1);" : : "r"( 31 ), "r"( p ) : ); +} + +static __forceinline__ __device__ unsigned int optixGetPayload_0() +{ + unsigned int result; + asm volatile( "call (%0), _optix_get_payload, (%1);" : "=r"( result ) : "r"( 0 ) : ); + return result; +} + +static __forceinline__ __device__ unsigned int optixGetPayload_1() +{ + unsigned int result; + asm volatile( "call (%0), _optix_get_payload, (%1);" : "=r"( result ) : "r"( 1 ) : ); + return result; +} + +static __forceinline__ __device__ unsigned int optixGetPayload_2() +{ + unsigned int result; + asm volatile( "call (%0), _optix_get_payload, (%1);" : "=r"( result ) : "r"( 2 ) : ); + return result; +} + +static __forceinline__ __device__ unsigned int optixGetPayload_3() +{ + unsigned int result; + asm volatile( "call (%0), _optix_get_payload, (%1);" : "=r"( result ) : "r"( 3 ) : ); + return result; +} + +static __forceinline__ __device__ unsigned int optixGetPayload_4() +{ + unsigned int result; + asm volatile( "call (%0), _optix_get_payload, (%1);" : "=r"( result ) : "r"( 4 ) : ); + return result; +} + +static __forceinline__ __device__ unsigned int optixGetPayload_5() +{ + unsigned int result; + asm volatile( "call (%0), _optix_get_payload, (%1);" : "=r"( result ) : "r"( 5 ) : ); + return result; +} + +static __forceinline__ __device__ unsigned int optixGetPayload_6() +{ + unsigned int result; + asm volatile( "call (%0), _optix_get_payload, (%1);" : "=r"( result ) : "r"( 6 ) : ); + return result; +} + +static __forceinline__ __device__ unsigned int optixGetPayload_7() +{ + unsigned int result; + asm volatile( "call (%0), _optix_get_payload, (%1);" : "=r"( result ) : "r"( 7 ) : ); + return result; +} + +static __forceinline__ __device__ unsigned int optixGetPayload_8() +{ + unsigned int result; + asm volatile( "call (%0), _optix_get_payload, (%1);" : "=r"( result ) : "r"( 8 ) : ); + return result; +} + +static __forceinline__ __device__ unsigned int optixGetPayload_9() +{ + unsigned int result; + asm volatile( "call (%0), _optix_get_payload, (%1);" : "=r"( result ) : "r"( 9 ) : ); + return result; +} + +static __forceinline__ __device__ unsigned int optixGetPayload_10() +{ + unsigned int result; + asm volatile( "call (%0), _optix_get_payload, (%1);" : "=r"( result ) : "r"( 10 ) : ); + return result; +} + +static __forceinline__ __device__ unsigned int optixGetPayload_11() +{ + unsigned int result; + asm volatile( "call (%0), _optix_get_payload, (%1);" : "=r"( result ) : "r"( 11 ) : ); + return result; +} + +static __forceinline__ __device__ unsigned int optixGetPayload_12() +{ + unsigned int result; + asm volatile( "call (%0), _optix_get_payload, (%1);" : "=r"( result ) : "r"( 12 ) : ); + return result; +} + +static __forceinline__ __device__ unsigned int optixGetPayload_13() +{ + unsigned int result; + asm volatile( "call (%0), _optix_get_payload, (%1);" : "=r"( result ) : "r"( 13 ) : ); + return result; +} + +static __forceinline__ __device__ unsigned int optixGetPayload_14() +{ + unsigned int result; + asm volatile( "call (%0), _optix_get_payload, (%1);" : "=r"( result ) : "r"( 14 ) : ); + return result; +} + +static __forceinline__ __device__ unsigned int optixGetPayload_15() +{ + unsigned int result; + asm volatile( "call (%0), _optix_get_payload, (%1);" : "=r"( result ) : "r"( 15 ) : ); + return result; +} + +static __forceinline__ __device__ unsigned int optixGetPayload_16() +{ + unsigned int result; + asm volatile( "call (%0), _optix_get_payload, (%1);" : "=r"( result ) : "r"( 16 ) : ); + return result; +} + +static __forceinline__ __device__ unsigned int optixGetPayload_17() +{ + unsigned int result; + asm volatile( "call (%0), _optix_get_payload, (%1);" : "=r"( result ) : "r"( 17 ) : ); + return result; +} + +static __forceinline__ __device__ unsigned int optixGetPayload_18() +{ + unsigned int result; + asm volatile( "call (%0), _optix_get_payload, (%1);" : "=r"( result ) : "r"( 18 ) : ); + return result; +} + +static __forceinline__ __device__ unsigned int optixGetPayload_19() +{ + unsigned int result; + asm volatile( "call (%0), _optix_get_payload, (%1);" : "=r"( result ) : "r"( 19 ) : ); + return result; +} + +static __forceinline__ __device__ unsigned int optixGetPayload_20() +{ + unsigned int result; + asm volatile( "call (%0), _optix_get_payload, (%1);" : "=r"( result ) : "r"( 20 ) : ); + return result; +} + +static __forceinline__ __device__ unsigned int optixGetPayload_21() +{ + unsigned int result; + asm volatile( "call (%0), _optix_get_payload, (%1);" : "=r"( result ) : "r"( 21 ) : ); + return result; +} + +static __forceinline__ __device__ unsigned int optixGetPayload_22() +{ + unsigned int result; + asm volatile( "call (%0), _optix_get_payload, (%1);" : "=r"( result ) : "r"( 22 ) : ); + return result; +} + +static __forceinline__ __device__ unsigned int optixGetPayload_23() +{ + unsigned int result; + asm volatile( "call (%0), _optix_get_payload, (%1);" : "=r"( result ) : "r"( 23 ) : ); + return result; +} + +static __forceinline__ __device__ unsigned int optixGetPayload_24() +{ + unsigned int result; + asm volatile( "call (%0), _optix_get_payload, (%1);" : "=r"( result ) : "r"( 24 ) : ); + return result; +} + +static __forceinline__ __device__ unsigned int optixGetPayload_25() +{ + unsigned int result; + asm volatile( "call (%0), _optix_get_payload, (%1);" : "=r"( result ) : "r"( 25 ) : ); + return result; +} + +static __forceinline__ __device__ unsigned int optixGetPayload_26() +{ + unsigned int result; + asm volatile( "call (%0), _optix_get_payload, (%1);" : "=r"( result ) : "r"( 26 ) : ); + return result; +} + +static __forceinline__ __device__ unsigned int optixGetPayload_27() +{ + unsigned int result; + asm volatile( "call (%0), _optix_get_payload, (%1);" : "=r"( result ) : "r"( 27 ) : ); + return result; +} + +static __forceinline__ __device__ unsigned int optixGetPayload_28() +{ + unsigned int result; + asm volatile( "call (%0), _optix_get_payload, (%1);" : "=r"( result ) : "r"( 28 ) : ); + return result; +} + +static __forceinline__ __device__ unsigned int optixGetPayload_29() +{ + unsigned int result; + asm volatile( "call (%0), _optix_get_payload, (%1);" : "=r"( result ) : "r"( 29 ) : ); + return result; +} + +static __forceinline__ __device__ unsigned int optixGetPayload_30() +{ + unsigned int result; + asm volatile( "call (%0), _optix_get_payload, (%1);" : "=r"( result ) : "r"( 30 ) : ); + return result; +} + +static __forceinline__ __device__ unsigned int optixGetPayload_31() +{ + unsigned int result; + asm volatile( "call (%0), _optix_get_payload, (%1);" : "=r"( result ) : "r"( 31 ) : ); + return result; +} + +static __forceinline__ __device__ void optixSetPayloadTypes( unsigned int types ) +{ + asm volatile( "call _optix_set_payload_types, (%0);" : : "r"( types ) : ); +} + +static __forceinline__ __device__ unsigned int optixUndefinedValue() +{ + unsigned int u0; + asm( "call (%0), _optix_undef_value, ();" : "=r"( u0 ) : ); + return u0; +} + +static __forceinline__ __device__ float3 optixGetWorldRayOrigin() +{ + float f0, f1, f2; + asm( "call (%0), _optix_get_world_ray_origin_x, ();" : "=f"( f0 ) : ); + asm( "call (%0), _optix_get_world_ray_origin_y, ();" : "=f"( f1 ) : ); + asm( "call (%0), _optix_get_world_ray_origin_z, ();" : "=f"( f2 ) : ); + return make_float3( f0, f1, f2 ); +} + +static __forceinline__ __device__ float3 optixGetWorldRayDirection() +{ + float f0, f1, f2; + asm( "call (%0), _optix_get_world_ray_direction_x, ();" : "=f"( f0 ) : ); + asm( "call (%0), _optix_get_world_ray_direction_y, ();" : "=f"( f1 ) : ); + asm( "call (%0), _optix_get_world_ray_direction_z, ();" : "=f"( f2 ) : ); + return make_float3( f0, f1, f2 ); +} + +static __forceinline__ __device__ float3 optixGetObjectRayOrigin() +{ + float f0, f1, f2; + asm( "call (%0), _optix_get_object_ray_origin_x, ();" : "=f"( f0 ) : ); + asm( "call (%0), _optix_get_object_ray_origin_y, ();" : "=f"( f1 ) : ); + asm( "call (%0), _optix_get_object_ray_origin_z, ();" : "=f"( f2 ) : ); + return make_float3( f0, f1, f2 ); +} + +static __forceinline__ __device__ float3 optixGetObjectRayDirection() +{ + float f0, f1, f2; + asm( "call (%0), _optix_get_object_ray_direction_x, ();" : "=f"( f0 ) : ); + asm( "call (%0), _optix_get_object_ray_direction_y, ();" : "=f"( f1 ) : ); + asm( "call (%0), _optix_get_object_ray_direction_z, ();" : "=f"( f2 ) : ); + return make_float3( f0, f1, f2 ); +} + +static __forceinline__ __device__ float optixGetRayTmin() +{ + float f0; + asm( "call (%0), _optix_get_ray_tmin, ();" : "=f"( f0 ) : ); + return f0; +} + +static __forceinline__ __device__ float optixGetRayTmax() +{ + float f0; + asm( "call (%0), _optix_get_ray_tmax, ();" : "=f"( f0 ) : ); + return f0; +} + +static __forceinline__ __device__ float optixGetRayTime() +{ + float f0; + asm( "call (%0), _optix_get_ray_time, ();" : "=f"( f0 ) : ); + return f0; +} + +static __forceinline__ __device__ unsigned int optixGetRayFlags() +{ + unsigned int u0; + asm( "call (%0), _optix_get_ray_flags, ();" : "=r"( u0 ) : ); + return u0; +} + +static __forceinline__ __device__ unsigned int optixGetRayVisibilityMask() +{ + unsigned int u0; + asm( "call (%0), _optix_get_ray_visibility_mask, ();" : "=r"( u0 ) : ); + return u0; +} + +static __forceinline__ __device__ OptixTraversableHandle optixGetInstanceTraversableFromIAS( OptixTraversableHandle ias, + unsigned int instIdx ) +{ + unsigned long long handle; + asm( "call (%0), _optix_get_instance_traversable_from_ias, (%1, %2);" + : "=l"( handle ) : "l"( ias ), "r"( instIdx ) ); + return (OptixTraversableHandle)handle; +} + + +static __forceinline__ __device__ void optixGetTriangleVertexData( OptixTraversableHandle gas, + unsigned int primIdx, + unsigned int sbtGASIndex, + float time, + float3 data[3] ) +{ + asm( "call (%0, %1, %2, %3, %4, %5, %6, %7, %8), _optix_get_triangle_vertex_data, " + "(%9, %10, %11, %12);" + : "=f"( data[0].x ), "=f"( data[0].y ), "=f"( data[0].z ), "=f"( data[1].x ), "=f"( data[1].y ), + "=f"( data[1].z ), "=f"( data[2].x ), "=f"( data[2].y ), "=f"( data[2].z ) + : "l"( gas ), "r"( primIdx ), "r"( sbtGASIndex ), "f"( time ) + : ); +} + +static __forceinline__ __device__ void optixGetLinearCurveVertexData( OptixTraversableHandle gas, + unsigned int primIdx, + unsigned int sbtGASIndex, + float time, + float4 data[2] ) +{ + asm( "call (%0, %1, %2, %3, %4, %5, %6, %7), _optix_get_linear_curve_vertex_data, " + "(%8, %9, %10, %11);" + : "=f"( data[0].x ), "=f"( data[0].y ), "=f"( data[0].z ), "=f"( data[0].w ), + "=f"( data[1].x ), "=f"( data[1].y ), "=f"( data[1].z ), "=f"( data[1].w ) + : "l"( gas ), "r"( primIdx ), "r"( sbtGASIndex ), "f"( time ) + : ); +} + +static __forceinline__ __device__ void optixGetQuadraticBSplineVertexData( OptixTraversableHandle gas, + unsigned int primIdx, + unsigned int sbtGASIndex, + float time, + float4 data[3] ) +{ + asm( "call (%0, %1, %2, %3, %4, %5, %6, %7, %8, %9, %10, %11), _optix_get_quadratic_bspline_vertex_data, " + "(%12, %13, %14, %15);" + : "=f"( data[0].x ), "=f"( data[0].y ), "=f"( data[0].z ), "=f"( data[0].w ), + "=f"( data[1].x ), "=f"( data[1].y ), "=f"( data[1].z ), "=f"( data[1].w ), + "=f"( data[2].x ), "=f"( data[2].y ), "=f"( data[2].z ), "=f"( data[2].w ) + : "l"( gas ), "r"( primIdx ), "r"( sbtGASIndex ), "f"( time ) + : ); +} + +static __forceinline__ __device__ void optixGetCubicBSplineVertexData( OptixTraversableHandle gas, + unsigned int primIdx, + unsigned int sbtGASIndex, + float time, + float4 data[4] ) +{ + asm( "call (%0, %1, %2, %3, %4, %5, %6, %7, %8, %9, %10, %11, %12, %13, %14, %15), " + "_optix_get_cubic_bspline_vertex_data, " + "(%16, %17, %18, %19);" + : "=f"( data[0].x ), "=f"( data[0].y ), "=f"( data[0].z ), "=f"( data[0].w ), + "=f"( data[1].x ), "=f"( data[1].y ), "=f"( data[1].z ), "=f"( data[1].w ), + "=f"( data[2].x ), "=f"( data[2].y ), "=f"( data[2].z ), "=f"( data[2].w ), + "=f"( data[3].x ), "=f"( data[3].y ), "=f"( data[3].z ), "=f"( data[3].w ) + : "l"( gas ), "r"( primIdx ), "r"( sbtGASIndex ), "f"( time ) + : ); +} + +static __forceinline__ __device__ void optixGetCatmullRomVertexData( OptixTraversableHandle gas, + unsigned int primIdx, + unsigned int sbtGASIndex, + float time, + float4 data[4] ) +{ + asm( "call (%0, %1, %2, %3, %4, %5, %6, %7, %8, %9, %10, %11, %12, %13, %14, %15), " + "_optix_get_catmullrom_vertex_data, " + "(%16, %17, %18, %19);" + : "=f"( data[0].x ), "=f"( data[0].y ), "=f"( data[0].z ), "=f"( data[0].w ), "=f"( data[1].x ), + "=f"( data[1].y ), "=f"( data[1].z ), "=f"( data[1].w ), "=f"( data[2].x ), "=f"( data[2].y ), + "=f"( data[2].z ), "=f"( data[2].w ), "=f"( data[3].x ), "=f"( data[3].y ), "=f"( data[3].z ), "=f"( data[3].w ) + : "l"( gas ), "r"( primIdx ), "r"( sbtGASIndex ), "f"( time ) + : ); +} + +static __forceinline__ __device__ void optixGetSphereData( OptixTraversableHandle gas, + unsigned int primIdx, + unsigned int sbtGASIndex, + float time, + float4 data[1] ) +{ + asm( "call (%0, %1, %2, %3), " + "_optix_get_sphere_data, " + "(%4, %5, %6, %7);" + : "=f"( data[0].x ), "=f"( data[0].y ), "=f"( data[0].z ), "=f"( data[0].w ) + : "l"( gas ), "r"( primIdx ), "r"( sbtGASIndex ), "f"( time ) + : ); +} + +static __forceinline__ __device__ OptixTraversableHandle optixGetGASTraversableHandle() +{ + unsigned long long handle; + asm( "call (%0), _optix_get_gas_traversable_handle, ();" : "=l"( handle ) : ); + return (OptixTraversableHandle)handle; +} + +static __forceinline__ __device__ float optixGetGASMotionTimeBegin( OptixTraversableHandle handle ) +{ + float f0; + asm( "call (%0), _optix_get_gas_motion_time_begin, (%1);" : "=f"( f0 ) : "l"( handle ) : ); + return f0; +} + +static __forceinline__ __device__ float optixGetGASMotionTimeEnd( OptixTraversableHandle handle ) +{ + float f0; + asm( "call (%0), _optix_get_gas_motion_time_end, (%1);" : "=f"( f0 ) : "l"( handle ) : ); + return f0; +} + +static __forceinline__ __device__ unsigned int optixGetGASMotionStepCount( OptixTraversableHandle handle ) +{ + unsigned int u0; + asm( "call (%0), _optix_get_gas_motion_step_count, (%1);" : "=r"( u0 ) : "l"( handle ) : ); + return u0; +} + +static __forceinline__ __device__ void optixGetWorldToObjectTransformMatrix( float m[12] ) +{ + if( optixGetTransformListSize() == 0 ) + { + m[0] = 1.0f; + m[1] = 0.0f; + m[2] = 0.0f; + m[3] = 0.0f; + m[4] = 0.0f; + m[5] = 1.0f; + m[6] = 0.0f; + m[7] = 0.0f; + m[8] = 0.0f; + m[9] = 0.0f; + m[10] = 1.0f; + m[11] = 0.0f; + return; + } + + float4 m0, m1, m2; + optix_impl::optixGetWorldToObjectTransformMatrix( m0, m1, m2 ); + m[0] = m0.x; + m[1] = m0.y; + m[2] = m0.z; + m[3] = m0.w; + m[4] = m1.x; + m[5] = m1.y; + m[6] = m1.z; + m[7] = m1.w; + m[8] = m2.x; + m[9] = m2.y; + m[10] = m2.z; + m[11] = m2.w; +} + +static __forceinline__ __device__ void optixGetObjectToWorldTransformMatrix( float m[12] ) +{ + if( optixGetTransformListSize() == 0 ) + { + m[0] = 1.0f; + m[1] = 0.0f; + m[2] = 0.0f; + m[3] = 0.0f; + m[4] = 0.0f; + m[5] = 1.0f; + m[6] = 0.0f; + m[7] = 0.0f; + m[8] = 0.0f; + m[9] = 0.0f; + m[10] = 1.0f; + m[11] = 0.0f; + return; + } + + float4 m0, m1, m2; + optix_impl::optixGetObjectToWorldTransformMatrix( m0, m1, m2 ); + m[0] = m0.x; + m[1] = m0.y; + m[2] = m0.z; + m[3] = m0.w; + m[4] = m1.x; + m[5] = m1.y; + m[6] = m1.z; + m[7] = m1.w; + m[8] = m2.x; + m[9] = m2.y; + m[10] = m2.z; + m[11] = m2.w; +} + +static __forceinline__ __device__ float3 optixTransformPointFromWorldToObjectSpace( float3 point ) +{ + if( optixGetTransformListSize() == 0 ) + return point; + + float4 m0, m1, m2; + optix_impl::optixGetWorldToObjectTransformMatrix( m0, m1, m2 ); + return optix_impl::optixTransformPoint( m0, m1, m2, point ); +} + +static __forceinline__ __device__ float3 optixTransformVectorFromWorldToObjectSpace( float3 vec ) +{ + if( optixGetTransformListSize() == 0 ) + return vec; + + float4 m0, m1, m2; + optix_impl::optixGetWorldToObjectTransformMatrix( m0, m1, m2 ); + return optix_impl::optixTransformVector( m0, m1, m2, vec ); +} + +static __forceinline__ __device__ float3 optixTransformNormalFromWorldToObjectSpace( float3 normal ) +{ + if( optixGetTransformListSize() == 0 ) + return normal; + + float4 m0, m1, m2; + optix_impl::optixGetObjectToWorldTransformMatrix( m0, m1, m2 ); // inverse of optixGetWorldToObjectTransformMatrix() + return optix_impl::optixTransformNormal( m0, m1, m2, normal ); +} + +static __forceinline__ __device__ float3 optixTransformPointFromObjectToWorldSpace( float3 point ) +{ + if( optixGetTransformListSize() == 0 ) + return point; + + float4 m0, m1, m2; + optix_impl::optixGetObjectToWorldTransformMatrix( m0, m1, m2 ); + return optix_impl::optixTransformPoint( m0, m1, m2, point ); +} + +static __forceinline__ __device__ float3 optixTransformVectorFromObjectToWorldSpace( float3 vec ) +{ + if( optixGetTransformListSize() == 0 ) + return vec; + + float4 m0, m1, m2; + optix_impl::optixGetObjectToWorldTransformMatrix( m0, m1, m2 ); + return optix_impl::optixTransformVector( m0, m1, m2, vec ); +} + +static __forceinline__ __device__ float3 optixTransformNormalFromObjectToWorldSpace( float3 normal ) +{ + if( optixGetTransformListSize() == 0 ) + return normal; + + float4 m0, m1, m2; + optix_impl::optixGetWorldToObjectTransformMatrix( m0, m1, m2 ); // inverse of optixGetObjectToWorldTransformMatrix() + return optix_impl::optixTransformNormal( m0, m1, m2, normal ); +} + +static __forceinline__ __device__ unsigned int optixGetTransformListSize() +{ + unsigned int u0; + asm( "call (%0), _optix_get_transform_list_size, ();" : "=r"( u0 ) : ); + return u0; +} + +static __forceinline__ __device__ OptixTraversableHandle optixGetTransformListHandle( unsigned int index ) +{ + unsigned long long u0; + asm( "call (%0), _optix_get_transform_list_handle, (%1);" : "=l"( u0 ) : "r"( index ) : ); + return u0; +} + +static __forceinline__ __device__ OptixTransformType optixGetTransformTypeFromHandle( OptixTraversableHandle handle ) +{ + int i0; + asm( "call (%0), _optix_get_transform_type_from_handle, (%1);" : "=r"( i0 ) : "l"( handle ) : ); + return (OptixTransformType)i0; +} + +static __forceinline__ __device__ const OptixStaticTransform* optixGetStaticTransformFromHandle( OptixTraversableHandle handle ) +{ + unsigned long long ptr; + asm( "call (%0), _optix_get_static_transform_from_handle, (%1);" : "=l"( ptr ) : "l"( handle ) : ); + return (const OptixStaticTransform*)ptr; +} + +static __forceinline__ __device__ const OptixSRTMotionTransform* optixGetSRTMotionTransformFromHandle( OptixTraversableHandle handle ) +{ + unsigned long long ptr; + asm( "call (%0), _optix_get_srt_motion_transform_from_handle, (%1);" : "=l"( ptr ) : "l"( handle ) : ); + return (const OptixSRTMotionTransform*)ptr; +} + +static __forceinline__ __device__ const OptixMatrixMotionTransform* optixGetMatrixMotionTransformFromHandle( OptixTraversableHandle handle ) +{ + unsigned long long ptr; + asm( "call (%0), _optix_get_matrix_motion_transform_from_handle, (%1);" : "=l"( ptr ) : "l"( handle ) : ); + return (const OptixMatrixMotionTransform*)ptr; +} + +static __forceinline__ __device__ unsigned int optixGetInstanceIdFromHandle( OptixTraversableHandle handle ) +{ + int i0; + asm( "call (%0), _optix_get_instance_id_from_handle, (%1);" : "=r"( i0 ) : "l"( handle ) : ); + return i0; +} + +static __forceinline__ __device__ OptixTraversableHandle optixGetInstanceChildFromHandle( OptixTraversableHandle handle ) +{ + unsigned long long i0; + asm( "call (%0), _optix_get_instance_child_from_handle, (%1);" : "=l"( i0 ) : "l"( handle ) : ); + return (OptixTraversableHandle)i0; +} + +static __forceinline__ __device__ const float4* optixGetInstanceTransformFromHandle( OptixTraversableHandle handle ) +{ + unsigned long long ptr; + asm( "call (%0), _optix_get_instance_transform_from_handle, (%1);" : "=l"( ptr ) : "l"( handle ) : ); + return (const float4*)ptr; +} + +static __forceinline__ __device__ const float4* optixGetInstanceInverseTransformFromHandle( OptixTraversableHandle handle ) +{ + unsigned long long ptr; + asm( "call (%0), _optix_get_instance_inverse_transform_from_handle, (%1);" : "=l"( ptr ) : "l"( handle ) : ); + return (const float4*)ptr; +} + +static __forceinline__ __device__ bool optixReportIntersection( float hitT, unsigned int hitKind ) +{ + int ret; + asm volatile( + "call (%0), _optix_report_intersection_0" + ", (%1, %2);" + : "=r"( ret ) + : "f"( hitT ), "r"( hitKind ) + : ); + return ret; +} + +static __forceinline__ __device__ bool optixReportIntersection( float hitT, unsigned int hitKind, unsigned int a0 ) +{ + int ret; + asm volatile( + "call (%0), _optix_report_intersection_1" + ", (%1, %2, %3);" + : "=r"( ret ) + : "f"( hitT ), "r"( hitKind ), "r"( a0 ) + : ); + return ret; +} + +static __forceinline__ __device__ bool optixReportIntersection( float hitT, unsigned int hitKind, unsigned int a0, unsigned int a1 ) +{ + int ret; + asm volatile( + "call (%0), _optix_report_intersection_2" + ", (%1, %2, %3, %4);" + : "=r"( ret ) + : "f"( hitT ), "r"( hitKind ), "r"( a0 ), "r"( a1 ) + : ); + return ret; +} + +static __forceinline__ __device__ bool optixReportIntersection( float hitT, unsigned int hitKind, unsigned int a0, unsigned int a1, unsigned int a2 ) +{ + int ret; + asm volatile( + "call (%0), _optix_report_intersection_3" + ", (%1, %2, %3, %4, %5);" + : "=r"( ret ) + : "f"( hitT ), "r"( hitKind ), "r"( a0 ), "r"( a1 ), "r"( a2 ) + : ); + return ret; +} + +static __forceinline__ __device__ bool optixReportIntersection( float hitT, + unsigned int hitKind, + unsigned int a0, + unsigned int a1, + unsigned int a2, + unsigned int a3 ) +{ + int ret; + asm volatile( + "call (%0), _optix_report_intersection_4" + ", (%1, %2, %3, %4, %5, %6);" + : "=r"( ret ) + : "f"( hitT ), "r"( hitKind ), "r"( a0 ), "r"( a1 ), "r"( a2 ), "r"( a3 ) + : ); + return ret; +} + +static __forceinline__ __device__ bool optixReportIntersection( float hitT, + unsigned int hitKind, + unsigned int a0, + unsigned int a1, + unsigned int a2, + unsigned int a3, + unsigned int a4 ) +{ + int ret; + asm volatile( + "call (%0), _optix_report_intersection_5" + ", (%1, %2, %3, %4, %5, %6, %7);" + : "=r"( ret ) + : "f"( hitT ), "r"( hitKind ), "r"( a0 ), "r"( a1 ), "r"( a2 ), "r"( a3 ), "r"( a4 ) + : ); + return ret; +} + +static __forceinline__ __device__ bool optixReportIntersection( float hitT, + unsigned int hitKind, + unsigned int a0, + unsigned int a1, + unsigned int a2, + unsigned int a3, + unsigned int a4, + unsigned int a5 ) +{ + int ret; + asm volatile( + "call (%0), _optix_report_intersection_6" + ", (%1, %2, %3, %4, %5, %6, %7, %8);" + : "=r"( ret ) + : "f"( hitT ), "r"( hitKind ), "r"( a0 ), "r"( a1 ), "r"( a2 ), "r"( a3 ), "r"( a4 ), "r"( a5 ) + : ); + return ret; +} + +static __forceinline__ __device__ bool optixReportIntersection( float hitT, + unsigned int hitKind, + unsigned int a0, + unsigned int a1, + unsigned int a2, + unsigned int a3, + unsigned int a4, + unsigned int a5, + unsigned int a6 ) +{ + int ret; + asm volatile( + "call (%0), _optix_report_intersection_7" + ", (%1, %2, %3, %4, %5, %6, %7, %8, %9);" + : "=r"( ret ) + : "f"( hitT ), "r"( hitKind ), "r"( a0 ), "r"( a1 ), "r"( a2 ), "r"( a3 ), "r"( a4 ), "r"( a5 ), "r"( a6 ) + : ); + return ret; +} + +static __forceinline__ __device__ bool optixReportIntersection( float hitT, + unsigned int hitKind, + unsigned int a0, + unsigned int a1, + unsigned int a2, + unsigned int a3, + unsigned int a4, + unsigned int a5, + unsigned int a6, + unsigned int a7 ) +{ + int ret; + asm volatile( + "call (%0), _optix_report_intersection_8" + ", (%1, %2, %3, %4, %5, %6, %7, %8, %9, %10);" + : "=r"( ret ) + : "f"( hitT ), "r"( hitKind ), "r"( a0 ), "r"( a1 ), "r"( a2 ), "r"( a3 ), "r"( a4 ), "r"( a5 ), "r"( a6 ), "r"( a7 ) + : ); + return ret; +} + +#define OPTIX_DEFINE_optixGetAttribute_BODY( which ) \ + unsigned int ret; \ + asm( "call (%0), _optix_get_attribute_" #which ", ();" : "=r"( ret ) : ); \ + return ret; + +static __forceinline__ __device__ unsigned int optixGetAttribute_0() +{ + OPTIX_DEFINE_optixGetAttribute_BODY( 0 ); +} + +static __forceinline__ __device__ unsigned int optixGetAttribute_1() +{ + OPTIX_DEFINE_optixGetAttribute_BODY( 1 ); +} + +static __forceinline__ __device__ unsigned int optixGetAttribute_2() +{ + OPTIX_DEFINE_optixGetAttribute_BODY( 2 ); +} + +static __forceinline__ __device__ unsigned int optixGetAttribute_3() +{ + OPTIX_DEFINE_optixGetAttribute_BODY( 3 ); +} + +static __forceinline__ __device__ unsigned int optixGetAttribute_4() +{ + OPTIX_DEFINE_optixGetAttribute_BODY( 4 ); +} + +static __forceinline__ __device__ unsigned int optixGetAttribute_5() +{ + OPTIX_DEFINE_optixGetAttribute_BODY( 5 ); +} + +static __forceinline__ __device__ unsigned int optixGetAttribute_6() +{ + OPTIX_DEFINE_optixGetAttribute_BODY( 6 ); +} + +static __forceinline__ __device__ unsigned int optixGetAttribute_7() +{ + OPTIX_DEFINE_optixGetAttribute_BODY( 7 ); +} + +#undef OPTIX_DEFINE_optixGetAttribute_BODY + +static __forceinline__ __device__ void optixTerminateRay() +{ + asm volatile( "call _optix_terminate_ray, ();" ); +} + +static __forceinline__ __device__ void optixIgnoreIntersection() +{ + asm volatile( "call _optix_ignore_intersection, ();" ); +} + +static __forceinline__ __device__ unsigned int optixGetPrimitiveIndex() +{ + unsigned int u0; + asm( "call (%0), _optix_read_primitive_idx, ();" : "=r"( u0 ) : ); + return u0; +} + +static __forceinline__ __device__ unsigned int optixGetSbtGASIndex() +{ + unsigned int u0; + asm( "call (%0), _optix_read_sbt_gas_idx, ();" : "=r"( u0 ) : ); + return u0; +} + +static __forceinline__ __device__ unsigned int optixGetInstanceId() +{ + unsigned int u0; + asm( "call (%0), _optix_read_instance_id, ();" : "=r"( u0 ) : ); + return u0; +} + +static __forceinline__ __device__ unsigned int optixGetInstanceIndex() +{ + unsigned int u0; + asm( "call (%0), _optix_read_instance_idx, ();" : "=r"( u0 ) : ); + return u0; +} + +static __forceinline__ __device__ unsigned int optixGetHitKind() +{ + unsigned int u0; + asm( "call (%0), _optix_get_hit_kind, ();" : "=r"( u0 ) : ); + return u0; +} + +static __forceinline__ __device__ OptixPrimitiveType optixGetPrimitiveType(unsigned int hitKind) +{ + unsigned int u0; + asm( "call (%0), _optix_get_primitive_type_from_hit_kind, (%1);" : "=r"( u0 ) : "r"( hitKind ) ); + return (OptixPrimitiveType)u0; +} + +static __forceinline__ __device__ bool optixIsBackFaceHit( unsigned int hitKind ) +{ + unsigned int u0; + asm( "call (%0), _optix_get_backface_from_hit_kind, (%1);" : "=r"( u0 ) : "r"( hitKind ) ); + return (u0 == 0x1); +} + +static __forceinline__ __device__ bool optixIsFrontFaceHit( unsigned int hitKind ) +{ + return !optixIsBackFaceHit( hitKind ); +} + + +static __forceinline__ __device__ OptixPrimitiveType optixGetPrimitiveType() +{ + return optixGetPrimitiveType( optixGetHitKind() ); +} + +static __forceinline__ __device__ bool optixIsBackFaceHit() +{ + return optixIsBackFaceHit( optixGetHitKind() ); +} + +static __forceinline__ __device__ bool optixIsFrontFaceHit() +{ + return optixIsFrontFaceHit( optixGetHitKind() ); +} + +static __forceinline__ __device__ bool optixIsTriangleHit() +{ + return optixIsTriangleFrontFaceHit() || optixIsTriangleBackFaceHit(); +} + +static __forceinline__ __device__ bool optixIsTriangleFrontFaceHit() +{ + return optixGetHitKind() == OPTIX_HIT_KIND_TRIANGLE_FRONT_FACE; +} + +static __forceinline__ __device__ bool optixIsTriangleBackFaceHit() +{ + return optixGetHitKind() == OPTIX_HIT_KIND_TRIANGLE_BACK_FACE; +} + +static __forceinline__ __device__ float optixGetCurveParameter() +{ + return __int_as_float( optixGetAttribute_0() ); +} + +static __forceinline__ __device__ float2 optixGetTriangleBarycentrics() +{ + float f0, f1; + asm( "call (%0, %1), _optix_get_triangle_barycentrics, ();" : "=f"( f0 ), "=f"( f1 ) : ); + return make_float2( f0, f1 ); +} + +static __forceinline__ __device__ uint3 optixGetLaunchIndex() +{ + unsigned int u0, u1, u2; + asm( "call (%0), _optix_get_launch_index_x, ();" : "=r"( u0 ) : ); + asm( "call (%0), _optix_get_launch_index_y, ();" : "=r"( u1 ) : ); + asm( "call (%0), _optix_get_launch_index_z, ();" : "=r"( u2 ) : ); + return make_uint3( u0, u1, u2 ); +} + +static __forceinline__ __device__ uint3 optixGetLaunchDimensions() +{ + unsigned int u0, u1, u2; + asm( "call (%0), _optix_get_launch_dimension_x, ();" : "=r"( u0 ) : ); + asm( "call (%0), _optix_get_launch_dimension_y, ();" : "=r"( u1 ) : ); + asm( "call (%0), _optix_get_launch_dimension_z, ();" : "=r"( u2 ) : ); + return make_uint3( u0, u1, u2 ); +} + +static __forceinline__ __device__ CUdeviceptr optixGetSbtDataPointer() +{ + unsigned long long ptr; + asm( "call (%0), _optix_get_sbt_data_ptr_64, ();" : "=l"( ptr ) : ); + return (CUdeviceptr)ptr; +} + +static __forceinline__ __device__ void optixThrowException( int exceptionCode ) +{ + asm volatile( + "call _optix_throw_exception_0, (%0);" + : /* no return value */ + : "r"( exceptionCode ) + : ); +} + +static __forceinline__ __device__ void optixThrowException( int exceptionCode, unsigned int exceptionDetail0 ) +{ + asm volatile( + "call _optix_throw_exception_1, (%0, %1);" + : /* no return value */ + : "r"( exceptionCode ), "r"( exceptionDetail0 ) + : ); +} + +static __forceinline__ __device__ void optixThrowException( int exceptionCode, unsigned int exceptionDetail0, unsigned int exceptionDetail1 ) +{ + asm volatile( + "call _optix_throw_exception_2, (%0, %1, %2);" + : /* no return value */ + : "r"( exceptionCode ), "r"( exceptionDetail0 ), "r"( exceptionDetail1 ) + : ); +} + +static __forceinline__ __device__ void optixThrowException( int exceptionCode, unsigned int exceptionDetail0, unsigned int exceptionDetail1, unsigned int exceptionDetail2 ) +{ + asm volatile( + "call _optix_throw_exception_3, (%0, %1, %2, %3);" + : /* no return value */ + : "r"( exceptionCode ), "r"( exceptionDetail0 ), "r"( exceptionDetail1 ), "r"( exceptionDetail2 ) + : ); +} + +static __forceinline__ __device__ void optixThrowException( int exceptionCode, unsigned int exceptionDetail0, unsigned int exceptionDetail1, unsigned int exceptionDetail2, unsigned int exceptionDetail3 ) +{ + asm volatile( + "call _optix_throw_exception_4, (%0, %1, %2, %3, %4);" + : /* no return value */ + : "r"( exceptionCode ), "r"( exceptionDetail0 ), "r"( exceptionDetail1 ), "r"( exceptionDetail2 ), "r"( exceptionDetail3 ) + : ); +} + +static __forceinline__ __device__ void optixThrowException( int exceptionCode, unsigned int exceptionDetail0, unsigned int exceptionDetail1, unsigned int exceptionDetail2, unsigned int exceptionDetail3, unsigned int exceptionDetail4 ) +{ + asm volatile( + "call _optix_throw_exception_5, (%0, %1, %2, %3, %4, %5);" + : /* no return value */ + : "r"( exceptionCode ), "r"( exceptionDetail0 ), "r"( exceptionDetail1 ), "r"( exceptionDetail2 ), "r"( exceptionDetail3 ), "r"( exceptionDetail4 ) + : ); +} + +static __forceinline__ __device__ void optixThrowException( int exceptionCode, unsigned int exceptionDetail0, unsigned int exceptionDetail1, unsigned int exceptionDetail2, unsigned int exceptionDetail3, unsigned int exceptionDetail4, unsigned int exceptionDetail5 ) +{ + asm volatile( + "call _optix_throw_exception_6, (%0, %1, %2, %3, %4, %5, %6);" + : /* no return value */ + : "r"( exceptionCode ), "r"( exceptionDetail0 ), "r"( exceptionDetail1 ), "r"( exceptionDetail2 ), "r"( exceptionDetail3 ), "r"( exceptionDetail4 ), "r"( exceptionDetail5 ) + : ); +} + +static __forceinline__ __device__ void optixThrowException( int exceptionCode, unsigned int exceptionDetail0, unsigned int exceptionDetail1, unsigned int exceptionDetail2, unsigned int exceptionDetail3, unsigned int exceptionDetail4, unsigned int exceptionDetail5, unsigned int exceptionDetail6 ) +{ + asm volatile( + "call _optix_throw_exception_7, (%0, %1, %2, %3, %4, %5, %6, %7);" + : /* no return value */ + : "r"( exceptionCode ), "r"( exceptionDetail0 ), "r"( exceptionDetail1 ), "r"( exceptionDetail2 ), "r"( exceptionDetail3 ), "r"( exceptionDetail4 ), "r"( exceptionDetail5 ), "r"( exceptionDetail6 ) + : ); +} + +static __forceinline__ __device__ void optixThrowException( int exceptionCode, unsigned int exceptionDetail0, unsigned int exceptionDetail1, unsigned int exceptionDetail2, unsigned int exceptionDetail3, unsigned int exceptionDetail4, unsigned int exceptionDetail5, unsigned int exceptionDetail6, unsigned int exceptionDetail7 ) +{ + asm volatile( + "call _optix_throw_exception_8, (%0, %1, %2, %3, %4, %5, %6, %7, %8);" + : /* no return value */ + : "r"( exceptionCode ), "r"( exceptionDetail0 ), "r"( exceptionDetail1 ), "r"( exceptionDetail2 ), "r"( exceptionDetail3 ), "r"( exceptionDetail4 ), "r"( exceptionDetail5 ), "r"( exceptionDetail6 ), "r"( exceptionDetail7 ) + : ); +} + +static __forceinline__ __device__ int optixGetExceptionCode() +{ + int s0; + asm( "call (%0), _optix_get_exception_code, ();" : "=r"( s0 ) : ); + return s0; +} + +#define OPTIX_DEFINE_optixGetExceptionDetail_BODY( which ) \ + unsigned int ret; \ + asm( "call (%0), _optix_get_exception_detail_" #which ", ();" : "=r"( ret ) : ); \ + return ret; + +static __forceinline__ __device__ unsigned int optixGetExceptionDetail_0() +{ + OPTIX_DEFINE_optixGetExceptionDetail_BODY( 0 ); +} + +static __forceinline__ __device__ unsigned int optixGetExceptionDetail_1() +{ + OPTIX_DEFINE_optixGetExceptionDetail_BODY( 1 ); +} + +static __forceinline__ __device__ unsigned int optixGetExceptionDetail_2() +{ + OPTIX_DEFINE_optixGetExceptionDetail_BODY( 2 ); +} + +static __forceinline__ __device__ unsigned int optixGetExceptionDetail_3() +{ + OPTIX_DEFINE_optixGetExceptionDetail_BODY( 3 ); +} + +static __forceinline__ __device__ unsigned int optixGetExceptionDetail_4() +{ + OPTIX_DEFINE_optixGetExceptionDetail_BODY( 4 ); +} + +static __forceinline__ __device__ unsigned int optixGetExceptionDetail_5() +{ + OPTIX_DEFINE_optixGetExceptionDetail_BODY( 5 ); +} + +static __forceinline__ __device__ unsigned int optixGetExceptionDetail_6() +{ + OPTIX_DEFINE_optixGetExceptionDetail_BODY( 6 ); +} + +static __forceinline__ __device__ unsigned int optixGetExceptionDetail_7() +{ + OPTIX_DEFINE_optixGetExceptionDetail_BODY( 7 ); +} + +#undef OPTIX_DEFINE_optixGetExceptionDetail_BODY + +static __forceinline__ __device__ OptixTraversableHandle optixGetExceptionInvalidTraversable() +{ + unsigned long long handle; + asm( "call (%0), _optix_get_exception_invalid_traversable, ();" : "=l"( handle ) : ); + return (OptixTraversableHandle)handle; +} + +static __forceinline__ __device__ int optixGetExceptionInvalidSbtOffset() +{ + int s0; + asm( "call (%0), _optix_get_exception_invalid_sbt_offset, ();" : "=r"( s0 ) : ); + return s0; +} + +static __forceinline__ __device__ OptixInvalidRayExceptionDetails optixGetExceptionInvalidRay() +{ + float rayOriginX, rayOriginY, rayOriginZ, rayDirectionX, rayDirectionY, rayDirectionZ, tmin, tmax, rayTime; + asm( "call (%0, %1, %2, %3, %4, %5, %6, %7, %8), _optix_get_exception_invalid_ray, ();" + : "=f"( rayOriginX ), "=f"( rayOriginY ), "=f"( rayOriginZ ), "=f"( rayDirectionX ), "=f"( rayDirectionY ), + "=f"( rayDirectionZ ), "=f"( tmin ), "=f"( tmax ), "=f"( rayTime ) + : ); + OptixInvalidRayExceptionDetails ray; + ray.origin = make_float3( rayOriginX, rayOriginY, rayOriginZ ); + ray.direction = make_float3( rayDirectionX, rayDirectionY, rayDirectionZ ); + ray.tmin = tmin; + ray.tmax = tmax; + ray.time = rayTime; + return ray; +} + +static __forceinline__ __device__ OptixParameterMismatchExceptionDetails optixGetExceptionParameterMismatch() +{ + unsigned int expected, actual, sbtIdx; + unsigned long long calleeName; + asm( + "call (%0, %1, %2, %3), _optix_get_exception_parameter_mismatch, ();" + : "=r"(expected), "=r"(actual), "=r"(sbtIdx), "=l"(calleeName) : ); + OptixParameterMismatchExceptionDetails details; + details.expectedParameterCount = expected; + details.passedArgumentCount = actual; + details.sbtIndex = sbtIdx; + details.callableName = (char*)calleeName; + return details; +} + +static __forceinline__ __device__ char* optixGetExceptionLineInfo() +{ + unsigned long long ptr; + asm( "call (%0), _optix_get_exception_line_info, ();" : "=l"(ptr) : ); + return (char*)ptr; +} + +template +static __forceinline__ __device__ ReturnT optixDirectCall( unsigned int sbtIndex, ArgTypes... args ) +{ + unsigned long long func; + asm( "call (%0), _optix_call_direct_callable,(%1);" : "=l"( func ) : "r"( sbtIndex ) : ); + using funcT = ReturnT ( * )( ArgTypes... ); + funcT call = ( funcT )( func ); + return call( args... ); +} + +template +static __forceinline__ __device__ ReturnT optixContinuationCall( unsigned int sbtIndex, ArgTypes... args ) +{ + unsigned long long func; + asm( "call (%0), _optix_call_continuation_callable,(%1);" : "=l"( func ) : "r"( sbtIndex ) : ); + using funcT = ReturnT ( * )( ArgTypes... ); + funcT call = ( funcT )( func ); + return call( args... ); +} +#endif + +static __forceinline__ __device__ uint4 optixTexFootprint2D( unsigned long long tex, unsigned int texInfo, float x, float y, unsigned int* singleMipLevel ) +{ + uint4 result; + unsigned long long resultPtr = reinterpret_cast( &result ); + unsigned long long singleMipLevelPtr = reinterpret_cast( singleMipLevel ); + // Cast float args to integers, because the intrinics take .b32 arguments when compiled to PTX. + asm volatile( + "call _optix_tex_footprint_2d_v2" + ", (%0, %1, %2, %3, %4, %5);" + : + : "l"( tex ), "r"( texInfo ), "r"( __float_as_uint( x ) ), "r"( __float_as_uint( y ) ), + "l"( singleMipLevelPtr ), "l"( resultPtr ) + : ); + return result; +} + +static __forceinline__ __device__ uint4 optixTexFootprint2DGrad( unsigned long long tex, + unsigned int texInfo, + float x, + float y, + float dPdx_x, + float dPdx_y, + float dPdy_x, + float dPdy_y, + bool coarse, + unsigned int* singleMipLevel ) +{ + uint4 result; + unsigned long long resultPtr = reinterpret_cast( &result ); + unsigned long long singleMipLevelPtr = reinterpret_cast( singleMipLevel ); + // Cast float args to integers, because the intrinics take .b32 arguments when compiled to PTX. + asm volatile( + "call _optix_tex_footprint_2d_grad_v2" + ", (%0, %1, %2, %3, %4, %5, %6, %7, %8, %9, %10);" + : + : "l"( tex ), "r"( texInfo ), "r"( __float_as_uint( x ) ), "r"( __float_as_uint( y ) ), + "r"( __float_as_uint( dPdx_x ) ), "r"( __float_as_uint( dPdx_y ) ), "r"( __float_as_uint( dPdy_x ) ), + "r"( __float_as_uint( dPdy_y ) ), "r"( static_cast( coarse ) ), "l"( singleMipLevelPtr ), "l"( resultPtr ) + : ); + + return result; +} + +static __forceinline__ __device__ uint4 +optixTexFootprint2DLod( unsigned long long tex, unsigned int texInfo, float x, float y, float level, bool coarse, unsigned int* singleMipLevel ) +{ + uint4 result; + unsigned long long resultPtr = reinterpret_cast( &result ); + unsigned long long singleMipLevelPtr = reinterpret_cast( singleMipLevel ); + // Cast float args to integers, because the intrinics take .b32 arguments when compiled to PTX. + asm volatile( + "call _optix_tex_footprint_2d_lod_v2" + ", (%0, %1, %2, %3, %4, %5, %6, %7);" + : + : "l"( tex ), "r"( texInfo ), "r"( __float_as_uint( x ) ), "r"( __float_as_uint( y ) ), + "r"( __float_as_uint( level ) ), "r"( static_cast( coarse ) ), "l"( singleMipLevelPtr ), "l"( resultPtr ) + : ); + return result; +} diff --git a/include/internal/optix_7_device_impl_exception.h b/include/internal/optix_7_device_impl_exception.h new file mode 100644 index 0000000..c398ef5 --- /dev/null +++ b/include/internal/optix_7_device_impl_exception.h @@ -0,0 +1,295 @@ +/* +* Copyright (c) 2021 NVIDIA Corporation. All rights reserved. +* +* NVIDIA Corporation and its licensors retain all intellectual property and proprietary +* rights in and to this software, related documentation and any modifications thereto. +* Any use, reproduction, disclosure or distribution of this software and related +* documentation without an express license agreement from NVIDIA Corporation is strictly +* prohibited. +* +* TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THIS SOFTWARE IS PROVIDED *AS IS* +* AND NVIDIA AND ITS SUPPLIERS DISCLAIM ALL WARRANTIES, EITHER EXPRESS OR IMPLIED, +* INCLUDING, BUT NOT LIMITED TO, IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +* PARTICULAR PURPOSE. IN NO EVENT SHALL NVIDIA OR ITS SUPPLIERS BE LIABLE FOR ANY +* SPECIAL, INCIDENTAL, INDIRECT, OR CONSEQUENTIAL DAMAGES WHATSOEVER (INCLUDING, WITHOUT +* LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, BUSINESS INTERRUPTION, LOSS OF +* BUSINESS INFORMATION, OR ANY OTHER PECUNIARY LOSS) ARISING OUT OF THE USE OF OR +* INABILITY TO USE THIS SOFTWARE, EVEN IF NVIDIA HAS BEEN ADVISED OF THE POSSIBILITY OF +* SUCH DAMAGES +*/ + +/** +* @file optix_7_device_impl_exception.h +* @author NVIDIA Corporation +* @brief OptiX public API +* +* OptiX public API Reference - Device side implementation for exception helper function. +*/ + +#if !defined( __OPTIX_INCLUDE_INTERNAL_HEADERS__ ) +#error("optix_7_device_impl_exception.h is an internal header file and must not be used directly. Please use optix_device.h or optix.h instead.") +#endif + +#ifndef __optix_optix_7_device_impl_exception_h__ +#define __optix_optix_7_device_impl_exception_h__ + +#if !defined(__CUDACC_RTC__) +#include /* for printf */ +#endif + +namespace optix_impl { + + static __forceinline__ __device__ void optixDumpStaticTransformFromHandle( OptixTraversableHandle handle ) + { + const OptixStaticTransform* traversable = optixGetStaticTransformFromHandle( handle ); + if( traversable ) + { + const uint3 index = optixGetLaunchIndex(); + printf( "(%4i,%4i,%4i) OptixStaticTransform@%p = {\n" + " child = %p,\n" + " transform = { %f,%f,%f,%f,\n" + " %f,%f,%f,%f,\n" + " %f,%f,%f,%f } }\n", + index.x,index.y,index.z, + traversable, + (void*)traversable->child, + traversable->transform[0], traversable->transform[1], traversable->transform[2], traversable->transform[3], + traversable->transform[4], traversable->transform[5], traversable->transform[6], traversable->transform[7], + traversable->transform[8], traversable->transform[9], traversable->transform[10], traversable->transform[11] ); + } + } + + static __forceinline__ __device__ void optixDumpMotionMatrixTransformFromHandle( OptixTraversableHandle handle ) + { + const OptixMatrixMotionTransform* traversable = optixGetMatrixMotionTransformFromHandle( handle ); + if( traversable ) + { + const uint3 index = optixGetLaunchIndex(); + printf( "(%4i,%4i,%4i) OptixMatrixMotionTransform@%p = {\n" + " child = %p,\n" + " motionOptions = { numKeys = %i, flags = %i, timeBegin = %f, timeEnd = %f },\n" + " transform = { { %f,%f,%f,%f,\n" + " %f,%f,%f,%f,\n" + " %f,%f,%f,%f }, ... }\n", + index.x,index.y,index.z, + traversable, + (void*)traversable->child, + (int)traversable->motionOptions.numKeys, (int)traversable->motionOptions.flags, traversable->motionOptions.timeBegin, traversable->motionOptions.timeEnd, + traversable->transform[0][0], traversable->transform[0][1], traversable->transform[0][2], traversable->transform[0][3], + traversable->transform[0][4], traversable->transform[0][5], traversable->transform[0][6], traversable->transform[0][7], + traversable->transform[0][8], traversable->transform[0][9], traversable->transform[0][10], traversable->transform[0][11] ); + } + } + + static __forceinline__ __device__ void optixDumpSrtMatrixTransformFromHandle( OptixTraversableHandle handle ) + { + const OptixSRTMotionTransform* traversable = optixGetSRTMotionTransformFromHandle( handle ); + if( traversable ) + { + const uint3 index = optixGetLaunchIndex(); + printf( "(%4i,%4i,%4i) OptixSRTMotionTransform@%p = {\n" + " child = %p,\n" + " motionOptions = { numKeys = %i, flags = %i, timeBegin = %f, timeEnd = %f },\n" + " srtData = { { sx = %f, a = %f, b = %f, pvx = %f,\n" + " sy = %f, c = %f, pvy = %f, sz = %f,\n" + " pvz = %f, qx = %f, qy = %f, qz = %f,\n" + " qw = %f, tx = %f, ty = %f, tz = %f }, ... }\n", + index.x,index.y,index.z, + traversable, + (void*)traversable->child, + (int)traversable->motionOptions.numKeys, (int)traversable->motionOptions.flags, traversable->motionOptions.timeBegin, traversable->motionOptions.timeEnd, + traversable->srtData[0].sx, traversable->srtData[0].a, traversable->srtData[0].b, traversable->srtData[0].pvx, + traversable->srtData[0].sy, traversable->srtData[0].c, traversable->srtData[0].pvy,traversable->srtData[0].sz, + traversable->srtData[0].pvz,traversable->srtData[0].qx,traversable->srtData[0].qy, traversable->srtData[0].qz, + traversable->srtData[0].qw, traversable->srtData[0].tx,traversable->srtData[0].ty, traversable->srtData[0].tz ); + } + } + + static __forceinline__ __device__ void optixDumpInstanceFromHandle( OptixTraversableHandle handle ) + { + if( optixGetTransformTypeFromHandle( handle ) == OPTIX_TRANSFORM_TYPE_INSTANCE ) + { + unsigned int instanceId = optixGetInstanceIdFromHandle( handle ); + const float4* transform = optixGetInstanceTransformFromHandle( handle ); + + const uint3 index = optixGetLaunchIndex(); + printf( "(%4i,%4i,%4i) OptixInstance = {\n" + " instanceId = %i,\n" + " transform = { %f,%f,%f,%f,\n" + " %f,%f,%f,%f,\n" + " %f,%f,%f,%f } }\n", + index.x,index.y,index.z, + instanceId, + transform[0].x, transform[0].y, transform[0].z, transform[0].w, + transform[1].x, transform[1].y, transform[1].z, transform[1].w, + transform[2].x, transform[2].y, transform[2].z, transform[2].w ); + } + } + + static __forceinline__ __device__ void optixDumpTransform( OptixTraversableHandle handle ) + { + const OptixTransformType type = optixGetTransformTypeFromHandle( handle ); + const uint3 index = optixGetLaunchIndex(); + + switch( type ) + { + case OPTIX_TRANSFORM_TYPE_NONE: + break; + case OPTIX_TRANSFORM_TYPE_STATIC_TRANSFORM: + optixDumpStaticTransformFromHandle( handle ); + break; + case OPTIX_TRANSFORM_TYPE_MATRIX_MOTION_TRANSFORM: + optixDumpMotionMatrixTransformFromHandle( handle ); + break; + case OPTIX_TRANSFORM_TYPE_SRT_MOTION_TRANSFORM: + optixDumpSrtMatrixTransformFromHandle( handle ); + break; + case OPTIX_TRANSFORM_TYPE_INSTANCE: + optixDumpInstanceFromHandle( handle ); + break; + default: + break; + } + } + + static __forceinline__ __device__ void optixDumpTransformList() + { + const int tlistSize = optixGetTransformListSize(); + const uint3 index = optixGetLaunchIndex(); + + printf("(%4i,%4i,%4i) transform list of size %i:\n", index.x,index.y,index.z, tlistSize); + + for( unsigned int i = 0 ; i < tlistSize ; ++i ) + { + OptixTraversableHandle handle = optixGetTransformListHandle( i ); + printf("(%4i,%4i,%4i) transform[%i] = %p\n", index.x, index.y, index.z, i, (void*)handle); + optixDumpTransform(handle); + } + } + + static __forceinline__ __device__ void optixDumpExceptionDetails() + { + bool dumpTlist = false; + const int exceptionCode = optixGetExceptionCode(); + const uint3 index = optixGetLaunchIndex(); + + if( exceptionCode == OPTIX_EXCEPTION_CODE_STACK_OVERFLOW ) + { + printf("(%4i,%4i,%4i) error: stack overflow\n", index.x,index.y,index.z); + } + else if( exceptionCode == OPTIX_EXCEPTION_CODE_TRACE_DEPTH_EXCEEDED ) + { + printf("(%4i,%4i,%4i) error: trace depth exceeded\n", index.x,index.y,index.z); + } + else if( exceptionCode == OPTIX_EXCEPTION_CODE_TRAVERSAL_DEPTH_EXCEEDED ) + { + printf("(%4i,%4i,%4i) error: traversal depth exceeded\n", index.x,index.y,index.z); + dumpTlist = true; + } + else if( exceptionCode == OPTIX_EXCEPTION_CODE_TRAVERSAL_INVALID_TRAVERSABLE ) + { + OptixTraversableHandle handle = optixGetExceptionInvalidTraversable(); + printf("(%4i,%4i,%4i) error: invalid traversable %p\n", index.x,index.y,index.z, (void*)handle); + dumpTlist = true; + } + else if( exceptionCode == OPTIX_EXCEPTION_CODE_TRAVERSAL_INVALID_MISS_SBT ) + { + int sbtOffset = optixGetExceptionInvalidSbtOffset(); + printf("(%4i,%4i,%4i) error: invalid miss sbt of %i\n", index.x,index.y,index.z, sbtOffset); + } + else if( exceptionCode == OPTIX_EXCEPTION_CODE_TRAVERSAL_INVALID_HIT_SBT ) + { + int sbtOffset = optixGetExceptionInvalidSbtOffset(); + printf("(%4i,%4i,%4i) error: invalid hit sbt of %i at primitive with gas sbt index %i\n", index.x,index.y,index.z, sbtOffset, optixGetSbtGASIndex() ); + dumpTlist = true; + } + else if( exceptionCode == OPTIX_EXCEPTION_CODE_UNSUPPORTED_PRIMITIVE_TYPE ) + { + dumpTlist = true; + printf( "(%4i,%4i,%4i) error: shader encountered unsupported builtin type\n" + " call location: %s\n", index.x, index.y, index.z, optixGetExceptionLineInfo() ); + } + else if( exceptionCode == OPTIX_EXCEPTION_CODE_INVALID_RAY ) + { + OptixInvalidRayExceptionDetails ray = optixGetExceptionInvalidRay(); + printf( "(%4i,%4i,%4i) error: encountered an invalid ray:\n", index.x, index.y, index.z ); + printf( + " origin: [%f, %f, %f]\n" + " direction: [%f, %f, %f]\n" + " tmin: %f\n" + " tmax: %f\n" + " rayTime: %f\n" + " call location: %s\n", + ray.origin.x, ray.origin.y, ray.origin.z, ray.direction.x, ray.direction.y, + ray.direction.z, ray.tmin, ray.tmax, ray.time, optixGetExceptionLineInfo() ); + } + else if( exceptionCode == OPTIX_EXCEPTION_CODE_CALLABLE_PARAMETER_MISMATCH ) + { + OptixParameterMismatchExceptionDetails details = optixGetExceptionParameterMismatch(); + printf( "(%4i,%4i,%4i) error: parameter mismatch in callable call.\n", index.x, index.y, index.z ); + printf( + " passed packed arguments: %u 32 Bit values\n" + " expected packed parameters: %u 32 Bit values\n" + " SBT index: %u\n" + " called function: %s\n" + " call location: %s\n", + details.passedArgumentCount, details.expectedParameterCount, details.sbtIndex, + details.callableName, optixGetExceptionLineInfo() ); + } + else if( exceptionCode == OPTIX_EXCEPTION_CODE_BUILTIN_IS_MISMATCH ) + { + dumpTlist = true; + printf("(%4i,%4i,%4i) error: mismatch between builtin IS shader and build input\n" + " call location: %s\n", index.x,index.y,index.z, optixGetExceptionLineInfo() ); + } + else if( exceptionCode == OPTIX_EXCEPTION_CODE_CALLABLE_INVALID_SBT ) + { + int sbtOffset = optixGetExceptionInvalidSbtOffset(); + printf( "(%4i,%4i,%4i) error: invalid sbt offset of %i for callable program\n", index.x, index.y, index.z, sbtOffset ); + } + else if( exceptionCode == OPTIX_EXCEPTION_CODE_CALLABLE_NO_DC_SBT_RECORD ) + { + int sbtOffset = optixGetExceptionInvalidSbtOffset(); + printf( "(%4i,%4i,%4i) error: invalid sbt offset of %i for direct callable program\n", index.x, index.y, index.z, sbtOffset ); + } + else if( exceptionCode == OPTIX_EXCEPTION_CODE_CALLABLE_NO_CC_SBT_RECORD ) + { + int sbtOffset = optixGetExceptionInvalidSbtOffset(); + printf( "(%4i,%4i,%4i) error: invalid sbt offset of %i for continuation callable program\n", index.x, index.y, index.z, sbtOffset ); + } + else if( exceptionCode == OPTIX_EXCEPTION_CODE_UNSUPPORTED_SINGLE_LEVEL_GAS ) + { + OptixTraversableHandle handle = optixGetExceptionInvalidTraversable(); + printf("(%4i,%4i,%4i) error: unsupported single GAS traversable graph %p\n", index.x,index.y,index.z, (void*)handle); + dumpTlist = true; + } + else if( ( exceptionCode <= OPTIX_EXCEPTION_CODE_INVALID_VALUE_ARGUMENT_0 ) && ( exceptionCode >= OPTIX_EXCEPTION_CODE_INVALID_VALUE_ARGUMENT_2 ) ) + { + printf("(%4i,%4i,%4i) error: invalid value for argument %i\n", index.x,index.y,index.z, -(exceptionCode - OPTIX_EXCEPTION_CODE_INVALID_VALUE_ARGUMENT_0) ); + } + else if( exceptionCode == OPTIX_EXCEPTION_CODE_UNSUPPORTED_DATA_ACCESS ) + { + printf("(%4i,%4i,%4i) error: unsupported random data access\n", index.x,index.y,index.z); + } + else if( exceptionCode == OPTIX_EXCEPTION_CODE_PAYLOAD_TYPE_MISMATCH ) + { + printf("(%4i,%4i,%4i) error: payload type mismatch between program and optixTrace call\n", index.x,index.y,index.z); + } + else if( exceptionCode >= 0 ) + { + dumpTlist = true; + printf( "(%4i,%4i,%4i) error: user exception with error code %i\n" + " call location: %s\n", index.x, index.y, index.z, exceptionCode, optixGetExceptionLineInfo() ); + } + else + { + printf("(%4i,%4i,%4i) error: unknown exception with error code %i\n", index.x,index.y,index.z, exceptionCode); + } + + if( dumpTlist ) + optixDumpTransformList(); + } + +} // namespace optix_impl + +#endif diff --git a/include/internal/optix_7_device_impl_transformations.h b/include/internal/optix_7_device_impl_transformations.h new file mode 100644 index 0000000..c2cc69a --- /dev/null +++ b/include/internal/optix_7_device_impl_transformations.h @@ -0,0 +1,424 @@ +/* +* Copyright (c) 2021 NVIDIA Corporation. All rights reserved. +* +* NVIDIA Corporation and its licensors retain all intellectual property and proprietary +* rights in and to this software, related documentation and any modifications thereto. +* Any use, reproduction, disclosure or distribution of this software and related +* documentation without an express license agreement from NVIDIA Corporation is strictly +* prohibited. +* +* TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THIS SOFTWARE IS PROVIDED *AS IS* +* AND NVIDIA AND ITS SUPPLIERS DISCLAIM ALL WARRANTIES, EITHER EXPRESS OR IMPLIED, +* INCLUDING, BUT NOT LIMITED TO, IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +* PARTICULAR PURPOSE. IN NO EVENT SHALL NVIDIA OR ITS SUPPLIERS BE LIABLE FOR ANY +* SPECIAL, INCIDENTAL, INDIRECT, OR CONSEQUENTIAL DAMAGES WHATSOEVER (INCLUDING, WITHOUT +* LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, BUSINESS INTERRUPTION, LOSS OF +* BUSINESS INFORMATION, OR ANY OTHER PECUNIARY LOSS) ARISING OUT OF THE USE OF OR +* INABILITY TO USE THIS SOFTWARE, EVEN IF NVIDIA HAS BEEN ADVISED OF THE POSSIBILITY OF +* SUCH DAMAGES +*/ + +/** +* @file optix_7_device_impl_transformations.h +* @author NVIDIA Corporation +* @brief OptiX public API +* +* OptiX public API Reference - Device side implementation for transformation helper functions. +*/ + +#if !defined( __OPTIX_INCLUDE_INTERNAL_HEADERS__ ) +#error("optix_7_device_impl_transformations.h is an internal header file and must not be used directly. Please use optix_device.h or optix.h instead.") +#endif + +#ifndef __optix_optix_7_device_impl_transformations_h__ +#define __optix_optix_7_device_impl_transformations_h__ + +namespace optix_impl { + +static __forceinline__ __device__ float4 optixAddFloat4( const float4& a, const float4& b ) +{ + return make_float4( a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w ); +} + +static __forceinline__ __device__ float4 optixMulFloat4( const float4& a, float b ) +{ + return make_float4( a.x * b, a.y * b, a.z * b, a.w * b ); +} + +static __forceinline__ __device__ uint4 optixLdg( unsigned long long addr ) +{ + const uint4* ptr; + asm volatile( "cvta.to.global.u64 %0, %1;" : "=l"( ptr ) : "l"( addr ) ); + uint4 ret; + asm volatile( "ld.global.v4.u32 {%0,%1,%2,%3}, [%4];" + : "=r"( ret.x ), "=r"( ret.y ), "=r"( ret.z ), "=r"( ret.w ) + : "l"( ptr ) ); + return ret; +} + +template +static __forceinline__ __device__ T optixLoadReadOnlyAlign16( const T* ptr ) +{ + T v; + for( int ofs = 0; ofs < sizeof( T ); ofs += 16 ) + *(uint4*)( (char*)&v + ofs ) = optixLdg( (unsigned long long)( (char*)ptr + ofs ) ); + return v; +} + +// Multiplies the row vector vec with the 3x4 matrix with rows m0, m1, and m2 +static __forceinline__ __device__ float4 optixMultiplyRowMatrix( const float4 vec, const float4 m0, const float4 m1, const float4 m2 ) +{ + float4 result; + + result.x = vec.x * m0.x + vec.y * m1.x + vec.z * m2.x; + result.y = vec.x * m0.y + vec.y * m1.y + vec.z * m2.y; + result.z = vec.x * m0.z + vec.y * m1.z + vec.z * m2.z; + result.w = vec.x * m0.w + vec.y * m1.w + vec.z * m2.w + vec.w; + + return result; +} + +// Converts the SRT transformation srt into a 3x4 matrix with rows m0, m1, and m2 +static __forceinline__ __device__ void optixGetMatrixFromSrt( float4& m0, float4& m1, float4& m2, const OptixSRTData& srt ) +{ + const float4 q = {srt.qx, srt.qy, srt.qz, srt.qw}; + + // normalize + const float inv_sql = 1.f / ( srt.qx * srt.qx + srt.qy * srt.qy + srt.qz * srt.qz + srt.qw * srt.qw ); + const float4 nq = optixMulFloat4( q, inv_sql ); + + const float sqw = q.w * nq.w; + const float sqx = q.x * nq.x; + const float sqy = q.y * nq.y; + const float sqz = q.z * nq.z; + + const float xy = q.x * nq.y; + const float zw = q.z * nq.w; + const float xz = q.x * nq.z; + const float yw = q.y * nq.w; + const float yz = q.y * nq.z; + const float xw = q.x * nq.w; + + m0.x = ( sqx - sqy - sqz + sqw ); + m0.y = 2.0f * ( xy - zw ); + m0.z = 2.0f * ( xz + yw ); + + m1.x = 2.0f * ( xy + zw ); + m1.y = ( -sqx + sqy - sqz + sqw ); + m1.z = 2.0f * ( yz - xw ); + + m2.x = 2.0f * ( xz - yw ); + m2.y = 2.0f * ( yz + xw ); + m2.z = ( -sqx - sqy + sqz + sqw ); + + m0.w = m0.x * srt.pvx + m0.y * srt.pvy + m0.z * srt.pvz + srt.tx; + m1.w = m1.x * srt.pvx + m1.y * srt.pvy + m1.z * srt.pvz + srt.ty; + m2.w = m2.x * srt.pvx + m2.y * srt.pvy + m2.z * srt.pvz + srt.tz; + + m0.z = m0.x * srt.b + m0.y * srt.c + m0.z * srt.sz; + m1.z = m1.x * srt.b + m1.y * srt.c + m1.z * srt.sz; + m2.z = m2.x * srt.b + m2.y * srt.c + m2.z * srt.sz; + + m0.y = m0.x * srt.a + m0.y * srt.sy; + m1.y = m1.x * srt.a + m1.y * srt.sy; + m2.y = m2.x * srt.a + m2.y * srt.sy; + + m0.x = m0.x * srt.sx; + m1.x = m1.x * srt.sx; + m2.x = m2.x * srt.sx; +} + +// Inverts a 3x4 matrix in place +static __forceinline__ __device__ void optixInvertMatrix( float4& m0, float4& m1, float4& m2 ) +{ + const float det3 = + m0.x * ( m1.y * m2.z - m1.z * m2.y ) - m0.y * ( m1.x * m2.z - m1.z * m2.x ) + m0.z * ( m1.x * m2.y - m1.y * m2.x ); + + const float inv_det3 = 1.0f / det3; + + float inv3[3][3]; + inv3[0][0] = inv_det3 * ( m1.y * m2.z - m2.y * m1.z ); + inv3[0][1] = inv_det3 * ( m0.z * m2.y - m2.z * m0.y ); + inv3[0][2] = inv_det3 * ( m0.y * m1.z - m1.y * m0.z ); + + inv3[1][0] = inv_det3 * ( m1.z * m2.x - m2.z * m1.x ); + inv3[1][1] = inv_det3 * ( m0.x * m2.z - m2.x * m0.z ); + inv3[1][2] = inv_det3 * ( m0.z * m1.x - m1.z * m0.x ); + + inv3[2][0] = inv_det3 * ( m1.x * m2.y - m2.x * m1.y ); + inv3[2][1] = inv_det3 * ( m0.y * m2.x - m2.y * m0.x ); + inv3[2][2] = inv_det3 * ( m0.x * m1.y - m1.x * m0.y ); + + const float b[3] = {m0.w, m1.w, m2.w}; + + m0.x = inv3[0][0]; + m0.y = inv3[0][1]; + m0.z = inv3[0][2]; + m0.w = -inv3[0][0] * b[0] - inv3[0][1] * b[1] - inv3[0][2] * b[2]; + + m1.x = inv3[1][0]; + m1.y = inv3[1][1]; + m1.z = inv3[1][2]; + m1.w = -inv3[1][0] * b[0] - inv3[1][1] * b[1] - inv3[1][2] * b[2]; + + m2.x = inv3[2][0]; + m2.y = inv3[2][1]; + m2.z = inv3[2][2]; + m2.w = -inv3[2][0] * b[0] - inv3[2][1] * b[1] - inv3[2][2] * b[2]; +} + +static __forceinline__ __device__ void optixLoadInterpolatedMatrixKey( float4& m0, float4& m1, float4& m2, const float4* matrix, const float t1 ) +{ + m0 = optixLoadReadOnlyAlign16( &matrix[0] ); + m1 = optixLoadReadOnlyAlign16( &matrix[1] ); + m2 = optixLoadReadOnlyAlign16( &matrix[2] ); + + // The conditional prevents concurrent loads leading to spills + if( t1 > 0.0f ) + { + const float t0 = 1.0f - t1; + m0 = optixAddFloat4( optixMulFloat4( m0, t0 ), optixMulFloat4( optixLoadReadOnlyAlign16( &matrix[3] ), t1 ) ); + m1 = optixAddFloat4( optixMulFloat4( m1, t0 ), optixMulFloat4( optixLoadReadOnlyAlign16( &matrix[4] ), t1 ) ); + m2 = optixAddFloat4( optixMulFloat4( m2, t0 ), optixMulFloat4( optixLoadReadOnlyAlign16( &matrix[5] ), t1 ) ); + } +} + +static __forceinline__ __device__ void optixLoadInterpolatedSrtKey( float4& srt0, + float4& srt1, + float4& srt2, + float4& srt3, + const float4* srt, + const float t1 ) +{ + srt0 = optixLoadReadOnlyAlign16( &srt[0] ); + srt1 = optixLoadReadOnlyAlign16( &srt[1] ); + srt2 = optixLoadReadOnlyAlign16( &srt[2] ); + srt3 = optixLoadReadOnlyAlign16( &srt[3] ); + + // The conditional prevents concurrent loads leading to spills + if( t1 > 0.0f ) + { + const float t0 = 1.0f - t1; + srt0 = optixAddFloat4( optixMulFloat4( srt0, t0 ), optixMulFloat4( optixLoadReadOnlyAlign16( &srt[4] ), t1 ) ); + srt1 = optixAddFloat4( optixMulFloat4( srt1, t0 ), optixMulFloat4( optixLoadReadOnlyAlign16( &srt[5] ), t1 ) ); + srt2 = optixAddFloat4( optixMulFloat4( srt2, t0 ), optixMulFloat4( optixLoadReadOnlyAlign16( &srt[6] ), t1 ) ); + srt3 = optixAddFloat4( optixMulFloat4( srt3, t0 ), optixMulFloat4( optixLoadReadOnlyAlign16( &srt[7] ), t1 ) ); + + float inv_length = 1.f / sqrt( srt2.y * srt2.y + srt2.z * srt2.z + srt2.w * srt2.w + srt3.x * srt3.x ); + srt2.y *= inv_length; + srt2.z *= inv_length; + srt2.w *= inv_length; + srt3.x *= inv_length; + } +} + +static __forceinline__ __device__ void optixResolveMotionKey( float& localt, int& key, const OptixMotionOptions& options, const float globalt ) +{ + const float timeBegin = options.timeBegin; + const float timeEnd = options.timeEnd; + const float numIntervals = (float)( options.numKeys - 1 ); + + // No need to check the motion flags. If data originates from a valid transform list handle, then globalt is in + // range, or vanish flags are not set. + + const float time = max( 0.f, min( numIntervals, ( globalt - timeBegin ) * numIntervals / ( timeEnd - timeBegin ) ) ); + const float fltKey = floorf( time ); + + localt = time - fltKey; + key = (int)fltKey; +} + +// Returns the interpolated transformation matrix for a particular matrix motion transformation and point in time. +static __forceinline__ __device__ void optixGetInterpolatedTransformation( float4& trf0, + float4& trf1, + float4& trf2, + const OptixMatrixMotionTransform* transformData, + const float time ) +{ + // Compute key and intra key time + float keyTime; + int key; + optixResolveMotionKey( keyTime, key, optixLoadReadOnlyAlign16( transformData ).motionOptions, time ); + + // Get pointer to left key + const float4* transform = (const float4*)( &transformData->transform[key][0] ); + + // Load and interpolate matrix keys + optixLoadInterpolatedMatrixKey( trf0, trf1, trf2, transform, keyTime ); +} + +// Returns the interpolated transformation matrix for a particular SRT motion transformation and point in time. +static __forceinline__ __device__ void optixGetInterpolatedTransformation( float4& trf0, + float4& trf1, + float4& trf2, + const OptixSRTMotionTransform* transformData, + const float time ) +{ + // Compute key and intra key time + float keyTime; + int key; + optixResolveMotionKey( keyTime, key, optixLoadReadOnlyAlign16( transformData ).motionOptions, time ); + + // Get pointer to left key + const float4* dataPtr = reinterpret_cast( &transformData->srtData[key] ); + + // Load and interpolated SRT keys + float4 data[4]; + optixLoadInterpolatedSrtKey( data[0], data[1], data[2], data[3], dataPtr, keyTime ); + + OptixSRTData srt = {data[0].x, data[0].y, data[0].z, data[0].w, data[1].x, data[1].y, data[1].z, data[1].w, + data[2].x, data[2].y, data[2].z, data[2].w, data[3].x, data[3].y, data[3].z, data[3].w}; + + // Convert SRT into a matrix + optixGetMatrixFromSrt( trf0, trf1, trf2, srt ); +} + +// Returns the interpolated transformation matrix for a particular traversable handle and point in time. +static __forceinline__ __device__ void optixGetInterpolatedTransformationFromHandle( float4& trf0, + float4& trf1, + float4& trf2, + const OptixTraversableHandle handle, + const float time, + const bool objectToWorld ) +{ + const OptixTransformType type = optixGetTransformTypeFromHandle( handle ); + + if( type == OPTIX_TRANSFORM_TYPE_MATRIX_MOTION_TRANSFORM || type == OPTIX_TRANSFORM_TYPE_SRT_MOTION_TRANSFORM ) + { + if( type == OPTIX_TRANSFORM_TYPE_MATRIX_MOTION_TRANSFORM ) + { + const OptixMatrixMotionTransform* transformData = optixGetMatrixMotionTransformFromHandle( handle ); + optixGetInterpolatedTransformation( trf0, trf1, trf2, transformData, time ); + } + else + { + const OptixSRTMotionTransform* transformData = optixGetSRTMotionTransformFromHandle( handle ); + optixGetInterpolatedTransformation( trf0, trf1, trf2, transformData, time ); + } + + if( !objectToWorld ) + optixInvertMatrix( trf0, trf1, trf2 ); + } + else if( type == OPTIX_TRANSFORM_TYPE_INSTANCE || type == OPTIX_TRANSFORM_TYPE_STATIC_TRANSFORM ) + { + const float4* transform; + + if( type == OPTIX_TRANSFORM_TYPE_INSTANCE ) + { + transform = ( objectToWorld ) ? optixGetInstanceTransformFromHandle( handle ) : + optixGetInstanceInverseTransformFromHandle( handle ); + } + else + { + const OptixStaticTransform* traversable = optixGetStaticTransformFromHandle( handle ); + transform = (const float4*)( ( objectToWorld ) ? traversable->transform : traversable->invTransform ); + } + + trf0 = optixLoadReadOnlyAlign16( &transform[0] ); + trf1 = optixLoadReadOnlyAlign16( &transform[1] ); + trf2 = optixLoadReadOnlyAlign16( &transform[2] ); + } + else + { + trf0 = {1.0f, 0.0f, 0.0f, 0.0f}; + trf1 = {0.0f, 1.0f, 0.0f, 0.0f}; + trf2 = {0.0f, 0.0f, 1.0f, 0.0f}; + } +} + +// Returns the world-to-object transformation matrix resulting from the current transform stack and current ray time. +static __forceinline__ __device__ void optixGetWorldToObjectTransformMatrix( float4& m0, float4& m1, float4& m2 ) +{ + const unsigned int size = optixGetTransformListSize(); + const float time = optixGetRayTime(); + +#pragma unroll 1 + for( unsigned int i = 0; i < size; ++i ) + { + OptixTraversableHandle handle = optixGetTransformListHandle( i ); + + float4 trf0, trf1, trf2; + optixGetInterpolatedTransformationFromHandle( trf0, trf1, trf2, handle, time, /*objectToWorld*/ false ); + + if( i == 0 ) + { + m0 = trf0; + m1 = trf1; + m2 = trf2; + } + else + { + // m := trf * m + float4 tmp0 = m0, tmp1 = m1, tmp2 = m2; + m0 = optixMultiplyRowMatrix( trf0, tmp0, tmp1, tmp2 ); + m1 = optixMultiplyRowMatrix( trf1, tmp0, tmp1, tmp2 ); + m2 = optixMultiplyRowMatrix( trf2, tmp0, tmp1, tmp2 ); + } + } +} + +// Returns the object-to-world transformation matrix resulting from the current transform stack and current ray time. +static __forceinline__ __device__ void optixGetObjectToWorldTransformMatrix( float4& m0, float4& m1, float4& m2 ) +{ + const int size = optixGetTransformListSize(); + const float time = optixGetRayTime(); + +#pragma unroll 1 + for( int i = size - 1; i >= 0; --i ) + { + OptixTraversableHandle handle = optixGetTransformListHandle( i ); + + float4 trf0, trf1, trf2; + optixGetInterpolatedTransformationFromHandle( trf0, trf1, trf2, handle, time, /*objectToWorld*/ true ); + + if( i == size - 1 ) + { + m0 = trf0; + m1 = trf1; + m2 = trf2; + } + else + { + // m := trf * m + float4 tmp0 = m0, tmp1 = m1, tmp2 = m2; + m0 = optixMultiplyRowMatrix( trf0, tmp0, tmp1, tmp2 ); + m1 = optixMultiplyRowMatrix( trf1, tmp0, tmp1, tmp2 ); + m2 = optixMultiplyRowMatrix( trf2, tmp0, tmp1, tmp2 ); + } + } +} + +// Multiplies the 3x4 matrix with rows m0, m1, m2 with the point p. +static __forceinline__ __device__ float3 optixTransformPoint( const float4& m0, const float4& m1, const float4& m2, const float3& p ) +{ + float3 result; + result.x = m0.x * p.x + m0.y * p.y + m0.z * p.z + m0.w; + result.y = m1.x * p.x + m1.y * p.y + m1.z * p.z + m1.w; + result.z = m2.x * p.x + m2.y * p.y + m2.z * p.z + m2.w; + return result; +} + +// Multiplies the 3x3 linear submatrix of the 3x4 matrix with rows m0, m1, m2 with the vector v. +static __forceinline__ __device__ float3 optixTransformVector( const float4& m0, const float4& m1, const float4& m2, const float3& v ) +{ + float3 result; + result.x = m0.x * v.x + m0.y * v.y + m0.z * v.z; + result.y = m1.x * v.x + m1.y * v.y + m1.z * v.z; + result.z = m2.x * v.x + m2.y * v.y + m2.z * v.z; + return result; +} + +// Multiplies the transpose of the 3x3 linear submatrix of the 3x4 matrix with rows m0, m1, m2 with the normal n. +// Note that the given matrix is supposed to be the inverse of the actual transformation matrix. +static __forceinline__ __device__ float3 optixTransformNormal( const float4& m0, const float4& m1, const float4& m2, const float3& n ) +{ + float3 result; + result.x = m0.x * n.x + m1.x * n.y + m2.x * n.z; + result.y = m0.y * n.x + m1.y * n.y + m2.y * n.z; + result.z = m0.z * n.x + m1.z * n.y + m2.z * n.z; + return result; +} + +} // namespace optix_impl + +#endif diff --git a/include/optix.h b/include/optix.h new file mode 100644 index 0000000..58c5334 --- /dev/null +++ b/include/optix.h @@ -0,0 +1,47 @@ + +/* + * Copyright (c) 2021 NVIDIA Corporation. All rights reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and proprietary + * rights in and to this software, related documentation and any modifications thereto. + * Any use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation is strictly + * prohibited. + * + * TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THIS SOFTWARE IS PROVIDED *AS IS* + * AND NVIDIA AND ITS SUPPLIERS DISCLAIM ALL WARRANTIES, EITHER EXPRESS OR IMPLIED, + * INCLUDING, BUT NOT LIMITED TO, IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE. IN NO EVENT SHALL NVIDIA OR ITS SUPPLIERS BE LIABLE FOR ANY + * SPECIAL, INCIDENTAL, INDIRECT, OR CONSEQUENTIAL DAMAGES WHATSOEVER (INCLUDING, WITHOUT + * LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, BUSINESS INTERRUPTION, LOSS OF + * BUSINESS INFORMATION, OR ANY OTHER PECUNIARY LOSS) ARISING OUT OF THE USE OF OR + * INABILITY TO USE THIS SOFTWARE, EVEN IF NVIDIA HAS BEEN ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGES + */ + +/// @file +/// @author NVIDIA Corporation +/// @brief OptiX public API header +/// +/// Includes the host api if compiling host code, includes the cuda api if compiling device code. +/// For the math library routines include optix_math.h + +#ifndef __optix_optix_h__ +#define __optix_optix_h__ + +/// The OptiX version. +/// +/// - major = OPTIX_VERSION/10000 +/// - minor = (OPTIX_VERSION%10000)/100 +/// - micro = OPTIX_VERSION%100 +#define OPTIX_VERSION 70500 + + +#ifdef __CUDACC__ +#include "optix_device.h" +#else +#include "optix_host.h" +#endif + + +#endif // __optix_optix_h__ diff --git a/include/optix_7_device.h b/include/optix_7_device.h new file mode 100644 index 0000000..aa45068 --- /dev/null +++ b/include/optix_7_device.h @@ -0,0 +1,1008 @@ +/* +* Copyright (c) 2021 NVIDIA Corporation. All rights reserved. +* +* NVIDIA Corporation and its licensors retain all intellectual property and proprietary +* rights in and to this software, related documentation and any modifications thereto. +* Any use, reproduction, disclosure or distribution of this software and related +* documentation without an express license agreement from NVIDIA Corporation is strictly +* prohibited. +* +* TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THIS SOFTWARE IS PROVIDED *AS IS* +* AND NVIDIA AND ITS SUPPLIERS DISCLAIM ALL WARRANTIES, EITHER EXPRESS OR IMPLIED, +* INCLUDING, BUT NOT LIMITED TO, IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +* PARTICULAR PURPOSE. IN NO EVENT SHALL NVIDIA OR ITS SUPPLIERS BE LIABLE FOR ANY +* SPECIAL, INCIDENTAL, INDIRECT, OR CONSEQUENTIAL DAMAGES WHATSOEVER (INCLUDING, WITHOUT +* LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, BUSINESS INTERRUPTION, LOSS OF +* BUSINESS INFORMATION, OR ANY OTHER PECUNIARY LOSS) ARISING OUT OF THE USE OF OR +* INABILITY TO USE THIS SOFTWARE, EVEN IF NVIDIA HAS BEEN ADVISED OF THE POSSIBILITY OF +* SUCH DAMAGES +*/ + +/// @file +/// @author NVIDIA Corporation +/// @brief OptiX public API header +/// +/// OptiX public API Reference - Device API declarations + +#if !defined( __OPTIX_INCLUDE_INTERNAL_HEADERS__ ) +#error("optix_7_device.h is an internal header file and must not be used directly. Please use optix_device.h or optix.h instead.") +#endif + + +#ifndef __optix_optix_7_device_h__ +#define __optix_optix_7_device_h__ + +#if defined( __cplusplus ) && ( __cplusplus < 201103L ) && !defined( _WIN32 ) +#error Device code for OptiX requires at least C++11. Consider adding "--std c++11" to the nvcc command-line. +#endif + +#include "optix_7_types.h" + +/// \defgroup optix_device_api Device API +/// \brief OptiX Device API + +/** \addtogroup optix_device_api +@{ +*/ + +/// Initiates a ray tracing query starting with the given traversable. +/// +/// \param[in] handle +/// \param[in] rayOrigin +/// \param[in] rayDirection +/// \param[in] tmin +/// \param[in] tmax +/// \param[in] rayTime +/// \param[in] visibilityMask really only 8 bits +/// \param[in] rayFlags really only 8 bits, combination of OptixRayFlags +/// \param[in] SBToffset really only 8 bits +/// \param[in] SBTstride really only 8 bits +/// \param[in] missSBTIndex specifies the miss program invoked on a miss +/// \param[out] payload up to 32 unsigned int values that hold the payload +template +static __forceinline__ __device__ void optixTrace( OptixTraversableHandle handle, + float3 rayOrigin, + float3 rayDirection, + float tmin, + float tmax, + float rayTime, + OptixVisibilityMask visibilityMask, + unsigned int rayFlags, + unsigned int SBToffset, + unsigned int SBTstride, + unsigned int missSBTIndex, + Payload&... payload ); + +/// Initiates a ray tracing query starting with the given traversable. +/// +/// \param[in] type +/// \param[in] handle +/// \param[in] rayOrigin +/// \param[in] rayDirection +/// \param[in] tmin +/// \param[in] tmax +/// \param[in] rayTime +/// \param[in] visibilityMask really only 8 bits +/// \param[in] rayFlags really only 8 bits, combination of OptixRayFlags +/// \param[in] SBToffset really only 8 bits +/// \param[in] SBTstride really only 8 bits +/// \param[in] missSBTIndex specifies the miss program invoked on a miss +/// \param[out] payload up to 32 unsigned int values that hold the payload +template +static __forceinline__ __device__ void optixTrace( OptixPayloadTypeID type, + OptixTraversableHandle handle, + float3 rayOrigin, + float3 rayDirection, + float tmin, + float tmax, + float rayTime, + OptixVisibilityMask visibilityMask, + unsigned int rayFlags, + unsigned int SBToffset, + unsigned int SBTstride, + unsigned int missSBTIndex, + Payload&... payload ); + +/// Writes the 32-bit payload value at slot 0. +static __forceinline__ __device__ void optixSetPayload_0( unsigned int p ); +/// Writes the 32-bit payload value at slot 1. +static __forceinline__ __device__ void optixSetPayload_1( unsigned int p ); +/// Writes the 32-bit payload value at slot 2. +static __forceinline__ __device__ void optixSetPayload_2( unsigned int p ); +/// Writes the 32-bit payload value at slot 3. +static __forceinline__ __device__ void optixSetPayload_3( unsigned int p ); +/// Writes the 32-bit payload value at slot 4. +static __forceinline__ __device__ void optixSetPayload_4( unsigned int p ); +/// Writes the 32-bit payload value at slot 5. +static __forceinline__ __device__ void optixSetPayload_5( unsigned int p ); +/// Writes the 32-bit payload value at slot 6. +static __forceinline__ __device__ void optixSetPayload_6( unsigned int p ); +/// Writes the 32-bit payload value at slot 7. +static __forceinline__ __device__ void optixSetPayload_7( unsigned int p ); + +/// Writes the 32-bit payload value at slot 8. +static __forceinline__ __device__ void optixSetPayload_8( unsigned int p ); +/// Writes the 32-bit payload value at slot 9. +static __forceinline__ __device__ void optixSetPayload_9( unsigned int p ); +/// Writes the 32-bit payload value at slot 10. +static __forceinline__ __device__ void optixSetPayload_10( unsigned int p ); +/// Writes the 32-bit payload value at slot 11. +static __forceinline__ __device__ void optixSetPayload_11( unsigned int p ); +/// Writes the 32-bit payload value at slot 12. +static __forceinline__ __device__ void optixSetPayload_12( unsigned int p ); +/// Writes the 32-bit payload value at slot 13. +static __forceinline__ __device__ void optixSetPayload_13( unsigned int p ); +/// Writes the 32-bit payload value at slot 14. +static __forceinline__ __device__ void optixSetPayload_14( unsigned int p ); +/// Writes the 32-bit payload value at slot 15. +static __forceinline__ __device__ void optixSetPayload_15( unsigned int p ); +/// Writes the 32-bit payload value at slot 16. +static __forceinline__ __device__ void optixSetPayload_16( unsigned int p ); +/// Writes the 32-bit payload value at slot 17. +static __forceinline__ __device__ void optixSetPayload_17( unsigned int p ); +/// Writes the 32-bit payload value at slot 18. +static __forceinline__ __device__ void optixSetPayload_18( unsigned int p ); +/// Writes the 32-bit payload value at slot 19. +static __forceinline__ __device__ void optixSetPayload_19( unsigned int p ); +/// Writes the 32-bit payload value at slot 20. +static __forceinline__ __device__ void optixSetPayload_20( unsigned int p ); +/// Writes the 32-bit payload value at slot 21. +static __forceinline__ __device__ void optixSetPayload_21( unsigned int p ); +/// Writes the 32-bit payload value at slot 22. +static __forceinline__ __device__ void optixSetPayload_22( unsigned int p ); +/// Writes the 32-bit payload value at slot 23. +static __forceinline__ __device__ void optixSetPayload_23( unsigned int p ); +/// Writes the 32-bit payload value at slot 24. +static __forceinline__ __device__ void optixSetPayload_24( unsigned int p ); +/// Writes the 32-bit payload value at slot 25. +static __forceinline__ __device__ void optixSetPayload_25( unsigned int p ); +/// Writes the 32-bit payload value at slot 26. +static __forceinline__ __device__ void optixSetPayload_26( unsigned int p ); +/// Writes the 32-bit payload value at slot 27. +static __forceinline__ __device__ void optixSetPayload_27( unsigned int p ); +/// Writes the 32-bit payload value at slot 28. +static __forceinline__ __device__ void optixSetPayload_28( unsigned int p ); +/// Writes the 32-bit payload value at slot 29. +static __forceinline__ __device__ void optixSetPayload_29( unsigned int p ); +/// Writes the 32-bit payload value at slot 30. +static __forceinline__ __device__ void optixSetPayload_30( unsigned int p ); +/// Writes the 32-bit payload value at slot 31. +static __forceinline__ __device__ void optixSetPayload_31( unsigned int p ); + +/// Reads the 32-bit payload value at slot 0. +static __forceinline__ __device__ unsigned int optixGetPayload_0(); +/// Reads the 32-bit payload value at slot 1. +static __forceinline__ __device__ unsigned int optixGetPayload_1(); +/// Reads the 32-bit payload value at slot 2. +static __forceinline__ __device__ unsigned int optixGetPayload_2(); +/// Reads the 32-bit payload value at slot 3. +static __forceinline__ __device__ unsigned int optixGetPayload_3(); +/// Reads the 32-bit payload value at slot 4. +static __forceinline__ __device__ unsigned int optixGetPayload_4(); +/// Reads the 32-bit payload value at slot 5. +static __forceinline__ __device__ unsigned int optixGetPayload_5(); +/// Reads the 32-bit payload value at slot 6. +static __forceinline__ __device__ unsigned int optixGetPayload_6(); +/// Reads the 32-bit payload value at slot 7. +static __forceinline__ __device__ unsigned int optixGetPayload_7(); + +/// Reads the 32-bit payload value at slot 8. +static __forceinline__ __device__ unsigned int optixGetPayload_8(); +/// Reads the 32-bit payload value at slot 9. +static __forceinline__ __device__ unsigned int optixGetPayload_9(); +/// Reads the 32-bit payload value at slot 10. +static __forceinline__ __device__ unsigned int optixGetPayload_10(); +/// Reads the 32-bit payload value at slot 11. +static __forceinline__ __device__ unsigned int optixGetPayload_11(); +/// Reads the 32-bit payload value at slot 12. +static __forceinline__ __device__ unsigned int optixGetPayload_12(); +/// Reads the 32-bit payload value at slot 13. +static __forceinline__ __device__ unsigned int optixGetPayload_13(); +/// Reads the 32-bit payload value at slot 14. +static __forceinline__ __device__ unsigned int optixGetPayload_14(); +/// Reads the 32-bit payload value at slot 15. +static __forceinline__ __device__ unsigned int optixGetPayload_15(); +/// Reads the 32-bit payload value at slot 16. +static __forceinline__ __device__ unsigned int optixGetPayload_16(); +/// Reads the 32-bit payload value at slot 17. +static __forceinline__ __device__ unsigned int optixGetPayload_17(); +/// Reads the 32-bit payload value at slot 18. +static __forceinline__ __device__ unsigned int optixGetPayload_18(); +/// Reads the 32-bit payload value at slot 19. +static __forceinline__ __device__ unsigned int optixGetPayload_19(); +/// Reads the 32-bit payload value at slot 20. +static __forceinline__ __device__ unsigned int optixGetPayload_20(); +/// Reads the 32-bit payload value at slot 21. +static __forceinline__ __device__ unsigned int optixGetPayload_21(); +/// Reads the 32-bit payload value at slot 22. +static __forceinline__ __device__ unsigned int optixGetPayload_22(); +/// Reads the 32-bit payload value at slot 23. +static __forceinline__ __device__ unsigned int optixGetPayload_23(); +/// Reads the 32-bit payload value at slot 24. +static __forceinline__ __device__ unsigned int optixGetPayload_24(); +/// Reads the 32-bit payload value at slot 25. +static __forceinline__ __device__ unsigned int optixGetPayload_25(); +/// Reads the 32-bit payload value at slot 26. +static __forceinline__ __device__ unsigned int optixGetPayload_26(); +/// Reads the 32-bit payload value at slot 27. +static __forceinline__ __device__ unsigned int optixGetPayload_27(); +/// Reads the 32-bit payload value at slot 28. +static __forceinline__ __device__ unsigned int optixGetPayload_28(); +/// Reads the 32-bit payload value at slot 29. +static __forceinline__ __device__ unsigned int optixGetPayload_29(); +/// Reads the 32-bit payload value at slot 30. +static __forceinline__ __device__ unsigned int optixGetPayload_30(); +/// Reads the 32-bit payload value at slot 31. +static __forceinline__ __device__ unsigned int optixGetPayload_31(); + +/// Specify the supported payload types for a program. +/// +/// The supported types are specified as a bitwise combination of payload types. (See OptixPayloadTypeID) +/// May only be called once per program. +/// Must be called at the top of the program. +/// Only available in IS, AH, CH, MS +static __forceinline__ __device__ void optixSetPayloadTypes( unsigned int typeMask ); + +/// Returns an undefined value. +static __forceinline__ __device__ unsigned int optixUndefinedValue(); + +/// Returns the rayOrigin passed into optixTrace. +/// +/// May be more expensive to call in IS and AH than their object space counterparts, +/// so effort should be made to use the object space ray in those programs. +/// Only available in IS, AH, CH, MS +static __forceinline__ __device__ float3 optixGetWorldRayOrigin(); + +/// Returns the rayDirection passed into optixTrace. +/// +/// May be more expensive to call in IS and AH than their object space counterparts, +/// so effort should be made to use the object space ray in those programs. +/// Only available in IS, AH, CH, MS +static __forceinline__ __device__ float3 optixGetWorldRayDirection(); + +/// Returns the current object space ray origin based on the current transform stack. +/// +/// Only available in IS and AH. +static __forceinline__ __device__ float3 optixGetObjectRayOrigin(); + +/// Returns the current object space ray direction based on the current transform stack. +/// +/// Only available in IS and AH. +static __forceinline__ __device__ float3 optixGetObjectRayDirection(); + +/// Returns the tmin passed into optixTrace. +/// +/// Only available in IS, AH, CH, MS +static __forceinline__ __device__ float optixGetRayTmin(); + +/// In IS and CH returns the current smallest reported hitT or the tmax passed into optixTrace if no hit has been reported +/// In AH returns the hitT value as passed in to optixReportIntersection +/// In MS returns the tmax passed into optixTrace +/// Only available in IS, AH, CH, MS +static __forceinline__ __device__ float optixGetRayTmax(); + +/// Returns the rayTime passed into optixTrace. +/// +/// Will return 0 if motion is disabled. +/// Only available in IS, AH, CH, MS +static __forceinline__ __device__ float optixGetRayTime(); + +/// Returns the rayFlags passed into optixTrace +/// +/// Only available in IS, AH, CH, MS +static __forceinline__ __device__ unsigned int optixGetRayFlags(); + +/// Returns the visibilityMask passed into optixTrace +/// +/// Only available in IS, AH, CH, MS +static __forceinline__ __device__ unsigned int optixGetRayVisibilityMask(); + +/// Return the traversable handle of a given instance in an Instance +/// Acceleration Structure (IAS) +static __forceinline__ __device__ OptixTraversableHandle optixGetInstanceTraversableFromIAS( OptixTraversableHandle ias, unsigned int instIdx ); + +/// Return the object space triangle vertex positions of a given triangle in a Geometry +/// Acceleration Structure (GAS) at a given motion time. +/// To access vertex data, the GAS must be built using the flag OPTIX_BUILD_FLAG_ALLOW_RANDOM_VERTEX_ACCESS. +/// +/// If motion is disabled via OptixPipelineCompileOptions::usesMotionBlur, or the GAS does not contain motion, the +/// time parameter is ignored. +static __forceinline__ __device__ void optixGetTriangleVertexData( OptixTraversableHandle gas, unsigned int primIdx, unsigned int sbtGASIndex, float time, float3 data[3]); + +/// Return the object space curve control vertex data of a linear curve in a Geometry +/// Acceleration Structure (GAS) at a given motion time. +/// To access vertex data, the GAS must be built using the flag OPTIX_BUILD_FLAG_ALLOW_RANDOM_VERTEX_ACCESS. +/// +/// data[i] = {x,y,z,w} with {x,y,z} the position and w the radius of control vertex i. +/// If motion is disabled via OptixPipelineCompileOptions::usesMotionBlur, or the GAS does not contain motion, the +/// time parameter is ignored. +static __forceinline__ __device__ void optixGetLinearCurveVertexData( OptixTraversableHandle gas, unsigned int primIdx, unsigned int sbtGASIndex, float time, float4 data[2] ); + +/// Return the object space curve control vertex data of a quadratic BSpline curve in a Geometry +/// Acceleration Structure (GAS) at a given motion time. +/// To access vertex data, the GAS must be built using the flag OPTIX_BUILD_FLAG_ALLOW_RANDOM_VERTEX_ACCESS. +/// +/// data[i] = {x,y,z,w} with {x,y,z} the position and w the radius of control vertex i. +/// If motion is disabled via OptixPipelineCompileOptions::usesMotionBlur, or the GAS does not contain motion, the +/// time parameter is ignored. +static __forceinline__ __device__ void optixGetQuadraticBSplineVertexData( OptixTraversableHandle gas, unsigned int primIdx, unsigned int sbtGASIndex, float time, float4 data[3] ); + +/// Return the object space curve control vertex data of a cubic BSpline curve in a Geometry +/// Acceleration Structure (GAS) at a given motion time. +/// To access vertex data, the GAS must be built using the flag OPTIX_BUILD_FLAG_ALLOW_RANDOM_VERTEX_ACCESS. +/// +/// data[i] = {x,y,z,w} with {x,y,z} the position and w the radius of control vertex i. +/// If motion is disabled via OptixPipelineCompileOptions::usesMotionBlur, or the GAS does not contain motion, the +/// time parameter is ignored. +static __forceinline__ __device__ void optixGetCubicBSplineVertexData( OptixTraversableHandle gas, unsigned int primIdx, unsigned int sbtGASIndex, float time, float4 data[4] ); + +/// Return the object space curve control vertex data of a CatmullRom spline curve in a Geometry +/// Acceleration Structure (GAS) at a given motion time. +/// To access vertex data, the GAS must be built using the flag OPTIX_BUILD_FLAG_ALLOW_RANDOM_VERTEX_ACCESS. +/// +/// data[i] = {x,y,z,w} with {x,y,z} the position and w the radius of control vertex i. +/// If motion is disabled via OptixPipelineCompileOptions::usesMotionBlur, or the GAS does not contain motion, the +/// time parameter is ignored. +static __forceinline__ __device__ void optixGetCatmullRomVertexData( OptixTraversableHandle gas, unsigned int primIdx, unsigned int sbtGASIndex, float time, float4 data[4] ); + +/// Return the object space sphere data, center point and radius, in a Geometry Acceleration Structure (GAS) at a given motion time. +/// To access sphere data, the GAS must be built using the flag OPTIX_BUILD_FLAG_ALLOW_RANDOM_VERTEX_ACCESS. +/// +/// data[0] = {x,y,z,w} with {x,y,z} the position of the sphere center and w the radius. +/// If motion is disabled via OptixPipelineCompileOptions::usesMotionBlur, or the GAS does not contain motion, the +/// time parameter is ignored. +static __forceinline__ __device__ void optixGetSphereData( OptixTraversableHandle gas, unsigned int primIdx, unsigned int sbtGASIndex, float time, float4 data[1] ); + +/// Returns the traversable handle for the Geometry Acceleration Structure (GAS) containing +/// the current hit. May be called from IS, AH and CH. +static __forceinline__ __device__ OptixTraversableHandle optixGetGASTraversableHandle(); + +/// Returns the motion begin time of a GAS (see OptixMotionOptions) +static __forceinline__ __device__ float optixGetGASMotionTimeBegin( OptixTraversableHandle gas ); + +/// Returns the motion end time of a GAS (see OptixMotionOptions) +static __forceinline__ __device__ float optixGetGASMotionTimeEnd( OptixTraversableHandle gas ); + +/// Returns the number of motion steps of a GAS (see OptixMotionOptions) +static __forceinline__ __device__ unsigned int optixGetGASMotionStepCount( OptixTraversableHandle gas ); + +/// Returns the world-to-object transformation matrix resulting from the current active transformation list. +/// +/// The cost of this function may be proportional to the size of the transformation list. +static __forceinline__ __device__ void optixGetWorldToObjectTransformMatrix( float m[12] ); + +/// Returns the object-to-world transformation matrix resulting from the current active transformation list. +/// +/// The cost of this function may be proportional to the size of the transformation list. +static __forceinline__ __device__ void optixGetObjectToWorldTransformMatrix( float m[12] ); + +/// Transforms the point using world-to-object transformation matrix resulting from the current active transformation +/// list. +/// +/// The cost of this function may be proportional to the size of the transformation list. +static __forceinline__ __device__ float3 optixTransformPointFromWorldToObjectSpace( float3 point ); + +/// Transforms the vector using world-to-object transformation matrix resulting from the current active transformation +/// list. +/// +/// The cost of this function may be proportional to the size of the transformation list. +static __forceinline__ __device__ float3 optixTransformVectorFromWorldToObjectSpace( float3 vec ); + +/// Transforms the normal using world-to-object transformation matrix resulting from the current active transformation +/// list. +/// +/// The cost of this function may be proportional to the size of the transformation list. +static __forceinline__ __device__ float3 optixTransformNormalFromWorldToObjectSpace( float3 normal ); + +/// Transforms the point using object-to-world transformation matrix resulting from the current active transformation +/// list. +/// +/// The cost of this function may be proportional to the size of the transformation list. +static __forceinline__ __device__ float3 optixTransformPointFromObjectToWorldSpace( float3 point ); + +/// Transforms the vector using object-to-world transformation matrix resulting from the current active transformation +/// list. +/// +/// The cost of this function may be proportional to the size of the transformation list. +static __forceinline__ __device__ float3 optixTransformVectorFromObjectToWorldSpace( float3 vec ); + +/// Transforms the normal using object-to-world transformation matrix resulting from the current active transformation +/// list. +/// +/// The cost of this function may be proportional to the size of the transformation list. +static __forceinline__ __device__ float3 optixTransformNormalFromObjectToWorldSpace( float3 normal ); + +/// Returns the number of transforms on the current transform list. +/// +/// Only available in IS, AH, CH, EX +static __forceinline__ __device__ unsigned int optixGetTransformListSize(); + +/// Returns the traversable handle for a transform on the current transform list. +/// +/// Only available in IS, AH, CH, EX +static __forceinline__ __device__ OptixTraversableHandle optixGetTransformListHandle( unsigned int index ); + + +/// Returns the transform type of a traversable handle from a transform list. +static __forceinline__ __device__ OptixTransformType optixGetTransformTypeFromHandle( OptixTraversableHandle handle ); + +/// Returns a pointer to a OptixStaticTransform from its traversable handle. +/// +/// Returns 0 if the traversable is not of type OPTIX_TRANSFORM_TYPE_STATIC_TRANSFORM. +static __forceinline__ __device__ const OptixStaticTransform* optixGetStaticTransformFromHandle( OptixTraversableHandle handle ); + +/// Returns a pointer to a OptixSRTMotionTransform from its traversable handle. +/// +/// Returns 0 if the traversable is not of type OPTIX_TRANSFORM_TYPE_SRT_MOTION_TRANSFORM. +static __forceinline__ __device__ const OptixSRTMotionTransform* optixGetSRTMotionTransformFromHandle( OptixTraversableHandle handle ); + +/// Returns a pointer to a OptixMatrixMotionTransform from its traversable handle. +/// +/// Returns 0 if the traversable is not of type OPTIX_TRANSFORM_TYPE_MATRIX_MOTION_TRANSFORM. +static __forceinline__ __device__ const OptixMatrixMotionTransform* optixGetMatrixMotionTransformFromHandle( OptixTraversableHandle handle ); + +/// Returns instanceId from an OptixInstance traversable. +/// +/// Returns 0 if the traversable handle does not reference an OptixInstance. +static __forceinline__ __device__ unsigned int optixGetInstanceIdFromHandle( OptixTraversableHandle handle ); + +/// Returns child traversable handle from an OptixInstance traversable. +/// +/// Returns 0 if the traversable handle does not reference an OptixInstance. +static __forceinline__ __device__ OptixTraversableHandle optixGetInstanceChildFromHandle( OptixTraversableHandle handle ); + +/// Returns object-to-world transform from an OptixInstance traversable. +/// +/// Returns 0 if the traversable handle does not reference an OptixInstance. +static __forceinline__ __device__ const float4* optixGetInstanceTransformFromHandle( OptixTraversableHandle handle ); + +/// Returns world-to-object transform from an OptixInstance traversable. +/// +/// Returns 0 if the traversable handle does not reference an OptixInstance. +static __forceinline__ __device__ const float4* optixGetInstanceInverseTransformFromHandle( OptixTraversableHandle handle ); + +/// Reports an intersections (overload without attributes). +/// +/// If optixGetRayTmin() <= hitT <= optixGetRayTmax(), the any hit program associated with this intersection program (via the SBT entry) is called. +/// The AH program can do one of three things: +/// 1. call optixIgnoreIntersection - no hit is recorded, optixReportIntersection returns false +/// 2. call optixTerminateRay - hit is recorded, optixReportIntersection does not return, no further traversal occurs, +/// and the associated closest hit program is called +/// 3. neither - hit is recorded, optixReportIntersection returns true +/// hitKind - Only the 7 least significant bits should be written [0..127]. Any values above 127 are reserved for built in intersection. The value can be queried with optixGetHitKind() in AH and CH. +/// +/// The attributes specified with a0..a7 are available in the AH and CH programs. +/// Note that the attributes available in the CH program correspond to the closest recorded intersection. +/// The number of attributes in registers and memory can be configured in the pipeline. +/// +/// \param[in] hitT +/// \param[in] hitKind +static __forceinline__ __device__ bool optixReportIntersection( float hitT, unsigned int hitKind ); + +/// Reports an intersection (overload with 1 attribute register). +/// +/// \see #optixReportIntersection(float,unsigned int) +static __forceinline__ __device__ bool optixReportIntersection( float hitT, unsigned int hitKind, unsigned int a0 ); + +/// Reports an intersection (overload with 2 attribute registers). +/// +/// \see #optixReportIntersection(float,unsigned int) +static __forceinline__ __device__ bool optixReportIntersection( float hitT, unsigned int hitKind, unsigned int a0, unsigned int a1 ); + +/// Reports an intersection (overload with 3 attribute registers). +/// +/// \see #optixReportIntersection(float,unsigned int) +static __forceinline__ __device__ bool optixReportIntersection( float hitT, unsigned int hitKind, unsigned int a0, unsigned int a1, unsigned int a2 ); + +/// Reports an intersection (overload with 4 attribute registers). +/// +/// \see #optixReportIntersection(float,unsigned int) +static __forceinline__ __device__ bool optixReportIntersection( float hitT, + unsigned int hitKind, + unsigned int a0, + unsigned int a1, + unsigned int a2, + unsigned int a3 ); + +/// Reports an intersection (overload with 5 attribute registers). +/// +/// \see #optixReportIntersection(float,unsigned int) +static __forceinline__ __device__ bool optixReportIntersection( float hitT, + unsigned int hitKind, + unsigned int a0, + unsigned int a1, + unsigned int a2, + unsigned int a3, + unsigned int a4 ); + +/// Reports an intersection (overload with 6 attribute registers). +/// +/// \see #optixReportIntersection(float,unsigned int) +static __forceinline__ __device__ bool optixReportIntersection( float hitT, + unsigned int hitKind, + unsigned int a0, + unsigned int a1, + unsigned int a2, + unsigned int a3, + unsigned int a4, + unsigned int a5 ); + +/// Reports an intersection (overload with 7 attribute registers). +/// +/// \see #optixReportIntersection(float,unsigned int) +static __forceinline__ __device__ bool optixReportIntersection( float hitT, + unsigned int hitKind, + unsigned int a0, + unsigned int a1, + unsigned int a2, + unsigned int a3, + unsigned int a4, + unsigned int a5, + unsigned int a6 ); + +/// Reports an intersection (overload with 8 attribute registers). +/// +/// \see #optixReportIntersection(float,unsigned int) +static __forceinline__ __device__ bool optixReportIntersection( float hitT, + unsigned int hitKind, + unsigned int a0, + unsigned int a1, + unsigned int a2, + unsigned int a3, + unsigned int a4, + unsigned int a5, + unsigned int a6, + unsigned int a7 ); + +/// Returns the attribute at slot 0. +static __forceinline__ __device__ unsigned int optixGetAttribute_0(); +/// Returns the attribute at slot 1. +static __forceinline__ __device__ unsigned int optixGetAttribute_1(); +/// Returns the attribute at slot 2. +static __forceinline__ __device__ unsigned int optixGetAttribute_2(); +/// Returns the attribute at slot 3. +static __forceinline__ __device__ unsigned int optixGetAttribute_3(); +/// Returns the attribute at slot 4. +static __forceinline__ __device__ unsigned int optixGetAttribute_4(); +/// Returns the attribute at slot 5. +static __forceinline__ __device__ unsigned int optixGetAttribute_5(); +/// Returns the attribute at slot 6. +static __forceinline__ __device__ unsigned int optixGetAttribute_6(); +/// Returns the attribute at slot 7. +static __forceinline__ __device__ unsigned int optixGetAttribute_7(); + +/// Record the hit, stops traversal, and proceeds to CH. +/// +/// Available only in AH. +static __forceinline__ __device__ void optixTerminateRay(); + +/// Discards the hit, and returns control to the calling optixReportIntersection or built-in intersection routine. +/// +/// Available only in AH. +static __forceinline__ __device__ void optixIgnoreIntersection(); + + +/// For a given OptixBuildInputTriangleArray the number of primitives is defined as +/// "(OptixBuildInputTriangleArray::indexBuffer == 0) ? OptixBuildInputTriangleArray::numVertices/3 : +/// OptixBuildInputTriangleArray::numIndexTriplets;". +/// For a given OptixBuildInputCustomPrimitiveArray the number of primitives is defined as +/// numAabbs. +/// +/// The primitive index returns the index into the array of primitives +/// plus the primitiveIndexOffset. +/// +/// In IS and AH this corresponds to the currently intersected primitive. +/// In CH this corresponds to the primitive index of the closest intersected primitive. +static __forceinline__ __device__ unsigned int optixGetPrimitiveIndex(); + +/// Returns the Sbt GAS index of the primitive associated with the current intersection. +/// +/// In IS and AH this corresponds to the currently intersected primitive. +/// In CH this corresponds to the Sbt GAS index of the closest intersected primitive. +/// In EX with exception code OPTIX_EXCEPTION_CODE_TRAVERSAL_INVALID_HIT_SBT corresponds to the sbt index within the hit GAS. Returns zero for all other exceptions. +static __forceinline__ __device__ unsigned int optixGetSbtGASIndex(); + + +/// Returns the OptixInstance::instanceId of the instance within the top level acceleration structure associated with the current intersection. +/// +/// When building an acceleration structure using OptixBuildInputInstanceArray each OptixInstance has a user supplied instanceId. +/// OptixInstance objects reference another acceleration structure. During traversal the acceleration structures are visited top down. +/// In the IS and AH programs the OptixInstance::instanceId corresponding to the most recently visited OptixInstance is returned when calling optixGetInstanceId(). +/// In CH optixGetInstanceId() returns the OptixInstance::instanceId when the hit was recorded with optixReportIntersection. +/// In the case where there is no OptixInstance visited, optixGetInstanceId returns ~0u +static __forceinline__ __device__ unsigned int optixGetInstanceId(); + +/// Returns the zero-based index of the instance within its instance acceleration structure associated with the current intersection. +/// +/// In the IS and AH programs the index corresponding to the most recently visited OptixInstance is returned when calling optixGetInstanceIndex(). +/// In CH optixGetInstanceIndex() returns the index when the hit was recorded with optixReportIntersection. +/// In the case where there is no OptixInstance visited, optixGetInstanceIndex returns 0 +static __forceinline__ __device__ unsigned int optixGetInstanceIndex(); + +/// Returns the 8 bit hit kind associated with the current hit. +/// +/// Use optixGetPrimitiveType() to interpret the hit kind. +/// For custom intersections (primitive type OPTIX_PRIMITIVE_TYPE_CUSTOM), +/// this is the 7-bit hitKind passed to optixReportIntersection(). +/// Hit kinds greater than 127 are reserved for built-in primitives. +/// +/// Available only in AH and CH. +static __forceinline__ __device__ unsigned int optixGetHitKind(); + +/// Function interpreting the result of #optixGetHitKind(). +static __forceinline__ __device__ OptixPrimitiveType optixGetPrimitiveType( unsigned int hitKind ); + +/// Function interpreting the result of #optixGetHitKind(). +static __forceinline__ __device__ bool optixIsFrontFaceHit( unsigned int hitKind ); + +/// Function interpreting the result of #optixGetHitKind(). +static __forceinline__ __device__ bool optixIsBackFaceHit( unsigned int hitKind ); + +/// Function interpreting the hit kind associated with the current optixReportIntersection. +static __forceinline__ __device__ OptixPrimitiveType optixGetPrimitiveType(); + +/// Function interpreting the hit kind associated with the current optixReportIntersection. +static __forceinline__ __device__ bool optixIsFrontFaceHit(); + +/// Function interpreting the hit kind associated with the current optixReportIntersection. +static __forceinline__ __device__ bool optixIsBackFaceHit(); + +/// Convenience function interpreting the result of #optixGetHitKind(). +static __forceinline__ __device__ bool optixIsTriangleHit(); + +/// Convenience function interpreting the result of #optixGetHitKind(). +static __forceinline__ __device__ bool optixIsTriangleFrontFaceHit(); + +/// Convenience function interpreting the result of #optixGetHitKind(). +static __forceinline__ __device__ bool optixIsTriangleBackFaceHit(); + +/// Convenience function that returns the first two attributes as floats. +/// +/// When using OptixBuildInputTriangleArray objects, during intersection the barycentric +/// coordinates are stored into the first two attribute registers. +static __forceinline__ __device__ float2 optixGetTriangleBarycentrics(); + +/// Convenience function that returns the curve parameter. +/// +/// When using OptixBuildInputCurveArray objects, during intersection the curve parameter +/// is stored into the first attribute register. +static __forceinline__ __device__ float optixGetCurveParameter(); + +/// Available in any program, it returns the current launch index within the launch dimensions specified by optixLaunch on the host. +/// +/// The raygen program is typically only launched once per launch index. +static __forceinline__ __device__ uint3 optixGetLaunchIndex(); + +/// Available in any program, it returns the dimensions of the current launch specified by optixLaunch on the host. +static __forceinline__ __device__ uint3 optixGetLaunchDimensions(); + +/// Returns the generic memory space pointer to the data region (past the header) of the currently active SBT record corresponding to the current program. +static __forceinline__ __device__ CUdeviceptr optixGetSbtDataPointer(); + +/// Throws a user exception with the given exception code (overload without exception details). +/// +/// The exception code must be in the range from 0 to 2^30 - 1. Up to 8 optional exception details can be passed. They +/// can be queried in the EX program using optixGetExceptionDetail_0() to ..._8(). +/// +/// The exception details must not be used to encode pointers to the stack since the current stack is not preserved in +/// the EX program. +/// +/// Not available in EX. +/// +/// \param[in] exceptionCode The exception code to be thrown. +static __forceinline__ __device__ void optixThrowException( int exceptionCode ); + +/// Throws a user exception with the given exception code (overload with 1 exception detail). +/// +/// \see #optixThrowException(int) +static __forceinline__ __device__ void optixThrowException( int exceptionCode, unsigned int exceptionDetail0 ); + +/// Throws a user exception with the given exception code (overload with 2 exception details). +/// +/// \see #optixThrowException(int) +static __forceinline__ __device__ void optixThrowException( int exceptionCode, + unsigned int exceptionDetail0, + unsigned int exceptionDetail1 ); + +/// Throws a user exception with the given exception code (overload with 3 exception details). +/// +/// \see #optixThrowException(int) +static __forceinline__ __device__ void optixThrowException( int exceptionCode, + unsigned int exceptionDetail0, + unsigned int exceptionDetail1, + unsigned int exceptionDetail2 ); + +/// Throws a user exception with the given exception code (overload with 4 exception details). +/// +/// \see #optixThrowException(int) +static __forceinline__ __device__ void optixThrowException( int exceptionCode, + unsigned int exceptionDetail0, + unsigned int exceptionDetail1, + unsigned int exceptionDetail2, + unsigned int exceptionDetail3 ); + +/// Throws a user exception with the given exception code (overload with 5 exception details). +/// +/// \see #optixThrowException(int) +static __forceinline__ __device__ void optixThrowException( int exceptionCode, + unsigned int exceptionDetail0, + unsigned int exceptionDetail1, + unsigned int exceptionDetail2, + unsigned int exceptionDetail3, + unsigned int exceptionDetail4 ); + +/// Throws a user exception with the given exception code (overload with 6 exception details). +/// +/// \see #optixThrowException(int) +static __forceinline__ __device__ void optixThrowException( int exceptionCode, + unsigned int exceptionDetail0, + unsigned int exceptionDetail1, + unsigned int exceptionDetail2, + unsigned int exceptionDetail3, + unsigned int exceptionDetail4, + unsigned int exceptionDetail5 ); + +/// Throws a user exception with the given exception code (overload with 7 exception details). +/// +/// \see #optixThrowException(int) +static __forceinline__ __device__ void optixThrowException( int exceptionCode, + unsigned int exceptionDetail0, + unsigned int exceptionDetail1, + unsigned int exceptionDetail2, + unsigned int exceptionDetail3, + unsigned int exceptionDetail4, + unsigned int exceptionDetail5, + unsigned int exceptionDetail6 ); + +/// Throws a user exception with the given exception code (overload with 8 exception details). +/// +/// \see #optixThrowException(int) +static __forceinline__ __device__ void optixThrowException( int exceptionCode, + unsigned int exceptionDetail0, + unsigned int exceptionDetail1, + unsigned int exceptionDetail2, + unsigned int exceptionDetail3, + unsigned int exceptionDetail4, + unsigned int exceptionDetail5, + unsigned int exceptionDetail6, + unsigned int exceptionDetail7 ); + +/// Returns the exception code. +/// +/// Only available in EX. +static __forceinline__ __device__ int optixGetExceptionCode(); + +/// Returns the 32-bit exception detail at slot 0. +/// +/// The behavior is undefined if the exception is not a user exception, or the used overload #optixThrowException() did +/// not provide the queried exception detail. +/// +/// Only available in EX. +static __forceinline__ __device__ unsigned int optixGetExceptionDetail_0(); + +/// Returns the 32-bit exception detail at slot 1. +/// +/// \see #optixGetExceptionDetail_0() +static __forceinline__ __device__ unsigned int optixGetExceptionDetail_1(); + +/// Returns the 32-bit exception detail at slot 2. +/// +/// \see #optixGetExceptionDetail_0() +static __forceinline__ __device__ unsigned int optixGetExceptionDetail_2(); + +/// Returns the 32-bit exception detail at slot 3. +/// +/// \see #optixGetExceptionDetail_0() +static __forceinline__ __device__ unsigned int optixGetExceptionDetail_3(); + +/// Returns the 32-bit exception detail at slot 4. +/// +/// \see #optixGetExceptionDetail_0() +static __forceinline__ __device__ unsigned int optixGetExceptionDetail_4(); + +/// Returns the 32-bit exception detail at slot 5. +/// +/// \see #optixGetExceptionDetail_0() +static __forceinline__ __device__ unsigned int optixGetExceptionDetail_5(); + +/// Returns the 32-bit exception detail at slot 6. +/// +/// \see #optixGetExceptionDetail_0() +static __forceinline__ __device__ unsigned int optixGetExceptionDetail_6(); + +/// Returns the 32-bit exception detail at slot 7. +/// +/// \see #optixGetExceptionDetail_0() +static __forceinline__ __device__ unsigned int optixGetExceptionDetail_7(); + +/// Returns the invalid traversable handle for exceptions with exception code OPTIX_EXCEPTION_CODE_TRAVERSAL_INVALID_TRAVERSABLE. +/// +/// Returns zero for all other exception codes. +/// +/// Only available in EX. +static __forceinline__ __device__ OptixTraversableHandle optixGetExceptionInvalidTraversable(); + +/// Returns the invalid sbt offset for exceptions with exception code OPTIX_EXCEPTION_CODE_TRAVERSAL_INVALID_MISS_SBT and OPTIX_EXCEPTION_CODE_TRAVERSAL_INVALID_HIT_SBT. +/// +/// Returns zero for all other exception codes. +/// +/// Only available in EX. +static __forceinline__ __device__ int optixGetExceptionInvalidSbtOffset(); + +/// Returns the invalid ray for exceptions with exception code OPTIX_EXCEPTION_CODE_INVALID_RAY. +/// Exceptions of type OPTIX_EXCEPTION_CODE_INVALID_RAY are thrown when one or more values that were +/// passed into optixTrace are either inf or nan. +/// +/// OptixInvalidRayExceptionDetails::rayTime will always be 0 if OptixPipelineCompileOptions::usesMotionBlur is 0. +/// Values in the returned struct are all zero for all other exception codes. +/// +/// Only available in EX. +static __forceinline__ __device__ OptixInvalidRayExceptionDetails optixGetExceptionInvalidRay(); + +/// Returns information about an exception with code OPTIX_EXCEPTION_CODE_CALLABLE_PARAMETER_MISMATCH. +/// +/// Exceptions of type OPTIX_EXCEPTION_CODE_CALLABLE_PARAMETER_MISMATCH are called when the number of +/// arguments that were passed into a call to optixDirectCall or optixContinuationCall does not match +/// the number of parameters of the callable that is called. +/// Note that the parameters are packed by OptiX into individual 32 bit values, so the number of +/// expected and passed values may not correspond to the number of arguments passed into optixDirectCall +/// or optixContinuationCall. +/// +/// Values in the returned struct are all zero for all other exception codes. +/// +/// Only available in EX. +static __forceinline__ __device__ OptixParameterMismatchExceptionDetails optixGetExceptionParameterMismatch(); + +/// Returns a string that includes information about the source location that caused the current exception. +/// +/// The source location is only available for exceptions of type OPTIX_EXCEPTION_CODE_CALLABLE_PARAMETER_MISMATCH, +/// OPTIX_EXCEPTION_CODE_UNSUPPORTED_PRIMITIVE_TYPE, OPTIX_EXCEPTION_CODE_INVALID_RAY, and for user exceptions. +/// Line information needs to be present in the input PTX and OptixModuleCompileOptions::debugLevel +/// may not be set to OPTIX_COMPILE_DEBUG_LEVEL_NONE. +/// +/// Returns a NULL pointer if no line information is available. +/// +/// Only available in EX. +static __forceinline__ __device__ char* optixGetExceptionLineInfo(); + +/// Creates a call to the direct callable program at the specified SBT entry. +/// +/// This will call the program that was specified in the OptixProgramGroupCallables::entryFunctionNameDC in the +/// module specified by OptixProgramGroupCallables::moduleDC. +/// The address of the SBT entry is calculated by OptixShaderBindingTable::callablesRecordBase + ( OptixShaderBindingTable::callablesRecordStrideInBytes * sbtIndex ). +/// +/// Behavior is undefined if there is no direct callable program at the specified SBT entry. +/// +/// Behavior is undefined if the number of arguments that are being passed in does not match the number of +/// parameters expected by the program that is called. In that case an exception of type OPTIX_EXCEPTION_CODE_CALLABLE_PARAMETER_MISMATCH +/// will be thrown if OPTIX_EXCEPTION_FLAG_DEBUG was specified for the OptixPipelineCompileOptions::exceptionFlags. +/// +/// \param[in] sbtIndex The offset of the SBT entry of the direct callable program to call relative to OptixShaderBindingTable::callablesRecordBase. +/// \param[in] args The arguments to pass to the direct callable program. +template +static __forceinline__ __device__ ReturnT optixDirectCall( unsigned int sbtIndex, ArgTypes... args ); + + +/// Creates a call to the continuation callable program at the specified SBT entry. +/// +/// This will call the program that was specified in the OptixProgramGroupCallables::entryFunctionNameCC in the +/// module specified by OptixProgramGroupCallables::moduleCC. +/// The address of the SBT entry is calculated by OptixShaderBindingTable::callablesRecordBase + ( OptixShaderBindingTable::callablesRecordStrideInBytes * sbtIndex ). +/// As opposed to direct callable programs, continuation callable programs are allowed to call optixTrace recursively. +/// +/// Behavior is undefined if there is no continuation callable program at the specified SBT entry. +/// +/// Behavior is undefined if the number of arguments that are being passed in does not match the number of +/// parameters expected by the program that is called. In that case an exception of type OPTIX_EXCEPTION_CODE_CALLABLE_PARAMETER_MISMATCH +/// will be thrown if OPTIX_EXCEPTION_FLAG_DEBUG was specified for the OptixPipelineCompileOptions::exceptionFlags. +/// +/// \param[in] sbtIndex The offset of the SBT entry of the continuation callable program to call relative to OptixShaderBindingTable::callablesRecordBase. +/// \param[in] args The arguments to pass to the continuation callable program. +template +static __forceinline__ __device__ ReturnT optixContinuationCall( unsigned int sbtIndex, ArgTypes... args ); + + +/// optixTexFootprint2D calculates the footprint of a corresponding 2D texture fetch (non-mipmapped). +/// +/// On Turing and subsequent architectures, a texture footprint instruction allows user programs to +/// determine the set of texels that would be accessed by an equivalent filtered texture lookup. +/// +/// \param[in] tex CUDA texture object (cast to 64-bit integer) +/// \param[in] texInfo Texture info packed into 32-bit integer, described below. +/// \param[in] x Texture coordinate +/// \param[in] y Texture coordinate +/// \param[out] singleMipLevel Result indicating whether the footprint spans only a single miplevel. +/// +/// The texture info argument is a packed 32-bit integer with the following layout: +/// +/// texInfo[31:29] = reserved (3 bits) +/// texInfo[28:24] = miplevel count (5 bits) +/// texInfo[23:20] = log2 of tile width (4 bits) +/// texInfo[19:16] = log2 of tile height (4 bits) +/// texInfo[15:10] = reserved (6 bits) +/// texInfo[9:8] = horizontal wrap mode (2 bits) (CUaddress_mode) +/// texInfo[7:6] = vertical wrap mode (2 bits) (CUaddress_mode) +/// texInfo[5] = mipmap filter mode (1 bit) (CUfilter_mode) +/// texInfo[4:0] = maximum anisotropy (5 bits) +/// +/// Returns a 16-byte structure (as a uint4) that stores the footprint of a texture request at a +/// particular "granularity", which has the following layout: +/// +/// struct Texture2DFootprint +/// { +/// unsigned long long mask; +/// unsigned int tileY : 12; +/// unsigned int reserved1 : 4; +/// unsigned int dx : 3; +/// unsigned int dy : 3; +/// unsigned int reserved2 : 2; +/// unsigned int granularity : 4; +/// unsigned int reserved3 : 4; +/// unsigned int tileX : 12; +/// unsigned int level : 4; +/// unsigned int reserved4 : 16; +/// }; +/// +/// The granularity indicates the size of texel groups that are represented by an 8x8 bitmask. For +/// example, a granularity of 12 indicates texel groups that are 128x64 texels in size. In a +/// footprint call, The returned granularity will either be the actual granularity of the result, or +/// 0 if the footprint call was able to honor the requested granularity (the usual case). +/// +/// level is the mip level of the returned footprint. Two footprint calls are needed to get the +/// complete footprint when a texture call spans multiple mip levels. +/// +/// mask is an 8x8 bitmask of texel groups that are covered, or partially covered, by the footprint. +/// tileX and tileY give the starting position of the mask in 8x8 texel-group blocks. For example, +/// suppose a granularity of 12 (128x64 texels), and tileX=3 and tileY=4. In this case, bit 0 of the +/// mask (the low order bit) corresponds to texel group coordinates (3*8, 4*8), and texel +/// coordinates (3*8*128, 4*8*64), within the specified mip level. +/// +/// If nonzero, dx and dy specify a "toroidal rotation" of the bitmask. Toroidal rotation of a +/// coordinate in the mask simply means that its value is reduced by 8. Continuing the example from +/// above, if dx=0 and dy=0 the mask covers texel groups (3*8, 4*8) to (3*8+7, 4*8+7) inclusive. +/// If, on the other hand, dx=2, the rightmost 2 columns in the mask have their x coordinates +/// reduced by 8, and similarly for dy. +/// +/// See the OptiX SDK for sample code that illustrates how to unpack the result. +static __forceinline__ __device__ uint4 optixTexFootprint2D( unsigned long long tex, unsigned int texInfo, float x, float y, unsigned int* singleMipLevel ); + +/// optixTexFootprint2DLod calculates the footprint of a corresponding 2D texture fetch (tex2DLod) +/// \param[in] tex CUDA texture object (cast to 64-bit integer) +/// \param[in] texInfo Texture info packed into 32-bit integer, described below. +/// \param[in] x Texture coordinate +/// \param[in] y Texture coordinate +/// \param[in] level Level of detail (lod) +/// \param[in] coarse Requests footprint from coarse miplevel, when the footprint spans two levels. +/// \param[out] singleMipLevel Result indicating whether the footprint spans only a single miplevel. +/// \see #optixTexFootprint2D(unsigned long long,unsigned int,float,float,unsigned int*) +static __forceinline__ __device__ uint4 +optixTexFootprint2DLod( unsigned long long tex, unsigned int texInfo, float x, float y, float level, bool coarse, unsigned int* singleMipLevel ); + +/// optixTexFootprint2DGrad calculates the footprint of a corresponding 2D texture fetch (tex2DGrad) +/// \param[in] tex CUDA texture object (cast to 64-bit integer) +/// \param[in] texInfo Texture info packed into 32-bit integer, described below. +/// \param[in] x Texture coordinate +/// \param[in] y Texture coordinate +/// \param[in] dPdx_x Derivative of x coordinte, which determines level of detail. +/// \param[in] dPdx_y Derivative of x coordinte, which determines level of detail. +/// \param[in] dPdy_x Derivative of y coordinte, which determines level of detail. +/// \param[in] dPdy_y Derivative of y coordinte, which determines level of detail. +/// \param[in] coarse Requests footprint from coarse miplevel, when the footprint spans two levels. +/// \param[out] singleMipLevel Result indicating whether the footprint spans only a single miplevel. +/// \see #optixTexFootprint2D(unsigned long long,unsigned int,float,float,unsigned int*) +static __forceinline__ __device__ uint4 optixTexFootprint2DGrad( unsigned long long tex, + unsigned int texInfo, + float x, + float y, + float dPdx_x, + float dPdx_y, + float dPdy_x, + float dPdy_y, + bool coarse, + unsigned int* singleMipLevel ); + +/*@}*/ // end group optix_device_api + +#include "internal/optix_7_device_impl.h" + +#endif // __optix_optix_7_device_h__ diff --git a/include/optix_7_host.h b/include/optix_7_host.h new file mode 100644 index 0000000..1ade4ac --- /dev/null +++ b/include/optix_7_host.h @@ -0,0 +1,902 @@ +/* + * Copyright (c) 2021 NVIDIA Corporation. All rights reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and proprietary + * rights in and to this software, related documentation and any modifications thereto. + * Any use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation is strictly + * prohibited. + * + * TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THIS SOFTWARE IS PROVIDED *AS IS* + * AND NVIDIA AND ITS SUPPLIERS DISCLAIM ALL WARRANTIES, EITHER EXPRESS OR IMPLIED, + * INCLUDING, BUT NOT LIMITED TO, IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE. IN NO EVENT SHALL NVIDIA OR ITS SUPPLIERS BE LIABLE FOR ANY + * SPECIAL, INCIDENTAL, INDIRECT, OR CONSEQUENTIAL DAMAGES WHATSOEVER (INCLUDING, WITHOUT + * LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, BUSINESS INTERRUPTION, LOSS OF + * BUSINESS INFORMATION, OR ANY OTHER PECUNIARY LOSS) ARISING OUT OF THE USE OF OR + * INABILITY TO USE THIS SOFTWARE, EVEN IF NVIDIA HAS BEEN ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGES + */ + +/// @file +/// @author NVIDIA Corporation +/// @brief OptiX public API header +/// +/// OptiX host include file -- includes the host api if compiling host code. +/// For the math library routines include optix_math.h + +#if !defined( __OPTIX_INCLUDE_INTERNAL_HEADERS__ ) +#error("optix_7_host.h is an internal header file and must not be used directly. Please use optix_host.h or optix.h instead.") +#endif + +#ifndef __optix_optix_7_host_h__ +#define __optix_optix_7_host_h__ + +#include "optix_7_types.h" +#if !defined( OPTIX_DONT_INCLUDE_CUDA ) +// If OPTIX_DONT_INCLUDE_CUDA is defined, cuda driver types must be defined through other +// means before including optix headers. +#include +#endif + + + +#ifdef __cplusplus +extern "C" { +#endif + +/// \defgroup optix_host_api Host API +/// \brief OptiX Host API + +/// \defgroup optix_host_api_error_handling Error handling +/// \ingroup optix_host_api +//@{ + +/// Returns a string containing the name of an error code in the enum. +/// +/// Output is a string representation of the enum. For example "OPTIX_SUCCESS" for +/// OPTIX_SUCCESS and "OPTIX_ERROR_INVALID_VALUE" for OPTIX_ERROR_INVALID_VALUE. +/// +/// If the error code is not recognized, "Unrecognized OptixResult code" is returned. +/// +/// \param[in] result OptixResult enum to generate string name for +/// +/// \see #optixGetErrorString +const char* optixGetErrorName( OptixResult result ); + +/// Returns the description string for an error code. +/// +/// Output is a string description of the enum. For example "Success" for +/// OPTIX_SUCCESS and "Invalid value" for OPTIX_ERROR_INVALID_VALUE. +/// +/// If the error code is not recognized, "Unrecognized OptixResult code" is returned. +/// +/// \param[in] result OptixResult enum to generate string description for +/// +/// \see #optixGetErrorName +const char* optixGetErrorString( OptixResult result ); + +//@} +/// \defgroup optix_host_api_device_context Device context +/// \ingroup optix_host_api +//@{ + +/// Create a device context associated with the CUDA context specified with 'fromContext'. +/// +/// If zero is specified for 'fromContext', OptiX will use the current CUDA context. The +/// CUDA context should be initialized before calling optixDeviceContextCreate. +/// +/// \param[in] fromContext +/// \param[in] options +/// \param[out] context +/// \return +/// - OPTIX_ERROR_CUDA_NOT_INITIALIZED +/// If using zero for 'fromContext' and CUDA has not been initialized yet on the calling +/// thread. +/// - OPTIX_ERROR_CUDA_ERROR +/// CUDA operation failed. +/// - OPTIX_ERROR_HOST_OUT_OF_MEMORY +/// Heap allocation failed. +/// - OPTIX_ERROR_INTERNAL_ERROR +/// Internal error +OptixResult optixDeviceContextCreate( CUcontext fromContext, const OptixDeviceContextOptions* options, OptixDeviceContext* context ); + +/// Destroys all CPU and GPU state associated with the device. +/// +/// It will attempt to block on CUDA streams that have launch work outstanding. +/// +/// Any API objects, such as OptixModule and OptixPipeline, not already destroyed will be +/// destroyed. +/// +/// Thread safety: A device context must not be destroyed while it is still in use by concurrent API calls in other threads. +OptixResult optixDeviceContextDestroy( OptixDeviceContext context ); + +/// Query properties of a device context. +/// +/// \param[in] context the device context to query the property for +/// \param[in] property the property to query +/// \param[out] value pointer to the returned +/// \param[in] sizeInBytes size of output +OptixResult optixDeviceContextGetProperty( OptixDeviceContext context, OptixDeviceProperty property, void* value, size_t sizeInBytes ); + +/// Sets the current log callback method. +/// +/// See #OptixLogCallback for more details. +/// +/// Thread safety: It is guaranteed that the callback itself (callbackFunction and callbackData) are updated atomically. +/// It is not guaranteed that the callback itself (callbackFunction and callbackData) and the callbackLevel are updated +/// atomically. It is unspecified when concurrent API calls using the same context start to make use of the new +/// callback method. +/// +/// \param[in] context the device context +/// \param[in] callbackFunction the callback function to call +/// \param[in] callbackData pointer to data passed to callback function while invoking it +/// \param[in] callbackLevel callback level +OptixResult optixDeviceContextSetLogCallback( OptixDeviceContext context, + OptixLogCallback callbackFunction, + void* callbackData, + unsigned int callbackLevel ); + +/// Enables or disables the disk cache. +/// +/// If caching was previously disabled, enabling it will attempt to initialize +/// the disk cache database using the currently configured cache location. An +/// error will be returned if initialization fails. +/// +/// Note that no in-memory cache is used, so no caching behavior will be observed if the disk cache +/// is disabled. +/// +/// The cache can be disabled by setting the environment variable OPTIX_CACHE_MAXSIZE=0. +/// The environment variable takes precedence over this setting. +/// See #optixDeviceContextSetCacheDatabaseSizes for additional information. +/// +/// Note that the disk cache can be disabled by the environment variable, but it cannot be enabled +/// via the environment if it is disabled via the API. +/// +/// \param[in] context the device context +/// \param[in] enabled 1 to enabled, 0 to disable +OptixResult optixDeviceContextSetCacheEnabled( OptixDeviceContext context, + int enabled ); + +/// Sets the location of the disk cache. +/// +/// The location is specified by a directory. This directory should not be used for other purposes +/// and will be created if it does not exist. An error will be returned if is not possible to +/// create the disk cache at the specified location for any reason (e.g., the path is invalid or +/// the directory is not writable). Caching will be disabled if the disk cache cannot be +/// initialized in the new location. If caching is disabled, no error will be returned until caching +/// is enabled. If the disk cache is located on a network file share, behavior is undefined. +/// +/// The location of the disk cache can be overridden with the environment variable OPTIX_CACHE_PATH. +/// The environment variable takes precedence over this setting. +/// +/// The default location depends on the operating system: +/// - Windows: %LOCALAPPDATA%\\NVIDIA\\OptixCache +/// - Linux: /var/tmp/OptixCache_\ (or /tmp/OptixCache_\ if the first choice is not usable), +/// the underscore and username suffix are omitted if the username cannot be obtained +/// - MacOS X: /Library/Application Support/NVIDIA/OptixCache +/// +/// \param[in] context the device context +/// \param[in] location directory of disk cache +OptixResult optixDeviceContextSetCacheLocation( OptixDeviceContext context, const char* location ); + +/// Sets the low and high water marks for disk cache garbage collection. +/// +/// Garbage collection is triggered when a new entry is written to the cache and +/// the current cache data size plus the size of the cache entry that is about +/// to be inserted exceeds the high water mark. Garbage collection proceeds until +/// the size reaches the low water mark. Garbage collection will always free enough +/// space to insert the new entry without exceeding the low water mark. Setting +/// either limit to zero will disable garbage collection. An error will be returned +/// if both limits are non-zero and the high water mark is smaller than the low water mark. +/// +/// Note that garbage collection is performed only on writes to the disk cache. No garbage +/// collection is triggered on disk cache initialization or immediately when calling this function, +/// but on subsequent inserting of data into the database. +/// +/// If the size of a compiled module exceeds the value configured for the high water +/// mark and garbage collection is enabled, the module will not be added to the cache +/// and a warning will be added to the log. +/// +/// The high water mark can be overridden with the environment variable OPTIX_CACHE_MAXSIZE. +/// The environment variable takes precedence over the function parameters. The low water mark +/// will be set to half the value of OPTIX_CACHE_MAXSIZE. Setting OPTIX_CACHE_MAXSIZE to 0 will +/// disable the disk cache, but will not alter the contents of the cache. Negative and non-integer +/// values will be ignored. +/// +/// \param[in] context the device context +/// \param[in] lowWaterMark the low water mark +/// \param[in] highWaterMark the high water mark +OptixResult optixDeviceContextSetCacheDatabaseSizes( OptixDeviceContext context, size_t lowWaterMark, size_t highWaterMark ); + +/// Indicates whether the disk cache is enabled or disabled. +/// +/// \param[in] context the device context +/// \param[out] enabled 1 if enabled, 0 if disabled +OptixResult optixDeviceContextGetCacheEnabled( OptixDeviceContext context, int* enabled ); +/// Returns the location of the disk cache. If the cache has been disabled by setting the environment +/// variable OPTIX_CACHE_MAXSIZE=0, this function will return an empy string. +/// +/// \param[in] context the device context +/// \param[out] location directory of disk cache, null terminated if locationSize > 0 +/// \param[in] locationSize locationSize +OptixResult optixDeviceContextGetCacheLocation( OptixDeviceContext context, char* location, size_t locationSize ); + +/// Returns the low and high water marks for disk cache garbage collection. If the cache has been disabled by +/// setting the environment variable OPTIX_CACHE_MAXSIZE=0, this function will return 0 for the low and high +/// water marks. +/// +/// \param[in] context the device context +/// \param[out] lowWaterMark the low water mark +/// \param[out] highWaterMark the high water mark +OptixResult optixDeviceContextGetCacheDatabaseSizes( OptixDeviceContext context, size_t* lowWaterMark, size_t* highWaterMark ); + +//@} +/// \defgroup optix_host_api_pipelines Pipelines +/// \ingroup optix_host_api +//@{ + +/// logString is an optional buffer that contains compiler feedback and errors. This +/// information is also passed to the context logger (if enabled), however it may be +/// difficult to correlate output to the logger to specific API invocations when using +/// multiple threads. The output to logString will only contain feedback for this specific +/// invocation of this API call. +/// +/// logStringSize as input should be a pointer to the number of bytes backing logString. +/// Upon return it contains the length of the log message (including the null terminator) +/// which may be greater than the input value. In this case, the log message will be +/// truncated to fit into logString. +/// +/// If logString or logStringSize are NULL, no output is written to logString. If +/// logStringSize points to a value that is zero, no output is written. This does not +/// affect output to the context logger if enabled. +/// +/// \param[in] context +/// \param[in] pipelineCompileOptions +/// \param[in] pipelineLinkOptions +/// \param[in] programGroups array of ProgramGroup objects +/// \param[in] numProgramGroups number of ProgramGroup objects +/// \param[out] logString Information will be written to this string. If logStringSize > 0 logString will be null terminated. +/// \param[in,out] logStringSize +/// \param[out] pipeline +OptixResult optixPipelineCreate( OptixDeviceContext context, + const OptixPipelineCompileOptions* pipelineCompileOptions, + const OptixPipelineLinkOptions* pipelineLinkOptions, + const OptixProgramGroup* programGroups, + unsigned int numProgramGroups, + char* logString, + size_t* logStringSize, + OptixPipeline* pipeline ); + +/// Thread safety: A pipeline must not be destroyed while it is still in use by concurrent API calls in other threads. +OptixResult optixPipelineDestroy( OptixPipeline pipeline ); + +/// Sets the stack sizes for a pipeline. +/// +/// Users are encouraged to see the programming guide and the implementations of the helper functions +/// to understand how to construct the stack sizes based on their particular needs. +/// +/// If this method is not used, an internal default implementation is used. The default implementation is correct (but +/// not necessarily optimal) as long as the maximum depth of call trees of CC and DC programs is at most 2 and no motion transforms are used. +/// +/// The maxTraversableGraphDepth responds to the maximal number of traversables visited when calling trace. +/// Every acceleration structure and motion transform count as one level of traversal. +/// E.g., for a simple IAS (instance acceleration structure) -> GAS (geometry acceleration structure) +/// traversal graph, the maxTraversableGraphDepth is two. +/// For IAS -> MT (motion transform) -> GAS, the maxTraversableGraphDepth is three. +/// Note that it does not matter whether a IAS or GAS has motion or not, it always counts as one. +/// Launching optix with exceptions turned on (see #OPTIX_EXCEPTION_FLAG_TRACE_DEPTH) will throw an exception +/// if the specified maxTraversableGraphDepth is too small. +/// +/// \param[in] pipeline The pipeline to configure the stack size for. +/// \param[in] directCallableStackSizeFromTraversal The direct stack size requirement for direct callables invoked from IS or AH. +/// \param[in] directCallableStackSizeFromState The direct stack size requirement for direct callables invoked from RG, MS, or CH. +/// \param[in] continuationStackSize The continuation stack requirement. +/// \param[in] maxTraversableGraphDepth The maximum depth of a traversable graph passed to trace. +OptixResult optixPipelineSetStackSize( OptixPipeline pipeline, + unsigned int directCallableStackSizeFromTraversal, + unsigned int directCallableStackSizeFromState, + unsigned int continuationStackSize, + unsigned int maxTraversableGraphDepth ); + +//@} +/// \defgroup optix_host_api_modules Modules +/// \ingroup optix_host_api +//@{ + +/// logString is an optional buffer that contains compiler feedback and errors. This +/// information is also passed to the context logger (if enabled), however it may be +/// difficult to correlate output to the logger to specific API invocations when using +/// multiple threads. The output to logString will only contain feedback for this specific +/// invocation of this API call. +/// +/// logStringSize as input should be a pointer to the number of bytes backing logString. +/// Upon return it contains the length of the log message (including the null terminator) +/// which may be greater than the input value. In this case, the log message will be +/// truncated to fit into logString. +/// +/// If logString or logStringSize are NULL, no output is written to logString. If +/// logStringSize points to a value that is zero, no output is written. This does not +/// affect output to the context logger if enabled. +/// +/// \param[in] context +/// \param[in] moduleCompileOptions +/// \param[in] pipelineCompileOptions All modules in a pipeline need to use the same values for the pipeline compile options. +/// \param[in] PTX Pointer to the PTX input string. +/// \param[in] PTXsize Parsing proceeds up to PTXsize characters, or the first NUL byte, whichever occurs first. +/// \param[out] logString Information will be written to this string. If logStringSize > 0 logString will be null terminated. +/// \param[in,out] logStringSize +/// \param[out] module +/// +/// \return OPTIX_ERROR_INVALID_VALUE - context is 0, moduleCompileOptions is 0, pipelineCompileOptions is 0, PTX is 0, module is 0. +OptixResult optixModuleCreateFromPTX( OptixDeviceContext context, + const OptixModuleCompileOptions* moduleCompileOptions, + const OptixPipelineCompileOptions* pipelineCompileOptions, + const char* PTX, + size_t PTXsize, + char* logString, + size_t* logStringSize, + OptixModule* module ); + +/// This function is designed to do just enough work to create the OptixTask return +/// parameter and is expected to be fast enough run without needing parallel execution. A +/// single thread could generate all the OptixTask objects for further processing in a +/// work pool. +/// +/// Options are similar to #optixModuleCreateFromPTX(), aside from the return parameter, +/// firstTask. +/// +/// The memory used to hold the PTX should be live until all tasks are finished. +/// +/// It is illegal to call #optixModuleDestroy() if any OptixTask objects are currently +/// being executed. In that case OPTIX_ERROR_ILLEGAL_DURING_TASK_EXECUTE will be returned. +/// +/// If an invocation of optixTaskExecute fails, the OptixModule will be marked as +/// OPTIX_MODULE_COMPILE_STATE_IMPENDING_FAILURE if there are outstanding tasks or +/// OPTIX_MODULE_COMPILE_STATE_FAILURE if there are no outstanding tasks. Subsequent calls +/// to #optixTaskExecute() may execute additional work to collect compilation errors +/// generated from the input. Currently executing tasks will not necessarily be terminated +/// immediately but at the next opportunity. + +/// Logging will continue to be directed to the logger installed with the +/// OptixDeviceContext. If logString is provided to #optixModuleCreateFromPTXWithTasks(), +/// it will contain all the compiler feedback from all executed tasks. The lifetime of the +/// memory pointed to by logString should extend from calling +/// #optixModuleCreateFromPTXWithTasks() to when the compilation state is either +/// OPTIX_MODULE_COMPILE_STATE_FAILURE or OPTIX_MODULE_COMPILE_STATE_COMPLETED. OptiX will +/// not write to the logString outside of execution of +/// #optixModuleCreateFromPTXWithTasks() or #optixTaskExecute(). If the compilation state +/// is OPTIX_MODULE_COMPILE_STATE_IMPENDING_FAILURE and no further execution of +/// #optixTaskExecute() is performed the logString may be reclaimed by the application +/// before calling #optixModuleDestroy(). The contents of logString will contain output +/// from currently completed tasks. + +/// All OptixTask objects associated with a given OptixModule will be cleaned up when +/// #optixModuleDestroy() is called regardless of whether the compilation was successful +/// or not. If the compilation state is OPTIX_MODULE_COMPILE_STATE_IMPENDIND_FAILURE, any +/// unstarted OptixTask objects do not need to be executed though there is no harm doing +/// so. +/// +/// \see #optixModuleCreateFromPTX +OptixResult optixModuleCreateFromPTXWithTasks( OptixDeviceContext context, + const OptixModuleCompileOptions* moduleCompileOptions, + const OptixPipelineCompileOptions* pipelineCompileOptions, + const char* PTX, + size_t PTXsize, + char* logString, + size_t* logStringSize, + OptixModule* module, + OptixTask* firstTask ); + +/// When creating a module with tasks, the current state of the module can be queried +/// using this function. +/// +/// Thread safety: Safe to call from any thread until optixModuleDestroy is called. +/// +/// \see #optixModuleCreateFromPTXWithTasks +OptixResult optixModuleGetCompilationState( OptixModule module, OptixModuleCompileState* state ); + +/// Call for OptixModule objects created with optixModuleCreateFromPTX and optixModuleDeserialize. +/// +/// Modules must not be destroyed while they are still used by any program group. +/// +/// Thread safety: A module must not be destroyed while it is still in use by concurrent API calls in other threads. +OptixResult optixModuleDestroy( OptixModule module ); + +/// Returns a module containing the intersection program for the built-in primitive type specified +/// by the builtinISOptions. This module must be used as the moduleIS for the OptixProgramGroupHitgroup +/// in any SBT record for that primitive type. (The entryFunctionNameIS should be null.) +OptixResult optixBuiltinISModuleGet( OptixDeviceContext context, + const OptixModuleCompileOptions* moduleCompileOptions, + const OptixPipelineCompileOptions* pipelineCompileOptions, + const OptixBuiltinISOptions* builtinISOptions, + OptixModule* builtinModule ); + +//@} +/// \defgroup optix_host_api_tasks Tasks +/// \ingroup optix_host_api +//@{ + +/// Each OptixTask should be executed with #optixTaskExecute(). If additional parallel +/// work is found, new OptixTask objects will be returned in additionalTasks along with +/// the number of additional tasks in numAdditionalTasksCreated. The parameter +/// additionalTasks should point to a user allocated array of minimum size +/// maxNumAdditionalTasks. OptiX can generate upto maxNumAdditionalTasks additional tasks. +/// +/// Each task can be executed in parallel and in any order. +/// +/// Thread safety: Safe to call from any thread until #optixModuleDestroy() is called for +/// any associated task. +/// +/// \see #optixModuleCreateFromPTXWithTasks +/// +/// \param[in] task the OptixTask to execute +/// \param[in] additionalTasks pointer to array of OptixTask objects to be filled in +/// \param[in] maxNumAdditionalTasks maximum number of additional OptixTask objects +/// \param[out] numAdditionalTasksCreated number of OptixTask objects created by OptiX and written into #additionalTasks +OptixResult optixTaskExecute( OptixTask task, OptixTask* additionalTasks, unsigned int maxNumAdditionalTasks, unsigned int* numAdditionalTasksCreated ); + +//@} +/// \defgroup optix_host_api_program_groups Program groups +/// \ingroup optix_host_api +//@{ + +/// Returns the stack sizes for the given program group. +/// +/// \param[in] programGroup the program group +/// \param[out] stackSizes the corresponding stack sizes +OptixResult optixProgramGroupGetStackSize( OptixProgramGroup programGroup, OptixStackSizes* stackSizes ); + +/// logString is an optional buffer that contains compiler feedback and errors. This +/// information is also passed to the context logger (if enabled), however it may be +/// difficult to correlate output to the logger to specific API invocations when using +/// multiple threads. The output to logString will only contain feedback for this specific +/// invocation of this API call. +/// +/// logStringSize as input should be a pointer to the number of bytes backing logString. +/// Upon return it contains the length of the log message (including the null terminator) +/// which may be greater than the input value. In this case, the log message will be +/// truncated to fit into logString. +/// +/// If logString or logStringSize are NULL, no output is written to logString. If +/// logStringSize points to a value that is zero, no output is written. This does not +/// affect output to the context logger if enabled. +/// +/// Creates numProgramGroups OptiXProgramGroup objects from the specified +/// OptixProgramGroupDesc array. The size of the arrays must match. +/// +/// \param[in] context +/// \param[in] programDescriptions N * OptixProgramGroupDesc +/// \param[in] numProgramGroups N +/// \param[in] options +/// \param[out] logString Information will be written to this string. If logStringSize > 0 logString will be null terminated. +/// \param[in,out] logStringSize +/// \param[out] programGroups +OptixResult optixProgramGroupCreate( OptixDeviceContext context, + const OptixProgramGroupDesc* programDescriptions, + unsigned int numProgramGroups, + const OptixProgramGroupOptions* options, + char* logString, + size_t* logStringSize, + OptixProgramGroup* programGroups ); + +/// Thread safety: A program group must not be destroyed while it is still in use by concurrent API calls in other threads. +OptixResult optixProgramGroupDestroy( OptixProgramGroup programGroup ); + +//@} +/// \defgroup optix_host_api_launches Launches +/// \ingroup optix_host_api +//@{ + +/// Where the magic happens. +/// +/// The stream and pipeline must belong to the same device context. Multiple launches +/// may be issues in parallel from multiple threads to different streams. +/// +/// pipelineParamsSize number of bytes are copied from the device memory pointed to by +/// pipelineParams before launch. It is an error if pipelineParamsSize is greater than the +/// size of the variable declared in modules and identified by +/// OptixPipelineCompileOptions::pipelineLaunchParamsVariableName. If the launch params +/// variable was optimized out or not found in the modules linked to the pipeline then +/// the pipelineParams and pipelineParamsSize parameters are ignored. +/// +/// sbt points to the shader binding table, which defines shader +/// groupings and their resources. See the SBT spec. +/// +/// \param[in] pipeline +/// \param[in] stream +/// \param[in] pipelineParams +/// \param[in] pipelineParamsSize +/// \param[in] sbt +/// \param[in] width number of elements to compute +/// \param[in] height number of elements to compute +/// \param[in] depth number of elements to compute +/// +/// Thread safety: In the current implementation concurrent launches to the same pipeline are not +/// supported. Concurrent launches require separate OptixPipeline objects. +OptixResult optixLaunch( OptixPipeline pipeline, + CUstream stream, + CUdeviceptr pipelineParams, + size_t pipelineParamsSize, + const OptixShaderBindingTable* sbt, + unsigned int width, + unsigned int height, + unsigned int depth ); + +/// \param[in] programGroup the program group containing the program(s) +/// \param[out] sbtRecordHeaderHostPointer the result sbt record header +OptixResult optixSbtRecordPackHeader( OptixProgramGroup programGroup, void* sbtRecordHeaderHostPointer ); + +//@} +/// \defgroup optix_host_api_acceleration_structures Acceleration structures +/// \ingroup optix_host_api +//@{ + +/// \param[in] context +/// \param[in] accelOptions options for the accel build +/// \param[in] buildInputs an array of OptixBuildInput objects +/// \param[in] numBuildInputs number of elements in buildInputs (must be at least 1) +/// \param[out] bufferSizes fills in buffer sizes +OptixResult optixAccelComputeMemoryUsage( OptixDeviceContext context, + const OptixAccelBuildOptions* accelOptions, + const OptixBuildInput* buildInputs, + unsigned int numBuildInputs, + OptixAccelBufferSizes* bufferSizes ); + +/// \param[in] context +/// \param[in] stream +/// \param[in] accelOptions accel options +/// \param[in] buildInputs an array of OptixBuildInput objects +/// \param[in] numBuildInputs must be >= 1 for GAS, and == 1 for IAS +/// \param[in] tempBuffer must be a multiple of OPTIX_ACCEL_BUFFER_BYTE_ALIGNMENT +/// \param[in] tempBufferSizeInBytes +/// \param[in] outputBuffer must be a multiple of OPTIX_ACCEL_BUFFER_BYTE_ALIGNMENT +/// \param[in] outputBufferSizeInBytes +/// \param[out] outputHandle +/// \param[out] emittedProperties types of requested properties and output buffers +/// \param[in] numEmittedProperties number of post-build properties to populate (may be zero) +OptixResult optixAccelBuild( OptixDeviceContext context, + CUstream stream, + const OptixAccelBuildOptions* accelOptions, + const OptixBuildInput* buildInputs, + unsigned int numBuildInputs, + CUdeviceptr tempBuffer, + size_t tempBufferSizeInBytes, + CUdeviceptr outputBuffer, + size_t outputBufferSizeInBytes, + OptixTraversableHandle* outputHandle, + const OptixAccelEmitDesc* emittedProperties, + unsigned int numEmittedProperties ); + +/// Obtain relocation information, stored in OptixAccelRelocationInfo, for a given context +/// and acceleration structure's traversable handle. +/// +/// The relocation information can be passed to optixAccelCheckRelocationCompatibility to +/// determine if an acceleration structure, referenced by 'handle', can be relocated to a +/// different device's memory space (see #optixAccelCheckRelocationCompatibility). +/// +/// When used with optixAccelRelocate, it provides data necessary for doing the relocation. +/// +/// If the acceleration structure data associated with 'handle' is copied multiple times, +/// the same OptixAccelRelocationInfo can also be used on all copies. +/// +/// \param[in] context +/// \param[in] handle +/// \param[out] info +/// \return OPTIX_ERROR_INVALID_VALUE will be returned for traversable handles that are not from +/// acceleration structure builds. +OptixResult optixAccelGetRelocationInfo( OptixDeviceContext context, OptixTraversableHandle handle, OptixAccelRelocationInfo* info ); + +/// Checks if an acceleration structure built using another OptixDeviceContext (that was +/// used to fill in 'info') is compatible with the OptixDeviceContext specified in the +/// 'context' parameter. +/// +/// Any device is always compatible with itself. +/// +/// \param[in] context +/// \param[in] info +/// \param[out] compatible If OPTIX_SUCCESS is returned 'compatible' will have the value of either: +/// - 0: This context is not compatible with acceleration structure data associated with 'info'. +/// - 1: This context is compatible. +OptixResult optixAccelCheckRelocationCompatibility( OptixDeviceContext context, const OptixAccelRelocationInfo* info, int* compatible ); + +/// optixAccelRelocate is called to update the acceleration structure after it has been +/// relocated. Relocation is necessary when the acceleration structure's location in device +/// memory has changed. optixAccelRelocate does not copy the memory. This function only +/// operates on the relocated memory who's new location is specified by 'targetAccel'. +/// optixAccelRelocate also returns the new OptixTraversableHandle associated with +/// 'targetAccel'. The original memory (source) is not required to be valid, only the +/// OptixAccelRelocationInfo. +/// +/// Before copying the data and calling optixAccelRelocate, +/// optixAccelCheckRelocationCompatibility should be called to ensure the copy will be +/// compatible with the destination device context. +/// +/// The memory pointed to by 'targetAccel' should be allocated with the same size as the +/// source acceleration. Similar to the 'outputBuffer' used in optixAccelBuild, this +/// pointer must be a multiple of OPTIX_ACCEL_BUFFER_BYTE_ALIGNMENT. +/// +/// The memory in 'targetAccel' must be allocated as long as the accel is in use. +/// +/// When relocating an accel that contains instances, 'instanceTraversableHandles' and +/// 'numInstanceTraversableHandles' should be supplied. These are the traversable handles +/// of the instances. These can be used when also relocating the instances. No updates to +/// the bounds are performed. Use optixAccelBuild to update the bounds. +/// 'instanceTraversableHandles' and 'numInstanceTraversableHandles' may be zero when +/// relocating bottom level accel (i.e. an accel with no instances). +/// +/// \param[in] context +/// \param[in] stream +/// \param[in] info +/// \param[in] instanceTraversableHandles +/// \param[in] numInstanceTraversableHandles +/// \param[in] targetAccel +/// \param[in] targetAccelSizeInBytes +/// \param[out] targetHandle +OptixResult optixAccelRelocate( OptixDeviceContext context, + CUstream stream, + const OptixAccelRelocationInfo* info, + CUdeviceptr instanceTraversableHandles, + size_t numInstanceTraversableHandles, + CUdeviceptr targetAccel, + size_t targetAccelSizeInBytes, + OptixTraversableHandle* targetHandle ); + +/// After building an acceleration structure, it can be copied in a compacted form to reduce +/// memory. In order to be compacted, OPTIX_BUILD_FLAG_ALLOW_COMPACTION must be supplied in +/// OptixAccelBuildOptions::buildFlags passed to optixAccelBuild. +/// +/// 'outputBuffer' is the pointer to where the compacted acceleration structure will be +/// written. This pointer must be a multiple of OPTIX_ACCEL_BUFFER_BYTE_ALIGNMENT. +/// +/// The size of the memory specified in 'outputBufferSizeInBytes' should be at least the +/// value computed using the OPTIX_PROPERTY_TYPE_COMPACTED_SIZE that was reported during +/// optixAccelBuild. +/// +/// \param[in] context +/// \param[in] stream +/// \param[in] inputHandle +/// \param[in] outputBuffer +/// \param[in] outputBufferSizeInBytes +/// \param[out] outputHandle +OptixResult optixAccelCompact( OptixDeviceContext context, + CUstream stream, + OptixTraversableHandle inputHandle, + CUdeviceptr outputBuffer, + size_t outputBufferSizeInBytes, + OptixTraversableHandle* outputHandle ); + +/// \param[in] onDevice +/// \param[in] pointer pointer to traversable allocated in OptixDeviceContext. This pointer must be a multiple of OPTIX_TRANSFORM_BYTE_ALIGNMENT +/// \param[in] traversableType Type of OptixTraversableHandle to create +/// \param[out] traversableHandle traversable handle. traversableHandle must be in host memory +OptixResult optixConvertPointerToTraversableHandle( OptixDeviceContext onDevice, + CUdeviceptr pointer, + OptixTraversableType traversableType, + OptixTraversableHandle* traversableHandle ); + + + + +//@} +/// \defgroup optix_host_api_denoiser Denoiser +/// \ingroup optix_host_api +//@{ + +/// Creates a denoiser object with the given options, using built-in inference models +/// +/// 'modelKind' selects the model used for inference. +/// Inference for the built-in models can be guided (giving hints to improve image quality) with +/// albedo and normal vector images in the guide layer (see 'optixDenoiserInvoke'). +/// Use of these images must be enabled in 'OptixDenoiserOptions'. +/// +/// \param[in] context +/// \param[in] modelKind +/// \param[in] options +/// \param[out] denoiser +OptixResult optixDenoiserCreate( OptixDeviceContext context, + OptixDenoiserModelKind modelKind, + const OptixDenoiserOptions* options, + OptixDenoiser* denoiser ); + +/// Creates a denoiser object with the given options, using a provided inference model +/// +/// 'userData' and 'userDataSizeInBytes' provide a user model for inference. +/// The memory passed in userData will be accessed only during the invocation of this function and +/// can be freed after it returns. +/// The user model must export only one weight set which determines both the model kind and the +/// required set of guide images. +/// +/// \param[in] context +/// \param[in] userData +/// \param[in] userDataSizeInBytes +/// \param[out] denoiser +OptixResult optixDenoiserCreateWithUserModel( OptixDeviceContext context, + const void* userData, size_t userDataSizeInBytes, OptixDenoiser* denoiser ); + +/// Destroys the denoiser object and any associated host resources. +OptixResult optixDenoiserDestroy( OptixDenoiser denoiser ); + +/// Computes the GPU memory resources required to execute the denoiser. +/// +/// Memory for state and scratch buffers must be allocated with the sizes in 'returnSizes' and scratch memory +/// passed to optixDenoiserSetup, optixDenoiserInvoke, +/// optixDenoiserComputeIntensity and optixDenoiserComputeAverageColor. +/// For tiled denoising an overlap area ('overlapWindowSizeInPixels') must be added to each tile on all sides +/// which increases the amount of +/// memory needed to denoise a tile. In case of tiling use withOverlapScratchSizeInBytes for scratch memory size. +/// If only full resolution images are denoised, withoutOverlapScratchSizeInBytes can be used which is always +/// smaller than withOverlapScratchSizeInBytes. +/// +/// 'outputWidth' and 'outputHeight' is the dimension of the image to be denoised (without overlap in case tiling +/// is being used). +/// 'outputWidth' and 'outputHeight' must be greater than or equal to the dimensions passed to optixDenoiserSetup. +/// +/// \param[in] denoiser +/// \param[in] outputWidth +/// \param[in] outputHeight +/// \param[out] returnSizes +OptixResult optixDenoiserComputeMemoryResources( const OptixDenoiser denoiser, + unsigned int outputWidth, + unsigned int outputHeight, + OptixDenoiserSizes* returnSizes ); + +/// Initializes the state required by the denoiser. +/// +/// 'inputWidth' and 'inputHeight' must include overlap on both sides of the image if tiling is being used. The overlap is +/// returned by #optixDenoiserComputeMemoryResources. +/// For subsequent calls to #optixDenoiserInvoke 'inputWidth' and 'inputHeight' are the maximum dimensions +/// of the input layers. Dimensions of the input layers passed to #optixDenoiserInvoke may be different in each +/// invocation however they always must be smaller than 'inputWidth' and 'inputHeight' passed to #optixDenoiserSetup. +/// +/// \param[in] denoiser +/// \param[in] stream +/// \param[in] inputWidth +/// \param[in] inputHeight +/// \param[in] denoiserState +/// \param[in] denoiserStateSizeInBytes +/// \param[in] scratch +/// \param[in] scratchSizeInBytes +OptixResult optixDenoiserSetup( OptixDenoiser denoiser, + CUstream stream, + unsigned int inputWidth, + unsigned int inputHeight, + CUdeviceptr denoiserState, + size_t denoiserStateSizeInBytes, + CUdeviceptr scratch, + size_t scratchSizeInBytes ); + +/// Invokes denoiser on a set of input data and produces at least one output image. +/// State memory must be available during the execution of the +/// denoiser (or until optixDenoiserSetup is called with a new state memory pointer). +/// Scratch memory passed is used only for the duration of this function. +/// Scratch and state memory sizes must have a size greater than or equal to the sizes as returned by +/// optixDenoiserComputeMemoryResources. +/// +/// 'inputOffsetX' and 'inputOffsetY' are pixel offsets in the 'inputLayers' image +/// specifying the beginning of the image without overlap. When denoising an entire image without tiling +/// there is no overlap and 'inputOffsetX' and 'inputOffsetY' must be zero. When denoising a tile which is +/// adjacent to one of the four sides of the entire image the corresponding offsets must also be zero since +/// there is no overlap at the side adjacent to the image border. +/// +/// 'guideLayer' provides additional information to the denoiser. When providing albedo and normal vector +/// guide images, the corresponding fields in the 'OptixDenoiserOptions' must be +/// enabled, see #optixDenoiserCreate. +/// 'guideLayer' must not be null. If a guide image in 'OptixDenoiserOptions' is not enabled, the +/// corresponding image in 'OptixDenoiserGuideLayer' is ignored. +/// +/// If OPTIX_DENOISER_MODEL_KIND_TEMPORAL or OPTIX_DENOISER_MODEL_KIND_TEMPORAL_AOV is selected, a 2d flow +/// image must be given in 'OptixDenoiserGuideLayer'. +/// It describes for each pixel the flow from the previous to the current frame (a 2d vector in pixel space). +/// The denoised beauty/AOV of the previous frame must be given in 'previousOutput'. +/// If this image is not available in the first frame of a sequence, the noisy beauty/AOV from the first frame +/// and zero flow vectors could be given as a substitute. +/// For non-temporal model kinds the flow image in 'OptixDenoiserGuideLayer' is ignored. +/// 'previousOutput' and +/// 'output' may refer to the same buffer, i.e. 'previousOutput' is first read by this function and later +/// overwritten with the denoised result. 'output' can be passed as 'previousOutput' to the next frame. +/// In other model kinds (not temporal) 'previousOutput' is ignored. +/// +/// The beauty layer must be given as the first entry in 'layers'. +/// In AOV type model kinds (OPTIX_DENOISER_MODEL_KIND_AOV or in user defined models implementing +/// kernel-prediction) additional layers for the AOV images can be given. +/// In each layer the noisy input image is given in 'input', the denoised output is written into the +/// 'output' image. input and output images may refer to the same buffer, with the restriction that +/// the pixel formats must be identical for input and output when the blend mode is selected (see +/// #OptixDenoiserParams). +/// +/// If OPTIX_DENOISER_MODEL_KIND_TEMPORAL or OPTIX_DENOISER_MODEL_KIND_TEMPORAL_AOV is selected, the denoised +/// image from the previous frame must be given in 'previousOutput' in the layer. 'previousOutput' and +/// 'output' may refer to the same buffer, i.e. 'previousOutput' is first read by this function and later +/// overwritten with the denoised result. 'output' can be passed as 'previousOutput' to the next frame. +/// In other model kinds (not temporal) 'previousOutput' is ignored. +/// +/// If OPTIX_DENOISER_MODEL_KIND_TEMPORAL or OPTIX_DENOISER_MODEL_KIND_TEMPORAL_AOV is selected, the +/// normal vector guide image must be given as 3d vectors in camera space. In the other models only +/// the x and y channels are used and other channels are ignored. +/// +/// \param[in] denoiser +/// \param[in] stream +/// \param[in] params +/// \param[in] denoiserState +/// \param[in] denoiserStateSizeInBytes +/// \param[in] guideLayer +/// \param[in] layers +/// \param[in] numLayers +/// \param[in] inputOffsetX +/// \param[in] inputOffsetY +/// \param[in] scratch +/// \param[in] scratchSizeInBytes +OptixResult optixDenoiserInvoke( OptixDenoiser denoiser, + CUstream stream, + const OptixDenoiserParams* params, + CUdeviceptr denoiserState, + size_t denoiserStateSizeInBytes, + const OptixDenoiserGuideLayer* guideLayer, + const OptixDenoiserLayer* layers, + unsigned int numLayers, + unsigned int inputOffsetX, + unsigned int inputOffsetY, + CUdeviceptr scratch, + size_t scratchSizeInBytes ); + +/// Computes the logarithmic average intensity of the given image. The returned value 'outputIntensity' +/// is multiplied with the RGB values of the input image/tile in optixDenoiserInvoke if given in the parameter +/// OptixDenoiserParams::hdrIntensity (otherwise 'hdrIntensity' must be a null pointer). This is useful for +/// denoising HDR images which are very dark or bright. +/// When denoising tiles the intensity of the entire image should be computed, i.e. not per tile to get +/// consistent results. +/// +/// For each RGB pixel in the inputImage the intensity is calculated and summed if it is greater than 1e-8f: +/// intensity = log(r * 0.212586f + g * 0.715170f + b * 0.072200f). +/// The function returns 0.18 / exp(sum of intensities / number of summed pixels). +/// More details could be found in the Reinhard tonemapping paper: +/// http://www.cmap.polytechnique.fr/~peyre/cours/x2005signal/hdr_photographic.pdf +/// +/// The size of scratch memory required can be queried with #optixDenoiserComputeMemoryResources. +/// +/// data type unsigned char is not supported for 'inputImage', it must be 3 or 4 component half/float. +/// +/// \param[in] denoiser +/// \param[in] stream +/// \param[in] inputImage +/// \param[out] outputIntensity single float +/// \param[in] scratch +/// \param[in] scratchSizeInBytes +OptixResult optixDenoiserComputeIntensity( OptixDenoiser denoiser, + CUstream stream, + const OptixImage2D* inputImage, + CUdeviceptr outputIntensity, + CUdeviceptr scratch, + size_t scratchSizeInBytes ); + +/// Compute average logarithmic for each of the first three channels for the given image. +/// When denoising tiles the intensity of the entire image should be computed, i.e. not per tile to get +/// consistent results. +/// +/// The size of scratch memory required can be queried with #optixDenoiserComputeMemoryResources. +/// +/// data type unsigned char is not supported for 'inputImage', it must be 3 or 4 component half/float. +/// +/// \param[in] denoiser +/// \param[in] stream +/// \param[in] inputImage +/// \param[out] outputAverageColor three floats +/// \param[in] scratch +/// \param[in] scratchSizeInBytes +OptixResult optixDenoiserComputeAverageColor( OptixDenoiser denoiser, + CUstream stream, + const OptixImage2D* inputImage, + CUdeviceptr outputAverageColor, + CUdeviceptr scratch, + size_t scratchSizeInBytes ); + +//@} + +#ifdef __cplusplus +} +#endif + +#include "optix_function_table.h" + +#endif // __optix_optix_7_host_h__ diff --git a/include/optix_7_types.h b/include/optix_7_types.h new file mode 100644 index 0000000..58985e4 --- /dev/null +++ b/include/optix_7_types.h @@ -0,0 +1,1986 @@ + +/* + * Copyright (c) 2021 NVIDIA Corporation. All rights reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and proprietary + * rights in and to this software, related documentation and any modifications thereto. + * Any use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation is strictly + * prohibited. + * + * TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THIS SOFTWARE IS PROVIDED *AS IS* + * AND NVIDIA AND ITS SUPPLIERS DISCLAIM ALL WARRANTIES, EITHER EXPRESS OR IMPLIED, + * INCLUDING, BUT NOT LIMITED TO, IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE. IN NO EVENT SHALL NVIDIA OR ITS SUPPLIERS BE LIABLE FOR ANY + * SPECIAL, INCIDENTAL, INDIRECT, OR CONSEQUENTIAL DAMAGES WHATSOEVER (INCLUDING, WITHOUT + * LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, BUSINESS INTERRUPTION, LOSS OF + * BUSINESS INFORMATION, OR ANY OTHER PECUNIARY LOSS) ARISING OUT OF THE USE OF OR + * INABILITY TO USE THIS SOFTWARE, EVEN IF NVIDIA HAS BEEN ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGES + */ + +/// @file +/// @author NVIDIA Corporation +/// @brief OptiX public API header +/// +/// OptiX types include file -- defines types and enums used by the API. +/// For the math library routines include optix_math.h + +#if !defined( __OPTIX_INCLUDE_INTERNAL_HEADERS__ ) +#error("optix_7_types.h is an internal header file and must not be used directly. Please use optix_types.h, optix_host.h, optix_device.h or optix.h instead.") +#endif + +#ifndef __optix_optix_7_types_h__ +#define __optix_optix_7_types_h__ + +#if !defined(__CUDACC_RTC__) +#include /* for size_t */ +#endif + + + +/// \defgroup optix_types Types +/// \brief OptiX Types + +/** \addtogroup optix_types +@{ +*/ + +// This typedef should match the one in cuda.h in order to avoid compilation errors. +#if defined(_WIN64) || defined(__LP64__) +/// CUDA device pointer +typedef unsigned long long CUdeviceptr; +#else +/// CUDA device pointer +typedef unsigned int CUdeviceptr; +#endif + +/// Opaque type representing a device context +typedef struct OptixDeviceContext_t* OptixDeviceContext; + +/// Opaque type representing a module +typedef struct OptixModule_t* OptixModule; + +/// Opaque type representing a program group +typedef struct OptixProgramGroup_t* OptixProgramGroup; + +/// Opaque type representing a pipeline +typedef struct OptixPipeline_t* OptixPipeline; + +/// Opaque type representing a denoiser instance +typedef struct OptixDenoiser_t* OptixDenoiser; + +/// Opaque type representing a work task +typedef struct OptixTask_t* OptixTask; + +/// Traversable handle +typedef unsigned long long OptixTraversableHandle; + +/// Visibility mask +typedef unsigned int OptixVisibilityMask; + +/// Size of the SBT record headers. +#define OPTIX_SBT_RECORD_HEADER_SIZE ( (size_t)32 ) + +/// Alignment requirement for device pointers in OptixShaderBindingTable. +#define OPTIX_SBT_RECORD_ALIGNMENT 16ull + +/// Alignment requirement for output and temporay buffers for acceleration structures. +#define OPTIX_ACCEL_BUFFER_BYTE_ALIGNMENT 128ull + +/// Alignment requirement for OptixBuildInputInstanceArray::instances. +#define OPTIX_INSTANCE_BYTE_ALIGNMENT 16ull + +/// Alignment requirement for OptixBuildInputCustomPrimitiveArray::aabbBuffers +#define OPTIX_AABB_BUFFER_BYTE_ALIGNMENT 8ull + +/// Alignment requirement for OptixBuildInputTriangleArray::preTransform +#define OPTIX_GEOMETRY_TRANSFORM_BYTE_ALIGNMENT 16ull + +/// Alignment requirement for OptixStaticTransform, OptixMatrixMotionTransform, OptixSRTMotionTransform. +#define OPTIX_TRANSFORM_BYTE_ALIGNMENT 64ull + +/// Maximum number of registers allowed. Defaults to no explicit limit. +#define OPTIX_COMPILE_DEFAULT_MAX_REGISTER_COUNT 0 + +/// Maximum number of payload types allowed. +#define OPTIX_COMPILE_DEFAULT_MAX_PAYLOAD_TYPE_COUNT 8 + +/// Maximum number of payload values allowed. +#define OPTIX_COMPILE_DEFAULT_MAX_PAYLOAD_VALUE_COUNT 32 + + +/// Result codes returned from API functions +/// +/// All host side API functions return OptixResult with the exception of optixGetErrorName +/// and optixGetErrorString. When successful OPTIX_SUCCESS is returned. All return codes +/// except for OPTIX_SUCCESS should be assumed to be errors as opposed to a warning. +/// +/// \see #optixGetErrorName(), #optixGetErrorString() +typedef enum OptixResult +{ + OPTIX_SUCCESS = 0, + OPTIX_ERROR_INVALID_VALUE = 7001, + OPTIX_ERROR_HOST_OUT_OF_MEMORY = 7002, + OPTIX_ERROR_INVALID_OPERATION = 7003, + OPTIX_ERROR_FILE_IO_ERROR = 7004, + OPTIX_ERROR_INVALID_FILE_FORMAT = 7005, + OPTIX_ERROR_DISK_CACHE_INVALID_PATH = 7010, + OPTIX_ERROR_DISK_CACHE_PERMISSION_ERROR = 7011, + OPTIX_ERROR_DISK_CACHE_DATABASE_ERROR = 7012, + OPTIX_ERROR_DISK_CACHE_INVALID_DATA = 7013, + OPTIX_ERROR_LAUNCH_FAILURE = 7050, + OPTIX_ERROR_INVALID_DEVICE_CONTEXT = 7051, + OPTIX_ERROR_CUDA_NOT_INITIALIZED = 7052, + OPTIX_ERROR_VALIDATION_FAILURE = 7053, + OPTIX_ERROR_INVALID_PTX = 7200, + OPTIX_ERROR_INVALID_LAUNCH_PARAMETER = 7201, + OPTIX_ERROR_INVALID_PAYLOAD_ACCESS = 7202, + OPTIX_ERROR_INVALID_ATTRIBUTE_ACCESS = 7203, + OPTIX_ERROR_INVALID_FUNCTION_USE = 7204, + OPTIX_ERROR_INVALID_FUNCTION_ARGUMENTS = 7205, + OPTIX_ERROR_PIPELINE_OUT_OF_CONSTANT_MEMORY = 7250, + OPTIX_ERROR_PIPELINE_LINK_ERROR = 7251, + OPTIX_ERROR_ILLEGAL_DURING_TASK_EXECUTE = 7270, + OPTIX_ERROR_INTERNAL_COMPILER_ERROR = 7299, + OPTIX_ERROR_DENOISER_MODEL_NOT_SET = 7300, + OPTIX_ERROR_DENOISER_NOT_INITIALIZED = 7301, + OPTIX_ERROR_ACCEL_NOT_COMPATIBLE = 7400, + OPTIX_ERROR_PAYLOAD_TYPE_MISMATCH = 7500, + OPTIX_ERROR_PAYLOAD_TYPE_RESOLUTION_FAILED = 7501, + OPTIX_ERROR_PAYLOAD_TYPE_ID_INVALID = 7502, + OPTIX_ERROR_NOT_SUPPORTED = 7800, + OPTIX_ERROR_UNSUPPORTED_ABI_VERSION = 7801, + OPTIX_ERROR_FUNCTION_TABLE_SIZE_MISMATCH = 7802, + OPTIX_ERROR_INVALID_ENTRY_FUNCTION_OPTIONS = 7803, + OPTIX_ERROR_LIBRARY_NOT_FOUND = 7804, + OPTIX_ERROR_ENTRY_SYMBOL_NOT_FOUND = 7805, + OPTIX_ERROR_LIBRARY_UNLOAD_FAILURE = 7806, + OPTIX_ERROR_DEVICE_OUT_OF_MEMORY = 7807, + OPTIX_ERROR_CUDA_ERROR = 7900, + OPTIX_ERROR_INTERNAL_ERROR = 7990, + OPTIX_ERROR_UNKNOWN = 7999, +} OptixResult; + +/// Parameters used for #optixDeviceContextGetProperty() +/// +/// \see #optixDeviceContextGetProperty() +typedef enum OptixDeviceProperty +{ + /// Maximum value for OptixPipelineLinkOptions::maxTraceDepth. sizeof( unsigned int ) + OPTIX_DEVICE_PROPERTY_LIMIT_MAX_TRACE_DEPTH = 0x2001, + + /// Maximum value to pass into optixPipelineSetStackSize for parameter + /// maxTraversableGraphDepth.v sizeof( unsigned int ) + OPTIX_DEVICE_PROPERTY_LIMIT_MAX_TRAVERSABLE_GRAPH_DEPTH = 0x2002, + + /// The maximum number of primitives (over all build inputs) as input to a single + /// Geometry Acceleration Structure (GAS). sizeof( unsigned int ) + OPTIX_DEVICE_PROPERTY_LIMIT_MAX_PRIMITIVES_PER_GAS = 0x2003, + + /// The maximum number of instances (over all build inputs) as input to a single + /// Instance Acceleration Structure (IAS). sizeof( unsigned int ) + OPTIX_DEVICE_PROPERTY_LIMIT_MAX_INSTANCES_PER_IAS = 0x2004, + + /// The RT core version supported by the device (0 for no support, 10 for version + /// 1.0). sizeof( unsigned int ) + OPTIX_DEVICE_PROPERTY_RTCORE_VERSION = 0x2005, + + /// The maximum value for #OptixInstance::instanceId. sizeof( unsigned int ) + OPTIX_DEVICE_PROPERTY_LIMIT_MAX_INSTANCE_ID = 0x2006, + + /// The number of bits available for the #OptixInstance::visibilityMask. + /// Higher bits must be set to zero. sizeof( unsigned int ) + OPTIX_DEVICE_PROPERTY_LIMIT_NUM_BITS_INSTANCE_VISIBILITY_MASK = 0x2007, + + /// The maximum number of instances that can be added to a single Instance + /// Acceleration Structure (IAS). sizeof( unsigned int ) + OPTIX_DEVICE_PROPERTY_LIMIT_MAX_SBT_RECORDS_PER_GAS = 0x2008, + + /// The maximum value for #OptixInstance::sbtOffset. sizeof( unsigned int ) + OPTIX_DEVICE_PROPERTY_LIMIT_MAX_SBT_OFFSET = 0x2009, +} OptixDeviceProperty; + +/// Type of the callback function used for log messages. +/// +/// \param[in] level The log level indicates the severity of the message. See below for +/// possible values. +/// \param[in] tag A terse message category description (e.g., 'SCENE STAT'). +/// \param[in] message Null terminated log message (without newline at the end). +/// \param[in] cbdata Callback data that was provided with the callback pointer. +/// +/// It is the users responsibility to ensure thread safety within this function. +/// +/// The following log levels are defined. +/// +/// 0 disable Setting the callback level will disable all messages. The callback +/// function will not be called in this case. +/// 1 fatal A non-recoverable error. The context and/or OptiX itself might no longer +/// be in a usable state. +/// 2 error A recoverable error, e.g., when passing invalid call parameters. +/// 3 warning Hints that OptiX might not behave exactly as requested by the user or +/// may perform slower than expected. +/// 4 print Status or progress messages. +/// +/// Higher levels might occur. +/// +/// \see #optixDeviceContextSetLogCallback(), #OptixDeviceContextOptions +typedef void ( *OptixLogCallback )( unsigned int level, const char* tag, const char* message, void* cbdata ); + +/// Validation mode settings. +/// +/// When enabled, certain device code utilities will be enabled to provide as good debug and +/// error checking facilities as possible. +/// +/// +/// \see #optixDeviceContextCreate() +typedef enum OptixDeviceContextValidationMode +{ + OPTIX_DEVICE_CONTEXT_VALIDATION_MODE_OFF = 0, + OPTIX_DEVICE_CONTEXT_VALIDATION_MODE_ALL = 0xFFFFFFFF +} OptixDeviceContextValidationMode; + +/// Parameters used for #optixDeviceContextCreate() +/// +/// \see #optixDeviceContextCreate() +typedef struct OptixDeviceContextOptions +{ + /// Function pointer used when OptiX wishes to generate messages + OptixLogCallback logCallbackFunction; + /// Pointer stored and passed to logCallbackFunction when a message is generated + void* logCallbackData; + /// Maximum callback level to generate message for (see #OptixLogCallback) + int logCallbackLevel; + /// Validation mode of context. + OptixDeviceContextValidationMode validationMode; +} OptixDeviceContextOptions; + +/// Flags used by #OptixBuildInputTriangleArray::flags +/// and #OptixBuildInput::flag +/// and #OptixBuildInputCustomPrimitiveArray::flags +typedef enum OptixGeometryFlags +{ + /// No flags set + OPTIX_GEOMETRY_FLAG_NONE = 0, + + /// Disables the invocation of the anyhit program. + /// Can be overridden by OPTIX_INSTANCE_FLAG_ENFORCE_ANYHIT and OPTIX_RAY_FLAG_ENFORCE_ANYHIT. + OPTIX_GEOMETRY_FLAG_DISABLE_ANYHIT = 1u << 0, + + /// If set, an intersection with the primitive will trigger one and only one + /// invocation of the anyhit program. Otherwise, the anyhit program may be invoked + /// more than once. + OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL = 1u << 1, + + /// Prevent triangles from getting culled due to their orientation. + /// Effectively ignores ray flags + /// OPTIX_RAY_FLAG_CULL_BACK_FACING_TRIANGLES and OPTIX_RAY_FLAG_CULL_FRONT_FACING_TRIANGLES. + OPTIX_GEOMETRY_FLAG_DISABLE_TRIANGLE_FACE_CULLING = 1u << 2, + +} OptixGeometryFlags; + +/// Legacy type: A subset of the hit kinds for built-in primitive intersections. +/// It is preferred to use optixGetPrimitiveType(), together with +/// optixIsFrontFaceHit() or optixIsBackFaceHit(). +/// +/// \see #optixGetHitKind() +typedef enum OptixHitKind +{ + /// Ray hit the triangle on the front face + OPTIX_HIT_KIND_TRIANGLE_FRONT_FACE = 0xFE, + /// Ray hit the triangle on the back face + OPTIX_HIT_KIND_TRIANGLE_BACK_FACE = 0xFF +} OptixHitKind; + +/// Format of indices used int #OptixBuildInputTriangleArray::indexFormat. +typedef enum OptixIndicesFormat +{ + /// No indices, this format must only be used in combination with triangle soups, i.e., numIndexTriplets must be zero + OPTIX_INDICES_FORMAT_NONE = 0, + /// Three shorts + OPTIX_INDICES_FORMAT_UNSIGNED_SHORT3 = 0x2102, + /// Three ints + OPTIX_INDICES_FORMAT_UNSIGNED_INT3 = 0x2103 +} OptixIndicesFormat; + +/// Format of vertices used in #OptixBuildInputTriangleArray::vertexFormat. +typedef enum OptixVertexFormat +{ + OPTIX_VERTEX_FORMAT_NONE = 0, ///< No vertices + OPTIX_VERTEX_FORMAT_FLOAT3 = 0x2121, ///< Vertices are represented by three floats + OPTIX_VERTEX_FORMAT_FLOAT2 = 0x2122, ///< Vertices are represented by two floats + OPTIX_VERTEX_FORMAT_HALF3 = 0x2123, ///< Vertices are represented by three halfs + OPTIX_VERTEX_FORMAT_HALF2 = 0x2124, ///< Vertices are represented by two halfs + OPTIX_VERTEX_FORMAT_SNORM16_3 = 0x2125, + OPTIX_VERTEX_FORMAT_SNORM16_2 = 0x2126 +} OptixVertexFormat; + +/// Format of transform used in #OptixBuildInputTriangleArray::transformFormat. +typedef enum OptixTransformFormat +{ + OPTIX_TRANSFORM_FORMAT_NONE = 0, ///< no transform, default for zero initialization + OPTIX_TRANSFORM_FORMAT_MATRIX_FLOAT12 = 0x21E1, ///< 3x4 row major affine matrix +} OptixTransformFormat; + + +/// Triangle inputs +/// +/// \see #OptixBuildInput::triangleArray +typedef struct OptixBuildInputTriangleArray +{ + /// Points to host array of device pointers, one per motion step. Host array size must match the number of + /// motion keys as set in #OptixMotionOptions (or an array of size 1 if OptixMotionOptions::numKeys is set + /// to 0 or 1). Each per motion key device pointer must point to an array of vertices of the + /// triangles in the format as described by vertexFormat. The minimum alignment must match the natural + /// alignment of the type as specified in the vertexFormat, i.e., for OPTIX_VERTEX_FORMAT_FLOATX 4-byte, + /// for all others a 2-byte alignment. However, an 16-byte stride (and buffer alignment) is recommended for + /// vertices of format OPTIX_VERTEX_FORMAT_FLOAT3 for GAS build performance. + const CUdeviceptr* vertexBuffers; + + /// Number of vertices in each of buffer in OptixBuildInputTriangleArray::vertexBuffers. + unsigned int numVertices; + + /// \see #OptixVertexFormat + OptixVertexFormat vertexFormat; + + /// Stride between vertices. If set to zero, vertices are assumed to be tightly + /// packed and stride is inferred from vertexFormat. + unsigned int vertexStrideInBytes; + + /// Optional pointer to array of 16 or 32-bit int triplets, one triplet per triangle. + /// The minimum alignment must match the natural alignment of the type as specified in the indexFormat, i.e., + /// for OPTIX_INDICES_FORMAT_UNSIGNED_INT3 4-byte and for OPTIX_INDICES_FORMAT_UNSIGNED_SHORT3 a 2-byte alignment. + CUdeviceptr indexBuffer; + + /// Size of array in OptixBuildInputTriangleArray::indexBuffer. For build, needs to be zero if indexBuffer is \c nullptr. + unsigned int numIndexTriplets; + + /// \see #OptixIndicesFormat + OptixIndicesFormat indexFormat; + + /// Stride between triplets of indices. If set to zero, indices are assumed to be tightly + /// packed and stride is inferred from indexFormat. + unsigned int indexStrideInBytes; + + /// Optional pointer to array of floats + /// representing a 3x4 row major affine + /// transformation matrix. This pointer must be a multiple of OPTIX_GEOMETRY_TRANSFORM_BYTE_ALIGNMENT + CUdeviceptr preTransform; + + /// Array of flags, to specify flags per sbt record, + /// combinations of OptixGeometryFlags describing the + /// primitive behavior, size must match numSbtRecords + const unsigned int* flags; + + /// Number of sbt records available to the sbt index offset override. + unsigned int numSbtRecords; + + /// Device pointer to per-primitive local sbt index offset buffer. May be NULL. + /// Every entry must be in range [0,numSbtRecords-1]. + /// Size needs to be the number of primitives. + CUdeviceptr sbtIndexOffsetBuffer; + + /// Size of type of the sbt index offset. Needs to be 0, 1, 2 or 4 (8, 16 or 32 bit). + unsigned int sbtIndexOffsetSizeInBytes; + + /// Stride between the index offsets. If set to zero, the offsets are assumed to be tightly + /// packed and the stride matches the size of the type (sbtIndexOffsetSizeInBytes). + unsigned int sbtIndexOffsetStrideInBytes; + + /// Primitive index bias, applied in optixGetPrimitiveIndex(). + /// Sum of primitiveIndexOffset and number of triangles must not overflow 32bits. + unsigned int primitiveIndexOffset; + + /// \see #OptixTransformFormat + OptixTransformFormat transformFormat; + + +} OptixBuildInputTriangleArray; + +/// Builtin primitive types +/// +typedef enum OptixPrimitiveType +{ + /// Custom primitive. + OPTIX_PRIMITIVE_TYPE_CUSTOM = 0x2500, + /// B-spline curve of degree 2 with circular cross-section. + OPTIX_PRIMITIVE_TYPE_ROUND_QUADRATIC_BSPLINE = 0x2501, + /// B-spline curve of degree 3 with circular cross-section. + OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BSPLINE = 0x2502, + /// Piecewise linear curve with circular cross-section. + OPTIX_PRIMITIVE_TYPE_ROUND_LINEAR = 0x2503, + /// CatmullRom curve with circular cross-section. + OPTIX_PRIMITIVE_TYPE_ROUND_CATMULLROM = 0x2504, + OPTIX_PRIMITIVE_TYPE_SPHERE = 0x2506, + /// Triangle. + OPTIX_PRIMITIVE_TYPE_TRIANGLE = 0x2531, +} OptixPrimitiveType; + +/// Builtin flags may be bitwise combined. +/// +/// \see #OptixPipelineCompileOptions::usesPrimitiveTypeFlags +typedef enum OptixPrimitiveTypeFlags +{ + /// Custom primitive. + OPTIX_PRIMITIVE_TYPE_FLAGS_CUSTOM = 1 << 0, + /// B-spline curve of degree 2 with circular cross-section. + OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_QUADRATIC_BSPLINE = 1 << 1, + /// B-spline curve of degree 3 with circular cross-section. + OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_CUBIC_BSPLINE = 1 << 2, + /// Piecewise linear curve with circular cross-section. + OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_LINEAR = 1 << 3, + /// CatmullRom curve with circular cross-section. + OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_CATMULLROM = 1 << 4, + OPTIX_PRIMITIVE_TYPE_FLAGS_SPHERE = 1 << 6, + /// Triangle. + OPTIX_PRIMITIVE_TYPE_FLAGS_TRIANGLE = 1 << 31, +} OptixPrimitiveTypeFlags; + +/// Curve end cap types, for non-linear curves +/// +typedef enum OptixCurveEndcapFlags +{ + /// Default end caps. Round end caps for linear, no end caps for quadratic/cubic. + OPTIX_CURVE_ENDCAP_DEFAULT = 0, + /// Flat end caps at both ends of quadratic/cubic curve segments. Not valid for linear. + OPTIX_CURVE_ENDCAP_ON = 1 << 0, +} OptixCurveEndcapFlags; + +/// Curve inputs +/// +/// A curve is a swept surface defined by a 3D spline curve and a varying width (radius). A curve (or "strand") of +/// degree d (3=cubic, 2=quadratic, 1=linear) is represented by N > d vertices and N width values, and comprises N - d segments. +/// Each segment is defined by d+1 consecutive vertices. Each curve may have a different number of vertices. +/// +/// OptiX describes the curve array as a list of curve segments. The primitive id is the segment number. +/// It is the user's responsibility to maintain a mapping between curves and curve segments. +/// Each index buffer entry i = indexBuffer[primid] specifies the start of a curve segment, +/// represented by d+1 consecutive vertices in the vertex buffer, +/// and d+1 consecutive widths in the width buffer. Width is interpolated the same +/// way vertices are interpolated, that is, using the curve basis. +/// +/// Each curves build input has only one SBT record. +/// To create curves with different materials in the same BVH, use multiple build inputs. +/// +/// \see #OptixBuildInput::curveArray +typedef struct OptixBuildInputCurveArray +{ + /// Curve degree and basis + /// \see #OptixPrimitiveType + OptixPrimitiveType curveType; + /// Number of primitives. Each primitive is a polynomial curve segment. + unsigned int numPrimitives; + + /// Pointer to host array of device pointers, one per motion step. Host array size must match number of + /// motion keys as set in #OptixMotionOptions (or an array of size 1 if OptixMotionOptions::numKeys is set + /// to 1). Each per-motion-key device pointer must point to an array of floats (the vertices of the + /// curves). + const CUdeviceptr* vertexBuffers; + /// Number of vertices in each buffer in vertexBuffers. + unsigned int numVertices; + /// Stride between vertices. If set to zero, vertices are assumed to be tightly + /// packed and stride is sizeof( float3 ). + unsigned int vertexStrideInBytes; + + /// Parallel to vertexBuffers: a device pointer per motion step, each with numVertices float values, + /// specifying the curve width (radius) corresponding to each vertex. + const CUdeviceptr* widthBuffers; + /// Stride between widths. If set to zero, widths are assumed to be tightly + /// packed and stride is sizeof( float ). + unsigned int widthStrideInBytes; + + /// Reserved for future use. + const CUdeviceptr* normalBuffers; + /// Reserved for future use. + unsigned int normalStrideInBytes; + + /// Device pointer to array of unsigned ints, one per curve segment. + /// This buffer is required (unlike for OptixBuildInputTriangleArray). + /// Each index is the start of degree+1 consecutive vertices in vertexBuffers, + /// and corresponding widths in widthBuffers and normals in normalBuffers. + /// These define a single segment. Size of array is numPrimitives. + CUdeviceptr indexBuffer; + /// Stride between indices. If set to zero, indices are assumed to be tightly + /// packed and stride is sizeof( unsigned int ). + unsigned int indexStrideInBytes; + + /// Combination of OptixGeometryFlags describing the + /// primitive behavior. + unsigned int flag; + + /// Primitive index bias, applied in optixGetPrimitiveIndex(). + /// Sum of primitiveIndexOffset and number of primitives must not overflow 32bits. + unsigned int primitiveIndexOffset; + + /// End cap flags, see OptixCurveEndcapFlags + unsigned int endcapFlags; +} OptixBuildInputCurveArray; + +/// Sphere inputs +/// +/// A sphere is defined by a center point and a radius. +/// Each center point is represented by a vertex in the vertex buffer. +/// There is either a single radius for all spheres, or the radii are represented by entries in the radius buffer. +/// +/// The vertex buffers and radius buffers point to a host array of device pointers, one per motion step. +/// Host array size must match the number of motion keys as set in #OptixMotionOptions (or an array of size 1 if OptixMotionOptions::numKeys is set +/// to 0 or 1). Each per motion key device pointer must point to an array of vertices corresponding to the center points of the spheres, or +/// an array of 1 or N radii. Format OPTIX_VERTEX_FORMAT_FLOAT3 is used for vertices, OPTIX_VERTEX_FORMAT_FLOAT for radii. +/// +/// \see #OptixBuildInput::sphereArray +typedef struct OptixBuildInputSphereArray +{ + /// Pointer to host array of device pointers, one per motion step. Host array size must match number of + /// motion keys as set in #OptixMotionOptions (or an array of size 1 if OptixMotionOptions::numKeys is set + /// to 1). Each per-motion-key device pointer must point to an array of floats (the center points of + /// the spheres). + const CUdeviceptr* vertexBuffers; + + /// Stride between vertices. If set to zero, vertices are assumed to be tightly + /// packed and stride is sizeof( float3 ). + unsigned int vertexStrideInBytes; + /// Number of vertices in each buffer in vertexBuffers. + unsigned int numVertices; + + /// Parallel to vertexBuffers: a device pointer per motion step, each with numRadii float values, + /// specifying the sphere radius corresponding to each vertex. + const CUdeviceptr* radiusBuffers; + /// Stride between radii. If set to zero, widths are assumed to be tightly + /// packed and stride is sizeof( float ). + unsigned int radiusStrideInBytes; + /// Boolean value indicating whether a single radius per radius buffer is used, + /// or the number of radii in radiusBuffers equals numVertices. + int singleRadius; + + /// Array of flags, to specify flags per sbt record, + /// combinations of OptixGeometryFlags describing the + /// primitive behavior, size must match numSbtRecords + const unsigned int* flags; + + /// Number of sbt records available to the sbt index offset override. + unsigned int numSbtRecords; + /// Device pointer to per-primitive local sbt index offset buffer. May be NULL. + /// Every entry must be in range [0,numSbtRecords-1]. + /// Size needs to be the number of primitives. + CUdeviceptr sbtIndexOffsetBuffer; + /// Size of type of the sbt index offset. Needs to be 0, 1, 2 or 4 (8, 16 or 32 bit). + unsigned int sbtIndexOffsetSizeInBytes; + /// Stride between the sbt index offsets. If set to zero, the offsets are assumed to be tightly + /// packed and the stride matches the size of the type (sbtIndexOffsetSizeInBytes). + unsigned int sbtIndexOffsetStrideInBytes; + + /// Primitive index bias, applied in optixGetPrimitiveIndex(). + /// Sum of primitiveIndexOffset and number of primitives must not overflow 32bits. + unsigned int primitiveIndexOffset; +} OptixBuildInputSphereArray; + +/// AABB inputs +typedef struct OptixAabb +{ + float minX; ///< Lower extent in X direction. + float minY; ///< Lower extent in Y direction. + float minZ; ///< Lower extent in Z direction. + float maxX; ///< Upper extent in X direction. + float maxY; ///< Upper extent in Y direction. + float maxZ; ///< Upper extent in Z direction. +} OptixAabb; + +/// Custom primitive inputs +/// +/// \see #OptixBuildInput::customPrimitiveArray +typedef struct OptixBuildInputCustomPrimitiveArray +{ + /// Points to host array of device pointers to AABBs (type OptixAabb), one per motion step. + /// Host array size must match number of motion keys as set in OptixMotionOptions (or an array of size 1 + /// if OptixMotionOptions::numKeys is set to 1). + /// Each device pointer must be a multiple of OPTIX_AABB_BUFFER_BYTE_ALIGNMENT. + const CUdeviceptr* aabbBuffers; + + /// Number of primitives in each buffer (i.e., per motion step) in + /// #OptixBuildInputCustomPrimitiveArray::aabbBuffers. + unsigned int numPrimitives; + + /// Stride between AABBs (per motion key). If set to zero, the aabbs are assumed to be tightly + /// packed and the stride is assumed to be sizeof( OptixAabb ). + /// If non-zero, the value must be a multiple of OPTIX_AABB_BUFFER_BYTE_ALIGNMENT. + unsigned int strideInBytes; + + /// Array of flags, to specify flags per sbt record, + /// combinations of OptixGeometryFlags describing the + /// primitive behavior, size must match numSbtRecords + const unsigned int* flags; + + /// Number of sbt records available to the sbt index offset override. + unsigned int numSbtRecords; + + /// Device pointer to per-primitive local sbt index offset buffer. May be NULL. + /// Every entry must be in range [0,numSbtRecords-1]. + /// Size needs to be the number of primitives. + CUdeviceptr sbtIndexOffsetBuffer; + + /// Size of type of the sbt index offset. Needs to be 0, 1, 2 or 4 (8, 16 or 32 bit). + unsigned int sbtIndexOffsetSizeInBytes; + + /// Stride between the index offsets. If set to zero, the offsets are assumed to be tightly + /// packed and the stride matches the size of the type (sbtIndexOffsetSizeInBytes). + unsigned int sbtIndexOffsetStrideInBytes; + + /// Primitive index bias, applied in optixGetPrimitiveIndex(). + /// Sum of primitiveIndexOffset and number of primitive must not overflow 32bits. + unsigned int primitiveIndexOffset; +} OptixBuildInputCustomPrimitiveArray; + +/// Instance and instance pointer inputs +/// +/// \see #OptixBuildInput::instanceArray +typedef struct OptixBuildInputInstanceArray +{ + /// If OptixBuildInput::type is OPTIX_BUILD_INPUT_TYPE_INSTANCE_POINTERS instances and + /// aabbs should be interpreted as arrays of pointers instead of arrays of structs. + /// + /// This pointer must be a multiple of OPTIX_INSTANCE_BYTE_ALIGNMENT if + /// OptixBuildInput::type is OPTIX_BUILD_INPUT_TYPE_INSTANCES. The array elements must + /// be a multiple of OPTIX_INSTANCE_BYTE_ALIGNMENT if OptixBuildInput::type is + /// OPTIX_BUILD_INPUT_TYPE_INSTANCE_POINTERS. + CUdeviceptr instances; + + /// Number of elements in #OptixBuildInputInstanceArray::instances. + unsigned int numInstances; + +} OptixBuildInputInstanceArray; + +/// Enum to distinguish the different build input types. +/// +/// \see #OptixBuildInput::type +typedef enum OptixBuildInputType +{ + /// Triangle inputs. \see #OptixBuildInputTriangleArray + OPTIX_BUILD_INPUT_TYPE_TRIANGLES = 0x2141, + /// Custom primitive inputs. \see #OptixBuildInputCustomPrimitiveArray + OPTIX_BUILD_INPUT_TYPE_CUSTOM_PRIMITIVES = 0x2142, + /// Instance inputs. \see #OptixBuildInputInstanceArray + OPTIX_BUILD_INPUT_TYPE_INSTANCES = 0x2143, + /// Instance pointer inputs. \see #OptixBuildInputInstanceArray + OPTIX_BUILD_INPUT_TYPE_INSTANCE_POINTERS = 0x2144, + /// Curve inputs. \see #OptixBuildInputCurveArray + OPTIX_BUILD_INPUT_TYPE_CURVES = 0x2145, + /// Sphere inputs. \see #OptixBuildInputSphereArray + OPTIX_BUILD_INPUT_TYPE_SPHERES = 0x2146 +} OptixBuildInputType; + +/// Build inputs. +/// +/// All of them support motion and the size of the data arrays needs to match the number of motion steps +/// +/// \see #optixAccelComputeMemoryUsage(), #optixAccelBuild() +typedef struct OptixBuildInput +{ + /// The type of the build input. + OptixBuildInputType type; + + union + { + /// Triangle inputs. + OptixBuildInputTriangleArray triangleArray; + /// Curve inputs. + OptixBuildInputCurveArray curveArray; + /// Sphere inputs. + OptixBuildInputSphereArray sphereArray; + /// Custom primitive inputs. + OptixBuildInputCustomPrimitiveArray customPrimitiveArray; + /// Instance and instance pointer inputs. + OptixBuildInputInstanceArray instanceArray; + char pad[1024]; + }; +} OptixBuildInput; + +// Some 32-bit tools use this header. This static_assert fails for them because +// the default enum size is 4 bytes, rather than 8, under 32-bit compilers. +// This #ifndef allows them to disable the static assert. + +// TODO Define a static assert for C/pre-C++-11 +#if defined( __cplusplus ) && __cplusplus >= 201103L +static_assert( sizeof( OptixBuildInput ) == 8 + 1024, "OptixBuildInput has wrong size" ); +#endif + +/// Flags set on the #OptixInstance::flags. +/// +/// These can be or'ed together to combine multiple flags. +typedef enum OptixInstanceFlags +{ + /// No special flag set + OPTIX_INSTANCE_FLAG_NONE = 0, + + /// Prevent triangles from getting culled due to their orientation. + /// Effectively ignores ray flags + /// OPTIX_RAY_FLAG_CULL_BACK_FACING_TRIANGLES and OPTIX_RAY_FLAG_CULL_FRONT_FACING_TRIANGLES. + OPTIX_INSTANCE_FLAG_DISABLE_TRIANGLE_FACE_CULLING = 1u << 0, + + /// Flip triangle orientation. + /// This affects front/backface culling as well as the reported face in case of a hit. + OPTIX_INSTANCE_FLAG_FLIP_TRIANGLE_FACING = 1u << 1, + + /// Disable anyhit programs for all geometries of the instance. + /// Can be overridden by OPTIX_RAY_FLAG_ENFORCE_ANYHIT. + /// This flag is mutually exclusive with OPTIX_INSTANCE_FLAG_ENFORCE_ANYHIT. + OPTIX_INSTANCE_FLAG_DISABLE_ANYHIT = 1u << 2, + + /// Enables anyhit programs for all geometries of the instance. + /// Overrides OPTIX_GEOMETRY_FLAG_DISABLE_ANYHIT + /// Can be overridden by OPTIX_RAY_FLAG_DISABLE_ANYHIT. + /// This flag is mutually exclusive with OPTIX_INSTANCE_FLAG_DISABLE_ANYHIT. + OPTIX_INSTANCE_FLAG_ENFORCE_ANYHIT = 1u << 3, + + + +} OptixInstanceFlags; + +/// Instances +/// +/// \see #OptixBuildInputInstanceArray::instances +typedef struct OptixInstance +{ + /// affine object-to-world transformation as 3x4 matrix in row-major layout + float transform[12]; + + /// Application supplied ID. The maximal ID can be queried using OPTIX_DEVICE_PROPERTY_LIMIT_MAX_INSTANCE_ID. + unsigned int instanceId; + + /// SBT record offset. Will only be used for instances of geometry acceleration structure (GAS) objects. + /// Needs to be set to 0 for instances of instance acceleration structure (IAS) objects. The maximal SBT offset + /// can be queried using OPTIX_DEVICE_PROPERTY_LIMIT_MAX_INSTANCE_SBT_OFFSET. + unsigned int sbtOffset; + + /// Visibility mask. If rayMask & instanceMask == 0 the instance is culled. The number of available bits can be + /// queried using OPTIX_DEVICE_PROPERTY_LIMIT_NUM_BITS_INSTANCE_VISIBILITY_MASK. + unsigned int visibilityMask; + + /// Any combination of OptixInstanceFlags is allowed. + unsigned int flags; + + /// Set with an OptixTraversableHandle. + OptixTraversableHandle traversableHandle; + + /// round up to 80-byte, to ensure 16-byte alignment + unsigned int pad[2]; +} OptixInstance; + +/// Builder Options +/// +/// Used for #OptixAccelBuildOptions::buildFlags. Can be or'ed together. +typedef enum OptixBuildFlags +{ + /// No special flags set. + OPTIX_BUILD_FLAG_NONE = 0, + + /// Allow updating the build with new vertex positions with subsequent calls to + /// optixAccelBuild. + OPTIX_BUILD_FLAG_ALLOW_UPDATE = 1u << 0, + + OPTIX_BUILD_FLAG_ALLOW_COMPACTION = 1u << 1, + + OPTIX_BUILD_FLAG_PREFER_FAST_TRACE = 1u << 2, + + OPTIX_BUILD_FLAG_PREFER_FAST_BUILD = 1u << 3, + + /// Allow random access to build input vertices + /// See optixGetTriangleVertexData + /// optixGetLinearCurveVertexData + /// optixGetQuadraticBSplineVertexData + /// optixGetCubicBSplineVertexData + /// optixGetCatmullRomVertexData + /// optixGetSphereData + OPTIX_BUILD_FLAG_ALLOW_RANDOM_VERTEX_ACCESS = 1u << 4, + + /// Allow random access to instances + /// See optixGetInstanceTraversableFromIAS + OPTIX_BUILD_FLAG_ALLOW_RANDOM_INSTANCE_ACCESS = 1u << 5, + +} OptixBuildFlags; + + + + +/// Enum to specify the acceleration build operation. +/// +/// Used in OptixAccelBuildOptions, which is then passed to optixAccelBuild and +/// optixAccelComputeMemoryUsage, this enum indicates whether to do a build or an update +/// of the acceleration structure. +/// +/// Acceleration structure updates utilize the same acceleration structure, but with +/// updated bounds. Updates are typically much faster than builds, however, large +/// perturbations can degrade the quality of the acceleration structure. +/// +/// \see #optixAccelComputeMemoryUsage(), #optixAccelBuild(), #OptixAccelBuildOptions +typedef enum OptixBuildOperation +{ + /// Perform a full build operation + OPTIX_BUILD_OPERATION_BUILD = 0x2161, + /// Perform an update using new bounds + OPTIX_BUILD_OPERATION_UPDATE = 0x2162, +} OptixBuildOperation; + +/// Enum to specify motion flags. +/// +/// \see #OptixMotionOptions::flags. +typedef enum OptixMotionFlags +{ + OPTIX_MOTION_FLAG_NONE = 0, + OPTIX_MOTION_FLAG_START_VANISH = 1u << 0, + OPTIX_MOTION_FLAG_END_VANISH = 1u << 1 +} OptixMotionFlags; + +/// Motion options +/// +/// \see #OptixAccelBuildOptions::motionOptions, #OptixMatrixMotionTransform::motionOptions, +/// #OptixSRTMotionTransform::motionOptions +typedef struct OptixMotionOptions +{ + /// If numKeys > 1, motion is enabled. timeBegin, + /// timeEnd and flags are all ignored when motion is disabled. + unsigned short numKeys; + + /// Combinations of #OptixMotionFlags + unsigned short flags; + + /// Point in time where motion starts. + float timeBegin; + + /// Point in time where motion ends. + float timeEnd; +} OptixMotionOptions; + +/// Build options for acceleration structures. +/// +/// \see #optixAccelComputeMemoryUsage(), #optixAccelBuild() +typedef struct OptixAccelBuildOptions +{ + /// Combinations of OptixBuildFlags + unsigned int buildFlags; + + /// If OPTIX_BUILD_OPERATION_UPDATE the output buffer is assumed to contain the result + /// of a full build with OPTIX_BUILD_FLAG_ALLOW_UPDATE set and using the same number of + /// primitives. It is updated incrementally to reflect the current position of the + /// primitives. + OptixBuildOperation operation; + + /// Options for motion. + OptixMotionOptions motionOptions; +} OptixAccelBuildOptions; + +/// Struct for querying builder allocation requirements. +/// +/// Once queried the sizes should be used to allocate device memory of at least these sizes. +/// +/// \see #optixAccelComputeMemoryUsage() +typedef struct OptixAccelBufferSizes +{ + /// The size in bytes required for the outputBuffer parameter to optixAccelBuild when + /// doing a build (OPTIX_BUILD_OPERATION_BUILD). + size_t outputSizeInBytes; + + /// The size in bytes required for the tempBuffer paramter to optixAccelBuild when + /// doing a build (OPTIX_BUILD_OPERATION_BUILD). + size_t tempSizeInBytes; + + /// The size in bytes required for the tempBuffer parameter to optixAccelBuild + /// when doing an update (OPTIX_BUILD_OPERATION_UPDATE). This value can be different + /// than tempSizeInBytes used for a full build. Only non-zero if + /// OPTIX_BUILD_FLAG_ALLOW_UPDATE flag is set in OptixAccelBuildOptions. + size_t tempUpdateSizeInBytes; +} OptixAccelBufferSizes; + +/// Properties which can be emitted during acceleration structure build. +/// +/// \see #OptixAccelEmitDesc::type. +typedef enum OptixAccelPropertyType +{ + /// Size of a compacted acceleration structure. The device pointer points to a uint64. + OPTIX_PROPERTY_TYPE_COMPACTED_SIZE = 0x2181, + + /// OptixAabb * numMotionSteps + OPTIX_PROPERTY_TYPE_AABBS = 0x2182, +} OptixAccelPropertyType; + +/// Specifies a type and output destination for emitted post-build properties. +/// +/// \see #optixAccelBuild() +typedef struct OptixAccelEmitDesc +{ + /// Output buffer for the properties + CUdeviceptr result; + + /// Requested property + OptixAccelPropertyType type; +} OptixAccelEmitDesc; + +/// Used to store information related to relocation of acceleration structures. +/// +/// \see #optixAccelGetRelocationInfo(), #optixAccelCheckRelocationCompatibility(), #optixAccelRelocate() +typedef struct OptixAccelRelocationInfo +{ + /// Opaque data, used internally, should not be modified + unsigned long long info[4]; +} OptixAccelRelocationInfo; + +/// Static transform +/// +/// The device address of instances of this type must be a multiple of OPTIX_TRANSFORM_BYTE_ALIGNMENT. +/// +/// \see #optixConvertPointerToTraversableHandle() +typedef struct OptixStaticTransform +{ + /// The traversable transformed by this transformation + OptixTraversableHandle child; + + /// Padding to make the transformations 16 byte aligned + unsigned int pad[2]; + + /// Affine object-to-world transformation as 3x4 matrix in row-major layout + float transform[12]; + + /// Affine world-to-object transformation as 3x4 matrix in row-major layout + /// Must be the inverse of the transform matrix + float invTransform[12]; +} OptixStaticTransform; + +/// Represents a matrix motion transformation. +/// +/// The device address of instances of this type must be a multiple of OPTIX_TRANSFORM_BYTE_ALIGNMENT. +/// +/// This struct, as defined here, handles only N=2 motion keys due to the fixed array length of its transform member. +/// The following example shows how to create instances for an arbitrary number N of motion keys: +/// +/// \code +/// float matrixData[N][12]; +/// ... // setup matrixData +/// +/// size_t transformSizeInBytes = sizeof( OptixMatrixMotionTransform ) + ( N-2 ) * 12 * sizeof( float ); +/// OptixMatrixMotionTransform* matrixMoptionTransform = (OptixMatrixMotionTransform*) malloc( transformSizeInBytes ); +/// memset( matrixMoptionTransform, 0, transformSizeInBytes ); +/// +/// ... // setup other members of matrixMoptionTransform +/// matrixMoptionTransform->motionOptions.numKeys/// = N; +/// memcpy( matrixMoptionTransform->transform, matrixData, N * 12 * sizeof( float ) ); +/// +/// ... // copy matrixMoptionTransform to device memory +/// free( matrixMoptionTransform ) +/// \endcode +/// +/// \see #optixConvertPointerToTraversableHandle() +typedef struct OptixMatrixMotionTransform +{ + /// The traversable that is transformed by this transformation + OptixTraversableHandle child; + + /// The motion options for this transformation + OptixMotionOptions motionOptions; + + /// Padding to make the transformation 16 byte aligned + unsigned int pad[3]; + + /// Affine object-to-world transformation as 3x4 matrix in row-major layout + float transform[2][12]; +} OptixMatrixMotionTransform; + +/// Represents an SRT transformation. +/// +/// An SRT transformation can represent a smooth rotation with fewer motion keys than a matrix transformation. Each +/// motion key is constructed from elements taken from a matrix S, a quaternion R, and a translation T. +/// +/// The scaling matrix +/// \f$S = \begin{bmatrix} sx & a & b & pvx \\ 0 & sy & c & pvy \\ 0 & 0 & sz & pvz \end{bmatrix}\f$ +// [ sx a b pvx ] +// S = [ 0 sy c pvy ] +// [ 0 0 sz pvz ] +/// defines an affine transformation that can include scale, shear, and a translation. +/// The translation allows to define the pivot point for the subsequent rotation. +/// +/// The quaternion R = [ qx, qy, qz, qw ] describes a rotation with angular component qw = cos(theta/2) and other +/// components [ qx, qy, qz ] = sin(theta/2) * [ ax, ay, az ] where the axis [ ax, ay, az ] is normalized. +/// +/// The translation matrix +/// \f$T = \begin{bmatrix} 1 & 0 & 0 & tx \\ 0 & 1 & 0 & ty \\ 0 & 0 & 1 & tz \end{bmatrix}\f$ +// [ 1 0 0 tx ] +// T = [ 0 1 0 ty ] +// [ 0 0 1 tz ] +/// defines another translation that is applied after the rotation. Typically, this translation includes +/// the inverse translation from the matrix S to reverse the translation for the pivot point for R. +/// +/// To obtain the effective transformation at time t, the elements of the components of S, R, and T will be interpolated +/// linearly. The components are then multiplied to obtain the combined transformation C = T * R * S. The transformation +/// C is the effective object-to-world transformations at time t, and C^(-1) is the effective world-to-object +/// transformation at time t. +/// +/// \see #OptixSRTMotionTransform::srtData, #optixConvertPointerToTraversableHandle() +typedef struct OptixSRTData +{ + /// \name Parameters describing the SRT transformation + /// @{ + float sx, a, b, pvx, sy, c, pvy, sz, pvz, qx, qy, qz, qw, tx, ty, tz; + /// @} +} OptixSRTData; + +// TODO Define a static assert for C/pre-C++-11 +#if defined( __cplusplus ) && __cplusplus >= 201103L +static_assert( sizeof( OptixSRTData ) == 16 * 4, "OptixSRTData has wrong size" ); +#endif + +/// Represents an SRT motion transformation. +/// +/// The device address of instances of this type must be a multiple of OPTIX_TRANSFORM_BYTE_ALIGNMENT. +/// +/// This struct, as defined here, handles only N=2 motion keys due to the fixed array length of its srtData member. +/// The following example shows how to create instances for an arbitrary number N of motion keys: +/// +/// \code +/// OptixSRTData srtData[N]; +/// ... // setup srtData +/// +/// size_t transformSizeInBytes = sizeof( OptixSRTMotionTransform ) + ( N-2 ) * sizeof( OptixSRTData ); +/// OptixSRTMotionTransform* srtMotionTransform = (OptixSRTMotionTransform*) malloc( transformSizeInBytes ); +/// memset( srtMotionTransform, 0, transformSizeInBytes ); +/// +/// ... // setup other members of srtMotionTransform +/// srtMotionTransform->motionOptions.numKeys = N; +/// memcpy( srtMotionTransform->srtData, srtData, N * sizeof( OptixSRTData ) ); +/// +/// ... // copy srtMotionTransform to device memory +/// free( srtMotionTransform ) +/// \endcode +/// +/// \see #optixConvertPointerToTraversableHandle() +typedef struct OptixSRTMotionTransform +{ + /// The traversable transformed by this transformation + OptixTraversableHandle child; + + /// The motion options for this transformation + OptixMotionOptions motionOptions; + + /// Padding to make the SRT data 16 byte aligned + unsigned int pad[3]; + + /// The actual SRT data describing the transformation + OptixSRTData srtData[2]; +} OptixSRTMotionTransform; + +// TODO Define a static assert for C/pre-C++-11 +#if defined( __cplusplus ) && __cplusplus >= 201103L +static_assert( sizeof( OptixSRTMotionTransform ) == 8 + 12 + 12 + 2 * 16 * 4, "OptixSRTMotionTransform has wrong size" ); +#endif + +/// Traversable Handles +/// +/// \see #optixConvertPointerToTraversableHandle() +typedef enum OptixTraversableType +{ + /// Static transforms. \see #OptixStaticTransform + OPTIX_TRAVERSABLE_TYPE_STATIC_TRANSFORM = 0x21C1, + /// Matrix motion transform. \see #OptixMatrixMotionTransform + OPTIX_TRAVERSABLE_TYPE_MATRIX_MOTION_TRANSFORM = 0x21C2, + /// SRT motion transform. \see #OptixSRTMotionTransform + OPTIX_TRAVERSABLE_TYPE_SRT_MOTION_TRANSFORM = 0x21C3, +} OptixTraversableType; + +/// Pixel formats used by the denoiser. +/// +/// \see #OptixImage2D::format +typedef enum OptixPixelFormat +{ + OPTIX_PIXEL_FORMAT_HALF2 = 0x2207, ///< two halfs, XY + OPTIX_PIXEL_FORMAT_HALF3 = 0x2201, ///< three halfs, RGB + OPTIX_PIXEL_FORMAT_HALF4 = 0x2202, ///< four halfs, RGBA + OPTIX_PIXEL_FORMAT_FLOAT2 = 0x2208, ///< two floats, XY + OPTIX_PIXEL_FORMAT_FLOAT3 = 0x2203, ///< three floats, RGB + OPTIX_PIXEL_FORMAT_FLOAT4 = 0x2204, ///< four floats, RGBA + OPTIX_PIXEL_FORMAT_UCHAR3 = 0x2205, ///< three unsigned chars, RGB + OPTIX_PIXEL_FORMAT_UCHAR4 = 0x2206, ///< four unsigned chars, RGBA + OPTIX_PIXEL_FORMAT_INTERNAL_GUIDE_LAYER = 0x2209, ///< internal format +} OptixPixelFormat; + +/// Image descriptor used by the denoiser. +/// +/// \see #optixDenoiserInvoke(), #optixDenoiserComputeIntensity() +typedef struct OptixImage2D +{ + /// Pointer to the actual pixel data. + CUdeviceptr data; + /// Width of the image (in pixels) + unsigned int width; + /// Height of the image (in pixels) + unsigned int height; + /// Stride between subsequent rows of the image (in bytes). + unsigned int rowStrideInBytes; + /// Stride between subsequent pixels of the image (in bytes). + /// If set to 0, dense packing (no gaps) is assumed. + /// For pixel format OPTIX_PIXEL_FORMAT_INTERNAL_GUIDE_LAYER it must be set to + /// at least OptixDenoiserSizes::internalGuideLayerSizeInBytes. + unsigned int pixelStrideInBytes; + /// Pixel format. + OptixPixelFormat format; +} OptixImage2D; + +/// Model kind used by the denoiser. +/// +/// \see #optixDenoiserCreate +typedef enum OptixDenoiserModelKind +{ + /// Use the built-in model appropriate for low dynamic range input. + OPTIX_DENOISER_MODEL_KIND_LDR = 0x2322, + + /// Use the built-in model appropriate for high dynamic range input. + OPTIX_DENOISER_MODEL_KIND_HDR = 0x2323, + + /// Use the built-in model appropriate for high dynamic range input and support for AOVs + OPTIX_DENOISER_MODEL_KIND_AOV = 0x2324, + + /// Use the built-in model appropriate for high dynamic range input, temporally stable + OPTIX_DENOISER_MODEL_KIND_TEMPORAL = 0x2325, + + /// Use the built-in model appropriate for high dynamic range input and support for AOVs, temporally stable + OPTIX_DENOISER_MODEL_KIND_TEMPORAL_AOV = 0x2326, + + /// Use the built-in model appropriate for high dynamic range input and support for AOVs, upscaling 2x + OPTIX_DENOISER_MODEL_KIND_UPSCALE2X = 0x2327, + + /// Use the built-in model appropriate for high dynamic range input and support for AOVs, upscaling 2x, + /// temporally stable + OPTIX_DENOISER_MODEL_KIND_TEMPORAL_UPSCALE2X = 0x2328, +} OptixDenoiserModelKind; + +/// Options used by the denoiser +/// +/// \see #optixDenoiserCreate() +typedef struct OptixDenoiserOptions +{ + // if nonzero, albedo image must be given in OptixDenoiserGuideLayer + unsigned int guideAlbedo; + + // if nonzero, normal image must be given in OptixDenoiserGuideLayer + unsigned int guideNormal; +} OptixDenoiserOptions; + +/// Guide layer for the denoiser +/// +/// \see #optixDenoiserInvoke() +typedef struct OptixDenoiserGuideLayer +{ + // albedo/bsdf image + OptixImage2D albedo; + + // normal vector image (2d or 3d pixel format) + OptixImage2D normal; + + // 2d flow image, pixel flow from previous to current frame for each pixel + OptixImage2D flow; + + OptixImage2D previousOutputInternalGuideLayer; + OptixImage2D outputInternalGuideLayer; +} OptixDenoiserGuideLayer; + +/// Input/Output layers for the denoiser +/// +/// \see #optixDenoiserInvoke() +typedef struct OptixDenoiserLayer +{ + // input image (beauty or AOV) + OptixImage2D input; + + // denoised output image from previous frame if temporal model kind selected + OptixImage2D previousOutput; + + // denoised output for given input + OptixImage2D output; +} OptixDenoiserLayer; + +/// Various parameters used by the denoiser +/// +/// \see #optixDenoiserInvoke() +/// \see #optixDenoiserComputeIntensity() +/// \see #optixDenoiserComputeAverageColor() +typedef enum OptixDenoiserAlphaMode +{ + /// Copy alpha (if present) from input layer, no denoising. + OPTIX_DENOISER_ALPHA_MODE_COPY = 0, + + /// Denoise alpha separately. With AOV model kinds, treat alpha like an AOV. + OPTIX_DENOISER_ALPHA_MODE_ALPHA_AS_AOV = 1, + + /// With AOV model kinds, full denoise pass with alpha. + /// This is slower than OPTIX_DENOISER_ALPHA_MODE_ALPHA_AS_AOV. + OPTIX_DENOISER_ALPHA_MODE_FULL_DENOISE_PASS = 2 +} OptixDenoiserAlphaMode; +typedef struct OptixDenoiserParams +{ + /// alpha denoise mode + OptixDenoiserAlphaMode denoiseAlpha; + + /// average log intensity of input image (default null pointer). points to a single float. + /// with the default (null pointer) denoised results will not be optimal for very dark or + /// bright input images. + CUdeviceptr hdrIntensity; + + /// blend factor. + /// If set to 0 the output is 100% of the denoised input. If set to 1, the output is 100% of + /// the unmodified input. Values between 0 and 1 will linearly interpolate between the denoised + /// and unmodified input. + float blendFactor; + + /// this parameter is used when the OPTIX_DENOISER_MODEL_KIND_AOV model kind is set. + /// average log color of input image, separate for RGB channels (default null pointer). + /// points to three floats. with the default (null pointer) denoised results will not be + /// optimal. + CUdeviceptr hdrAverageColor; + + /// In temporal modes this parameter must be set to 1 if previous layers (e.g. + /// previousOutputInternalGuideLayer) contain valid data. This is the case in the + /// second and subsequent frames of a sequence (for example after a change of camera + /// angle). In the first frame of such a sequence this parameter must be set to 0. + unsigned int temporalModeUsePreviousLayers; +} OptixDenoiserParams; + +/// Various sizes related to the denoiser. +/// +/// \see #optixDenoiserComputeMemoryResources() +typedef struct OptixDenoiserSizes +{ + /// Size of state memory passed to #optixDenoiserSetup, #optixDenoiserInvoke. + size_t stateSizeInBytes; + + /// Size of scratch memory passed to #optixDenoiserSetup, #optixDenoiserInvoke. + /// Overlap added to dimensions passed to #optixDenoiserComputeMemoryResources. + size_t withOverlapScratchSizeInBytes; + + /// Size of scratch memory passed to #optixDenoiserSetup, #optixDenoiserInvoke. + /// No overlap added. + size_t withoutOverlapScratchSizeInBytes; + + /// Overlap on all four tile sides. + unsigned int overlapWindowSizeInPixels; + + /// Size of scratch memory passed to #optixDenoiserComputeAverageColor. + /// The size is independent of the tile/image resolution. + size_t computeAverageColorSizeInBytes; + + /// Size of scratch memory passed to #optixDenoiserComputeIntensity. + /// The size is independent of the tile/image resolution. + size_t computeIntensitySizeInBytes; + + /// Number of bytes for each pixel in internal guide layers. + size_t internalGuideLayerPixelSizeInBytes; +} OptixDenoiserSizes; + +/// Ray flags passed to the device function #optixTrace(). These affect the behavior of +/// traversal per invocation. +/// +/// \see #optixTrace() +typedef enum OptixRayFlags +{ + /// No change from the behavior configured for the individual AS. + OPTIX_RAY_FLAG_NONE = 0u, + + /// Disables anyhit programs for the ray. + /// Overrides OPTIX_INSTANCE_FLAG_ENFORCE_ANYHIT. + /// This flag is mutually exclusive with OPTIX_RAY_FLAG_ENFORCE_ANYHIT, + /// OPTIX_RAY_FLAG_CULL_DISABLED_ANYHIT, OPTIX_RAY_FLAG_CULL_ENFORCED_ANYHIT. + OPTIX_RAY_FLAG_DISABLE_ANYHIT = 1u << 0, + + /// Forces anyhit program execution for the ray. + /// Overrides OPTIX_GEOMETRY_FLAG_DISABLE_ANYHIT as well as OPTIX_INSTANCE_FLAG_DISABLE_ANYHIT. + /// This flag is mutually exclusive with OPTIX_RAY_FLAG_DISABLE_ANYHIT, + /// OPTIX_RAY_FLAG_CULL_DISABLED_ANYHIT, OPTIX_RAY_FLAG_CULL_ENFORCED_ANYHIT. + OPTIX_RAY_FLAG_ENFORCE_ANYHIT = 1u << 1, + + /// Terminates the ray after the first hit and executes + /// the closesthit program of that hit. + OPTIX_RAY_FLAG_TERMINATE_ON_FIRST_HIT = 1u << 2, + + /// Disables closesthit programs for the ray, but still executes miss program in case of a miss. + OPTIX_RAY_FLAG_DISABLE_CLOSESTHIT = 1u << 3, + + /// Do not intersect triangle back faces + /// (respects a possible face change due to instance flag + /// OPTIX_INSTANCE_FLAG_FLIP_TRIANGLE_FACING). + /// This flag is mutually exclusive with OPTIX_RAY_FLAG_CULL_FRONT_FACING_TRIANGLES. + OPTIX_RAY_FLAG_CULL_BACK_FACING_TRIANGLES = 1u << 4, + + /// Do not intersect triangle front faces + /// (respects a possible face change due to instance flag + /// OPTIX_INSTANCE_FLAG_FLIP_TRIANGLE_FACING). + /// This flag is mutually exclusive with OPTIX_RAY_FLAG_CULL_BACK_FACING_TRIANGLES. + OPTIX_RAY_FLAG_CULL_FRONT_FACING_TRIANGLES = 1u << 5, + + /// Do not intersect geometry which disables anyhit programs + /// (due to setting geometry flag OPTIX_GEOMETRY_FLAG_DISABLE_ANYHIT or + /// instance flag OPTIX_INSTANCE_FLAG_DISABLE_ANYHIT). + /// This flag is mutually exclusive with OPTIX_RAY_FLAG_CULL_ENFORCED_ANYHIT, + /// OPTIX_RAY_FLAG_ENFORCE_ANYHIT, OPTIX_RAY_FLAG_DISABLE_ANYHIT. + OPTIX_RAY_FLAG_CULL_DISABLED_ANYHIT = 1u << 6, + + /// Do not intersect geometry which have an enabled anyhit program + /// (due to not setting geometry flag OPTIX_GEOMETRY_FLAG_DISABLE_ANYHIT or + /// setting instance flag OPTIX_INSTANCE_FLAG_ENFORCE_ANYHIT). + /// This flag is mutually exclusive with OPTIX_RAY_FLAG_CULL_DISABLED_ANYHIT, + /// OPTIX_RAY_FLAG_ENFORCE_ANYHIT, OPTIX_RAY_FLAG_DISABLE_ANYHIT. + OPTIX_RAY_FLAG_CULL_ENFORCED_ANYHIT = 1u << 7, + +} OptixRayFlags; + +/// Transform +/// +/// OptixTransformType is used by the device function #optixGetTransformTypeFromHandle() to +/// determine the type of the OptixTraversableHandle returned from +/// optixGetTransformListHandle(). +typedef enum OptixTransformType +{ + OPTIX_TRANSFORM_TYPE_NONE = 0, ///< Not a transformation + OPTIX_TRANSFORM_TYPE_STATIC_TRANSFORM = 1, ///< \see #OptixStaticTransform + OPTIX_TRANSFORM_TYPE_MATRIX_MOTION_TRANSFORM = 2, ///< \see #OptixMatrixMotionTransform + OPTIX_TRANSFORM_TYPE_SRT_MOTION_TRANSFORM = 3, ///< \see #OptixSRTMotionTransform + OPTIX_TRANSFORM_TYPE_INSTANCE = 4, ///< \see #OptixInstance +} OptixTransformType; + +/// Specifies the set of valid traversable graphs that may be +/// passed to invocation of #optixTrace(). Flags may be bitwise combined. +typedef enum OptixTraversableGraphFlags +{ + /// Used to signal that any traversable graphs is valid. + /// This flag is mutually exclusive with all other flags. + OPTIX_TRAVERSABLE_GRAPH_FLAG_ALLOW_ANY = 0, + + /// Used to signal that a traversable graph of a single Geometry Acceleration + /// Structure (GAS) without any transforms is valid. This flag may be combined with + /// other flags except for OPTIX_TRAVERSABLE_GRAPH_FLAG_ALLOW_ANY. + OPTIX_TRAVERSABLE_GRAPH_FLAG_ALLOW_SINGLE_GAS = 1u << 0, + + /// Used to signal that a traversable graph of a single Instance Acceleration + /// Structure (IAS) directly connected to Geometry Acceleration Structure (GAS) + /// traversables without transform traversables in between is valid. This flag may + /// be combined with other flags except for OPTIX_TRAVERSABLE_GRAPH_FLAG_ALLOW_ANY. + OPTIX_TRAVERSABLE_GRAPH_FLAG_ALLOW_SINGLE_LEVEL_INSTANCING = 1u << 1, +} OptixTraversableGraphFlags; + +/// Optimization levels +/// +/// \see #OptixModuleCompileOptions::optLevel +typedef enum OptixCompileOptimizationLevel +{ + /// Default is to run all optimizations + OPTIX_COMPILE_OPTIMIZATION_DEFAULT = 0, + /// No optimizations + OPTIX_COMPILE_OPTIMIZATION_LEVEL_0 = 0x2340, + /// Some optimizations + OPTIX_COMPILE_OPTIMIZATION_LEVEL_1 = 0x2341, + /// Most optimizations + OPTIX_COMPILE_OPTIMIZATION_LEVEL_2 = 0x2342, + /// All optimizations + OPTIX_COMPILE_OPTIMIZATION_LEVEL_3 = 0x2343, +} OptixCompileOptimizationLevel; + +/// Debug levels +/// +/// \see #OptixModuleCompileOptions::debugLevel +typedef enum OptixCompileDebugLevel +{ + /// Default currently is minimal + OPTIX_COMPILE_DEBUG_LEVEL_DEFAULT = 0, + /// No debug information + OPTIX_COMPILE_DEBUG_LEVEL_NONE = 0x2350, + /// Generate information that does not impact performance. + /// Note this replaces OPTIX_COMPILE_DEBUG_LEVEL_LINEINFO. + OPTIX_COMPILE_DEBUG_LEVEL_MINIMAL = 0x2351, + /// Generate some debug information with slight performance cost + OPTIX_COMPILE_DEBUG_LEVEL_MODERATE = 0x2353, + /// Generate full debug information + OPTIX_COMPILE_DEBUG_LEVEL_FULL = 0x2352, +} OptixCompileDebugLevel; + +/// Module compilation state. +/// +/// \see #optixModuleGetCompilationState(), #optixModuleCreateFromPTXWithTasks() +typedef enum OptixModuleCompileState +{ + /// No OptixTask objects have started + OPTIX_MODULE_COMPILE_STATE_NOT_STARTED = 0x2360, + + /// Started, but not all OptixTask objects have completed. No detected failures. + OPTIX_MODULE_COMPILE_STATE_STARTED = 0x2361, + + /// Not all OptixTask objects have completed, but at least one has failed. + OPTIX_MODULE_COMPILE_STATE_IMPENDING_FAILURE = 0x2362, + + /// All OptixTask objects have completed, and at least one has failed + OPTIX_MODULE_COMPILE_STATE_FAILED = 0x2363, + + /// All OptixTask objects have completed. The OptixModule is ready to be used. + OPTIX_MODULE_COMPILE_STATE_COMPLETED = 0x2364, +} OptixModuleCompileState; + + + +/// Struct for specifying specializations for pipelineParams as specified in +/// OptixPipelineCompileOptions::pipelineLaunchParamsVariableName. +/// +/// The bound values are supposed to represent a constant value in the +/// pipelineParams. OptiX will attempt to locate all loads from the pipelineParams and +/// correlate them to the appropriate bound value, but there are cases where OptiX cannot +/// safely or reliably do this. For example if the pointer to the pipelineParams is passed +/// as an argument to a non-inline function or the offset of the load to the +/// pipelineParams cannot be statically determined (e.g. accessed in a loop). No module +/// should rely on the value being specialized in order to work correctly. The values in +/// the pipelineParams specified on optixLaunch should match the bound value. If +/// validation mode is enabled on the context, OptiX will verify that the bound values +/// specified matches the values in pipelineParams specified to optixLaunch. +/// +/// These values are compiled in to the module as constants. Once the constants are +/// inserted into the code, an optimization pass will be run that will attempt to +/// propagate the consants and remove unreachable code. +/// +/// If caching is enabled, changes in these values will result in newly compiled modules. +/// +/// The pipelineParamOffset and sizeInBytes must be within the bounds of the +/// pipelineParams variable. OPTIX_ERROR_INVALID_VALUE will be returned from +/// optixModuleCreateFromPTX otherwise. +/// +/// If more than one bound value overlaps or the size of a bound value is equal to 0, +/// an OPTIX_ERROR_INVALID_VALUE will be returned from optixModuleCreateFromPTX. +/// +/// The same set of bound values do not need to be used for all modules in a pipeline, but +/// overlapping values between modules must have the same value. +/// OPTIX_ERROR_INVALID_VALUE will be returned from optixPipelineCreate otherwise. +/// +/// \see #OptixModuleCompileOptions +typedef struct OptixModuleCompileBoundValueEntry { + size_t pipelineParamOffsetInBytes; + size_t sizeInBytes; + const void* boundValuePtr; + const char* annotation; // optional string to display, set to 0 if unused. If unused, + // OptiX will report the annotation as "No annotation" +} OptixModuleCompileBoundValueEntry; + +/// Payload type identifiers. +typedef enum OptixPayloadTypeID { + OPTIX_PAYLOAD_TYPE_DEFAULT = 0, + OPTIX_PAYLOAD_TYPE_ID_0 = (1 << 0u), + OPTIX_PAYLOAD_TYPE_ID_1 = (1 << 1u), + OPTIX_PAYLOAD_TYPE_ID_2 = (1 << 2u), + OPTIX_PAYLOAD_TYPE_ID_3 = (1 << 3u), + OPTIX_PAYLOAD_TYPE_ID_4 = (1 << 4u), + OPTIX_PAYLOAD_TYPE_ID_5 = (1 << 5u), + OPTIX_PAYLOAD_TYPE_ID_6 = (1 << 6u), + OPTIX_PAYLOAD_TYPE_ID_7 = (1 << 7u) +} OptixPayloadTypeID; + +/// Semantic flags for a single payload word. +/// +/// Used to specify the semantics of a payload word per shader type. +/// "read": Shader of this type may read the payload word. +/// "write": Shader of this type may write the payload word. +/// +/// "trace_caller_write": Shaders may consume the value of the payload word passed to optixTrace by the caller. +/// "trace_caller_read": The caller to optixTrace may read the payload word after the call to optixTrace. +/// +/// Semantics can be bitwise combined. +/// Combining "read" and "write" is equivalent to specifying "read_write". +/// A payload needs to be writable by the caller or at least one shader type. +/// A payload needs to be readable by the caller or at least one shader type after a being writable. +typedef enum OptixPayloadSemantics +{ + OPTIX_PAYLOAD_SEMANTICS_TRACE_CALLER_NONE = 0, + OPTIX_PAYLOAD_SEMANTICS_TRACE_CALLER_READ = 1u << 0, + OPTIX_PAYLOAD_SEMANTICS_TRACE_CALLER_WRITE = 2u << 0, + OPTIX_PAYLOAD_SEMANTICS_TRACE_CALLER_READ_WRITE = 3u << 0, + + OPTIX_PAYLOAD_SEMANTICS_CH_NONE = 0, + OPTIX_PAYLOAD_SEMANTICS_CH_READ = 1u << 2, + OPTIX_PAYLOAD_SEMANTICS_CH_WRITE = 2u << 2, + OPTIX_PAYLOAD_SEMANTICS_CH_READ_WRITE = 3u << 2, + + OPTIX_PAYLOAD_SEMANTICS_MS_NONE = 0, + OPTIX_PAYLOAD_SEMANTICS_MS_READ = 1u << 4, + OPTIX_PAYLOAD_SEMANTICS_MS_WRITE = 2u << 4, + OPTIX_PAYLOAD_SEMANTICS_MS_READ_WRITE = 3u << 4, + + OPTIX_PAYLOAD_SEMANTICS_AH_NONE = 0, + OPTIX_PAYLOAD_SEMANTICS_AH_READ = 1u << 6, + OPTIX_PAYLOAD_SEMANTICS_AH_WRITE = 2u << 6, + OPTIX_PAYLOAD_SEMANTICS_AH_READ_WRITE = 3u << 6, + + OPTIX_PAYLOAD_SEMANTICS_IS_NONE = 0, + OPTIX_PAYLOAD_SEMANTICS_IS_READ = 1u << 8, + OPTIX_PAYLOAD_SEMANTICS_IS_WRITE = 2u << 8, + OPTIX_PAYLOAD_SEMANTICS_IS_READ_WRITE = 3u << 8, +} OptixPayloadSemantics; + +/// Specifies a single payload type +typedef struct OptixPayloadType +{ + /// The number of 32b words the payload of this type holds + unsigned int numPayloadValues; + + /// Points to host array of payload word semantics, size must match numPayloadValues + const unsigned int *payloadSemantics; +} OptixPayloadType; + +/// Compilation options for module +/// +/// \see #optixModuleCreateFromPTX() +typedef struct OptixModuleCompileOptions +{ + /// Maximum number of registers allowed when compiling to SASS. + /// Set to 0 for no explicit limit. May vary within a pipeline. + int maxRegisterCount; + + /// Optimization level. May vary within a pipeline. + OptixCompileOptimizationLevel optLevel; + + /// Generate debug information. + OptixCompileDebugLevel debugLevel; + + /// Ingored if numBoundValues is set to 0 + const OptixModuleCompileBoundValueEntry* boundValues; + + /// set to 0 if unused + unsigned int numBoundValues; + + /// The number of different payload types available for compilation. + /// Must be zero if OptixPipelineCompileOptions::numPayloadValues is not zero. + unsigned int numPayloadTypes; + + /// Points to host array of payload type definitions, size must match numPayloadTypes + OptixPayloadType *payloadTypes; + +} OptixModuleCompileOptions; + +/// Distinguishes different kinds of program groups. +typedef enum OptixProgramGroupKind +{ + /// Program group containing a raygen (RG) program + /// \see #OptixProgramGroupSingleModule, #OptixProgramGroupDesc::raygen + OPTIX_PROGRAM_GROUP_KIND_RAYGEN = 0x2421, + + /// Program group containing a miss (MS) program + /// \see #OptixProgramGroupSingleModule, #OptixProgramGroupDesc::miss + OPTIX_PROGRAM_GROUP_KIND_MISS = 0x2422, + + /// Program group containing an exception (EX) program + /// \see OptixProgramGroupHitgroup, #OptixProgramGroupDesc::exception + OPTIX_PROGRAM_GROUP_KIND_EXCEPTION = 0x2423, + + /// Program group containing an intersection (IS), any hit (AH), and/or closest hit (CH) program + /// \see #OptixProgramGroupSingleModule, #OptixProgramGroupDesc::hitgroup + OPTIX_PROGRAM_GROUP_KIND_HITGROUP = 0x2424, + + /// Program group containing a direct (DC) or continuation (CC) callable program + /// \see OptixProgramGroupCallables, #OptixProgramGroupDesc::callables + OPTIX_PROGRAM_GROUP_KIND_CALLABLES = 0x2425 +} OptixProgramGroupKind; + +/// Flags for program groups +typedef enum OptixProgramGroupFlags +{ + /// Currently there are no flags + OPTIX_PROGRAM_GROUP_FLAGS_NONE = 0 +} OptixProgramGroupFlags; + +/// Program group representing a single module. +/// +/// Used for raygen, miss, and exception programs. In case of raygen and exception programs, module and entry +/// function name need to be valid. For miss programs, module and entry function name might both be \c nullptr. +/// +/// \see #OptixProgramGroupDesc::raygen, #OptixProgramGroupDesc::miss, #OptixProgramGroupDesc::exception +typedef struct OptixProgramGroupSingleModule +{ + /// Module holding single program. + OptixModule module; + /// Entry function name of the single program. + const char* entryFunctionName; +} OptixProgramGroupSingleModule; + +/// Program group representing the hitgroup. +/// +/// For each of the three program types, module and entry function name might both be \c nullptr. +/// +/// \see #OptixProgramGroupDesc::hitgroup +typedef struct OptixProgramGroupHitgroup +{ + /// Module holding the closest hit (CH) program. + OptixModule moduleCH; + /// Entry function name of the closest hit (CH) program. + const char* entryFunctionNameCH; + /// Module holding the any hit (AH) program. + OptixModule moduleAH; + /// Entry function name of the any hit (AH) program. + const char* entryFunctionNameAH; + /// Module holding the intersection (Is) program. + OptixModule moduleIS; + /// Entry function name of the intersection (IS) program. + const char* entryFunctionNameIS; +} OptixProgramGroupHitgroup; + +/// Program group representing callables. +/// +/// Module and entry function name need to be valid for at least one of the two callables. +/// +/// \see ##OptixProgramGroupDesc::callables +typedef struct OptixProgramGroupCallables +{ + /// Module holding the direct callable (DC) program. + OptixModule moduleDC; + /// Entry function name of the direct callable (DC) program. + const char* entryFunctionNameDC; + /// Module holding the continuation callable (CC) program. + OptixModule moduleCC; + /// Entry function name of the continuation callable (CC) program. + const char* entryFunctionNameCC; +} OptixProgramGroupCallables; + +/// Descriptor for program groups. +typedef struct OptixProgramGroupDesc +{ + /// The kind of program group. + OptixProgramGroupKind kind; + + /// See #OptixProgramGroupFlags + unsigned int flags; + + union + { + /// \see #OPTIX_PROGRAM_GROUP_KIND_RAYGEN + OptixProgramGroupSingleModule raygen; + /// \see #OPTIX_PROGRAM_GROUP_KIND_MISS + OptixProgramGroupSingleModule miss; + /// \see #OPTIX_PROGRAM_GROUP_KIND_EXCEPTION + OptixProgramGroupSingleModule exception; + /// \see #OPTIX_PROGRAM_GROUP_KIND_CALLABLES + OptixProgramGroupCallables callables; + /// \see #OPTIX_PROGRAM_GROUP_KIND_HITGROUP + OptixProgramGroupHitgroup hitgroup; + }; +} OptixProgramGroupDesc; + +/// Program group options +/// +/// \see #optixProgramGroupCreate() +typedef struct OptixProgramGroupOptions +{ + /// Specifies the payload type of this program group. + /// All programs in the group must support the payload type + /// (Program support for a type is specified by calling + /// \see #optixSetPayloadTypes or otherwise all types specified in + /// \see #OptixModuleCompileOptions are supported). + /// If a program is not available for the requested payload type, + /// optixProgramGroupCreate returns OPTIX_ERROR_PAYLOAD_TYPE_MISMATCH. + /// If the payloadType is left zero, a unique type is deduced. + /// The payload type can be uniquely deduced if there is exactly one payload type + /// for which all programs in the group are available. + /// If the payload type could not be deduced uniquely + /// optixProgramGroupCreate returns OPTIX_ERROR_PAYLOAD_TYPE_RESOLUTION_FAILED. + OptixPayloadType* payloadType; +} OptixProgramGroupOptions; + +/// The following values are used to indicate which exception was thrown. +typedef enum OptixExceptionCodes +{ + /// Stack overflow of the continuation stack. + /// no exception details. + OPTIX_EXCEPTION_CODE_STACK_OVERFLOW = -1, + + /// The trace depth is exceeded. + /// no exception details. + OPTIX_EXCEPTION_CODE_TRACE_DEPTH_EXCEEDED = -2, + + /// The traversal depth is exceeded. + /// Exception details: + /// optixGetTransformListSize() + /// optixGetTransformListHandle() + OPTIX_EXCEPTION_CODE_TRAVERSAL_DEPTH_EXCEEDED = -3, + + /// Traversal encountered an invalid traversable type. + /// Exception details: + /// optixGetTransformListSize() + /// optixGetTransformListHandle() + /// optixGetExceptionInvalidTraversable() + OPTIX_EXCEPTION_CODE_TRAVERSAL_INVALID_TRAVERSABLE = -5, + + /// The miss SBT record index is out of bounds + /// A miss SBT record index is valid within the range [0, OptixShaderBindingTable::missRecordCount) (See optixLaunch) + /// Exception details: + /// optixGetExceptionInvalidSbtOffset() + OPTIX_EXCEPTION_CODE_TRAVERSAL_INVALID_MISS_SBT = -6, + + /// The traversal hit SBT record index out of bounds. + /// + /// A traversal hit SBT record index is valid within the range [0, OptixShaderBindingTable::hitgroupRecordCount) (See optixLaunch) + /// The following formula relates the + // sbt-index (See optixGetExceptionInvalidSbtOffset), + // sbt-instance-offset (See OptixInstance::sbtOffset), + /// sbt-geometry-acceleration-structure-index (See optixGetSbtGASIndex), + /// sbt-stride-from-trace-call and sbt-offset-from-trace-call (See optixTrace) + /// + /// sbt-index = sbt-instance-offset + (sbt-geometry-acceleration-structure-index * sbt-stride-from-trace-call) + sbt-offset-from-trace-call + /// + /// Exception details: + /// optixGetTransformListSize() + /// optixGetTransformListHandle() + /// optixGetExceptionInvalidSbtOffset() + /// optixGetSbtGASIndex() + OPTIX_EXCEPTION_CODE_TRAVERSAL_INVALID_HIT_SBT = -7, + + /// The shader encountered an unsupported primitive type (See OptixPipelineCompileOptions::usesPrimitiveTypeFlags). + /// no exception details. + OPTIX_EXCEPTION_CODE_UNSUPPORTED_PRIMITIVE_TYPE = -8, + + /// The shader encountered a call to optixTrace with at least + /// one of the float arguments being inf or nan, or the tmin argument is negative. + /// Exception details: + /// optixGetExceptionInvalidRay() + OPTIX_EXCEPTION_CODE_INVALID_RAY = -9, + + /// The shader encountered a call to either optixDirectCall or optixCallableCall + /// where the argument count does not match the parameter count of the callable + /// program which is called. + /// Exception details: + /// optixGetExceptionParameterMismatch + OPTIX_EXCEPTION_CODE_CALLABLE_PARAMETER_MISMATCH = -10, + + /// The invoked builtin IS does not match the current GAS + OPTIX_EXCEPTION_CODE_BUILTIN_IS_MISMATCH = -11, + + /// Tried to call a callable program using an SBT offset that is larger + /// than the number of passed in callable SBT records. + /// Exception details: + /// optixGetExceptionInvalidSbtOffset() + OPTIX_EXCEPTION_CODE_CALLABLE_INVALID_SBT = -12, + + /// Tried to call a direct callable using an SBT offset of a record that + /// was built from a program group that did not include a direct callable. + OPTIX_EXCEPTION_CODE_CALLABLE_NO_DC_SBT_RECORD = -13, + + /// Tried to call a continuation callable using an SBT offset of a record + /// that was built from a program group that did not include a continuation callable. + OPTIX_EXCEPTION_CODE_CALLABLE_NO_CC_SBT_RECORD = -14, + + /// Tried to directly traverse a single gas while single gas traversable graphs are not enabled + /// (see OptixTraversableGraphFlags::OPTIX_TRAVERSABLE_GRAPH_FLAG_ALLOW_SINGLE_GAS). + /// Exception details: + /// optixGetTransformListSize() + /// optixGetTransformListHandle() + /// optixGetExceptionInvalidTraversable() + OPTIX_EXCEPTION_CODE_UNSUPPORTED_SINGLE_LEVEL_GAS = -15, + + /// argument passed to an optix call is + /// not within an acceptable range of values. + OPTIX_EXCEPTION_CODE_INVALID_VALUE_ARGUMENT_0 = -16, + OPTIX_EXCEPTION_CODE_INVALID_VALUE_ARGUMENT_1 = -17, + OPTIX_EXCEPTION_CODE_INVALID_VALUE_ARGUMENT_2 = -18, + + /// Tried to access data on an AS without random data access support (See OptixBuildFlags). + OPTIX_EXCEPTION_CODE_UNSUPPORTED_DATA_ACCESS = -32, + + /// The program payload type doesn't match the trace payload type. + OPTIX_EXCEPTION_CODE_PAYLOAD_TYPE_MISMATCH = -33, +} OptixExceptionCodes; + +/// Exception flags. +/// +/// \see #OptixPipelineCompileOptions::exceptionFlags, #OptixExceptionCodes +typedef enum OptixExceptionFlags +{ + /// No exception are enabled. + OPTIX_EXCEPTION_FLAG_NONE = 0, + + /// Enables exceptions check related to the continuation stack. + OPTIX_EXCEPTION_FLAG_STACK_OVERFLOW = 1u << 0, + + /// Enables exceptions check related to trace depth. + OPTIX_EXCEPTION_FLAG_TRACE_DEPTH = 1u << 1, + + /// Enables user exceptions via optixThrowException(). This flag must be specified for all modules in a pipeline + /// if any module calls optixThrowException(). + OPTIX_EXCEPTION_FLAG_USER = 1u << 2, + + /// Enables various exceptions check related to traversal. + OPTIX_EXCEPTION_FLAG_DEBUG = 1u << 3 +} OptixExceptionFlags; + +/// Compilation options for all modules of a pipeline. +/// +/// Similar to #OptixModuleCompileOptions, but these options here need to be equal for all modules of a pipeline. +/// +/// \see #optixModuleCreateFromPTX(), #optixPipelineCreate() +typedef struct OptixPipelineCompileOptions +{ + /// Boolean value indicating whether motion blur could be used + int usesMotionBlur; + + /// Traversable graph bitfield. See OptixTraversableGraphFlags + unsigned int traversableGraphFlags; + + /// How much storage, in 32b words, to make available for the payload, [0..32] + /// Must be zero if numPayloadTypes is not zero. + int numPayloadValues; + + /// How much storage, in 32b words, to make available for the attributes. The + /// minimum number is 2. Values below that will automatically be changed to 2. [2..8] + int numAttributeValues; + + /// A bitmask of OptixExceptionFlags indicating which exceptions are enabled. + unsigned int exceptionFlags; + + /// The name of the pipeline parameter variable. If 0, no pipeline parameter + /// will be available. This will be ignored if the launch param variable was + /// optimized out or was not found in the modules linked to the pipeline. + const char* pipelineLaunchParamsVariableName; + + /// Bit field enabling primitive types. See OptixPrimitiveTypeFlags. + /// Setting to zero corresponds to enabling OPTIX_PRIMITIVE_TYPE_FLAGS_CUSTOM and OPTIX_PRIMITIVE_TYPE_FLAGS_TRIANGLE. + unsigned int usesPrimitiveTypeFlags; + +} OptixPipelineCompileOptions; + +/// Link options for a pipeline +/// +/// \see #optixPipelineCreate() +typedef struct OptixPipelineLinkOptions +{ + /// Maximum trace recursion depth. 0 means a ray generation program can be + /// launched, but can't trace any rays. The maximum allowed value is 31. + unsigned int maxTraceDepth; + + /// Generate debug information. + OptixCompileDebugLevel debugLevel; + +} OptixPipelineLinkOptions; + +/// Describes the shader binding table (SBT) +/// +/// \see #optixLaunch() +typedef struct OptixShaderBindingTable +{ + /// Device address of the SBT record of the ray gen program to start launch at. The address must be a multiple of + /// OPTIX_SBT_RECORD_ALIGNMENT. + CUdeviceptr raygenRecord; + + /// Device address of the SBT record of the exception program. The address must be a multiple of + /// OPTIX_SBT_RECORD_ALIGNMENT. + CUdeviceptr exceptionRecord; + + /// Arrays of SBT records for miss programs. The base address and the stride must be a multiple of + /// OPTIX_SBT_RECORD_ALIGNMENT. + /// @{ + CUdeviceptr missRecordBase; + unsigned int missRecordStrideInBytes; + unsigned int missRecordCount; + /// @} + + /// Arrays of SBT records for hit groups. The base address and the stride must be a multiple of + /// OPTIX_SBT_RECORD_ALIGNMENT. + /// @{ + CUdeviceptr hitgroupRecordBase; + unsigned int hitgroupRecordStrideInBytes; + unsigned int hitgroupRecordCount; + /// @} + + /// Arrays of SBT records for callable programs. If the base address is not null, the stride and count must not be + /// zero. If the base address is null, then the count needs to zero. The base address and the stride must be a + /// multiple of OPTIX_SBT_RECORD_ALIGNMENT. + /// @{ + CUdeviceptr callablesRecordBase; + unsigned int callablesRecordStrideInBytes; + unsigned int callablesRecordCount; + /// @} + +} OptixShaderBindingTable; + +/// Describes the stack size requirements of a program group. +/// +/// \see optixProgramGroupGetStackSize() +typedef struct OptixStackSizes +{ + /// Continuation stack size of RG programs in bytes + unsigned int cssRG; + /// Continuation stack size of MS programs in bytes + unsigned int cssMS; + /// Continuation stack size of CH programs in bytes + unsigned int cssCH; + /// Continuation stack size of AH programs in bytes + unsigned int cssAH; + /// Continuation stack size of IS programs in bytes + unsigned int cssIS; + /// Continuation stack size of CC programs in bytes + unsigned int cssCC; + /// Direct stack size of DC programs in bytes + unsigned int dssDC; + +} OptixStackSizes; + +/// Options that can be passed to \c optixQueryFunctionTable() +typedef enum OptixQueryFunctionTableOptions +{ + /// Placeholder (there are no options yet) + OPTIX_QUERY_FUNCTION_TABLE_OPTION_DUMMY = 0 + +} OptixQueryFunctionTableOptions; + +/// Type of the function \c optixQueryFunctionTable() +typedef OptixResult( OptixQueryFunctionTable_t )( int abiId, + unsigned int numOptions, + OptixQueryFunctionTableOptions* /*optionKeys*/, + const void** /*optionValues*/, + void* functionTable, + size_t sizeOfTable ); + +/// Specifies the options for retrieving an intersection program for a built-in primitive type. +/// The primitive type must not be OPTIX_PRIMITIVE_TYPE_CUSTOM. +/// +/// \see #optixBuiltinISModuleGet() +typedef struct OptixBuiltinISOptions +{ + OptixPrimitiveType builtinISModuleType; + /// Boolean value indicating whether vertex motion blur is used (but not motion transform blur). + int usesMotionBlur; + /// Build flags, see OptixBuildFlags. + unsigned int buildFlags; + /// End cap properties of curves, see OptixCurveEndcapFlags, 0 for non-curve types. + unsigned int curveEndcapFlags; +} OptixBuiltinISOptions; + +#if defined( __CUDACC__ ) +/// Describes the ray that was passed into \c optixTrace() which caused an exception with +/// exception code OPTIX_EXCEPTION_CODE_INVALID_RAY. +/// +/// \see #optixGetExceptionInvalidRay() +typedef struct OptixInvalidRayExceptionDetails +{ + float3 origin; + float3 direction; + float tmin; + float tmax; + float time; +} OptixInvalidRayExceptionDetails; + +/// Describes the details of a call to a callable program which caused an exception with +/// exception code OPTIX_EXCEPTION_CODE_CALLABLE_PARAMETER_MISMATCH, +/// Note that OptiX packs the parameters into individual 32 bit values, so the number of +/// expected and passed values may not correspond to the number of arguments passed into +/// optixDirectCall or optixContinuationCall, or the number parameters in the definition +/// of the function that is called. +typedef struct OptixParameterMismatchExceptionDetails +{ + /// Number of 32 bit values expected by the callable program + unsigned int expectedParameterCount; + /// Number of 32 bit values that were passed to the callable program + unsigned int passedArgumentCount; + /// The offset of the SBT entry of the callable program relative to OptixShaderBindingTable::callablesRecordBase + unsigned int sbtIndex; + /// Pointer to a string that holds the name of the callable program that was called + char* callableName; +} OptixParameterMismatchExceptionDetails; +#endif + + +/*@}*/ // end group optix_types + +#endif // __optix_optix_7_types_h__ diff --git a/include/optix_denoiser_tiling.h b/include/optix_denoiser_tiling.h new file mode 100644 index 0000000..03dda26 --- /dev/null +++ b/include/optix_denoiser_tiling.h @@ -0,0 +1,339 @@ +/* + * Copyright (c) 2021 NVIDIA Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of NVIDIA CORPORATION nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/// @file +/// @author NVIDIA Corporation +/// @brief OptiX public API header + +#ifndef optix_denoiser_tiling_h +#define optix_denoiser_tiling_h + +#include + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** \addtogroup optix_utilities +@{ +*/ + +/// Tile definition +/// +/// see #optixUtilDenoiserSplitImage +/// +struct OptixUtilDenoiserImageTile +{ + // input tile image + OptixImage2D input; + + // output tile image + OptixImage2D output; + + // overlap offsets, parameters for #optixUtilDenoiserInvoke + unsigned int inputOffsetX; + unsigned int inputOffsetY; +}; + +/// Return pixel stride in bytes for the given pixel format +/// if the pixelStrideInBytes member of the image is zero. +/// Otherwise return pixelStrideInBytes from the image. +/// +/// \param[in] image Image containing the pixel stride +/// +inline OptixResult optixUtilGetPixelStride( const OptixImage2D& image, unsigned int& pixelStrideInBytes ) +{ + pixelStrideInBytes = image.pixelStrideInBytes; + if( pixelStrideInBytes == 0 ) + { + switch( image.format ) + { + case OPTIX_PIXEL_FORMAT_HALF2: + pixelStrideInBytes = 2 * sizeof( short ); + break; + case OPTIX_PIXEL_FORMAT_HALF3: + pixelStrideInBytes = 3 * sizeof( short ); + break; + case OPTIX_PIXEL_FORMAT_HALF4: + pixelStrideInBytes = 4 * sizeof( short ); + break; + case OPTIX_PIXEL_FORMAT_FLOAT2: + pixelStrideInBytes = 2 * sizeof( float ); + break; + case OPTIX_PIXEL_FORMAT_FLOAT3: + pixelStrideInBytes = 3 * sizeof( float ); + break; + case OPTIX_PIXEL_FORMAT_FLOAT4: + pixelStrideInBytes = 4 * sizeof( float ); + break; + case OPTIX_PIXEL_FORMAT_UCHAR3: + pixelStrideInBytes = 3 * sizeof( char ); + break; + case OPTIX_PIXEL_FORMAT_UCHAR4: + pixelStrideInBytes = 4 * sizeof( char ); + break; + case OPTIX_PIXEL_FORMAT_INTERNAL_GUIDE_LAYER: + return OPTIX_ERROR_INVALID_VALUE; + break; + } + } + return OPTIX_SUCCESS; +} + +/// Split image into 2D tiles given horizontal and vertical tile size +/// +/// \param[in] input full resolution input image to be split +/// \param[in] output full resolution output image +/// \param[in] overlapWindowSizeInPixels see #OptixDenoiserSizes, #optixDenoiserComputeMemoryResources +/// \param[in] tileWidth maximum width of tiles +/// \param[in] tileHeight maximum height of tiles +/// \param[out] tiles list of tiles covering the input image +/// +inline OptixResult optixUtilDenoiserSplitImage( + const OptixImage2D& input, + const OptixImage2D& output, + unsigned int overlapWindowSizeInPixels, + unsigned int tileWidth, + unsigned int tileHeight, + std::vector& tiles ) +{ + if( tileWidth == 0 || tileHeight == 0 ) + return OPTIX_ERROR_INVALID_VALUE; + + unsigned int inPixelStride, outPixelStride; + if( const OptixResult res = optixUtilGetPixelStride( input, inPixelStride ) ) + return res; + if( const OptixResult res = optixUtilGetPixelStride( output, outPixelStride ) ) + return res; + + int inp_w = std::min( tileWidth + 2 * overlapWindowSizeInPixels, input.width ); + int inp_h = std::min( tileHeight + 2 * overlapWindowSizeInPixels, input.height ); + int inp_y = 0, copied_y = 0; + + int upscaleX = output.width / input.width; + int upscaleY = output.height / input.height; + + do + { + int inputOffsetY = inp_y == 0 ? 0 : std::max( (int)overlapWindowSizeInPixels, inp_h - ( (int)input.height - inp_y ) ); + int copy_y = inp_y == 0 ? std::min( input.height, tileHeight + overlapWindowSizeInPixels ) : + std::min( tileHeight, input.height - copied_y ); + + int inp_x = 0, copied_x = 0; + do + { + int inputOffsetX = inp_x == 0 ? 0 : std::max( (int)overlapWindowSizeInPixels, inp_w - ( (int)input.width - inp_x ) ); + int copy_x = inp_x == 0 ? std::min( input.width, tileWidth + overlapWindowSizeInPixels ) : + std::min( tileWidth, input.width - copied_x ); + + OptixUtilDenoiserImageTile tile; + tile.input.data = input.data + (size_t)( inp_y - inputOffsetY ) * input.rowStrideInBytes + + (size_t)( inp_x - inputOffsetX ) * inPixelStride; + tile.input.width = inp_w; + tile.input.height = inp_h; + tile.input.rowStrideInBytes = input.rowStrideInBytes; + tile.input.pixelStrideInBytes = input.pixelStrideInBytes; + tile.input.format = input.format; + + tile.output.data = output.data + (size_t)( upscaleY * inp_y ) * output.rowStrideInBytes + + (size_t)( upscaleX * inp_x ) * outPixelStride; + tile.output.width = upscaleX * copy_x; + tile.output.height = upscaleY * copy_y; + tile.output.rowStrideInBytes = output.rowStrideInBytes; + tile.output.pixelStrideInBytes = output.pixelStrideInBytes; + tile.output.format = output.format; + + tile.inputOffsetX = inputOffsetX; + tile.inputOffsetY = inputOffsetY; + + tiles.push_back( tile ); + + inp_x += inp_x == 0 ? tileWidth + overlapWindowSizeInPixels : tileWidth; + copied_x += copy_x; + } while( inp_x < static_cast( input.width ) ); + + inp_y += inp_y == 0 ? tileHeight + overlapWindowSizeInPixels : tileHeight; + copied_y += copy_y; + } while( inp_y < static_cast( input.height ) ); + + return OPTIX_SUCCESS; +} + +/// Run denoiser on input layers +/// see #optixDenoiserInvoke +/// additional parameters: + +/// Runs the denoiser on the input layers on a single GPU and stream using #optixDenoiserInvoke. +/// If the input layers' dimensions are larger than the specified tile size, the image is divided into +/// tiles using #optixUtilDenoiserSplitImage, and multiple back-to-back invocations are performed in +/// order to reuse the scratch space. Multiple tiles can be invoked concurrently if +/// #optixUtilDenoiserSplitImage is used directly and multiple scratch allocations for each concurrent +/// invocation are used. + +/// The input parameters are the same as #optixDenoiserInvoke except for the addition of the maximum tile size. +/// +/// \param[in] denoiser +/// \param[in] stream +/// \param[in] params +/// \param[in] denoiserState +/// \param[in] denoiserStateSizeInBytes +/// \param[in] guideLayer +/// \param[in] layers +/// \param[in] numLayers +/// \param[in] scratch +/// \param[in] scratchSizeInBytes +/// \param[in] overlapWindowSizeInPixels +/// \param[in] tileWidth +/// \param[in] tileHeight +inline OptixResult optixUtilDenoiserInvokeTiled( + OptixDenoiser denoiser, + CUstream stream, + const OptixDenoiserParams* params, + CUdeviceptr denoiserState, + size_t denoiserStateSizeInBytes, + const OptixDenoiserGuideLayer* guideLayer, + const OptixDenoiserLayer* layers, + unsigned int numLayers, + CUdeviceptr scratch, + size_t scratchSizeInBytes, + unsigned int overlapWindowSizeInPixels, + unsigned int tileWidth, + unsigned int tileHeight ) +{ + if( !guideLayer || !layers ) + return OPTIX_ERROR_INVALID_VALUE; + + const unsigned int upscale = numLayers > 0 && layers[0].previousOutput.width == 2 * layers[0].input.width ? 2 : 1; + + std::vector> tiles( numLayers ); + std::vector> prevTiles( numLayers ); + for( unsigned int l = 0; l < numLayers; l++ ) + { + if( const OptixResult res = optixUtilDenoiserSplitImage( layers[l].input, layers[l].output, + overlapWindowSizeInPixels, + tileWidth, tileHeight, tiles[l] ) ) + return res; + + if( layers[l].previousOutput.data ) + { + OptixImage2D dummyOutput = layers[l].previousOutput; + if( const OptixResult res = optixUtilDenoiserSplitImage( layers[l].previousOutput, dummyOutput, + upscale * overlapWindowSizeInPixels, + upscale * tileWidth, upscale * tileHeight, prevTiles[l] ) ) + return res; + } + } + + std::vector albedoTiles; + if( guideLayer->albedo.data ) + { + OptixImage2D dummyOutput = guideLayer->albedo; + if( const OptixResult res = optixUtilDenoiserSplitImage( guideLayer->albedo, dummyOutput, + overlapWindowSizeInPixels, + tileWidth, tileHeight, albedoTiles ) ) + return res; + } + + std::vector normalTiles; + if( guideLayer->normal.data ) + { + OptixImage2D dummyOutput = guideLayer->normal; + if( const OptixResult res = optixUtilDenoiserSplitImage( guideLayer->normal, dummyOutput, + overlapWindowSizeInPixels, + tileWidth, tileHeight, normalTiles ) ) + return res; + } + std::vector flowTiles; + if( guideLayer->flow.data ) + { + OptixImage2D dummyOutput = guideLayer->flow; + if( const OptixResult res = optixUtilDenoiserSplitImage( guideLayer->flow, dummyOutput, + overlapWindowSizeInPixels, + tileWidth, tileHeight, flowTiles ) ) + return res; + } + + std::vector internalGuideLayerTiles; + if( guideLayer->previousOutputInternalGuideLayer.data && guideLayer->outputInternalGuideLayer.data ) + { + if( const OptixResult res = optixUtilDenoiserSplitImage( guideLayer->previousOutputInternalGuideLayer, + guideLayer->outputInternalGuideLayer, + upscale * overlapWindowSizeInPixels, + upscale * tileWidth, upscale * tileHeight, internalGuideLayerTiles ) ) + return res; + } + + for( size_t t = 0; t < tiles[0].size(); t++ ) + { + std::vector tlayers; + for( unsigned int l = 0; l < numLayers; l++ ) + { + OptixDenoiserLayer layer = {}; + layer.input = ( tiles[l] )[t].input; + layer.output = ( tiles[l] )[t].output; + if( layers[l].previousOutput.data ) + layer.previousOutput = ( prevTiles[l] )[t].input; + tlayers.push_back( layer ); + } + + OptixDenoiserGuideLayer gl = {}; + if( guideLayer->albedo.data ) + gl.albedo = albedoTiles[t].input; + + if( guideLayer->normal.data ) + gl.normal = normalTiles[t].input; + + if( guideLayer->flow.data ) + gl.flow = flowTiles[t].input; + + if( guideLayer->previousOutputInternalGuideLayer.data ) + gl.previousOutputInternalGuideLayer = internalGuideLayerTiles[t].input; + + if( guideLayer->outputInternalGuideLayer.data ) + gl.outputInternalGuideLayer = internalGuideLayerTiles[t].output; + + if( const OptixResult res = + optixDenoiserInvoke( denoiser, stream, params, denoiserState, denoiserStateSizeInBytes, + &gl, &tlayers[0], numLayers, + ( tiles[0] )[t].inputOffsetX, ( tiles[0] )[t].inputOffsetY, + scratch, scratchSizeInBytes ) ) + return res; + } + return OPTIX_SUCCESS; +} + +/*@}*/ // end group optix_utilities + +#ifdef __cplusplus +} +#endif + +#endif // __optix_optix_stack_size_h__ diff --git a/include/optix_device.h b/include/optix_device.h new file mode 100644 index 0000000..6dcb280 --- /dev/null +++ b/include/optix_device.h @@ -0,0 +1,47 @@ + +/* + * Copyright (c) 2021 NVIDIA Corporation. All rights reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and proprietary + * rights in and to this software, related documentation and any modifications thereto. + * Any use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation is strictly + * prohibited. + * + * TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THIS SOFTWARE IS PROVIDED *AS IS* + * AND NVIDIA AND ITS SUPPLIERS DISCLAIM ALL WARRANTIES, EITHER EXPRESS OR IMPLIED, + * INCLUDING, BUT NOT LIMITED TO, IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE. IN NO EVENT SHALL NVIDIA OR ITS SUPPLIERS BE LIABLE FOR ANY + * SPECIAL, INCIDENTAL, INDIRECT, OR CONSEQUENTIAL DAMAGES WHATSOEVER (INCLUDING, WITHOUT + * LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, BUSINESS INTERRUPTION, LOSS OF + * BUSINESS INFORMATION, OR ANY OTHER PECUNIARY LOSS) ARISING OUT OF THE USE OF OR + * INABILITY TO USE THIS SOFTWARE, EVEN IF NVIDIA HAS BEEN ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGES + */ + + /** + * @file optix_device.h + * @author NVIDIA Corporation + * @brief OptiX public API + * + * OptiX public API Reference - Host/Device side + */ + +/******************************************************************************\ + * optix_cuda.h + * + * This file provides the nvcc interface for generating PTX that the OptiX is + * capable of parsing and weaving into the final kernel. This is included by + * optix.h automatically if compiling device code. It can be included explicitly + * in host code if desired. + * +\******************************************************************************/ +#if !defined(__OPTIX_INCLUDE_INTERNAL_HEADERS__) +# define __OPTIX_INCLUDE_INTERNAL_HEADERS__ +# define __UNDEF_OPTIX_INCLUDE_INTERNAL_HEADERS_OPTIX_DEVICE_H__ +#endif +#include "optix_7_device.h" +#if defined( __UNDEF_OPTIX_INCLUDE_INTERNAL_HEADERS_OPTIX_DEVICE_H__ ) +# undef __OPTIX_INCLUDE_INTERNAL_HEADERS__ +# undef __UNDEF_OPTIX_INCLUDE_INTERNAL_HEADERS_OPTIX_DEVICE_H__ +#endif diff --git a/include/optix_function_table.h b/include/optix_function_table.h new file mode 100644 index 0000000..932be7a --- /dev/null +++ b/include/optix_function_table.h @@ -0,0 +1,342 @@ +/* + * Copyright (c) 2021 NVIDIA Corporation. All rights reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and proprietary + * rights in and to this software, related documentation and any modifications thereto. + * Any use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation is strictly + * prohibited. + * + * TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THIS SOFTWARE IS PROVIDED *AS IS* + * AND NVIDIA AND ITS SUPPLIERS DISCLAIM ALL WARRANTIES, EITHER EXPRESS OR IMPLIED, + * INCLUDING, BUT NOT LIMITED TO, IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE. IN NO EVENT SHALL NVIDIA OR ITS SUPPLIERS BE LIABLE FOR ANY + * SPECIAL, INCIDENTAL, INDIRECT, OR CONSEQUENTIAL DAMAGES WHATSOEVER (INCLUDING, WITHOUT + * LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, BUSINESS INTERRUPTION, LOSS OF + * BUSINESS INFORMATION, OR ANY OTHER PECUNIARY LOSS) ARISING OUT OF THE USE OF OR + * INABILITY TO USE THIS SOFTWARE, EVEN IF NVIDIA HAS BEEN ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGES + */ + +/// @file +/// @author NVIDIA Corporation +/// @brief OptiX public API header + +#ifndef __optix_optix_function_table_h__ +#define __optix_optix_function_table_h__ + +/// The OptiX ABI version. +#define OPTIX_ABI_VERSION 60 + +#ifndef OPTIX_DEFINE_ABI_VERSION_ONLY + +#include "optix_types.h" + +#if !defined( OPTIX_DONT_INCLUDE_CUDA ) +// If OPTIX_DONT_INCLUDE_CUDA is defined, cuda driver types must be defined through other +// means before including optix headers. +#include +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/// \defgroup optix_function_table Function Table +/// \brief OptiX Function Table + +/** \addtogroup optix_function_table +@{ +*/ + +/// The function table containing all API functions. +/// +/// See #optixInit() and #optixInitWithHandle(). +typedef struct OptixFunctionTable +{ + /// \name Error handling + //@ { + + /// See ::optixGetErrorName(). + const char* ( *optixGetErrorName )( OptixResult result ); + + /// See ::optixGetErrorString(). + const char* ( *optixGetErrorString )( OptixResult result ); + + //@ } + /// \name Device context + //@ { + + /// See ::optixDeviceContextCreate(). + OptixResult ( *optixDeviceContextCreate )( CUcontext fromContext, const OptixDeviceContextOptions* options, OptixDeviceContext* context ); + + /// See ::optixDeviceContextDestroy(). + OptixResult ( *optixDeviceContextDestroy )( OptixDeviceContext context ); + + /// See ::optixDeviceContextGetProperty(). + OptixResult ( *optixDeviceContextGetProperty )( OptixDeviceContext context, OptixDeviceProperty property, void* value, size_t sizeInBytes ); + + /// See ::optixDeviceContextSetLogCallback(). + OptixResult ( *optixDeviceContextSetLogCallback )( OptixDeviceContext context, + OptixLogCallback callbackFunction, + void* callbackData, + unsigned int callbackLevel ); + + /// See ::optixDeviceContextSetCacheEnabled(). + OptixResult ( *optixDeviceContextSetCacheEnabled )( OptixDeviceContext context, int enabled ); + + /// See ::optixDeviceContextSetCacheLocation(). + OptixResult ( *optixDeviceContextSetCacheLocation )( OptixDeviceContext context, const char* location ); + + /// See ::optixDeviceContextSetCacheDatabaseSizes(). + OptixResult ( *optixDeviceContextSetCacheDatabaseSizes )( OptixDeviceContext context, size_t lowWaterMark, size_t highWaterMark ); + + /// See ::optixDeviceContextGetCacheEnabled(). + OptixResult ( *optixDeviceContextGetCacheEnabled )( OptixDeviceContext context, int* enabled ); + + /// See ::optixDeviceContextGetCacheLocation(). + OptixResult ( *optixDeviceContextGetCacheLocation )( OptixDeviceContext context, char* location, size_t locationSize ); + + /// See ::optixDeviceContextGetCacheDatabaseSizes(). + OptixResult ( *optixDeviceContextGetCacheDatabaseSizes )( OptixDeviceContext context, size_t* lowWaterMark, size_t* highWaterMark ); + + //@ } + /// \name Modules + //@ { + + /// See ::optixModuleCreateFromPTX(). + OptixResult ( *optixModuleCreateFromPTX )( OptixDeviceContext context, + const OptixModuleCompileOptions* moduleCompileOptions, + const OptixPipelineCompileOptions* pipelineCompileOptions, + const char* PTX, + size_t PTXsize, + char* logString, + size_t* logStringSize, + OptixModule* module ); + + /// See ::optixModuleCreateFromPTXWithTasks(). + OptixResult ( *optixModuleCreateFromPTXWithTasks )( OptixDeviceContext context, + const OptixModuleCompileOptions* moduleCompileOptions, + const OptixPipelineCompileOptions* pipelineCompileOptions, + const char* PTX, + size_t PTXsize, + char* logString, + size_t* logStringSize, + OptixModule* module, + OptixTask* firstTask ); + + /// See ::optixModuleGetCompilationState(). + OptixResult ( *optixModuleGetCompilationState )( OptixModule module, OptixModuleCompileState* state ); + + /// See ::optixModuleDestroy(). + OptixResult ( *optixModuleDestroy )( OptixModule module ); + + /// See ::optixBuiltinISModuleGet(). + OptixResult( *optixBuiltinISModuleGet )( OptixDeviceContext context, + const OptixModuleCompileOptions* moduleCompileOptions, + const OptixPipelineCompileOptions* pipelineCompileOptions, + const OptixBuiltinISOptions* builtinISOptions, + OptixModule* builtinModule); + + //@ } + /// \name Tasks + //@ { + + /// See ::optixTaskExecute(). + OptixResult ( *optixTaskExecute )( OptixTask task, + OptixTask* additionalTasks, + unsigned int maxNumAdditionalTasks, + unsigned int* numAdditionalTasksCreated ); + //@ } + /// \name Program groups + //@ { + + /// See ::optixProgramGroupCreate(). + OptixResult ( *optixProgramGroupCreate )( OptixDeviceContext context, + const OptixProgramGroupDesc* programDescriptions, + unsigned int numProgramGroups, + const OptixProgramGroupOptions* options, + char* logString, + size_t* logStringSize, + OptixProgramGroup* programGroups ); + + /// See ::optixProgramGroupDestroy(). + OptixResult ( *optixProgramGroupDestroy )( OptixProgramGroup programGroup ); + + /// See ::optixProgramGroupGetStackSize(). + OptixResult ( *optixProgramGroupGetStackSize )( OptixProgramGroup programGroup, OptixStackSizes* stackSizes ); + + //@ } + /// \name Pipeline + //@ { + + /// See ::optixPipelineCreate(). + OptixResult ( *optixPipelineCreate )( OptixDeviceContext context, + const OptixPipelineCompileOptions* pipelineCompileOptions, + const OptixPipelineLinkOptions* pipelineLinkOptions, + const OptixProgramGroup* programGroups, + unsigned int numProgramGroups, + char* logString, + size_t* logStringSize, + OptixPipeline* pipeline ); + + /// See ::optixPipelineDestroy(). + OptixResult ( *optixPipelineDestroy )( OptixPipeline pipeline ); + + /// See ::optixPipelineSetStackSize(). + OptixResult ( *optixPipelineSetStackSize )( OptixPipeline pipeline, + unsigned int directCallableStackSizeFromTraversal, + unsigned int directCallableStackSizeFromState, + unsigned int continuationStackSize, + unsigned int maxTraversableGraphDepth ); + + //@ } + /// \name Acceleration structures + //@ { + + /// See ::optixAccelComputeMemoryUsage(). + OptixResult ( *optixAccelComputeMemoryUsage )( OptixDeviceContext context, + const OptixAccelBuildOptions* accelOptions, + const OptixBuildInput* buildInputs, + unsigned int numBuildInputs, + OptixAccelBufferSizes* bufferSizes ); + + /// See ::optixAccelBuild(). + OptixResult ( *optixAccelBuild )( OptixDeviceContext context, + CUstream stream, + const OptixAccelBuildOptions* accelOptions, + const OptixBuildInput* buildInputs, + unsigned int numBuildInputs, + CUdeviceptr tempBuffer, + size_t tempBufferSizeInBytes, + CUdeviceptr outputBuffer, + size_t outputBufferSizeInBytes, + OptixTraversableHandle* outputHandle, + const OptixAccelEmitDesc* emittedProperties, + unsigned int numEmittedProperties ); + + /// See ::optixAccelGetRelocationInfo(). + OptixResult ( *optixAccelGetRelocationInfo )( OptixDeviceContext context, OptixTraversableHandle handle, OptixAccelRelocationInfo* info ); + + + /// See ::optixAccelCheckRelocationCompatibility(). + OptixResult ( *optixAccelCheckRelocationCompatibility )( OptixDeviceContext context, + const OptixAccelRelocationInfo* info, + int* compatible ); + + /// See ::optixAccelRelocate(). + OptixResult ( *optixAccelRelocate )( OptixDeviceContext context, + CUstream stream, + const OptixAccelRelocationInfo* info, + CUdeviceptr instanceTraversableHandles, + size_t numInstanceTraversableHandles, + CUdeviceptr targetAccel, + size_t targetAccelSizeInBytes, + OptixTraversableHandle* targetHandle ); + + + /// See ::optixAccelCompact(). + OptixResult ( *optixAccelCompact )( OptixDeviceContext context, + CUstream stream, + OptixTraversableHandle inputHandle, + CUdeviceptr outputBuffer, + size_t outputBufferSizeInBytes, + OptixTraversableHandle* outputHandle ); + + /// See ::optixConvertPointerToTraversableHandle(). + OptixResult ( *optixConvertPointerToTraversableHandle )( OptixDeviceContext onDevice, + CUdeviceptr pointer, + OptixTraversableType traversableType, + OptixTraversableHandle* traversableHandle ); + + void ( *reserved1 )( void ); + void ( *reserved2 )( void ); + + //@ } + /// \name Launch + //@ { + + /// See ::optixConvertPointerToTraversableHandle(). + OptixResult ( *optixSbtRecordPackHeader )( OptixProgramGroup programGroup, void* sbtRecordHeaderHostPointer ); + + /// See ::optixConvertPointerToTraversableHandle(). + OptixResult ( *optixLaunch )( OptixPipeline pipeline, + CUstream stream, + CUdeviceptr pipelineParams, + size_t pipelineParamsSize, + const OptixShaderBindingTable* sbt, + unsigned int width, + unsigned int height, + unsigned int depth ); + + //@ } + /// \name Denoiser + //@ { + + /// See ::optixDenoiserCreate(). + OptixResult ( *optixDenoiserCreate )( OptixDeviceContext context, OptixDenoiserModelKind modelKind, const OptixDenoiserOptions* options, OptixDenoiser* returnHandle ); + + /// See ::optixDenoiserDestroy(). + OptixResult ( *optixDenoiserDestroy )( OptixDenoiser handle ); + + /// See ::optixDenoiserComputeMemoryResources(). + OptixResult ( *optixDenoiserComputeMemoryResources )( const OptixDenoiser handle, + unsigned int maximumInputWidth, + unsigned int maximumInputHeight, + OptixDenoiserSizes* returnSizes ); + + /// See ::optixDenoiserSetup(). + OptixResult ( *optixDenoiserSetup )( OptixDenoiser denoiser, + CUstream stream, + unsigned int inputWidth, + unsigned int inputHeight, + CUdeviceptr state, + size_t stateSizeInBytes, + CUdeviceptr scratch, + size_t scratchSizeInBytes ); + + /// See ::optixDenoiserInvoke(). + OptixResult ( *optixDenoiserInvoke )( OptixDenoiser denoiser, + CUstream stream, + const OptixDenoiserParams* params, + CUdeviceptr denoiserState, + size_t denoiserStateSizeInBytes, + const OptixDenoiserGuideLayer * guideLayer, + const OptixDenoiserLayer * layers, + unsigned int numLayers, + unsigned int inputOffsetX, + unsigned int inputOffsetY, + CUdeviceptr scratch, + size_t scratchSizeInBytes ); + + /// See ::optixDenoiserComputeIntensity(). + OptixResult ( *optixDenoiserComputeIntensity )( OptixDenoiser handle, + CUstream stream, + const OptixImage2D* inputImage, + CUdeviceptr outputIntensity, + CUdeviceptr scratch, + size_t scratchSizeInBytes ); + + /// See ::optixDenoiserComputeAverageColor(). + OptixResult ( *optixDenoiserComputeAverageColor )( OptixDenoiser handle, + CUstream stream, + const OptixImage2D* inputImage, + CUdeviceptr outputAverageColor, + CUdeviceptr scratch, + size_t scratchSizeInBytes ); + + /// See ::optixDenoiserCreateWithUserModel(). + OptixResult ( *optixDenoiserCreateWithUserModel )( OptixDeviceContext context, const void * data, size_t dataSizeInBytes, OptixDenoiser* returnHandle ); + //@ } + +} OptixFunctionTable; + +/*@}*/ // end group optix_function_table + +#ifdef __cplusplus +} +#endif + +#endif /* OPTIX_DEFINE_ABI_VERSION_ONLY */ + +#endif /* __optix_optix_function_table_h__ */ diff --git a/include/optix_function_table_definition.h b/include/optix_function_table_definition.h new file mode 100644 index 0000000..d40dd0d --- /dev/null +++ b/include/optix_function_table_definition.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2021 NVIDIA Corporation. All rights reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and proprietary + * rights in and to this software, related documentation and any modifications thereto. + * Any use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation is strictly + * prohibited. + * + * TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THIS SOFTWARE IS PROVIDED *AS IS* + * AND NVIDIA AND ITS SUPPLIERS DISCLAIM ALL WARRANTIES, EITHER EXPRESS OR IMPLIED, + * INCLUDING, BUT NOT LIMITED TO, IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE. IN NO EVENT SHALL NVIDIA OR ITS SUPPLIERS BE LIABLE FOR ANY + * SPECIAL, INCIDENTAL, INDIRECT, OR CONSEQUENTIAL DAMAGES WHATSOEVER (INCLUDING, WITHOUT + * LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, BUSINESS INTERRUPTION, LOSS OF + * BUSINESS INFORMATION, OR ANY OTHER PECUNIARY LOSS) ARISING OUT OF THE USE OF OR + * INABILITY TO USE THIS SOFTWARE, EVEN IF NVIDIA HAS BEEN ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGES + */ + +/// @file +/// @author NVIDIA Corporation +/// @brief OptiX public API header + +#ifndef __optix_optix_function_table_definition_h__ +#define __optix_optix_function_table_definition_h__ + +#include "optix_function_table.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** \addtogroup optix_function_table +@{ +*/ + +/// If the stubs in optix_stubs.h are used, then the function table needs to be defined in exactly +/// one translation unit. This can be achieved by including this header file in that translation +/// unit. +OptixFunctionTable g_optixFunctionTable; + +/*@}*/ // end group optix_function_table + +#ifdef __cplusplus +} +#endif + +#endif // __optix_optix_function_table_definition_h__ diff --git a/include/optix_host.h b/include/optix_host.h new file mode 100644 index 0000000..2ed0c82 --- /dev/null +++ b/include/optix_host.h @@ -0,0 +1,38 @@ + +/* + * Copyright (c) 2021 NVIDIA Corporation. All rights reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and proprietary + * rights in and to this software, related documentation and any modifications thereto. + * Any use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation is strictly + * prohibited. + * + * TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THIS SOFTWARE IS PROVIDED *AS IS* + * AND NVIDIA AND ITS SUPPLIERS DISCLAIM ALL WARRANTIES, EITHER EXPRESS OR IMPLIED, + * INCLUDING, BUT NOT LIMITED TO, IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE. IN NO EVENT SHALL NVIDIA OR ITS SUPPLIERS BE LIABLE FOR ANY + * SPECIAL, INCIDENTAL, INDIRECT, OR CONSEQUENTIAL DAMAGES WHATSOEVER (INCLUDING, WITHOUT + * LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, BUSINESS INTERRUPTION, LOSS OF + * BUSINESS INFORMATION, OR ANY OTHER PECUNIARY LOSS) ARISING OUT OF THE USE OF OR + * INABILITY TO USE THIS SOFTWARE, EVEN IF NVIDIA HAS BEEN ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGES + */ + +/** + * @file optix_host.h + * @author NVIDIA Corporation + * @brief OptiX public API + * + * OptiX public API Reference - Host side + */ + +#if !defined(__OPTIX_INCLUDE_INTERNAL_HEADERS__) +# define __OPTIX_INCLUDE_INTERNAL_HEADERS__ +# define __UNDEF_OPTIX_INCLUDE_INTERNAL_HEADERS_OPTIX_HOST_H__ +#endif +#include "optix_7_host.h" +#if defined( __UNDEF_OPTIX_INCLUDE_INTERNAL_HEADERS_OPTIX_HOST_H__ ) +# undef __OPTIX_INCLUDE_INTERNAL_HEADERS__ +# undef __UNDEF_OPTIX_INCLUDE_INTERNAL_HEADERS_OPTIX_HOST_H__ +#endif diff --git a/include/optix_stack_size.h b/include/optix_stack_size.h new file mode 100644 index 0000000..a3ab70d --- /dev/null +++ b/include/optix_stack_size.h @@ -0,0 +1,337 @@ +/* + * Copyright (c) 2021 NVIDIA Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of NVIDIA CORPORATION nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/// @file +/// @author NVIDIA Corporation +/// @brief OptiX public API header + +#ifndef __optix_optix_stack_size_h__ +#define __optix_optix_stack_size_h__ + +#include "optix.h" + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** \addtogroup optix_utilities +@{ +*/ + +/// Retrieves direct and continuation stack sizes for each program in the program group and accumulates the upper bounds +/// in the correponding output variables based on the semantic type of the program. Before the first invocation of this +/// function with a given instance of #OptixStackSizes, the members of that instance should be set to 0. +inline OptixResult optixUtilAccumulateStackSizes( OptixProgramGroup programGroup, OptixStackSizes* stackSizes ) +{ + if( !stackSizes ) + return OPTIX_ERROR_INVALID_VALUE; + + OptixStackSizes localStackSizes; + OptixResult result = optixProgramGroupGetStackSize( programGroup, &localStackSizes ); + if( result != OPTIX_SUCCESS ) + return result; + + stackSizes->cssRG = std::max( stackSizes->cssRG, localStackSizes.cssRG ); + stackSizes->cssMS = std::max( stackSizes->cssMS, localStackSizes.cssMS ); + stackSizes->cssCH = std::max( stackSizes->cssCH, localStackSizes.cssCH ); + stackSizes->cssAH = std::max( stackSizes->cssAH, localStackSizes.cssAH ); + stackSizes->cssIS = std::max( stackSizes->cssIS, localStackSizes.cssIS ); + stackSizes->cssCC = std::max( stackSizes->cssCC, localStackSizes.cssCC ); + stackSizes->dssDC = std::max( stackSizes->dssDC, localStackSizes.dssDC ); + + return OPTIX_SUCCESS; +} + +/// Computes the stack size values needed to configure a pipeline. +/// +/// See the programming guide for an explanation of the formula. +/// +/// \param[in] stackSizes Accumulated stack sizes of all programs in the call graph. +/// \param[in] maxTraceDepth Maximum depth of #optixTrace() calls. +/// \param[in] maxCCDepth Maximum depth of calls trees of continuation callables. +/// \param[in] maxDCDepth Maximum depth of calls trees of direct callables. +/// \param[out] directCallableStackSizeFromTraversal Direct stack size requirement for direct callables invoked from +/// IS or AH. +/// \param[out] directCallableStackSizeFromState Direct stack size requirement for direct callables invoked from +/// RG, MS, or CH. +/// \param[out] continuationStackSize Continuation stack requirement. +inline OptixResult optixUtilComputeStackSizes( const OptixStackSizes* stackSizes, + unsigned int maxTraceDepth, + unsigned int maxCCDepth, + unsigned int maxDCDepth, + unsigned int* directCallableStackSizeFromTraversal, + unsigned int* directCallableStackSizeFromState, + unsigned int* continuationStackSize ) +{ + if( !stackSizes ) + return OPTIX_ERROR_INVALID_VALUE; + + const unsigned int cssRG = stackSizes->cssRG; + const unsigned int cssMS = stackSizes->cssMS; + const unsigned int cssCH = stackSizes->cssCH; + const unsigned int cssAH = stackSizes->cssAH; + const unsigned int cssIS = stackSizes->cssIS; + const unsigned int cssCC = stackSizes->cssCC; + const unsigned int dssDC = stackSizes->dssDC; + + if( directCallableStackSizeFromTraversal ) + *directCallableStackSizeFromTraversal = maxDCDepth * dssDC; + if( directCallableStackSizeFromState ) + *directCallableStackSizeFromState = maxDCDepth * dssDC; + + // upper bound on continuation stack used by call trees of continuation callables + unsigned int cssCCTree = maxCCDepth * cssCC; + + // upper bound on continuation stack used by CH or MS programs including the call tree of + // continuation callables + unsigned int cssCHOrMSPlusCCTree = std::max( cssCH, cssMS ) + cssCCTree; + + // clang-format off + if( continuationStackSize ) + *continuationStackSize + = cssRG + cssCCTree + + ( std::max( maxTraceDepth, 1u ) - 1 ) * cssCHOrMSPlusCCTree + + std::min( maxTraceDepth, 1u ) * std::max( cssCHOrMSPlusCCTree, cssIS + cssAH ); + // clang-format on + + return OPTIX_SUCCESS; +} + +/// Computes the stack size values needed to configure a pipeline. +/// +/// This variant is similar to #optixUtilComputeStackSizes(), except that it expects the values dssDC and +/// maxDCDepth split by call site semantic. +/// +/// See programming guide for an explanation of the formula. +/// +/// \param[in] stackSizes Accumulated stack sizes of all programs in the call graph. +/// \param[in] dssDCFromTraversal Accumulated direct stack size of all DC programs invoked from IS +/// or AH. +/// \param[in] dssDCFromState Accumulated direct stack size of all DC programs invoked from RG, +/// MS, or CH. +/// \param[in] maxTraceDepth Maximum depth of #optixTrace() calls. +/// \param[in] maxCCDepth Maximum depth of calls trees of continuation callables. +/// \param[in] maxDCDepthFromTraversal Maximum depth of calls trees of direct callables invoked from IS +/// or AH. +/// \param[in] maxDCDepthFromState Maximum depth of calls trees of direct callables invoked from RG, +/// MS, or CH. +/// \param[out] directCallableStackSizeFromTraversal Direct stack size requirement for direct callables invoked from +/// IS or AH. +/// \param[out] directCallableStackSizeFromState Direct stack size requirement for direct callables invoked from +/// RG, MS, or CH. +/// \param[out] continuationStackSize Continuation stack requirement. +inline OptixResult optixUtilComputeStackSizesDCSplit( const OptixStackSizes* stackSizes, + unsigned int dssDCFromTraversal, + unsigned int dssDCFromState, + unsigned int maxTraceDepth, + unsigned int maxCCDepth, + unsigned int maxDCDepthFromTraversal, + unsigned int maxDCDepthFromState, + unsigned int* directCallableStackSizeFromTraversal, + unsigned int* directCallableStackSizeFromState, + unsigned int* continuationStackSize ) +{ + if( !stackSizes ) + return OPTIX_ERROR_INVALID_VALUE; + + const unsigned int cssRG = stackSizes->cssRG; + const unsigned int cssMS = stackSizes->cssMS; + const unsigned int cssCH = stackSizes->cssCH; + const unsigned int cssAH = stackSizes->cssAH; + const unsigned int cssIS = stackSizes->cssIS; + const unsigned int cssCC = stackSizes->cssCC; + // use dssDCFromTraversal and dssDCFromState instead of stackSizes->dssDC + + if( directCallableStackSizeFromTraversal ) + *directCallableStackSizeFromTraversal = maxDCDepthFromTraversal * dssDCFromTraversal; + if( directCallableStackSizeFromState ) + *directCallableStackSizeFromState = maxDCDepthFromState * dssDCFromState; + + // upper bound on continuation stack used by call trees of continuation callables + unsigned int cssCCTree = maxCCDepth * cssCC; + + // upper bound on continuation stack used by CH or MS programs including the call tree of + // continuation callables + unsigned int cssCHOrMSPlusCCTree = std::max( cssCH, cssMS ) + cssCCTree; + + // clang-format off + if( continuationStackSize ) + *continuationStackSize + = cssRG + cssCCTree + + ( std::max( maxTraceDepth, 1u ) - 1 ) * cssCHOrMSPlusCCTree + + std::min( maxTraceDepth, 1u ) * std::max( cssCHOrMSPlusCCTree, cssIS + cssAH ); + // clang-format on + + return OPTIX_SUCCESS; +} + +/// Computes the stack size values needed to configure a pipeline. +/// +/// This variant is similar to #optixUtilComputeStackSizes(), except that it expects the value cssCCTree +/// instead of cssCC and maxCCDepth. +/// +/// See programming guide for an explanation of the formula. +/// +/// \param[in] stackSizes Accumulated stack sizes of all programs in the call graph. +/// \param[in] cssCCTree Maximum stack size used by calls trees of continuation callables. +/// \param[in] maxTraceDepth Maximum depth of #optixTrace() calls. +/// \param[in] maxDCDepth Maximum depth of calls trees of direct callables. +/// \param[out] directCallableStackSizeFromTraversal Direct stack size requirement for direct callables invoked from +/// IS or AH. +/// \param[out] directCallableStackSizeFromState Direct stack size requirement for direct callables invoked from +/// RG, MS, or CH. +/// \param[out] continuationStackSize Continuation stack requirement. +inline OptixResult optixUtilComputeStackSizesCssCCTree( const OptixStackSizes* stackSizes, + unsigned int cssCCTree, + unsigned int maxTraceDepth, + unsigned int maxDCDepth, + unsigned int* directCallableStackSizeFromTraversal, + unsigned int* directCallableStackSizeFromState, + unsigned int* continuationStackSize ) +{ + if( !stackSizes ) + return OPTIX_ERROR_INVALID_VALUE; + + const unsigned int cssRG = stackSizes->cssRG; + const unsigned int cssMS = stackSizes->cssMS; + const unsigned int cssCH = stackSizes->cssCH; + const unsigned int cssAH = stackSizes->cssAH; + const unsigned int cssIS = stackSizes->cssIS; + // use cssCCTree instead of stackSizes->cssCC and maxCCDepth + const unsigned int dssDC = stackSizes->dssDC; + + if( directCallableStackSizeFromTraversal ) + *directCallableStackSizeFromTraversal = maxDCDepth * dssDC; + if( directCallableStackSizeFromState ) + *directCallableStackSizeFromState = maxDCDepth * dssDC; + + // upper bound on continuation stack used by CH or MS programs including the call tree of + // continuation callables + unsigned int cssCHOrMSPlusCCTree = std::max( cssCH, cssMS ) + cssCCTree; + + // clang-format off + if( continuationStackSize ) + *continuationStackSize + = cssRG + cssCCTree + + ( std::max( maxTraceDepth, 1u ) - 1 ) * cssCHOrMSPlusCCTree + + std::min( maxTraceDepth, 1u ) * std::max( cssCHOrMSPlusCCTree, cssIS + cssAH ); + // clang-format on + + return OPTIX_SUCCESS; +} + +/// Computes the stack size values needed to configure a pipeline. +/// +/// This variant is a specialization of #optixUtilComputeStackSizes() for a simple path tracer with the following +/// assumptions: There are only two ray types, camera rays and shadow rays. There are only RG, MS, and CH programs, and +/// no AH, IS, CC, or DC programs. The camera rays invoke only the miss and closest hit programs MS1 and CH1, +/// respectively. The CH1 program might trace shadow rays, which invoke only the miss and closest hit programs MS2 and +/// CH2, respectively. +/// +/// For flexibility, we allow for each of CH1 and CH2 not just one single program group, but an array of programs +/// groups, and compute the maximas of the stack size requirements per array. +/// +/// See programming guide for an explanation of the formula. +inline OptixResult optixUtilComputeStackSizesSimplePathTracer( OptixProgramGroup programGroupRG, + OptixProgramGroup programGroupMS1, + const OptixProgramGroup* programGroupCH1, + unsigned int programGroupCH1Count, + OptixProgramGroup programGroupMS2, + const OptixProgramGroup* programGroupCH2, + unsigned int programGroupCH2Count, + unsigned int* directCallableStackSizeFromTraversal, + unsigned int* directCallableStackSizeFromState, + unsigned int* continuationStackSize ) +{ + if( !programGroupCH1 && ( programGroupCH1Count > 0 ) ) + return OPTIX_ERROR_INVALID_VALUE; + if( !programGroupCH2 && ( programGroupCH2Count > 0 ) ) + return OPTIX_ERROR_INVALID_VALUE; + + OptixResult result; + + OptixStackSizes stackSizesRG = {}; + result = optixProgramGroupGetStackSize( programGroupRG, &stackSizesRG ); + if( result != OPTIX_SUCCESS ) + return result; + + OptixStackSizes stackSizesMS1 = {}; + result = optixProgramGroupGetStackSize( programGroupMS1, &stackSizesMS1 ); + if( result != OPTIX_SUCCESS ) + return result; + + OptixStackSizes stackSizesCH1 = {}; + for( unsigned int i = 0; i < programGroupCH1Count; ++i ) + { + result = optixUtilAccumulateStackSizes( programGroupCH1[i], &stackSizesCH1 ); + if( result != OPTIX_SUCCESS ) + return result; + } + + OptixStackSizes stackSizesMS2 = {}; + result = optixProgramGroupGetStackSize( programGroupMS2, &stackSizesMS2 ); + if( result != OPTIX_SUCCESS ) + return result; + + OptixStackSizes stackSizesCH2 = {}; + memset( &stackSizesCH2, 0, sizeof( OptixStackSizes ) ); + for( unsigned int i = 0; i < programGroupCH2Count; ++i ) + { + result = optixUtilAccumulateStackSizes( programGroupCH2[i], &stackSizesCH2 ); + if( result != OPTIX_SUCCESS ) + return result; + } + + const unsigned int cssRG = stackSizesRG.cssRG; + const unsigned int cssMS1 = stackSizesMS1.cssMS; + const unsigned int cssCH1 = stackSizesCH1.cssCH; + const unsigned int cssMS2 = stackSizesMS2.cssMS; + const unsigned int cssCH2 = stackSizesCH2.cssCH; + // no AH, IS, CC, or DC programs + + if( directCallableStackSizeFromTraversal ) + *directCallableStackSizeFromTraversal = 0; + if( directCallableStackSizeFromState ) + *directCallableStackSizeFromState = 0; + + if( continuationStackSize ) + *continuationStackSize = cssRG + std::max( cssMS1, cssCH1 + std::max( cssMS2, cssCH2 ) ); + + return OPTIX_SUCCESS; +} + +/*@}*/ // end group optix_utilities + +#ifdef __cplusplus +} +#endif + +#endif // __optix_optix_stack_size_h__ diff --git a/include/optix_stubs.h b/include/optix_stubs.h new file mode 100644 index 0000000..1b30878 --- /dev/null +++ b/include/optix_stubs.h @@ -0,0 +1,651 @@ +/* + * Copyright (c) 2021 NVIDIA Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of NVIDIA CORPORATION nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/// @file +/// @author NVIDIA Corporation +/// @brief OptiX public API header + +#ifndef __optix_optix_stubs_h__ +#define __optix_optix_stubs_h__ + +#include "optix_function_table.h" + +#ifdef _WIN32 +#ifndef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN 1 +#endif +#include +// The cfgmgr32 header is necessary for interrogating driver information in the registry. +// For convenience the library is also linked in automatically using the #pragma command. +#include +#pragma comment( lib, "Cfgmgr32.lib" ) +#include +#else +#include +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +// The function table needs to be defined in exactly one translation unit. This can be +// achieved by including optix_function_table_definition.h in that translation unit. +extern OptixFunctionTable g_optixFunctionTable; + +#ifdef _WIN32 +#if defined( _MSC_VER ) +// Visual Studio produces warnings suggesting strcpy and friends being replaced with _s +// variants. All the string lengths and allocation sizes have been calculated and should +// be safe, so we are disabling this warning to increase compatibility. +# pragma warning( push ) +# pragma warning( disable : 4996 ) +#endif +static void* optixLoadWindowsDllFromName( const char* optixDllName ) +{ + void* handle = NULL; + + + // Get the size of the path first, then allocate + unsigned int size = GetSystemDirectoryA( NULL, 0 ); + if( size == 0 ) + { + // Couldn't get the system path size, so bail + return NULL; + } + size_t pathSize = size + 1 + strlen( optixDllName ); + char* systemPath = (char*)malloc( pathSize ); + if( systemPath == NULL ) + return NULL; + if( GetSystemDirectoryA( systemPath, size ) != size - 1 ) + { + // Something went wrong + free( systemPath ); + return NULL; + } + strcat( systemPath, "\\" ); + strcat( systemPath, optixDllName ); + handle = LoadLibraryA( systemPath ); + free( systemPath ); + if( handle ) + return handle; + + // If we didn't find it, go looking in the register store. Since nvoptix.dll doesn't + // have its own registry entry, we are going to look for the opengl driver which lives + // next to nvoptix.dll. 0 (null) will be returned if any errors occured. + + static const char* deviceInstanceIdentifiersGUID = "{4d36e968-e325-11ce-bfc1-08002be10318}"; + const ULONG flags = CM_GETIDLIST_FILTER_CLASS | CM_GETIDLIST_FILTER_PRESENT; + ULONG deviceListSize = 0; + if( CM_Get_Device_ID_List_SizeA( &deviceListSize, deviceInstanceIdentifiersGUID, flags ) != CR_SUCCESS ) + { + return NULL; + } + char* deviceNames = (char*)malloc( deviceListSize ); + if( deviceNames == NULL ) + return NULL; + if( CM_Get_Device_ID_ListA( deviceInstanceIdentifiersGUID, deviceNames, deviceListSize, flags ) ) + { + free( deviceNames ); + return NULL; + } + DEVINST devID = 0; + char* dllPath = NULL; + + // Continue to the next device if errors are encountered. + for( char* deviceName = deviceNames; *deviceName; deviceName += strlen( deviceName ) + 1 ) + { + if( CM_Locate_DevNodeA( &devID, deviceName, CM_LOCATE_DEVNODE_NORMAL ) != CR_SUCCESS ) + { + continue; + } + HKEY regKey = 0; + if( CM_Open_DevNode_Key( devID, KEY_QUERY_VALUE, 0, RegDisposition_OpenExisting, ®Key, CM_REGISTRY_SOFTWARE ) != CR_SUCCESS ) + { + continue; + } + const char* valueName = "OpenGLDriverName"; + DWORD valueSize = 0; + LSTATUS ret = RegQueryValueExA( regKey, valueName, NULL, NULL, NULL, &valueSize ); + if( ret != ERROR_SUCCESS ) + { + RegCloseKey( regKey ); + continue; + } + char* regValue = (char*)malloc( valueSize ); + if( regValue == NULL ) + { + RegCloseKey( regKey ); + continue; + } + ret = RegQueryValueExA( regKey, valueName, NULL, NULL, (LPBYTE)regValue, &valueSize ); + if( ret != ERROR_SUCCESS ) + { + free( regValue ); + RegCloseKey( regKey ); + continue; + } + // Strip the opengl driver dll name from the string then create a new string with + // the path and the nvoptix.dll name + for( int i = (int) valueSize - 1; i >= 0 && regValue[i] != '\\'; --i ) + regValue[i] = '\0'; + size_t newPathSize = strlen( regValue ) + strlen( optixDllName ) + 1; + dllPath = (char*)malloc( newPathSize ); + if( dllPath == NULL ) + { + free( regValue ); + RegCloseKey( regKey ); + continue; + } + strcpy( dllPath, regValue ); + strcat( dllPath, optixDllName ); + free( regValue ); + RegCloseKey( regKey ); + handle = LoadLibraryA( (LPCSTR)dllPath ); + free( dllPath ); + if( handle ) + break; + } + free( deviceNames ); + return handle; +} +#if defined( _MSC_VER ) +# pragma warning( pop ) +#endif + +static void* optixLoadWindowsDll( ) +{ + return optixLoadWindowsDllFromName( "nvoptix.dll" ); +} +#endif + +/// \defgroup optix_utilities Utilities +/// \brief OptiX Utilities + +/** \addtogroup optix_utilities +@{ +*/ + +/// Loads the OptiX library and initializes the function table used by the stubs below. +/// +/// If handlePtr is not nullptr, an OS-specific handle to the library will be returned in *handlePtr. +/// +/// \see #optixUninitWithHandle +inline OptixResult optixInitWithHandle( void** handlePtr ) +{ + // Make sure these functions get initialized to zero in case the DLL and function + // table can't be loaded + g_optixFunctionTable.optixGetErrorName = 0; + g_optixFunctionTable.optixGetErrorString = 0; + + if( !handlePtr ) + return OPTIX_ERROR_INVALID_VALUE; + +#ifdef _WIN32 + *handlePtr = optixLoadWindowsDll(); + if( !*handlePtr ) + return OPTIX_ERROR_LIBRARY_NOT_FOUND; + + void* symbol = GetProcAddress( (HMODULE)*handlePtr, "optixQueryFunctionTable" ); + if( !symbol ) + return OPTIX_ERROR_ENTRY_SYMBOL_NOT_FOUND; +#else + *handlePtr = dlopen( "libnvoptix.so.1", RTLD_NOW ); + if( !*handlePtr ) + return OPTIX_ERROR_LIBRARY_NOT_FOUND; + + void* symbol = dlsym( *handlePtr, "optixQueryFunctionTable" ); + if( !symbol ) + return OPTIX_ERROR_ENTRY_SYMBOL_NOT_FOUND; +#endif + + OptixQueryFunctionTable_t* optixQueryFunctionTable = (OptixQueryFunctionTable_t*)symbol; + + return optixQueryFunctionTable( OPTIX_ABI_VERSION, 0, 0, 0, &g_optixFunctionTable, sizeof( g_optixFunctionTable ) ); +} + +/// Loads the OptiX library and initializes the function table used by the stubs below. +/// +/// A variant of #optixInitWithHandle() that does not make the handle to the loaded library available. +inline OptixResult optixInit( void ) +{ + void* handle; + return optixInitWithHandle( &handle ); +} + +/// Unloads the OptiX library and zeros the function table used by the stubs below. Takes the +/// handle returned by optixInitWithHandle. All OptixDeviceContext objects must be destroyed +/// before calling this function, or the behavior is undefined. +/// +/// \see #optixInitWithHandle +inline OptixResult optixUninitWithHandle( void* handle ) +{ + if( !handle ) + return OPTIX_ERROR_INVALID_VALUE; +#ifdef _WIN32 + if( !FreeLibrary( (HMODULE)handle ) ) + return OPTIX_ERROR_LIBRARY_UNLOAD_FAILURE; +#else + if( dlclose( handle ) ) + return OPTIX_ERROR_LIBRARY_UNLOAD_FAILURE; +#endif + OptixFunctionTable empty = { 0 }; + g_optixFunctionTable = empty; + return OPTIX_SUCCESS; +} + + +/*@}*/ // end group optix_utilities + +#ifndef OPTIX_DOXYGEN_SHOULD_SKIP_THIS + +// Stub functions that forward calls to the corresponding function pointer in the function table. + +inline const char* optixGetErrorName( OptixResult result ) +{ + if( g_optixFunctionTable.optixGetErrorName ) + return g_optixFunctionTable.optixGetErrorName( result ); + + // If the DLL and symbol table couldn't be loaded, provide a set of error strings + // suitable for processing errors related to the DLL loading. + switch( result ) + { + case OPTIX_SUCCESS: + return "OPTIX_SUCCESS"; + case OPTIX_ERROR_INVALID_VALUE: + return "OPTIX_ERROR_INVALID_VALUE"; + case OPTIX_ERROR_UNSUPPORTED_ABI_VERSION: + return "OPTIX_ERROR_UNSUPPORTED_ABI_VERSION"; + case OPTIX_ERROR_FUNCTION_TABLE_SIZE_MISMATCH: + return "OPTIX_ERROR_FUNCTION_TABLE_SIZE_MISMATCH"; + case OPTIX_ERROR_INVALID_ENTRY_FUNCTION_OPTIONS: + return "OPTIX_ERROR_INVALID_ENTRY_FUNCTION_OPTIONS"; + case OPTIX_ERROR_LIBRARY_NOT_FOUND: + return "OPTIX_ERROR_LIBRARY_NOT_FOUND"; + case OPTIX_ERROR_ENTRY_SYMBOL_NOT_FOUND: + return "OPTIX_ERROR_ENTRY_SYMBOL_NOT_FOUND"; + case OPTIX_ERROR_LIBRARY_UNLOAD_FAILURE: + return "OPTIX_ERROR_LIBRARY_UNLOAD_FAILURE"; + default: + return "Unknown OptixResult code"; + } +} + +inline const char* optixGetErrorString( OptixResult result ) +{ + if( g_optixFunctionTable.optixGetErrorString ) + return g_optixFunctionTable.optixGetErrorString( result ); + + // If the DLL and symbol table couldn't be loaded, provide a set of error strings + // suitable for processing errors related to the DLL loading. + switch( result ) + { + case OPTIX_SUCCESS: + return "Success"; + case OPTIX_ERROR_INVALID_VALUE: + return "Invalid value"; + case OPTIX_ERROR_UNSUPPORTED_ABI_VERSION: + return "Unsupported ABI version"; + case OPTIX_ERROR_FUNCTION_TABLE_SIZE_MISMATCH: + return "Function table size mismatch"; + case OPTIX_ERROR_INVALID_ENTRY_FUNCTION_OPTIONS: + return "Invalid options to entry function"; + case OPTIX_ERROR_LIBRARY_NOT_FOUND: + return "Library not found"; + case OPTIX_ERROR_ENTRY_SYMBOL_NOT_FOUND: + return "Entry symbol not found"; + case OPTIX_ERROR_LIBRARY_UNLOAD_FAILURE: + return "Library could not be unloaded"; + default: + return "Unknown OptixResult code"; + } +} + +inline OptixResult optixDeviceContextCreate( CUcontext fromContext, const OptixDeviceContextOptions* options, OptixDeviceContext* context ) +{ + return g_optixFunctionTable.optixDeviceContextCreate( fromContext, options, context ); +} + +inline OptixResult optixDeviceContextDestroy( OptixDeviceContext context ) +{ + return g_optixFunctionTable.optixDeviceContextDestroy( context ); +} + +inline OptixResult optixDeviceContextGetProperty( OptixDeviceContext context, OptixDeviceProperty property, void* value, size_t sizeInBytes ) +{ + return g_optixFunctionTable.optixDeviceContextGetProperty( context, property, value, sizeInBytes ); +} + +inline OptixResult optixDeviceContextSetLogCallback( OptixDeviceContext context, + OptixLogCallback callbackFunction, + void* callbackData, + unsigned int callbackLevel ) +{ + return g_optixFunctionTable.optixDeviceContextSetLogCallback( context, callbackFunction, callbackData, callbackLevel ); +} + +inline OptixResult optixDeviceContextSetCacheEnabled( OptixDeviceContext context, int enabled ) +{ + return g_optixFunctionTable.optixDeviceContextSetCacheEnabled( context, enabled ); +} + +inline OptixResult optixDeviceContextSetCacheLocation( OptixDeviceContext context, const char* location ) +{ + return g_optixFunctionTable.optixDeviceContextSetCacheLocation( context, location ); +} + +inline OptixResult optixDeviceContextSetCacheDatabaseSizes( OptixDeviceContext context, size_t lowWaterMark, size_t highWaterMark ) +{ + return g_optixFunctionTable.optixDeviceContextSetCacheDatabaseSizes( context, lowWaterMark, highWaterMark ); +} + +inline OptixResult optixDeviceContextGetCacheEnabled( OptixDeviceContext context, int* enabled ) +{ + return g_optixFunctionTable.optixDeviceContextGetCacheEnabled( context, enabled ); +} + +inline OptixResult optixDeviceContextGetCacheLocation( OptixDeviceContext context, char* location, size_t locationSize ) +{ + return g_optixFunctionTable.optixDeviceContextGetCacheLocation( context, location, locationSize ); +} + +inline OptixResult optixDeviceContextGetCacheDatabaseSizes( OptixDeviceContext context, size_t* lowWaterMark, size_t* highWaterMark ) +{ + return g_optixFunctionTable.optixDeviceContextGetCacheDatabaseSizes( context, lowWaterMark, highWaterMark ); +} + +inline OptixResult optixModuleCreateFromPTX( OptixDeviceContext context, + const OptixModuleCompileOptions* moduleCompileOptions, + const OptixPipelineCompileOptions* pipelineCompileOptions, + const char* PTX, + size_t PTXsize, + char* logString, + size_t* logStringSize, + OptixModule* module ) +{ + return g_optixFunctionTable.optixModuleCreateFromPTX( context, moduleCompileOptions, pipelineCompileOptions, PTX, + PTXsize, logString, logStringSize, module ); +} + +inline OptixResult optixModuleCreateFromPTXWithTasks( OptixDeviceContext context, + const OptixModuleCompileOptions* moduleCompileOptions, + const OptixPipelineCompileOptions* pipelineCompileOptions, + const char* PTX, + size_t PTXsize, + char* logString, + size_t* logStringSize, + OptixModule* module, + OptixTask* firstTask ) +{ + return g_optixFunctionTable.optixModuleCreateFromPTXWithTasks( context, moduleCompileOptions, pipelineCompileOptions, PTX, + PTXsize, logString, logStringSize, module, firstTask ); +} + +inline OptixResult optixModuleGetCompilationState( OptixModule module, OptixModuleCompileState* state ) +{ + return g_optixFunctionTable.optixModuleGetCompilationState( module, state ); +} + +inline OptixResult optixModuleDestroy( OptixModule module ) +{ + return g_optixFunctionTable.optixModuleDestroy( module ); +} + +inline OptixResult optixBuiltinISModuleGet( OptixDeviceContext context, + const OptixModuleCompileOptions* moduleCompileOptions, + const OptixPipelineCompileOptions* pipelineCompileOptions, + const OptixBuiltinISOptions* builtinISOptions, + OptixModule* builtinModule ) +{ + return g_optixFunctionTable.optixBuiltinISModuleGet( context, moduleCompileOptions, pipelineCompileOptions, + builtinISOptions, builtinModule ); +} + +inline OptixResult optixTaskExecute( OptixTask task, OptixTask* additionalTasks, unsigned int maxNumAdditionalTasks, unsigned int* numAdditionalTasksCreated ) +{ + return g_optixFunctionTable.optixTaskExecute( task, additionalTasks, maxNumAdditionalTasks, numAdditionalTasksCreated ); +} + +inline OptixResult optixProgramGroupCreate( OptixDeviceContext context, + const OptixProgramGroupDesc* programDescriptions, + unsigned int numProgramGroups, + const OptixProgramGroupOptions* options, + char* logString, + size_t* logStringSize, + OptixProgramGroup* programGroups ) +{ + return g_optixFunctionTable.optixProgramGroupCreate( context, programDescriptions, numProgramGroups, options, + logString, logStringSize, programGroups ); +} + +inline OptixResult optixProgramGroupDestroy( OptixProgramGroup programGroup ) +{ + return g_optixFunctionTable.optixProgramGroupDestroy( programGroup ); +} + +inline OptixResult optixProgramGroupGetStackSize( OptixProgramGroup programGroup, OptixStackSizes* stackSizes ) +{ + return g_optixFunctionTable.optixProgramGroupGetStackSize( programGroup, stackSizes ); +} + +inline OptixResult optixPipelineCreate( OptixDeviceContext context, + const OptixPipelineCompileOptions* pipelineCompileOptions, + const OptixPipelineLinkOptions* pipelineLinkOptions, + const OptixProgramGroup* programGroups, + unsigned int numProgramGroups, + char* logString, + size_t* logStringSize, + OptixPipeline* pipeline ) +{ + return g_optixFunctionTable.optixPipelineCreate( context, pipelineCompileOptions, pipelineLinkOptions, programGroups, + numProgramGroups, logString, logStringSize, pipeline ); +} + +inline OptixResult optixPipelineDestroy( OptixPipeline pipeline ) +{ + return g_optixFunctionTable.optixPipelineDestroy( pipeline ); +} + +inline OptixResult optixPipelineSetStackSize( OptixPipeline pipeline, + unsigned int directCallableStackSizeFromTraversal, + unsigned int directCallableStackSizeFromState, + unsigned int continuationStackSize, + unsigned int maxTraversableGraphDepth ) +{ + return g_optixFunctionTable.optixPipelineSetStackSize( pipeline, directCallableStackSizeFromTraversal, directCallableStackSizeFromState, + continuationStackSize, maxTraversableGraphDepth ); +} + +inline OptixResult optixAccelComputeMemoryUsage( OptixDeviceContext context, + const OptixAccelBuildOptions* accelOptions, + const OptixBuildInput* buildInputs, + unsigned int numBuildInputs, + OptixAccelBufferSizes* bufferSizes ) +{ + return g_optixFunctionTable.optixAccelComputeMemoryUsage( context, accelOptions, buildInputs, numBuildInputs, bufferSizes ); +} + +inline OptixResult optixAccelBuild( OptixDeviceContext context, + CUstream stream, + const OptixAccelBuildOptions* accelOptions, + const OptixBuildInput* buildInputs, + unsigned int numBuildInputs, + CUdeviceptr tempBuffer, + size_t tempBufferSizeInBytes, + CUdeviceptr outputBuffer, + size_t outputBufferSizeInBytes, + OptixTraversableHandle* outputHandle, + const OptixAccelEmitDesc* emittedProperties, + unsigned int numEmittedProperties ) +{ + return g_optixFunctionTable.optixAccelBuild( context, stream, accelOptions, buildInputs, numBuildInputs, tempBuffer, + tempBufferSizeInBytes, outputBuffer, outputBufferSizeInBytes, + outputHandle, emittedProperties, numEmittedProperties ); +} + + +inline OptixResult optixAccelGetRelocationInfo( OptixDeviceContext context, OptixTraversableHandle handle, OptixAccelRelocationInfo* info ) +{ + return g_optixFunctionTable.optixAccelGetRelocationInfo( context, handle, info ); +} + + +inline OptixResult optixAccelCheckRelocationCompatibility( OptixDeviceContext context, const OptixAccelRelocationInfo* info, int* compatible ) +{ + return g_optixFunctionTable.optixAccelCheckRelocationCompatibility( context, info, compatible ); +} + +inline OptixResult optixAccelRelocate( OptixDeviceContext context, + CUstream stream, + const OptixAccelRelocationInfo* info, + CUdeviceptr instanceTraversableHandles, + size_t numInstanceTraversableHandles, + CUdeviceptr targetAccel, + size_t targetAccelSizeInBytes, + OptixTraversableHandle* targetHandle ) +{ + return g_optixFunctionTable.optixAccelRelocate( context, stream, info, instanceTraversableHandles, numInstanceTraversableHandles, + targetAccel, targetAccelSizeInBytes, targetHandle ); +} + +inline OptixResult optixAccelCompact( OptixDeviceContext context, + CUstream stream, + OptixTraversableHandle inputHandle, + CUdeviceptr outputBuffer, + size_t outputBufferSizeInBytes, + OptixTraversableHandle* outputHandle ) +{ + return g_optixFunctionTable.optixAccelCompact( context, stream, inputHandle, outputBuffer, outputBufferSizeInBytes, outputHandle ); +} + +inline OptixResult optixConvertPointerToTraversableHandle( OptixDeviceContext onDevice, + CUdeviceptr pointer, + OptixTraversableType traversableType, + OptixTraversableHandle* traversableHandle ) +{ + return g_optixFunctionTable.optixConvertPointerToTraversableHandle( onDevice, pointer, traversableType, traversableHandle ); +} + + +inline OptixResult optixSbtRecordPackHeader( OptixProgramGroup programGroup, void* sbtRecordHeaderHostPointer ) +{ + return g_optixFunctionTable.optixSbtRecordPackHeader( programGroup, sbtRecordHeaderHostPointer ); +} + +inline OptixResult optixLaunch( OptixPipeline pipeline, + CUstream stream, + CUdeviceptr pipelineParams, + size_t pipelineParamsSize, + const OptixShaderBindingTable* sbt, + unsigned int width, + unsigned int height, + unsigned int depth ) +{ + return g_optixFunctionTable.optixLaunch( pipeline, stream, pipelineParams, pipelineParamsSize, sbt, width, height, depth ); +} + +inline OptixResult optixDenoiserCreate( OptixDeviceContext context, OptixDenoiserModelKind modelKind, const OptixDenoiserOptions* options, OptixDenoiser* returnHandle ) +{ + return g_optixFunctionTable.optixDenoiserCreate( context, modelKind, options, returnHandle ); +} + +inline OptixResult optixDenoiserCreateWithUserModel( OptixDeviceContext context, const void* data, size_t dataSizeInBytes, OptixDenoiser* returnHandle ) +{ + return g_optixFunctionTable.optixDenoiserCreateWithUserModel( context, data, dataSizeInBytes, returnHandle ); +} + +inline OptixResult optixDenoiserDestroy( OptixDenoiser handle ) +{ + return g_optixFunctionTable.optixDenoiserDestroy( handle ); +} + +inline OptixResult optixDenoiserComputeMemoryResources( const OptixDenoiser handle, + unsigned int maximumInputWidth, + unsigned int maximumInputHeight, + OptixDenoiserSizes* returnSizes ) +{ + return g_optixFunctionTable.optixDenoiserComputeMemoryResources( handle, maximumInputWidth, maximumInputHeight, returnSizes ); +} + +inline OptixResult optixDenoiserSetup( OptixDenoiser denoiser, + CUstream stream, + unsigned int inputWidth, + unsigned int inputHeight, + CUdeviceptr denoiserState, + size_t denoiserStateSizeInBytes, + CUdeviceptr scratch, + size_t scratchSizeInBytes ) +{ + return g_optixFunctionTable.optixDenoiserSetup( denoiser, stream, inputWidth, inputHeight, denoiserState, + denoiserStateSizeInBytes, scratch, scratchSizeInBytes ); +} + +inline OptixResult optixDenoiserInvoke( OptixDenoiser handle, + CUstream stream, + const OptixDenoiserParams* params, + CUdeviceptr denoiserData, + size_t denoiserDataSize, + const OptixDenoiserGuideLayer* guideLayer, + const OptixDenoiserLayer* layers, + unsigned int numLayers, + unsigned int inputOffsetX, + unsigned int inputOffsetY, + CUdeviceptr scratch, + size_t scratchSizeInBytes ) +{ + return g_optixFunctionTable.optixDenoiserInvoke( handle, stream, params, denoiserData, denoiserDataSize, + guideLayer, layers, numLayers, + inputOffsetX, inputOffsetY, scratch, scratchSizeInBytes ); +} + +inline OptixResult optixDenoiserComputeIntensity( OptixDenoiser handle, + CUstream stream, + const OptixImage2D* inputImage, + CUdeviceptr outputIntensity, + CUdeviceptr scratch, + size_t scratchSizeInBytes ) +{ + return g_optixFunctionTable.optixDenoiserComputeIntensity( handle, stream, inputImage, outputIntensity, scratch, scratchSizeInBytes ); +} + +inline OptixResult optixDenoiserComputeAverageColor( OptixDenoiser handle, + CUstream stream, + const OptixImage2D* inputImage, + CUdeviceptr outputAverageColor, + CUdeviceptr scratch, + size_t scratchSizeInBytes ) +{ + return g_optixFunctionTable.optixDenoiserComputeAverageColor( handle, stream, inputImage, outputAverageColor, scratch, scratchSizeInBytes ); +} + +#endif // OPTIX_DOXYGEN_SHOULD_SKIP_THIS + +#ifdef __cplusplus +} +#endif + +#endif // __optix_optix_stubs_h__ diff --git a/include/optix_types.h b/include/optix_types.h new file mode 100644 index 0000000..cfe0a10 --- /dev/null +++ b/include/optix_types.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2021 NVIDIA Corporation. All rights reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and proprietary + * rights in and to this software, related documentation and any modifications thereto. + * Any use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation is strictly + * prohibited. + * + * TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THIS SOFTWARE IS PROVIDED *AS IS* + * AND NVIDIA AND ITS SUPPLIERS DISCLAIM ALL WARRANTIES, EITHER EXPRESS OR IMPLIED, + * INCLUDING, BUT NOT LIMITED TO, IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE. IN NO EVENT SHALL NVIDIA OR ITS SUPPLIERS BE LIABLE FOR ANY + * SPECIAL, INCIDENTAL, INDIRECT, OR CONSEQUENTIAL DAMAGES WHATSOEVER (INCLUDING, WITHOUT + * LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, BUSINESS INTERRUPTION, LOSS OF + * BUSINESS INFORMATION, OR ANY OTHER PECUNIARY LOSS) ARISING OUT OF THE USE OF OR + * INABILITY TO USE THIS SOFTWARE, EVEN IF NVIDIA HAS BEEN ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGES + */ + +/** + * @file optix_types.h + * @author NVIDIA Corporation + * @brief OptiX public API header + * + */ + +#ifndef __optix_optix_types_h__ +#define __optix_optix_types_h__ + +// clang-format off +#if !defined(__OPTIX_INCLUDE_INTERNAL_HEADERS__) +# define __OPTIX_INCLUDE_INTERNAL_HEADERS__ +# define __UNDEF_OPTIX_INCLUDE_INTERNAL_HEADERS_OPTIX_TYPES_H__ +#endif +#include "optix_7_types.h" +#if defined( __UNDEF_OPTIX_INCLUDE_INTERNAL_HEADERS_OPTIX_TYPES_H__ ) +# undef __OPTIX_INCLUDE_INTERNAL_HEADERS__ +# undef __UNDEF_OPTIX_INCLUDE_INTERNAL_HEADERS_OPTIX_TYPES_H__ +#endif +// clang-format on + +#endif // #ifndef __optix_optix_types_h__ diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..5734f34 --- /dev/null +++ b/setup.py @@ -0,0 +1,128 @@ +import os +import sys +import setuptools +from setuptools.command.build_ext import build_ext +import glob +import platform +import subprocess +from wheel.bdist_wheel import bdist_wheel + + +IS_WINDOWS = sys.platform == 'win32' +SUBPROCESS_DECODE_ARGS = ('oem',) if IS_WINDOWS else () + + +def _find_cuda_home(): + """Find the CUDA install path.""" + # Guess #1 + cuda_home = os.environ.get('CUDA_HOME') or os.environ.get('CUDA_PATH') + if cuda_home is None: + # Guess #2 + try: + which = 'where' if IS_WINDOWS else 'which' + with open(os.devnull, 'w') as devnull: + nvcc = subprocess.check_output([which, 'nvcc'], + stderr=devnull).decode(*SUBPROCESS_DECODE_ARGS).rstrip('\r\n') + cuda_home = os.path.dirname(os.path.dirname(nvcc)) + except Exception: + # Guess #3 + if IS_WINDOWS: + cuda_homes = glob.glob( + 'C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v*.*') + if len(cuda_homes) == 0: + cuda_home = '' + else: + cuda_home = cuda_homes[0] + else: + cuda_home = '/usr/local/cuda' + if not os.path.exists(cuda_home): + cuda_home = None + return cuda_home + + +class bdist_wheel_abi3(bdist_wheel): + def get_tag(self): + python, abi, plat = super().get_tag() + + if python.startswith("cp"): + # on CPython, our wheels are abi3 and compatible back to 3.6 + return python, "abi3", plat + + return python, abi, plat + + +with open("README.md", "r", encoding='utf-8') as fh: + long_description = fh.read() + + +cuda_home = _find_cuda_home() +cc_args = [] +cl_args = [] +libraries = ["cuda"] +library_paths = [] +if platform.system() == 'Windows': + cc_args += ['/DEBUG', '/Z7', '/std:c++17'] + cl_args += ['/DEBUG'] + libraries += ['Advapi32'] + library_paths += [os.path.join(cuda_home, "lib", "x64")] +else: + cc_args += ['-D__FUNCTION__=""', '-std=c++17', '-fno-crossjumping'] + lib_dir = 'lib64' + if (not os.path.exists(os.path.join(cuda_home, lib_dir)) and + os.path.exists(os.path.join(cuda_home, 'lib'))): + # 64-bit CUDA may be installed in 'lib' (see e.g. gh-16955) + # Note that it's also possible both don't exist (see + # _find_cuda_home) - in that case we stay with 'lib64'. + lib_dir = 'lib' + library_paths += [os.path.join(cuda_home, lib_dir)] + + +class build_ext_subclass(build_ext): + def build_extensions(self): + original_compile = self.compiler._compile + def new_compile(obj, src, ext, cc_args, extra_postargs, pp_opts): + if src.endswith('.c'): + extra_postargs = [s for s in extra_postargs if "c++17" not in s] + return original_compile(obj, src, ext, cc_args, extra_postargs, pp_opts) + self.compiler._compile = new_compile + try: + build_ext.build_extensions(self) + finally: + del self.compiler._compile + + +setuptools.setup( + name="torchoptix", + version="0.0.1", + author="eliphatfs", + author_email="shiruoxi61@gmail.com", + description="Modular python bindings for OptiX.", + long_description=long_description, + long_description_content_type="text/markdown", + url="https://github.com/eliphatfs/torchoptix", + include_package_data=True, + # cmdclass={"build_ext": build_ext_subclass, "bdist_wheel": bdist_wheel_abi3}, + cmdclass={"build_ext": build_ext_subclass}, + ext_modules=[setuptools.Extension( + "torchoptix", + glob.glob("csrc/**/*.c", recursive=True) + + glob.glob("csrc/**/*.cpp", recursive=True), + # define_macros=[("NO_COMBINE", "1"), ("MIR_INTERP_TRACE", "1")], + include_dirs=["include", os.path.join(cuda_home, "include")], + extra_compile_args=cc_args, + extra_link_args=cl_args, + libraries=libraries, + library_dirs=library_paths, + py_limited_api=True + )], + classifiers=[ + "Programming Language :: Python :: 3 :: Only", + "License :: OSI Approved :: Apache Software License" + ], + options={ + 'bdist_wheel': { + 'py_limited_api': 'cp38' + } + }, + python_requires='~=3.8' +)