diff --git a/.gitignore b/.gitignore new file mode 100644 index 000000000..a3062beae --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +.vscode/* diff --git a/CHANGELOG.md b/CHANGELOG.md index 4bd17b625..c619c9230 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ ## Changelog +### CUDA 11.6 +* Added new folder structure for samples +* Added support of Visual Studio 2022 to all samples supported on [Windows](#windows-1). +* All CUDA samples are now only available on [GitHub](https://github.com/nvidia/cuda-samples). They are no longer available via CUDA toolkit. + ### CUDA 11.5 * Added `cuDLAHybridMode`. Demonstrate usage of cuDLA in hybrid mode. * Added `cuDLAStandaloneMode`. Demonstrate usage of cuDLA in standalone mode. diff --git a/Common/helper_cuda.h b/Common/helper_cuda.h index 98a5a7b61..f6bea97a2 100644 --- a/Common/helper_cuda.h +++ b/Common/helper_cuda.h @@ -666,6 +666,7 @@ inline int _ConvertSMVer2Cores(int major, int minor) { {0x80, 64}, {0x86, 128}, {0x87, 128}, + {0x90, 128}, {-1, -1}}; int index = 0; @@ -712,6 +713,8 @@ inline const char* _ConvertSMVer2ArchName(int major, int minor) { {0x75, "Turing"}, {0x80, "Ampere"}, {0x86, "Ampere"}, + {0x87, "Ampere"}, + {0x90, "Hopper"}, {-1, "Graphics Device"}}; int index = 0; diff --git a/Common/helper_cuda_drvapi.h b/Common/helper_cuda_drvapi.h index f0362d64b..80979b5b0 100644 --- a/Common/helper_cuda_drvapi.h +++ b/Common/helper_cuda_drvapi.h @@ -114,6 +114,7 @@ inline int _ConvertSMVer2CoresDRV(int major, int minor) { {0x80, 64}, {0x86, 128}, {0x87, 128}, + {0x90, 128}, {-1, -1}}; int index = 0; diff --git a/README.md b/README.md index 7a37e1985..354fa6a45 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,12 @@ # CUDA Samples -Samples for CUDA Developers which demonstrates features in CUDA Toolkit. This version supports [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads). +Samples for CUDA Developers which demonstrates features in CUDA Toolkit. This version supports [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads). ## Release Notes This section describes the release notes for the CUDA Samples on GitHub only. -### CUDA 11.6 +### CUDA 11.8 * Added new folder structure for samples * Added support of Visual Studio 2022 to all samples supported on [Windows](#windows-1). * All CUDA samples are now only available on [GitHub](https://github.com/nvidia/cuda-samples). They are no longer available via CUDA toolkit. @@ -17,7 +17,7 @@ This section describes the release notes for the CUDA Samples on GitHub only. ### Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. For system requirements and installation instructions of cuda toolkit, please refer to the [Linux Installation Guide](http://docs.nvidia.com/cuda/cuda-installation-guide-linux/), and the [Windows Installation Guide](http://docs.nvidia.com/cuda/cuda-installation-guide-microsoft-windows/index.html). ### Getting the CUDA Samples @@ -263,4 +263,4 @@ Answers to frequently asked questions about CUDA can be found at http://develope ## Attributions -* Teapot image is obtained from [Wikimedia](https://en.wikipedia.org/wiki/File:Original_Utah_Teapot.jpg) and is licensed under the Creative Commons [Attribution-Share Alike 2.0](https://creativecommons.org/licenses/by-sa/2.0/deed.en) Generic license. The image is modified for samples use cases. \ No newline at end of file +* Teapot image is obtained from [Wikimedia](https://en.wikipedia.org/wiki/File:Original_Utah_Teapot.jpg) and is licensed under the Creative Commons [Attribution-Share Alike 2.0](https://creativecommons.org/licenses/by-sa/2.0/deed.en) Generic license. The image is modified for samples use cases. diff --git a/Samples/0_Introduction/UnifiedMemoryStreams/Makefile b/Samples/0_Introduction/UnifiedMemoryStreams/Makefile index ba7b78b45..e244dab55 100644 --- a/Samples/0_Introduction/UnifiedMemoryStreams/Makefile +++ b/Samples/0_Introduction/UnifiedMemoryStreams/Makefile @@ -318,9 +318,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/0_Introduction/UnifiedMemoryStreams/NsightEclipse.xml b/Samples/0_Introduction/UnifiedMemoryStreams/NsightEclipse.xml index 033c1c50e..744caa124 100644 --- a/Samples/0_Introduction/UnifiedMemoryStreams/NsightEclipse.xml +++ b/Samples/0_Introduction/UnifiedMemoryStreams/NsightEclipse.xml @@ -6,11 +6,11 @@ cudaStreamDestroy cudaFree cudaMallocManaged - cudaStreamCreate - cudaDeviceSynchronize cudaStreamAttachMemAsync cudaSetDevice + cudaDeviceSynchronize cudaStreamSynchronize + cudaStreamCreate cudaGetDeviceProperties @@ -70,6 +70,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/0_Introduction/UnifiedMemoryStreams/README.md b/Samples/0_Introduction/UnifiedMemoryStreams/README.md index 347649da1..417cf3a07 100644 --- a/Samples/0_Introduction/UnifiedMemoryStreams/README.md +++ b/Samples/0_Introduction/UnifiedMemoryStreams/README.md @@ -10,7 +10,7 @@ CUDA Systems Integration, OpenMP, CUBLAS, Multithreading, Unified Memory, CUDA S ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaStreamDestroy, cudaFree, cudaMallocManaged, cudaStreamCreate, cudaDeviceSynchronize, cudaStreamAttachMemAsync, cudaSetDevice, cudaStreamSynchronize, cudaGetDeviceProperties +cudaStreamDestroy, cudaFree, cudaMallocManaged, cudaStreamAttachMemAsync, cudaSetDevice, cudaDeviceSynchronize, cudaStreamSynchronize, cudaStreamCreate, cudaGetDeviceProperties ## Dependencies needed to build/run [OpenMP](../../../README.md#openmp), [UVM](../../../README.md#uvm), [CUBLAS](../../../README.md#cublas) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/0_Introduction/UnifiedMemoryStreams/UnifiedMemoryStreams_vs2017.vcxproj b/Samples/0_Introduction/UnifiedMemoryStreams/UnifiedMemoryStreams_vs2017.vcxproj index e5e99aacd..9680c777c 100644 --- a/Samples/0_Introduction/UnifiedMemoryStreams/UnifiedMemoryStreams_vs2017.vcxproj +++ b/Samples/0_Introduction/UnifiedMemoryStreams/UnifiedMemoryStreams_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/UnifiedMemoryStreams.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -108,6 +108,6 @@ - + diff --git a/Samples/0_Introduction/UnifiedMemoryStreams/UnifiedMemoryStreams_vs2019.vcxproj b/Samples/0_Introduction/UnifiedMemoryStreams/UnifiedMemoryStreams_vs2019.vcxproj index 75e8d36d0..866e26dcb 100644 --- a/Samples/0_Introduction/UnifiedMemoryStreams/UnifiedMemoryStreams_vs2019.vcxproj +++ b/Samples/0_Introduction/UnifiedMemoryStreams/UnifiedMemoryStreams_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/UnifiedMemoryStreams.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -104,6 +104,6 @@ - + diff --git a/Samples/0_Introduction/UnifiedMemoryStreams/UnifiedMemoryStreams_vs2022.vcxproj b/Samples/0_Introduction/UnifiedMemoryStreams/UnifiedMemoryStreams_vs2022.vcxproj index ba409655f..074787481 100644 --- a/Samples/0_Introduction/UnifiedMemoryStreams/UnifiedMemoryStreams_vs2022.vcxproj +++ b/Samples/0_Introduction/UnifiedMemoryStreams/UnifiedMemoryStreams_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/UnifiedMemoryStreams.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -104,6 +104,6 @@ - + diff --git a/Samples/0_Introduction/asyncAPI/Makefile b/Samples/0_Introduction/asyncAPI/Makefile index 885bbc8e4..71bb47940 100644 --- a/Samples/0_Introduction/asyncAPI/Makefile +++ b/Samples/0_Introduction/asyncAPI/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/0_Introduction/asyncAPI/NsightEclipse.xml b/Samples/0_Introduction/asyncAPI/NsightEclipse.xml index 6d0bbc624..d823ac8a0 100644 --- a/Samples/0_Introduction/asyncAPI/NsightEclipse.xml +++ b/Samples/0_Introduction/asyncAPI/NsightEclipse.xml @@ -3,21 +3,21 @@ asyncAPI - cudaMemset + cudaProfilerStop + cudaMalloc + cudaMemcpyAsync cudaFree - cudaEventRecord cudaMallocHost cudaProfilerStart - cudaEventCreate - cudaEventElapsedTime cudaDeviceSynchronize + cudaEventRecord cudaFreeHost - cudaMalloc - cudaEventQuery - cudaProfilerStop + cudaMemset cudaEventDestroy - cudaMemcpyAsync + cudaEventQuery + cudaEventElapsedTime cudaGetDeviceProperties + cudaEventCreate whole @@ -59,6 +59,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/0_Introduction/asyncAPI/README.md b/Samples/0_Introduction/asyncAPI/README.md index 81da4efc8..7f4f3b421 100644 --- a/Samples/0_Introduction/asyncAPI/README.md +++ b/Samples/0_Introduction/asyncAPI/README.md @@ -10,7 +10,7 @@ Asynchronous Data Transfers, CUDA Streams and Events ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMemset, cudaFree, cudaEventRecord, cudaMallocHost, cudaProfilerStart, cudaEventCreate, cudaEventElapsedTime, cudaDeviceSynchronize, cudaFreeHost, cudaMalloc, cudaEventQuery, cudaProfilerStop, cudaEventDestroy, cudaMemcpyAsync, cudaGetDeviceProperties +cudaProfilerStop, cudaMalloc, cudaMemcpyAsync, cudaFree, cudaMallocHost, cudaProfilerStart, cudaDeviceSynchronize, cudaEventRecord, cudaFreeHost, cudaMemset, cudaEventDestroy, cudaEventQuery, cudaEventElapsedTime, cudaGetDeviceProperties, cudaEventCreate ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/0_Introduction/asyncAPI/asyncAPI_vs2017.vcxproj b/Samples/0_Introduction/asyncAPI/asyncAPI_vs2017.vcxproj index f2de8d874..ccea698df 100644 --- a/Samples/0_Introduction/asyncAPI/asyncAPI_vs2017.vcxproj +++ b/Samples/0_Introduction/asyncAPI/asyncAPI_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/asyncAPI.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/0_Introduction/asyncAPI/asyncAPI_vs2019.vcxproj b/Samples/0_Introduction/asyncAPI/asyncAPI_vs2019.vcxproj index be2679b82..56489567a 100644 --- a/Samples/0_Introduction/asyncAPI/asyncAPI_vs2019.vcxproj +++ b/Samples/0_Introduction/asyncAPI/asyncAPI_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/asyncAPI.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/asyncAPI/asyncAPI_vs2022.vcxproj b/Samples/0_Introduction/asyncAPI/asyncAPI_vs2022.vcxproj index 9d98bdff2..c4b23b8f2 100644 --- a/Samples/0_Introduction/asyncAPI/asyncAPI_vs2022.vcxproj +++ b/Samples/0_Introduction/asyncAPI/asyncAPI_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/asyncAPI.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/c++11_cuda/Makefile b/Samples/0_Introduction/c++11_cuda/Makefile index f70e1ad6d..d4c77f611 100644 --- a/Samples/0_Introduction/c++11_cuda/Makefile +++ b/Samples/0_Introduction/c++11_cuda/Makefile @@ -310,9 +310,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) @@ -363,7 +363,6 @@ run: build $(EXEC) ./c++11_cuda testrun: build - $(EXEC) ./c++11_cuda --dummy-test-param clean: rm -f c++11_cuda c++11_cuda.o diff --git a/Samples/0_Introduction/c++11_cuda/NsightEclipse.xml b/Samples/0_Introduction/c++11_cuda/NsightEclipse.xml index e9acaddc9..ccb26ce14 100644 --- a/Samples/0_Introduction/c++11_cuda/NsightEclipse.xml +++ b/Samples/0_Introduction/c++11_cuda/NsightEclipse.xml @@ -7,9 +7,9 @@ cudaMalloc + cudaMemcpy cudaMemset cudaFree - cudaMemcpy whole @@ -31,9 +31,6 @@ true c++11_cuda.cu - - --dummy-test-param - CPP11 @@ -54,6 +51,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/0_Introduction/c++11_cuda/README.md b/Samples/0_Introduction/c++11_cuda/README.md index 0ff9f23e4..a889fb7c1 100644 --- a/Samples/0_Introduction/c++11_cuda/README.md +++ b/Samples/0_Introduction/c++11_cuda/README.md @@ -10,7 +10,7 @@ CPP11 CUDA ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMalloc, cudaMemset, cudaFree, cudaMemcpy +cudaMalloc, cudaMemcpy, cudaMemset, cudaFree ## Dependencies needed to build/run [CPP11](../../../README.md#cpp11) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/0_Introduction/c++11_cuda/c++11_cuda_vs2017.vcxproj b/Samples/0_Introduction/c++11_cuda/c++11_cuda_vs2017.vcxproj index 33d8ff13b..705e575c0 100644 --- a/Samples/0_Introduction/c++11_cuda/c++11_cuda_vs2017.vcxproj +++ b/Samples/0_Introduction/c++11_cuda/c++11_cuda_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/c++11_cuda.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/0_Introduction/c++11_cuda/c++11_cuda_vs2019.vcxproj b/Samples/0_Introduction/c++11_cuda/c++11_cuda_vs2019.vcxproj index 3d1bc27dd..e4e93deee 100644 --- a/Samples/0_Introduction/c++11_cuda/c++11_cuda_vs2019.vcxproj +++ b/Samples/0_Introduction/c++11_cuda/c++11_cuda_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/c++11_cuda.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/c++11_cuda/c++11_cuda_vs2022.vcxproj b/Samples/0_Introduction/c++11_cuda/c++11_cuda_vs2022.vcxproj index 9bf5532c4..8133b615c 100644 --- a/Samples/0_Introduction/c++11_cuda/c++11_cuda_vs2022.vcxproj +++ b/Samples/0_Introduction/c++11_cuda/c++11_cuda_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/c++11_cuda.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/clock/Makefile b/Samples/0_Introduction/clock/Makefile index dd8327571..df4722cb7 100644 --- a/Samples/0_Introduction/clock/Makefile +++ b/Samples/0_Introduction/clock/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/0_Introduction/clock/NsightEclipse.xml b/Samples/0_Introduction/clock/NsightEclipse.xml index eee903b95..6d8cfb096 100644 --- a/Samples/0_Introduction/clock/NsightEclipse.xml +++ b/Samples/0_Introduction/clock/NsightEclipse.xml @@ -4,8 +4,8 @@ clock cudaMalloc - cudaFree cudaMemcpy + cudaFree whole @@ -47,6 +47,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/0_Introduction/clock/README.md b/Samples/0_Introduction/clock/README.md index 98ffd744f..11f9afd45 100644 --- a/Samples/0_Introduction/clock/README.md +++ b/Samples/0_Introduction/clock/README.md @@ -10,7 +10,7 @@ Performance Strategies ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMalloc, cudaFree, cudaMemcpy +cudaMalloc, cudaMemcpy, cudaFree ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/0_Introduction/clock/clock_vs2017.vcxproj b/Samples/0_Introduction/clock/clock_vs2017.vcxproj index ba348ad32..2350f365d 100644 --- a/Samples/0_Introduction/clock/clock_vs2017.vcxproj +++ b/Samples/0_Introduction/clock/clock_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/clock.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/0_Introduction/clock/clock_vs2019.vcxproj b/Samples/0_Introduction/clock/clock_vs2019.vcxproj index a20c90b72..6649becab 100644 --- a/Samples/0_Introduction/clock/clock_vs2019.vcxproj +++ b/Samples/0_Introduction/clock/clock_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/clock.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/clock/clock_vs2022.vcxproj b/Samples/0_Introduction/clock/clock_vs2022.vcxproj index 10e923471..4cf6b8958 100644 --- a/Samples/0_Introduction/clock/clock_vs2022.vcxproj +++ b/Samples/0_Introduction/clock/clock_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/clock.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/clock_nvrtc/README.md b/Samples/0_Introduction/clock_nvrtc/README.md index 8f16c6d2d..5e1dbf0f9 100644 --- a/Samples/0_Introduction/clock_nvrtc/README.md +++ b/Samples/0_Introduction/clock_nvrtc/README.md @@ -10,7 +10,7 @@ Performance Strategies, Runtime Compilation ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,7 +23,7 @@ x86_64, ppc64le, aarch64 ## CUDA APIs involved ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html) -cuModuleGetFunction, cuMemAlloc, cuLaunchKernel, cuCtxSynchronize, cuMemFree, cuMemcpyDtoH, cuMemcpyHtoD +cuMemcpyDtoH, cuLaunchKernel, cuMemcpyHtoD, cuCtxSynchronize, cuMemAlloc, cuMemFree, cuModuleGetFunction ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) cudaBlockSize, cudaGridSize @@ -33,7 +33,7 @@ cudaBlockSize, cudaGridSize ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/0_Introduction/clock_nvrtc/clock_nvrtc_vs2017.vcxproj b/Samples/0_Introduction/clock_nvrtc/clock_nvrtc_vs2017.vcxproj index 03b11e36c..ec582a9fe 100644 --- a/Samples/0_Introduction/clock_nvrtc/clock_nvrtc_vs2017.vcxproj +++ b/Samples/0_Introduction/clock_nvrtc/clock_nvrtc_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -107,6 +107,6 @@ - + diff --git a/Samples/0_Introduction/clock_nvrtc/clock_nvrtc_vs2019.vcxproj b/Samples/0_Introduction/clock_nvrtc/clock_nvrtc_vs2019.vcxproj index 80f3f59d3..e5b93b60d 100644 --- a/Samples/0_Introduction/clock_nvrtc/clock_nvrtc_vs2019.vcxproj +++ b/Samples/0_Introduction/clock_nvrtc/clock_nvrtc_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/clock_nvrtc/clock_nvrtc_vs2022.vcxproj b/Samples/0_Introduction/clock_nvrtc/clock_nvrtc_vs2022.vcxproj index 0cf812f4c..825d8e05e 100644 --- a/Samples/0_Introduction/clock_nvrtc/clock_nvrtc_vs2022.vcxproj +++ b/Samples/0_Introduction/clock_nvrtc/clock_nvrtc_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/concurrentKernels/Makefile b/Samples/0_Introduction/concurrentKernels/Makefile index 0073ee5e7..e6e4e2418 100644 --- a/Samples/0_Introduction/concurrentKernels/Makefile +++ b/Samples/0_Introduction/concurrentKernels/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/0_Introduction/concurrentKernels/NsightEclipse.xml b/Samples/0_Introduction/concurrentKernels/NsightEclipse.xml index dd564b83c..edfb7ff58 100644 --- a/Samples/0_Introduction/concurrentKernels/NsightEclipse.xml +++ b/Samples/0_Introduction/concurrentKernels/NsightEclipse.xml @@ -3,22 +3,22 @@ concurrentKernels - cudaStreamWaitEvent cudaStreamDestroy + cudaMalloc + cudaMemcpyAsync cudaFree - cudaEventRecord cudaMallocHost - cudaStreamCreate - cudaEventCreate - cudaEventElapsedTime + cudaEventCreateWithFlags cudaEventSynchronize + cudaEventRecord cudaFreeHost - cudaMalloc - cudaEventCreateWithFlags + cudaGetDevice + cudaStreamWaitEvent cudaEventDestroy - cudaMemcpyAsync + cudaEventElapsedTime + cudaStreamCreate cudaGetDeviceProperties - cudaGetDevice + cudaEventCreate whole @@ -57,6 +57,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/0_Introduction/concurrentKernels/README.md b/Samples/0_Introduction/concurrentKernels/README.md index 96816ffa2..f83e3bdd4 100644 --- a/Samples/0_Introduction/concurrentKernels/README.md +++ b/Samples/0_Introduction/concurrentKernels/README.md @@ -10,7 +10,7 @@ Performance Strategies ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaStreamWaitEvent, cudaStreamDestroy, cudaFree, cudaEventRecord, cudaMallocHost, cudaStreamCreate, cudaEventCreate, cudaEventElapsedTime, cudaEventSynchronize, cudaFreeHost, cudaMalloc, cudaEventCreateWithFlags, cudaEventDestroy, cudaMemcpyAsync, cudaGetDeviceProperties, cudaGetDevice +cudaStreamDestroy, cudaMalloc, cudaMemcpyAsync, cudaFree, cudaMallocHost, cudaEventCreateWithFlags, cudaEventSynchronize, cudaEventRecord, cudaFreeHost, cudaGetDevice, cudaStreamWaitEvent, cudaEventDestroy, cudaEventElapsedTime, cudaStreamCreate, cudaGetDeviceProperties, cudaEventCreate ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/0_Introduction/concurrentKernels/concurrentKernels_vs2017.vcxproj b/Samples/0_Introduction/concurrentKernels/concurrentKernels_vs2017.vcxproj index bb6467891..59cad7eb4 100644 --- a/Samples/0_Introduction/concurrentKernels/concurrentKernels_vs2017.vcxproj +++ b/Samples/0_Introduction/concurrentKernels/concurrentKernels_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/concurrentKernels.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/0_Introduction/concurrentKernels/concurrentKernels_vs2019.vcxproj b/Samples/0_Introduction/concurrentKernels/concurrentKernels_vs2019.vcxproj index 2830f3105..faee059d5 100644 --- a/Samples/0_Introduction/concurrentKernels/concurrentKernels_vs2019.vcxproj +++ b/Samples/0_Introduction/concurrentKernels/concurrentKernels_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/concurrentKernels.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/concurrentKernels/concurrentKernels_vs2022.vcxproj b/Samples/0_Introduction/concurrentKernels/concurrentKernels_vs2022.vcxproj index a528a3fb8..abf2d5e05 100644 --- a/Samples/0_Introduction/concurrentKernels/concurrentKernels_vs2022.vcxproj +++ b/Samples/0_Introduction/concurrentKernels/concurrentKernels_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/concurrentKernels.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/cppIntegration/Makefile b/Samples/0_Introduction/cppIntegration/Makefile index 19301286d..ebe106e2f 100644 --- a/Samples/0_Introduction/cppIntegration/Makefile +++ b/Samples/0_Introduction/cppIntegration/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/0_Introduction/cppIntegration/NsightEclipse.xml b/Samples/0_Introduction/cppIntegration/NsightEclipse.xml index 9f70719b8..9b5f9b41d 100644 --- a/Samples/0_Introduction/cppIntegration/NsightEclipse.xml +++ b/Samples/0_Introduction/cppIntegration/NsightEclipse.xml @@ -4,8 +4,8 @@ cppIntegration cudaMalloc - cudaFree cudaMemcpy + cudaFree whole @@ -41,6 +41,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/0_Introduction/cppIntegration/README.md b/Samples/0_Introduction/cppIntegration/README.md index 2ba64fd7c..4ac48bcf1 100644 --- a/Samples/0_Introduction/cppIntegration/README.md +++ b/Samples/0_Introduction/cppIntegration/README.md @@ -10,7 +10,7 @@ CPP-CUDA Integration ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMalloc, cudaFree, cudaMemcpy +cudaMalloc, cudaMemcpy, cudaFree ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/0_Introduction/cppIntegration/cppIntegration_vs2017.vcxproj b/Samples/0_Introduction/cppIntegration/cppIntegration_vs2017.vcxproj index e68462114..4070ae91f 100644 --- a/Samples/0_Introduction/cppIntegration/cppIntegration_vs2017.vcxproj +++ b/Samples/0_Introduction/cppIntegration/cppIntegration_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/cppIntegration.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -109,6 +109,6 @@ - + diff --git a/Samples/0_Introduction/cppIntegration/cppIntegration_vs2019.vcxproj b/Samples/0_Introduction/cppIntegration/cppIntegration_vs2019.vcxproj index a57aa19b0..67d587aab 100644 --- a/Samples/0_Introduction/cppIntegration/cppIntegration_vs2019.vcxproj +++ b/Samples/0_Introduction/cppIntegration/cppIntegration_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/cppIntegration.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -105,6 +105,6 @@ - + diff --git a/Samples/0_Introduction/cppIntegration/cppIntegration_vs2022.vcxproj b/Samples/0_Introduction/cppIntegration/cppIntegration_vs2022.vcxproj index 26e828033..8ed0d991e 100644 --- a/Samples/0_Introduction/cppIntegration/cppIntegration_vs2022.vcxproj +++ b/Samples/0_Introduction/cppIntegration/cppIntegration_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/cppIntegration.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -105,6 +105,6 @@ - + diff --git a/Samples/0_Introduction/cppOverload/Makefile b/Samples/0_Introduction/cppOverload/Makefile index cfd5ec9c5..a76aca058 100644 --- a/Samples/0_Introduction/cppOverload/Makefile +++ b/Samples/0_Introduction/cppOverload/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/0_Introduction/cppOverload/NsightEclipse.xml b/Samples/0_Introduction/cppOverload/NsightEclipse.xml index 8c5b84a7f..9ad898beb 100644 --- a/Samples/0_Introduction/cppOverload/NsightEclipse.xml +++ b/Samples/0_Introduction/cppOverload/NsightEclipse.xml @@ -3,17 +3,17 @@ cppOverload - cudaFree + cudaMemcpy cudaFuncSetCacheConfig + cudaFree cudaMallocHost - cudaFuncGetAttributes - cudaGetDeviceCount + cudaSetDevice + cudaGetDeviceProperties cudaDeviceSynchronize cudaFreeHost cudaMalloc - cudaSetDevice - cudaMemcpy - cudaGetDeviceProperties + cudaFuncGetAttributes + cudaGetDeviceCount whole @@ -52,6 +52,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/0_Introduction/cppOverload/README.md b/Samples/0_Introduction/cppOverload/README.md index 16b6adc15..bc583bfd4 100644 --- a/Samples/0_Introduction/cppOverload/README.md +++ b/Samples/0_Introduction/cppOverload/README.md @@ -10,7 +10,7 @@ C++ Function Overloading, CUDA Streams and Events ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaFuncSetCacheConfig, cudaMallocHost, cudaFuncGetAttributes, cudaGetDeviceCount, cudaDeviceSynchronize, cudaFreeHost, cudaMalloc, cudaSetDevice, cudaMemcpy, cudaGetDeviceProperties +cudaMemcpy, cudaFuncSetCacheConfig, cudaFree, cudaMallocHost, cudaSetDevice, cudaGetDeviceProperties, cudaDeviceSynchronize, cudaFreeHost, cudaMalloc, cudaFuncGetAttributes, cudaGetDeviceCount ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/0_Introduction/cppOverload/cppOverload_vs2017.vcxproj b/Samples/0_Introduction/cppOverload/cppOverload_vs2017.vcxproj index 0a082724e..4adb6ea70 100644 --- a/Samples/0_Introduction/cppOverload/cppOverload_vs2017.vcxproj +++ b/Samples/0_Introduction/cppOverload/cppOverload_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/cppOverload.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/0_Introduction/cppOverload/cppOverload_vs2019.vcxproj b/Samples/0_Introduction/cppOverload/cppOverload_vs2019.vcxproj index ba8bee01b..040f08cf4 100644 --- a/Samples/0_Introduction/cppOverload/cppOverload_vs2019.vcxproj +++ b/Samples/0_Introduction/cppOverload/cppOverload_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/cppOverload.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/cppOverload/cppOverload_vs2022.vcxproj b/Samples/0_Introduction/cppOverload/cppOverload_vs2022.vcxproj index 4e849ea5d..a9592ff89 100644 --- a/Samples/0_Introduction/cppOverload/cppOverload_vs2022.vcxproj +++ b/Samples/0_Introduction/cppOverload/cppOverload_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/cppOverload.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/cudaOpenMP/Makefile b/Samples/0_Introduction/cudaOpenMP/Makefile index 476ddbc9d..277357e24 100644 --- a/Samples/0_Introduction/cudaOpenMP/Makefile +++ b/Samples/0_Introduction/cudaOpenMP/Makefile @@ -321,9 +321,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/0_Introduction/cudaOpenMP/README.md b/Samples/0_Introduction/cudaOpenMP/README.md index 9f446d04e..5a57d9185 100644 --- a/Samples/0_Introduction/cudaOpenMP/README.md +++ b/Samples/0_Introduction/cudaOpenMP/README.md @@ -10,7 +10,7 @@ CUDA Systems Integration, OpenMP, Multithreading ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMemset, cudaFree, cudaGetDeviceCount, cudaSetDevice, cudaMalloc, cudaGetLastError, cudaMemcpy, cudaGetErrorString, cudaGetDeviceProperties, cudaGetDevice +cudaMemcpy, cudaGetErrorString, cudaFree, cudaGetLastError, cudaSetDevice, cudaGetDeviceCount, cudaGetDevice, cudaMemset, cudaMalloc, cudaGetDeviceProperties ## Dependencies needed to build/run [OpenMP](../../../README.md#openmp) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/0_Introduction/cudaOpenMP/cudaOpenMP_vs2017.vcxproj b/Samples/0_Introduction/cudaOpenMP/cudaOpenMP_vs2017.vcxproj index 57636e0cf..b6a822e04 100644 --- a/Samples/0_Introduction/cudaOpenMP/cudaOpenMP_vs2017.vcxproj +++ b/Samples/0_Introduction/cudaOpenMP/cudaOpenMP_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/cudaOpenMP.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -108,6 +108,6 @@ - + diff --git a/Samples/0_Introduction/cudaOpenMP/cudaOpenMP_vs2019.vcxproj b/Samples/0_Introduction/cudaOpenMP/cudaOpenMP_vs2019.vcxproj index 5e7d2b503..991ca21ee 100644 --- a/Samples/0_Introduction/cudaOpenMP/cudaOpenMP_vs2019.vcxproj +++ b/Samples/0_Introduction/cudaOpenMP/cudaOpenMP_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/cudaOpenMP.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -104,6 +104,6 @@ - + diff --git a/Samples/0_Introduction/cudaOpenMP/cudaOpenMP_vs2022.vcxproj b/Samples/0_Introduction/cudaOpenMP/cudaOpenMP_vs2022.vcxproj index 8628c83bc..adf14793d 100644 --- a/Samples/0_Introduction/cudaOpenMP/cudaOpenMP_vs2022.vcxproj +++ b/Samples/0_Introduction/cudaOpenMP/cudaOpenMP_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/cudaOpenMP.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -104,6 +104,6 @@ - + diff --git a/Samples/0_Introduction/fp16ScalarProduct/Makefile b/Samples/0_Introduction/fp16ScalarProduct/Makefile index c80fe84ae..5dda1a895 100644 --- a/Samples/0_Introduction/fp16ScalarProduct/Makefile +++ b/Samples/0_Introduction/fp16ScalarProduct/Makefile @@ -285,9 +285,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 60 61 70 75 80 86 +SMS ?= 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/0_Introduction/fp16ScalarProduct/NsightEclipse.xml b/Samples/0_Introduction/fp16ScalarProduct/NsightEclipse.xml index fed67440f..045bce439 100644 --- a/Samples/0_Introduction/fp16ScalarProduct/NsightEclipse.xml +++ b/Samples/0_Introduction/fp16ScalarProduct/NsightEclipse.xml @@ -3,11 +3,11 @@ fp16ScalarProduct + cudaMemcpy cudaFree cudaMallocHost cudaFreeHost cudaMalloc - cudaMemcpy cudaGetDeviceProperties @@ -44,6 +44,7 @@ sm80 sm86 sm87 + sm90 arm diff --git a/Samples/0_Introduction/fp16ScalarProduct/README.md b/Samples/0_Introduction/fp16ScalarProduct/README.md index 3875a40a2..4aa2b89c3 100644 --- a/Samples/0_Introduction/fp16ScalarProduct/README.md +++ b/Samples/0_Introduction/fp16ScalarProduct/README.md @@ -10,7 +10,7 @@ CUDA Runtime API ## Supported SM Architectures -[SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaMallocHost, cudaFreeHost, cudaMalloc, cudaMemcpy, cudaGetDeviceProperties +cudaMemcpy, cudaFree, cudaMallocHost, cudaFreeHost, cudaMalloc, cudaGetDeviceProperties ## Dependencies needed to build/run [FP16](../../../README.md#fp16) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/0_Introduction/fp16ScalarProduct/fp16ScalarProduct_vs2017.vcxproj b/Samples/0_Introduction/fp16ScalarProduct/fp16ScalarProduct_vs2017.vcxproj index f1199a8ee..c4dbdc75c 100644 --- a/Samples/0_Introduction/fp16ScalarProduct/fp16ScalarProduct_vs2017.vcxproj +++ b/Samples/0_Introduction/fp16ScalarProduct/fp16ScalarProduct_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/fp16ScalarProduct.exe - compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/0_Introduction/fp16ScalarProduct/fp16ScalarProduct_vs2019.vcxproj b/Samples/0_Introduction/fp16ScalarProduct/fp16ScalarProduct_vs2019.vcxproj index bcd5c50cb..0b9a749fc 100644 --- a/Samples/0_Introduction/fp16ScalarProduct/fp16ScalarProduct_vs2019.vcxproj +++ b/Samples/0_Introduction/fp16ScalarProduct/fp16ScalarProduct_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/fp16ScalarProduct.exe - compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/fp16ScalarProduct/fp16ScalarProduct_vs2022.vcxproj b/Samples/0_Introduction/fp16ScalarProduct/fp16ScalarProduct_vs2022.vcxproj index c316ee629..ee4258a88 100644 --- a/Samples/0_Introduction/fp16ScalarProduct/fp16ScalarProduct_vs2022.vcxproj +++ b/Samples/0_Introduction/fp16ScalarProduct/fp16ScalarProduct_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/fp16ScalarProduct.exe - compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/matrixMul/Makefile b/Samples/0_Introduction/matrixMul/Makefile index a8b38ccdd..a4d336b55 100644 --- a/Samples/0_Introduction/matrixMul/Makefile +++ b/Samples/0_Introduction/matrixMul/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/0_Introduction/matrixMul/NsightEclipse.xml b/Samples/0_Introduction/matrixMul/NsightEclipse.xml index e90b6c7e8..3f5179670 100644 --- a/Samples/0_Introduction/matrixMul/NsightEclipse.xml +++ b/Samples/0_Introduction/matrixMul/NsightEclipse.xml @@ -3,20 +3,20 @@ matrixMul + cudaStreamCreateWithFlags + cudaProfilerStop + cudaMalloc cudaFree - cudaEventRecord cudaMallocHost cudaProfilerStart - cudaEventCreate - cudaEventElapsedTime cudaEventSynchronize + cudaEventRecord cudaFreeHost - cudaMalloc - cudaProfilerStop - cudaStreamCreateWithFlags - cudaEventDestroy cudaStreamSynchronize + cudaEventDestroy + cudaEventElapsedTime cudaMemcpyAsync + cudaEventCreate whole @@ -56,6 +56,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/0_Introduction/matrixMul/README.md b/Samples/0_Introduction/matrixMul/README.md index c558141a3..b0e121b23 100644 --- a/Samples/0_Introduction/matrixMul/README.md +++ b/Samples/0_Introduction/matrixMul/README.md @@ -10,7 +10,7 @@ CUDA Runtime API, Linear Algebra ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l, aarch64 ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaEventRecord, cudaMallocHost, cudaProfilerStart, cudaEventCreate, cudaEventElapsedTime, cudaEventSynchronize, cudaFreeHost, cudaMalloc, cudaProfilerStop, cudaStreamCreateWithFlags, cudaEventDestroy, cudaStreamSynchronize, cudaMemcpyAsync +cudaStreamCreateWithFlags, cudaProfilerStop, cudaMalloc, cudaFree, cudaMallocHost, cudaProfilerStart, cudaEventSynchronize, cudaEventRecord, cudaFreeHost, cudaStreamSynchronize, cudaEventDestroy, cudaEventElapsedTime, cudaMemcpyAsync, cudaEventCreate ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/0_Introduction/matrixMul/matrixMul_vs2017.vcxproj b/Samples/0_Introduction/matrixMul/matrixMul_vs2017.vcxproj index 5bc23eb0f..95f6a03af 100644 --- a/Samples/0_Introduction/matrixMul/matrixMul_vs2017.vcxproj +++ b/Samples/0_Introduction/matrixMul/matrixMul_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/matrixMul.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/0_Introduction/matrixMul/matrixMul_vs2019.vcxproj b/Samples/0_Introduction/matrixMul/matrixMul_vs2019.vcxproj index 7373d3856..375f668ae 100644 --- a/Samples/0_Introduction/matrixMul/matrixMul_vs2019.vcxproj +++ b/Samples/0_Introduction/matrixMul/matrixMul_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/matrixMul.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/matrixMul/matrixMul_vs2022.vcxproj b/Samples/0_Introduction/matrixMul/matrixMul_vs2022.vcxproj index 36e7c4e9a..e406cc03b 100644 --- a/Samples/0_Introduction/matrixMul/matrixMul_vs2022.vcxproj +++ b/Samples/0_Introduction/matrixMul/matrixMul_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/matrixMul.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/matrixMulDrv/Makefile b/Samples/0_Introduction/matrixMulDrv/Makefile index 794345b6f..83476982a 100644 --- a/Samples/0_Introduction/matrixMulDrv/Makefile +++ b/Samples/0_Introduction/matrixMulDrv/Makefile @@ -283,9 +283,9 @@ FATBIN_FILE := matrixMul_kernel${TARGET_SIZE}.fatbin # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/0_Introduction/matrixMulDrv/README.md b/Samples/0_Introduction/matrixMulDrv/README.md index 804e7d814..682fb940c 100644 --- a/Samples/0_Introduction/matrixMulDrv/README.md +++ b/Samples/0_Introduction/matrixMulDrv/README.md @@ -10,7 +10,7 @@ CUDA Driver API, Matrix Multiply ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l, aarch64 ## CUDA APIs involved ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html) -cuModuleGetFunction, cuMemcpyHtoD, cuModuleLoadData, cuCtxCreate, cuLaunchKernel, cuDeviceGetName, cuMemAlloc, cuOccupancyMaxPotentialBlockSize, cuDeviceTotalMem, cuMemFree, cuMemcpyDtoH, cuCtxDestroy, cuDeviceGetAttribute +cuMemcpyDtoH, cuLaunchKernel, cuMemcpyHtoD, cuDeviceGetName, cuDeviceTotalMem, cuDeviceGetAttribute, cuModuleLoadData, cuOccupancyMaxPotentialBlockSize, cuMemAlloc, cuMemFree, cuCtxDestroy, cuModuleGetFunction, cuCtxCreate ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/0_Introduction/matrixMulDrv/matrixMulDrv_vs2017.vcxproj b/Samples/0_Introduction/matrixMulDrv/matrixMulDrv_vs2017.vcxproj index ff9114527..739987619 100644 --- a/Samples/0_Introduction/matrixMulDrv/matrixMulDrv_vs2017.vcxproj +++ b/Samples/0_Introduction/matrixMulDrv/matrixMulDrv_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/matrixMulDrv.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -111,6 +111,6 @@ - + diff --git a/Samples/0_Introduction/matrixMulDrv/matrixMulDrv_vs2019.vcxproj b/Samples/0_Introduction/matrixMulDrv/matrixMulDrv_vs2019.vcxproj index 1f6b88ada..0805c97cf 100644 --- a/Samples/0_Introduction/matrixMulDrv/matrixMulDrv_vs2019.vcxproj +++ b/Samples/0_Introduction/matrixMulDrv/matrixMulDrv_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/matrixMulDrv.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/0_Introduction/matrixMulDrv/matrixMulDrv_vs2022.vcxproj b/Samples/0_Introduction/matrixMulDrv/matrixMulDrv_vs2022.vcxproj index e92ce9cef..a82bb6991 100644 --- a/Samples/0_Introduction/matrixMulDrv/matrixMulDrv_vs2022.vcxproj +++ b/Samples/0_Introduction/matrixMulDrv/matrixMulDrv_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/matrixMulDrv.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/0_Introduction/matrixMulDynlinkJIT/README.md b/Samples/0_Introduction/matrixMulDynlinkJIT/README.md index 183509644..657811d3b 100644 --- a/Samples/0_Introduction/matrixMulDynlinkJIT/README.md +++ b/Samples/0_Introduction/matrixMulDynlinkJIT/README.md @@ -10,7 +10,7 @@ CUDA Driver API, CUDA Dynamically Linked Library ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html) -cuParamSetv, cuMemFree, cuInit, cuModuleGetFunction, cuCtxDestroy, cuCtxCreate, cuDeviceGetName, cuCtxSynchronize, cuParamSeti, cuModuleLoadDataEx, cuDeviceGet, cuFuncSetSharedSize, cuMemAlloc, cuDeviceComputeCapability, cuFuncSetBlockShape, cuMemcpyHtoD, cuParamSetSize, cuLaunchGrid, cuDeviceGetCount, cuLaunchKernel, cuMemcpyDtoH +cuMemcpyDtoH, cuDeviceGetName, cuParamSeti, cuModuleLoadDataEx, cuModuleGetFunction, cuLaunchGrid, cuFuncSetSharedSize, cuMemFree, cuParamSetSize, cuParamSetv, cuInit, cuMemcpyHtoD, cuLaunchKernel, cuDeviceGet, cuFuncSetBlockShape, cuCtxDestroy, cuDeviceGetCount, cuDeviceComputeCapability, cuCtxSynchronize, cuMemAlloc, cuCtxCreate ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/0_Introduction/matrixMulDynlinkJIT/helper_cuda_drvapi.h b/Samples/0_Introduction/matrixMulDynlinkJIT/helper_cuda_drvapi.h index 5f69d3329..4ca66fde5 100644 --- a/Samples/0_Introduction/matrixMulDynlinkJIT/helper_cuda_drvapi.h +++ b/Samples/0_Introduction/matrixMulDynlinkJIT/helper_cuda_drvapi.h @@ -95,6 +95,7 @@ inline int _ConvertSMVer2CoresDRV(int major, int minor) { {0x80, 64}, {0x86, 128}, {0x87, 128}, + {0x90, 128}, {-1, -1}}; int index = 0; diff --git a/Samples/0_Introduction/matrixMulDynlinkJIT/matrixMulDynlinkJIT_vs2017.vcxproj b/Samples/0_Introduction/matrixMulDynlinkJIT/matrixMulDynlinkJIT_vs2017.vcxproj index da13462f7..8b146bded 100644 --- a/Samples/0_Introduction/matrixMulDynlinkJIT/matrixMulDynlinkJIT_vs2017.vcxproj +++ b/Samples/0_Introduction/matrixMulDynlinkJIT/matrixMulDynlinkJIT_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -116,6 +116,6 @@ - + diff --git a/Samples/0_Introduction/matrixMulDynlinkJIT/matrixMulDynlinkJIT_vs2019.vcxproj b/Samples/0_Introduction/matrixMulDynlinkJIT/matrixMulDynlinkJIT_vs2019.vcxproj index 460bc3dea..3fc6842cf 100644 --- a/Samples/0_Introduction/matrixMulDynlinkJIT/matrixMulDynlinkJIT_vs2019.vcxproj +++ b/Samples/0_Introduction/matrixMulDynlinkJIT/matrixMulDynlinkJIT_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -112,6 +112,6 @@ - + diff --git a/Samples/0_Introduction/matrixMulDynlinkJIT/matrixMulDynlinkJIT_vs2022.vcxproj b/Samples/0_Introduction/matrixMulDynlinkJIT/matrixMulDynlinkJIT_vs2022.vcxproj index d5ac5358a..732e0b222 100644 --- a/Samples/0_Introduction/matrixMulDynlinkJIT/matrixMulDynlinkJIT_vs2022.vcxproj +++ b/Samples/0_Introduction/matrixMulDynlinkJIT/matrixMulDynlinkJIT_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -112,6 +112,6 @@ - + diff --git a/Samples/0_Introduction/matrixMul_nvrtc/README.md b/Samples/0_Introduction/matrixMul_nvrtc/README.md index 2cefe20e5..224c3ee04 100644 --- a/Samples/0_Introduction/matrixMul_nvrtc/README.md +++ b/Samples/0_Introduction/matrixMul_nvrtc/README.md @@ -10,7 +10,7 @@ CUDA Runtime API, Linear Algebra, Runtime Compilation ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, aarch64 ## CUDA APIs involved ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html) -cuModuleGetFunction, cuMemAlloc, cuLaunchKernel, cuCtxSynchronize, cuMemFree, cuMemcpyDtoH, cuMemcpyHtoD +cuMemcpyDtoH, cuLaunchKernel, cuMemcpyHtoD, cuCtxSynchronize, cuMemAlloc, cuMemFree, cuModuleGetFunction ## Dependencies needed to build/run [NVRTC](../../../README.md#nvrtc) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/0_Introduction/matrixMul_nvrtc/matrixMul_nvrtc_vs2017.vcxproj b/Samples/0_Introduction/matrixMul_nvrtc/matrixMul_nvrtc_vs2017.vcxproj index 1b4a7eb09..7833bb475 100644 --- a/Samples/0_Introduction/matrixMul_nvrtc/matrixMul_nvrtc_vs2017.vcxproj +++ b/Samples/0_Introduction/matrixMul_nvrtc/matrixMul_nvrtc_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -113,6 +113,6 @@ xcopy /y /e /s "$(CudaToolkitDir)include\cooperative_groups" .\cooperative_group - + diff --git a/Samples/0_Introduction/matrixMul_nvrtc/matrixMul_nvrtc_vs2019.vcxproj b/Samples/0_Introduction/matrixMul_nvrtc/matrixMul_nvrtc_vs2019.vcxproj index cf0c66c8e..d0b583663 100644 --- a/Samples/0_Introduction/matrixMul_nvrtc/matrixMul_nvrtc_vs2019.vcxproj +++ b/Samples/0_Introduction/matrixMul_nvrtc/matrixMul_nvrtc_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -109,6 +109,6 @@ xcopy /y /e /s "$(CudaToolkitDir)include\cooperative_groups" .\cooperative_group - + diff --git a/Samples/0_Introduction/matrixMul_nvrtc/matrixMul_nvrtc_vs2022.vcxproj b/Samples/0_Introduction/matrixMul_nvrtc/matrixMul_nvrtc_vs2022.vcxproj index f6dc2b6f8..6fa7922e7 100644 --- a/Samples/0_Introduction/matrixMul_nvrtc/matrixMul_nvrtc_vs2022.vcxproj +++ b/Samples/0_Introduction/matrixMul_nvrtc/matrixMul_nvrtc_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -109,6 +109,6 @@ xcopy /y /e /s "$(CudaToolkitDir)include\cooperative_groups" .\cooperative_group - + diff --git a/Samples/0_Introduction/mergeSort/Makefile b/Samples/0_Introduction/mergeSort/Makefile index ad45af877..815268b1d 100644 --- a/Samples/0_Introduction/mergeSort/Makefile +++ b/Samples/0_Introduction/mergeSort/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/0_Introduction/mergeSort/NsightEclipse.xml b/Samples/0_Introduction/mergeSort/NsightEclipse.xml index 0a77b65e2..55cab9066 100644 --- a/Samples/0_Introduction/mergeSort/NsightEclipse.xml +++ b/Samples/0_Introduction/mergeSort/NsightEclipse.xml @@ -4,9 +4,9 @@ mergeSort cudaMalloc - cudaFree cudaDeviceSynchronize cudaMemcpy + cudaFree whole @@ -46,6 +46,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/0_Introduction/mergeSort/README.md b/Samples/0_Introduction/mergeSort/README.md index d7ab7be23..d08538962 100644 --- a/Samples/0_Introduction/mergeSort/README.md +++ b/Samples/0_Introduction/mergeSort/README.md @@ -10,7 +10,7 @@ Data-Parallel Algorithms ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMalloc, cudaFree, cudaDeviceSynchronize, cudaMemcpy +cudaMalloc, cudaDeviceSynchronize, cudaMemcpy, cudaFree ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/0_Introduction/mergeSort/mergeSort_vs2017.vcxproj b/Samples/0_Introduction/mergeSort/mergeSort_vs2017.vcxproj index 9f5e39b3e..0ef070139 100644 --- a/Samples/0_Introduction/mergeSort/mergeSort_vs2017.vcxproj +++ b/Samples/0_Introduction/mergeSort/mergeSort_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/mergeSort.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -111,6 +111,6 @@ - + diff --git a/Samples/0_Introduction/mergeSort/mergeSort_vs2019.vcxproj b/Samples/0_Introduction/mergeSort/mergeSort_vs2019.vcxproj index 8639bd2e4..5796dda5f 100644 --- a/Samples/0_Introduction/mergeSort/mergeSort_vs2019.vcxproj +++ b/Samples/0_Introduction/mergeSort/mergeSort_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/mergeSort.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/0_Introduction/mergeSort/mergeSort_vs2022.vcxproj b/Samples/0_Introduction/mergeSort/mergeSort_vs2022.vcxproj index c38e79d66..ed951e9f8 100644 --- a/Samples/0_Introduction/mergeSort/mergeSort_vs2022.vcxproj +++ b/Samples/0_Introduction/mergeSort/mergeSort_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/mergeSort.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/0_Introduction/simpleAWBarrier/Makefile b/Samples/0_Introduction/simpleAWBarrier/Makefile index cd8dc51d9..0fa1e665b 100644 --- a/Samples/0_Introduction/simpleAWBarrier/Makefile +++ b/Samples/0_Introduction/simpleAWBarrier/Makefile @@ -316,9 +316,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 70 72 75 80 86 87 +SMS ?= 70 72 75 80 86 87 90 else -SMS ?= 70 75 80 86 +SMS ?= 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/0_Introduction/simpleAWBarrier/NsightEclipse.xml b/Samples/0_Introduction/simpleAWBarrier/NsightEclipse.xml index e8738f6a4..87414f89f 100644 --- a/Samples/0_Introduction/simpleAWBarrier/NsightEclipse.xml +++ b/Samples/0_Introduction/simpleAWBarrier/NsightEclipse.xml @@ -6,17 +6,17 @@ --std=c++11 + cudaStreamCreateWithFlags cudaFree - cudaMallocHost - cudaOccupancyMaxActiveBlocksPerMultiprocessor - cudaOccupancyMaxPotentialBlockSize cudaDeviceGetAttribute + cudaMallocHost cudaFreeHost - cudaMalloc - cudaStreamCreateWithFlags - cudaLaunchCooperativeKernel cudaStreamSynchronize + cudaLaunchCooperativeKernel + cudaMalloc + cudaOccupancyMaxActiveBlocksPerMultiprocessor cudaMemcpyAsync + cudaOccupancyMaxPotentialBlockSize whole @@ -53,6 +53,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/0_Introduction/simpleAWBarrier/README.md b/Samples/0_Introduction/simpleAWBarrier/README.md index d81ac1f8b..064db83a5 100644 --- a/Samples/0_Introduction/simpleAWBarrier/README.md +++ b/Samples/0_Introduction/simpleAWBarrier/README.md @@ -10,7 +10,7 @@ Arrive Wait Barrier ## Supported SM Architectures -[SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l, aarch64 ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaMallocHost, cudaOccupancyMaxActiveBlocksPerMultiprocessor, cudaOccupancyMaxPotentialBlockSize, cudaDeviceGetAttribute, cudaFreeHost, cudaMalloc, cudaStreamCreateWithFlags, cudaLaunchCooperativeKernel, cudaStreamSynchronize, cudaMemcpyAsync +cudaStreamCreateWithFlags, cudaFree, cudaDeviceGetAttribute, cudaMallocHost, cudaFreeHost, cudaStreamSynchronize, cudaLaunchCooperativeKernel, cudaMalloc, cudaOccupancyMaxActiveBlocksPerMultiprocessor, cudaMemcpyAsync, cudaOccupancyMaxPotentialBlockSize ## Dependencies needed to build/run [CPP11](../../../README.md#cpp11), [MBCG](../../../README.md#mbcg) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/0_Introduction/simpleAWBarrier/simpleAWBarrier_vs2017.vcxproj b/Samples/0_Introduction/simpleAWBarrier/simpleAWBarrier_vs2017.vcxproj index ea64526a3..ed1365409 100644 --- a/Samples/0_Introduction/simpleAWBarrier/simpleAWBarrier_vs2017.vcxproj +++ b/Samples/0_Introduction/simpleAWBarrier/simpleAWBarrier_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/simpleAWBarrier.exe - compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/0_Introduction/simpleAWBarrier/simpleAWBarrier_vs2019.vcxproj b/Samples/0_Introduction/simpleAWBarrier/simpleAWBarrier_vs2019.vcxproj index aaf046ed0..eeddba29c 100644 --- a/Samples/0_Introduction/simpleAWBarrier/simpleAWBarrier_vs2019.vcxproj +++ b/Samples/0_Introduction/simpleAWBarrier/simpleAWBarrier_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleAWBarrier.exe - compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/simpleAWBarrier/simpleAWBarrier_vs2022.vcxproj b/Samples/0_Introduction/simpleAWBarrier/simpleAWBarrier_vs2022.vcxproj index 28637338e..85eb24bf8 100644 --- a/Samples/0_Introduction/simpleAWBarrier/simpleAWBarrier_vs2022.vcxproj +++ b/Samples/0_Introduction/simpleAWBarrier/simpleAWBarrier_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleAWBarrier.exe - compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/simpleAssert/Makefile b/Samples/0_Introduction/simpleAssert/Makefile index fb73574b4..bd790aa68 100644 --- a/Samples/0_Introduction/simpleAssert/Makefile +++ b/Samples/0_Introduction/simpleAssert/Makefile @@ -285,9 +285,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/0_Introduction/simpleAssert/NsightEclipse.xml b/Samples/0_Introduction/simpleAssert/NsightEclipse.xml index 7f9e81f5d..2ba03ec63 100644 --- a/Samples/0_Introduction/simpleAssert/NsightEclipse.xml +++ b/Samples/0_Introduction/simpleAssert/NsightEclipse.xml @@ -41,6 +41,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/0_Introduction/simpleAssert/README.md b/Samples/0_Introduction/simpleAssert/README.md index e5fbc1b3d..05b753a18 100644 --- a/Samples/0_Introduction/simpleAssert/README.md +++ b/Samples/0_Introduction/simpleAssert/README.md @@ -10,7 +10,7 @@ Assert ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -27,7 +27,7 @@ cudaDeviceSynchronize, cudaGetErrorString ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/0_Introduction/simpleAssert/simpleAssert_vs2017.vcxproj b/Samples/0_Introduction/simpleAssert/simpleAssert_vs2017.vcxproj index a033d7b00..731833f93 100644 --- a/Samples/0_Introduction/simpleAssert/simpleAssert_vs2017.vcxproj +++ b/Samples/0_Introduction/simpleAssert/simpleAssert_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/simpleAssert.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/0_Introduction/simpleAssert/simpleAssert_vs2019.vcxproj b/Samples/0_Introduction/simpleAssert/simpleAssert_vs2019.vcxproj index a32337825..88e2fa096 100644 --- a/Samples/0_Introduction/simpleAssert/simpleAssert_vs2019.vcxproj +++ b/Samples/0_Introduction/simpleAssert/simpleAssert_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleAssert.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/simpleAssert/simpleAssert_vs2022.vcxproj b/Samples/0_Introduction/simpleAssert/simpleAssert_vs2022.vcxproj index de3d8f032..ce9351208 100644 --- a/Samples/0_Introduction/simpleAssert/simpleAssert_vs2022.vcxproj +++ b/Samples/0_Introduction/simpleAssert/simpleAssert_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleAssert.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/simpleAssert_nvrtc/README.md b/Samples/0_Introduction/simpleAssert_nvrtc/README.md index d0ecd7e03..72c5de117 100644 --- a/Samples/0_Introduction/simpleAssert_nvrtc/README.md +++ b/Samples/0_Introduction/simpleAssert_nvrtc/README.md @@ -10,7 +10,7 @@ Assert, Runtime Compilation ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, aarch64 ## CUDA APIs involved ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html) -cuModuleGetFunction, cuCtxSynchronize, cuLaunchKernel +cuModuleGetFunction, cuLaunchKernel, cuCtxSynchronize ## Dependencies needed to build/run [NVRTC](../../../README.md#nvrtc) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/0_Introduction/simpleAssert_nvrtc/simpleAssert_nvrtc_vs2017.vcxproj b/Samples/0_Introduction/simpleAssert_nvrtc/simpleAssert_nvrtc_vs2017.vcxproj index 11b8003f2..3fc089e59 100644 --- a/Samples/0_Introduction/simpleAssert_nvrtc/simpleAssert_nvrtc_vs2017.vcxproj +++ b/Samples/0_Introduction/simpleAssert_nvrtc/simpleAssert_nvrtc_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -107,6 +107,6 @@ - + diff --git a/Samples/0_Introduction/simpleAssert_nvrtc/simpleAssert_nvrtc_vs2019.vcxproj b/Samples/0_Introduction/simpleAssert_nvrtc/simpleAssert_nvrtc_vs2019.vcxproj index 548b3baf6..0714d837b 100644 --- a/Samples/0_Introduction/simpleAssert_nvrtc/simpleAssert_nvrtc_vs2019.vcxproj +++ b/Samples/0_Introduction/simpleAssert_nvrtc/simpleAssert_nvrtc_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/simpleAssert_nvrtc/simpleAssert_nvrtc_vs2022.vcxproj b/Samples/0_Introduction/simpleAssert_nvrtc/simpleAssert_nvrtc_vs2022.vcxproj index 59d571cec..fc010fb0e 100644 --- a/Samples/0_Introduction/simpleAssert_nvrtc/simpleAssert_nvrtc_vs2022.vcxproj +++ b/Samples/0_Introduction/simpleAssert_nvrtc/simpleAssert_nvrtc_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/simpleAtomicIntrinsics/Makefile b/Samples/0_Introduction/simpleAtomicIntrinsics/Makefile index 7b221fb20..b72224450 100644 --- a/Samples/0_Introduction/simpleAtomicIntrinsics/Makefile +++ b/Samples/0_Introduction/simpleAtomicIntrinsics/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/0_Introduction/simpleAtomicIntrinsics/NsightEclipse.xml b/Samples/0_Introduction/simpleAtomicIntrinsics/NsightEclipse.xml index 093f108b1..e9252d1cc 100644 --- a/Samples/0_Introduction/simpleAtomicIntrinsics/NsightEclipse.xml +++ b/Samples/0_Introduction/simpleAtomicIntrinsics/NsightEclipse.xml @@ -3,12 +3,12 @@ simpleAtomicIntrinsics + cudaStreamCreateWithFlags cudaFree cudaMallocHost cudaFreeHost - cudaMalloc - cudaStreamCreateWithFlags cudaStreamSynchronize + cudaMalloc cudaMemcpyAsync @@ -48,6 +48,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/0_Introduction/simpleAtomicIntrinsics/README.md b/Samples/0_Introduction/simpleAtomicIntrinsics/README.md index 81693b71b..0fa52781b 100644 --- a/Samples/0_Introduction/simpleAtomicIntrinsics/README.md +++ b/Samples/0_Introduction/simpleAtomicIntrinsics/README.md @@ -10,7 +10,7 @@ Atomic Intrinsics ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l, aarch64 ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaMallocHost, cudaFreeHost, cudaMalloc, cudaStreamCreateWithFlags, cudaStreamSynchronize, cudaMemcpyAsync +cudaStreamCreateWithFlags, cudaFree, cudaMallocHost, cudaFreeHost, cudaStreamSynchronize, cudaMalloc, cudaMemcpyAsync ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/0_Introduction/simpleAtomicIntrinsics/simpleAtomicIntrinsics_vs2017.vcxproj b/Samples/0_Introduction/simpleAtomicIntrinsics/simpleAtomicIntrinsics_vs2017.vcxproj index 87308a5d1..d122ae68f 100644 --- a/Samples/0_Introduction/simpleAtomicIntrinsics/simpleAtomicIntrinsics_vs2017.vcxproj +++ b/Samples/0_Introduction/simpleAtomicIntrinsics/simpleAtomicIntrinsics_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/simpleAtomicIntrinsics.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -108,6 +108,6 @@ - + diff --git a/Samples/0_Introduction/simpleAtomicIntrinsics/simpleAtomicIntrinsics_vs2019.vcxproj b/Samples/0_Introduction/simpleAtomicIntrinsics/simpleAtomicIntrinsics_vs2019.vcxproj index b99f4190a..7f05dcc60 100644 --- a/Samples/0_Introduction/simpleAtomicIntrinsics/simpleAtomicIntrinsics_vs2019.vcxproj +++ b/Samples/0_Introduction/simpleAtomicIntrinsics/simpleAtomicIntrinsics_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleAtomicIntrinsics.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -104,6 +104,6 @@ - + diff --git a/Samples/0_Introduction/simpleAtomicIntrinsics/simpleAtomicIntrinsics_vs2022.vcxproj b/Samples/0_Introduction/simpleAtomicIntrinsics/simpleAtomicIntrinsics_vs2022.vcxproj index bb0bb8df1..7dd8d89e9 100644 --- a/Samples/0_Introduction/simpleAtomicIntrinsics/simpleAtomicIntrinsics_vs2022.vcxproj +++ b/Samples/0_Introduction/simpleAtomicIntrinsics/simpleAtomicIntrinsics_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleAtomicIntrinsics.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -104,6 +104,6 @@ - + diff --git a/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/README.md b/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/README.md index 0d1700a7b..a53e822b5 100644 --- a/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/README.md +++ b/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/README.md @@ -10,7 +10,7 @@ Atomic Intrinsics, Runtime Compilation ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,7 +23,7 @@ x86_64, ppc64le, aarch64 ## CUDA APIs involved ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html) -cuModuleGetFunction, cuMemAlloc, cuLaunchKernel, cuCtxSynchronize, cuMemFree, cuMemcpyDtoH, cuMemcpyHtoD +cuMemcpyDtoH, cuLaunchKernel, cuMemcpyHtoD, cuCtxSynchronize, cuMemAlloc, cuMemFree, cuModuleGetFunction ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) cudaBlockSize, cudaGridSize @@ -33,7 +33,7 @@ cudaBlockSize, cudaGridSize ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/simpleAtomicIntrinsics_nvrtc_vs2017.vcxproj b/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/simpleAtomicIntrinsics_nvrtc_vs2017.vcxproj index c51f99399..9db171b1f 100644 --- a/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/simpleAtomicIntrinsics_nvrtc_vs2017.vcxproj +++ b/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/simpleAtomicIntrinsics_nvrtc_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -108,6 +108,6 @@ - + diff --git a/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/simpleAtomicIntrinsics_nvrtc_vs2019.vcxproj b/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/simpleAtomicIntrinsics_nvrtc_vs2019.vcxproj index 75b918e79..b43cec916 100644 --- a/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/simpleAtomicIntrinsics_nvrtc_vs2019.vcxproj +++ b/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/simpleAtomicIntrinsics_nvrtc_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -104,6 +104,6 @@ - + diff --git a/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/simpleAtomicIntrinsics_nvrtc_vs2022.vcxproj b/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/simpleAtomicIntrinsics_nvrtc_vs2022.vcxproj index f7cb9e386..bd705f440 100644 --- a/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/simpleAtomicIntrinsics_nvrtc_vs2022.vcxproj +++ b/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/simpleAtomicIntrinsics_nvrtc_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -104,6 +104,6 @@ - + diff --git a/Samples/0_Introduction/simpleAttributes/Makefile b/Samples/0_Introduction/simpleAttributes/Makefile index 00e9c4f05..e685dd695 100644 --- a/Samples/0_Introduction/simpleAttributes/Makefile +++ b/Samples/0_Introduction/simpleAttributes/Makefile @@ -285,9 +285,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/0_Introduction/simpleAttributes/NsightEclipse.xml b/Samples/0_Introduction/simpleAttributes/NsightEclipse.xml index 5141efa16..fcad82351 100644 --- a/Samples/0_Introduction/simpleAttributes/NsightEclipse.xml +++ b/Samples/0_Introduction/simpleAttributes/NsightEclipse.xml @@ -3,15 +3,15 @@ simpleAttributes - cudaDeviceSetLimit cudaFree cudaMallocHost - cudaStreamCreate cudaFreeHost - cudaMalloc + cudaStreamSynchronize cudaStreamSetAttribute + cudaDeviceSetLimit + cudaMalloc cudaMemcpyAsync - cudaStreamSynchronize + cudaStreamCreate cudaGetDeviceProperties @@ -49,6 +49,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/0_Introduction/simpleAttributes/README.md b/Samples/0_Introduction/simpleAttributes/README.md index e5b6fa875..5dc1787bc 100644 --- a/Samples/0_Introduction/simpleAttributes/README.md +++ b/Samples/0_Introduction/simpleAttributes/README.md @@ -10,7 +10,7 @@ Attributes usage on stream ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaDeviceSetLimit, cudaFree, cudaMallocHost, cudaStreamCreate, cudaFreeHost, cudaMalloc, cudaStreamSetAttribute, cudaMemcpyAsync, cudaStreamSynchronize, cudaGetDeviceProperties +cudaFree, cudaMallocHost, cudaFreeHost, cudaStreamSynchronize, cudaStreamSetAttribute, cudaDeviceSetLimit, cudaMalloc, cudaMemcpyAsync, cudaStreamCreate, cudaGetDeviceProperties ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/0_Introduction/simpleAttributes/simpleAttributes_vs2017.vcxproj b/Samples/0_Introduction/simpleAttributes/simpleAttributes_vs2017.vcxproj index 2752b2669..a446d3a9b 100644 --- a/Samples/0_Introduction/simpleAttributes/simpleAttributes_vs2017.vcxproj +++ b/Samples/0_Introduction/simpleAttributes/simpleAttributes_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/simpleAttributes.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/0_Introduction/simpleAttributes/simpleAttributes_vs2019.vcxproj b/Samples/0_Introduction/simpleAttributes/simpleAttributes_vs2019.vcxproj index 620fdca35..e49167d97 100644 --- a/Samples/0_Introduction/simpleAttributes/simpleAttributes_vs2019.vcxproj +++ b/Samples/0_Introduction/simpleAttributes/simpleAttributes_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleAttributes.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/simpleAttributes/simpleAttributes_vs2022.vcxproj b/Samples/0_Introduction/simpleAttributes/simpleAttributes_vs2022.vcxproj index 9ab8f931b..1eb61252c 100644 --- a/Samples/0_Introduction/simpleAttributes/simpleAttributes_vs2022.vcxproj +++ b/Samples/0_Introduction/simpleAttributes/simpleAttributes_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleAttributes.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/simpleCUDA2GL/Makefile b/Samples/0_Introduction/simpleCUDA2GL/Makefile index 39042604b..80e3250f2 100644 --- a/Samples/0_Introduction/simpleCUDA2GL/Makefile +++ b/Samples/0_Introduction/simpleCUDA2GL/Makefile @@ -311,9 +311,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/0_Introduction/simpleCUDA2GL/NsightEclipse.xml b/Samples/0_Introduction/simpleCUDA2GL/NsightEclipse.xml index 55b9400ce..1f40f86bd 100644 --- a/Samples/0_Introduction/simpleCUDA2GL/NsightEclipse.xml +++ b/Samples/0_Introduction/simpleCUDA2GL/NsightEclipse.xml @@ -3,19 +3,19 @@ simpleCUDA2GL + cudaHostAlloc + cudaGraphicsUnmapResources + cudaMalloc cudaFree + cudaGraphicsResourceGetMappedPointer cudaGraphicsMapResources - cudaGraphicsUnregisterResource cudaMemcpyToArray - cudaGraphicsGLRegisterBuffer - cudaHostAlloc - cudaGraphicsResourceGetMappedPointer - cudaProcess cudaDeviceSynchronize - cudaMalloc + cudaProcess + cudaGraphicsUnregisterResource cudaGraphicsSubResourceGetMappedArray + cudaGraphicsGLRegisterBuffer cudaGraphicsGLRegisterImage - cudaGraphicsUnmapResources whole @@ -79,6 +79,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/0_Introduction/simpleCUDA2GL/README.md b/Samples/0_Introduction/simpleCUDA2GL/README.md index c5f7bb6d2..7c46fb7eb 100644 --- a/Samples/0_Introduction/simpleCUDA2GL/README.md +++ b/Samples/0_Introduction/simpleCUDA2GL/README.md @@ -10,7 +10,7 @@ Graphics Interop, Image Processing, Performance Strategies ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64 ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaGraphicsMapResources, cudaGraphicsUnregisterResource, cudaMemcpyToArray, cudaGraphicsGLRegisterBuffer, cudaHostAlloc, cudaGraphicsResourceGetMappedPointer, cudaProcess, cudaDeviceSynchronize, cudaMalloc, cudaGraphicsSubResourceGetMappedArray, cudaGraphicsGLRegisterImage, cudaGraphicsUnmapResources +cudaHostAlloc, cudaGraphicsUnmapResources, cudaMalloc, cudaFree, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaMemcpyToArray, cudaDeviceSynchronize, cudaProcess, cudaGraphicsUnregisterResource, cudaGraphicsSubResourceGetMappedArray, cudaGraphicsGLRegisterBuffer, cudaGraphicsGLRegisterImage ## Dependencies needed to build/run [X11](../../../README.md#x11), [GL](../../../README.md#gl) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/0_Introduction/simpleCUDA2GL/findgllib.mk b/Samples/0_Introduction/simpleCUDA2GL/findgllib.mk index f0a5c5512..998fcf0f1 100644 --- a/Samples/0_Introduction/simpleCUDA2GL/findgllib.mk +++ b/Samples/0_Introduction/simpleCUDA2GL/findgllib.mk @@ -53,11 +53,12 @@ endif ifeq ("$(TARGET_OS)","linux") # $(info) >> findgllib.mk -> LINUX path <<<) # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside - UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) - FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) - RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) - CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) + UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) + FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) + RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) + CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) SUSE = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?) + KYLIN = $(shell echo $(DISTRO) | grep -i kylin >/dev/null 2>&1; echo $$?) ifeq ("$(UBUNTU)","0") ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l) GLPATH := /usr/arm-linux-gnueabihf/lib @@ -87,27 +88,17 @@ ifeq ("$(TARGET_OS)","linux") DFLT_PATH ?= /usr/lib endif endif + ifeq ("$(SUSE)","0") GLPATH ?= /usr/X11R6/lib64 GLLINK ?= -L/usr/X11R6/lib64 DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(FEDORA)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(RHEL)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(CENTOS)","0") + else GLPATH ?= /usr/lib64/nvidia GLLINK ?= -L/usr/lib64/nvidia DFLT_PATH ?= /usr/lib64 endif - + # find libGL, libGLU GLLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGL.so -print 2>/dev/null) GLULIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLU.so -print 2>/dev/null) diff --git a/Samples/0_Introduction/simpleCUDA2GL/simpleCUDA2GL_vs2017.vcxproj b/Samples/0_Introduction/simpleCUDA2GL/simpleCUDA2GL_vs2017.vcxproj index b1c84ae4a..658651178 100644 --- a/Samples/0_Introduction/simpleCUDA2GL/simpleCUDA2GL_vs2017.vcxproj +++ b/Samples/0_Introduction/simpleCUDA2GL/simpleCUDA2GL_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/simpleCUDA2GL.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -118,6 +118,6 @@ - + diff --git a/Samples/0_Introduction/simpleCUDA2GL/simpleCUDA2GL_vs2019.vcxproj b/Samples/0_Introduction/simpleCUDA2GL/simpleCUDA2GL_vs2019.vcxproj index 955b060ab..19ad1de21 100644 --- a/Samples/0_Introduction/simpleCUDA2GL/simpleCUDA2GL_vs2019.vcxproj +++ b/Samples/0_Introduction/simpleCUDA2GL/simpleCUDA2GL_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleCUDA2GL.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -114,6 +114,6 @@ - + diff --git a/Samples/0_Introduction/simpleCUDA2GL/simpleCUDA2GL_vs2022.vcxproj b/Samples/0_Introduction/simpleCUDA2GL/simpleCUDA2GL_vs2022.vcxproj index 46e97f626..f2b071ec9 100644 --- a/Samples/0_Introduction/simpleCUDA2GL/simpleCUDA2GL_vs2022.vcxproj +++ b/Samples/0_Introduction/simpleCUDA2GL/simpleCUDA2GL_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleCUDA2GL.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -114,6 +114,6 @@ - + diff --git a/Samples/0_Introduction/simpleCallback/Makefile b/Samples/0_Introduction/simpleCallback/Makefile index fade686a6..ff334a834 100644 --- a/Samples/0_Introduction/simpleCallback/Makefile +++ b/Samples/0_Introduction/simpleCallback/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/0_Introduction/simpleCallback/NsightEclipse.xml b/Samples/0_Introduction/simpleCallback/NsightEclipse.xml index 3ee3b90de..931c7c651 100644 --- a/Samples/0_Introduction/simpleCallback/NsightEclipse.xml +++ b/Samples/0_Introduction/simpleCallback/NsightEclipse.xml @@ -3,16 +3,16 @@ simpleCallback + cudaHostAlloc cudaStreamDestroy cudaFree - cudaStreamCreate - cudaHostAlloc + cudaSetDevice cudaGetDeviceCount cudaFreeHost + cudaStreamAddCallback cudaMalloc - cudaSetDevice cudaMemcpyAsync - cudaStreamAddCallback + cudaStreamCreate cudaGetDeviceProperties @@ -53,6 +53,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/0_Introduction/simpleCallback/README.md b/Samples/0_Introduction/simpleCallback/README.md index 24c76c25d..403685670 100644 --- a/Samples/0_Introduction/simpleCallback/README.md +++ b/Samples/0_Introduction/simpleCallback/README.md @@ -10,7 +10,7 @@ CUDA Streams, Callback Functions, Multithreading ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaStreamDestroy, cudaFree, cudaStreamCreate, cudaHostAlloc, cudaGetDeviceCount, cudaFreeHost, cudaMalloc, cudaSetDevice, cudaMemcpyAsync, cudaStreamAddCallback, cudaGetDeviceProperties +cudaHostAlloc, cudaStreamDestroy, cudaFree, cudaSetDevice, cudaGetDeviceCount, cudaFreeHost, cudaStreamAddCallback, cudaMalloc, cudaMemcpyAsync, cudaStreamCreate, cudaGetDeviceProperties ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/0_Introduction/simpleCallback/simpleCallback_vs2017.vcxproj b/Samples/0_Introduction/simpleCallback/simpleCallback_vs2017.vcxproj index 677ea6b21..ad8bf9007 100644 --- a/Samples/0_Introduction/simpleCallback/simpleCallback_vs2017.vcxproj +++ b/Samples/0_Introduction/simpleCallback/simpleCallback_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/simpleCallback.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -108,6 +108,6 @@ - + diff --git a/Samples/0_Introduction/simpleCallback/simpleCallback_vs2019.vcxproj b/Samples/0_Introduction/simpleCallback/simpleCallback_vs2019.vcxproj index 0dcbadea5..b200ba6f3 100644 --- a/Samples/0_Introduction/simpleCallback/simpleCallback_vs2019.vcxproj +++ b/Samples/0_Introduction/simpleCallback/simpleCallback_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleCallback.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -104,6 +104,6 @@ - + diff --git a/Samples/0_Introduction/simpleCallback/simpleCallback_vs2022.vcxproj b/Samples/0_Introduction/simpleCallback/simpleCallback_vs2022.vcxproj index 6f2e491b5..196f57949 100644 --- a/Samples/0_Introduction/simpleCallback/simpleCallback_vs2022.vcxproj +++ b/Samples/0_Introduction/simpleCallback/simpleCallback_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleCallback.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -104,6 +104,6 @@ - + diff --git a/Samples/0_Introduction/simpleCooperativeGroups/Makefile b/Samples/0_Introduction/simpleCooperativeGroups/Makefile index c45b73325..22efbff33 100644 --- a/Samples/0_Introduction/simpleCooperativeGroups/Makefile +++ b/Samples/0_Introduction/simpleCooperativeGroups/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/0_Introduction/simpleCooperativeGroups/NsightEclipse.xml b/Samples/0_Introduction/simpleCooperativeGroups/NsightEclipse.xml index 54d9c4d3c..939f68ada 100644 --- a/Samples/0_Introduction/simpleCooperativeGroups/NsightEclipse.xml +++ b/Samples/0_Introduction/simpleCooperativeGroups/NsightEclipse.xml @@ -44,6 +44,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/0_Introduction/simpleCooperativeGroups/README.md b/Samples/0_Introduction/simpleCooperativeGroups/README.md index 7e80f6bc1..ab3e11ccf 100644 --- a/Samples/0_Introduction/simpleCooperativeGroups/README.md +++ b/Samples/0_Introduction/simpleCooperativeGroups/README.md @@ -10,7 +10,7 @@ Cooperative Groups ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -27,7 +27,7 @@ cudaDeviceSynchronize, cudaGetErrorString ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/0_Introduction/simpleCooperativeGroups/simpleCooperativeGroups_vs2017.vcxproj b/Samples/0_Introduction/simpleCooperativeGroups/simpleCooperativeGroups_vs2017.vcxproj index 4a7bac2af..061538d8d 100644 --- a/Samples/0_Introduction/simpleCooperativeGroups/simpleCooperativeGroups_vs2017.vcxproj +++ b/Samples/0_Introduction/simpleCooperativeGroups/simpleCooperativeGroups_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/simpleCooperativeGroups.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/0_Introduction/simpleCooperativeGroups/simpleCooperativeGroups_vs2019.vcxproj b/Samples/0_Introduction/simpleCooperativeGroups/simpleCooperativeGroups_vs2019.vcxproj index 09d331592..bf17882ac 100644 --- a/Samples/0_Introduction/simpleCooperativeGroups/simpleCooperativeGroups_vs2019.vcxproj +++ b/Samples/0_Introduction/simpleCooperativeGroups/simpleCooperativeGroups_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleCooperativeGroups.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/simpleCooperativeGroups/simpleCooperativeGroups_vs2022.vcxproj b/Samples/0_Introduction/simpleCooperativeGroups/simpleCooperativeGroups_vs2022.vcxproj index 127592032..649221c2f 100644 --- a/Samples/0_Introduction/simpleCooperativeGroups/simpleCooperativeGroups_vs2022.vcxproj +++ b/Samples/0_Introduction/simpleCooperativeGroups/simpleCooperativeGroups_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleCooperativeGroups.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/simpleCubemapTexture/Makefile b/Samples/0_Introduction/simpleCubemapTexture/Makefile index fdff39801..4c1fed172 100644 --- a/Samples/0_Introduction/simpleCubemapTexture/Makefile +++ b/Samples/0_Introduction/simpleCubemapTexture/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/0_Introduction/simpleCubemapTexture/NsightEclipse.xml b/Samples/0_Introduction/simpleCubemapTexture/NsightEclipse.xml index 625ed7a5d..1bf6b0105 100644 --- a/Samples/0_Introduction/simpleCubemapTexture/NsightEclipse.xml +++ b/Samples/0_Introduction/simpleCubemapTexture/NsightEclipse.xml @@ -3,17 +3,17 @@ simpleCubemapTexture - cudaFree + cudaMemcpy + cudaCreateChannelDesc cudaFreeArray + cudaFree + cudaPitchedPtr + cudaPos + cudaDestroyTextureObject cudaExtent cudaDeviceSynchronize - cudaDestroyTextureObject - cudaPitchedPtr cudaCreateTextureObject cudaMalloc - cudaCreateChannelDesc - cudaPos - cudaMemcpy cudaGetDeviceProperties @@ -52,6 +52,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/0_Introduction/simpleCubemapTexture/README.md b/Samples/0_Introduction/simpleCubemapTexture/README.md index 68f069479..44c3896f1 100644 --- a/Samples/0_Introduction/simpleCubemapTexture/README.md +++ b/Samples/0_Introduction/simpleCubemapTexture/README.md @@ -10,7 +10,7 @@ Texture, Volume Processing ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaFreeArray, cudaExtent, cudaDeviceSynchronize, cudaDestroyTextureObject, cudaPitchedPtr, cudaCreateTextureObject, cudaMalloc, cudaCreateChannelDesc, cudaPos, cudaMemcpy, cudaGetDeviceProperties +cudaMemcpy, cudaCreateChannelDesc, cudaFreeArray, cudaFree, cudaPitchedPtr, cudaPos, cudaDestroyTextureObject, cudaExtent, cudaDeviceSynchronize, cudaCreateTextureObject, cudaMalloc, cudaGetDeviceProperties ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/0_Introduction/simpleCubemapTexture/simpleCubemapTexture_vs2017.vcxproj b/Samples/0_Introduction/simpleCubemapTexture/simpleCubemapTexture_vs2017.vcxproj index 26ae94238..307c5282e 100644 --- a/Samples/0_Introduction/simpleCubemapTexture/simpleCubemapTexture_vs2017.vcxproj +++ b/Samples/0_Introduction/simpleCubemapTexture/simpleCubemapTexture_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/simpleCubemapTexture.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/0_Introduction/simpleCubemapTexture/simpleCubemapTexture_vs2019.vcxproj b/Samples/0_Introduction/simpleCubemapTexture/simpleCubemapTexture_vs2019.vcxproj index bcdaec2f4..709f03620 100644 --- a/Samples/0_Introduction/simpleCubemapTexture/simpleCubemapTexture_vs2019.vcxproj +++ b/Samples/0_Introduction/simpleCubemapTexture/simpleCubemapTexture_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleCubemapTexture.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/simpleCubemapTexture/simpleCubemapTexture_vs2022.vcxproj b/Samples/0_Introduction/simpleCubemapTexture/simpleCubemapTexture_vs2022.vcxproj index 0dd40bfd1..3c332afbe 100644 --- a/Samples/0_Introduction/simpleCubemapTexture/simpleCubemapTexture_vs2022.vcxproj +++ b/Samples/0_Introduction/simpleCubemapTexture/simpleCubemapTexture_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleCubemapTexture.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/simpleDrvRuntime/Makefile b/Samples/0_Introduction/simpleDrvRuntime/Makefile index 3cbc5811b..46593a89b 100644 --- a/Samples/0_Introduction/simpleDrvRuntime/Makefile +++ b/Samples/0_Introduction/simpleDrvRuntime/Makefile @@ -283,9 +283,9 @@ FATBIN_FILE := vectorAdd_kernel${TARGET_SIZE}.fatbin # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/0_Introduction/simpleDrvRuntime/README.md b/Samples/0_Introduction/simpleDrvRuntime/README.md index 74ea4ad99..158157e82 100644 --- a/Samples/0_Introduction/simpleDrvRuntime/README.md +++ b/Samples/0_Introduction/simpleDrvRuntime/README.md @@ -10,7 +10,7 @@ CUDA Driver API, CUDA Runtime API, Vector Addition ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html) -cuCtxDestroy, cuModuleLoadData, cuCtxCreate, cuLaunchKernel, cuModuleUnload, cuInit, cuModuleGetFunction +cuLaunchKernel, cuModuleLoadData, cuCtxDestroy, cuModuleUnload, cuModuleGetFunction, cuCtxCreate, cuInit ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaMallocHost, cudaFreeHost, cudaMalloc, cudaStreamCreateWithFlags, cudaStreamSynchronize, cudaMemcpyAsync +cudaStreamCreateWithFlags, cudaFree, cudaMallocHost, cudaFreeHost, cudaStreamSynchronize, cudaMalloc, cudaMemcpyAsync ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/0_Introduction/simpleDrvRuntime/simpleDrvRuntime_vs2017.vcxproj b/Samples/0_Introduction/simpleDrvRuntime/simpleDrvRuntime_vs2017.vcxproj index 92e427cd0..019fc0cc1 100644 --- a/Samples/0_Introduction/simpleDrvRuntime/simpleDrvRuntime_vs2017.vcxproj +++ b/Samples/0_Introduction/simpleDrvRuntime/simpleDrvRuntime_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/simpleDrvRuntime.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -111,6 +111,6 @@ - + diff --git a/Samples/0_Introduction/simpleDrvRuntime/simpleDrvRuntime_vs2019.vcxproj b/Samples/0_Introduction/simpleDrvRuntime/simpleDrvRuntime_vs2019.vcxproj index 64f8fab97..727c658dd 100644 --- a/Samples/0_Introduction/simpleDrvRuntime/simpleDrvRuntime_vs2019.vcxproj +++ b/Samples/0_Introduction/simpleDrvRuntime/simpleDrvRuntime_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleDrvRuntime.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/0_Introduction/simpleDrvRuntime/simpleDrvRuntime_vs2022.vcxproj b/Samples/0_Introduction/simpleDrvRuntime/simpleDrvRuntime_vs2022.vcxproj index 7f3d2b016..93b2ffad6 100644 --- a/Samples/0_Introduction/simpleDrvRuntime/simpleDrvRuntime_vs2022.vcxproj +++ b/Samples/0_Introduction/simpleDrvRuntime/simpleDrvRuntime_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleDrvRuntime.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/0_Introduction/simpleHyperQ/Makefile b/Samples/0_Introduction/simpleHyperQ/Makefile index 480185113..161406888 100644 --- a/Samples/0_Introduction/simpleHyperQ/Makefile +++ b/Samples/0_Introduction/simpleHyperQ/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/0_Introduction/simpleHyperQ/NsightEclipse.xml b/Samples/0_Introduction/simpleHyperQ/NsightEclipse.xml index 8777270a9..1a5038455 100644 --- a/Samples/0_Introduction/simpleHyperQ/NsightEclipse.xml +++ b/Samples/0_Introduction/simpleHyperQ/NsightEclipse.xml @@ -3,20 +3,20 @@ simpleHyperQ + cudaMemcpy cudaStreamDestroy + cudaMalloc cudaFree - cudaEventRecord cudaMallocHost - cudaStreamCreate - cudaEventCreate - cudaEventElapsedTime cudaEventSynchronize + cudaEventRecord cudaFreeHost - cudaMalloc + cudaGetDevice cudaEventDestroy - cudaMemcpy + cudaEventElapsedTime + cudaStreamCreate cudaGetDeviceProperties - cudaGetDevice + cudaEventCreate whole @@ -62,6 +62,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/0_Introduction/simpleHyperQ/README.md b/Samples/0_Introduction/simpleHyperQ/README.md index 8527317e1..467bc4b35 100644 --- a/Samples/0_Introduction/simpleHyperQ/README.md +++ b/Samples/0_Introduction/simpleHyperQ/README.md @@ -10,7 +10,7 @@ CUDA Systems Integration, Performance Strategies ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaStreamDestroy, cudaFree, cudaEventRecord, cudaMallocHost, cudaStreamCreate, cudaEventCreate, cudaEventElapsedTime, cudaEventSynchronize, cudaFreeHost, cudaMalloc, cudaEventDestroy, cudaMemcpy, cudaGetDeviceProperties, cudaGetDevice +cudaMemcpy, cudaStreamDestroy, cudaMalloc, cudaFree, cudaMallocHost, cudaEventSynchronize, cudaEventRecord, cudaFreeHost, cudaGetDevice, cudaEventDestroy, cudaEventElapsedTime, cudaStreamCreate, cudaGetDeviceProperties, cudaEventCreate ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/0_Introduction/simpleHyperQ/simpleHyperQ_vs2017.vcxproj b/Samples/0_Introduction/simpleHyperQ/simpleHyperQ_vs2017.vcxproj index 1e4bedc30..d2bbd16a2 100644 --- a/Samples/0_Introduction/simpleHyperQ/simpleHyperQ_vs2017.vcxproj +++ b/Samples/0_Introduction/simpleHyperQ/simpleHyperQ_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/simpleHyperQ.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/0_Introduction/simpleHyperQ/simpleHyperQ_vs2019.vcxproj b/Samples/0_Introduction/simpleHyperQ/simpleHyperQ_vs2019.vcxproj index b9180fa0b..3a6cc72cd 100644 --- a/Samples/0_Introduction/simpleHyperQ/simpleHyperQ_vs2019.vcxproj +++ b/Samples/0_Introduction/simpleHyperQ/simpleHyperQ_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleHyperQ.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/simpleHyperQ/simpleHyperQ_vs2022.vcxproj b/Samples/0_Introduction/simpleHyperQ/simpleHyperQ_vs2022.vcxproj index b641b9716..c15d7eec2 100644 --- a/Samples/0_Introduction/simpleHyperQ/simpleHyperQ_vs2022.vcxproj +++ b/Samples/0_Introduction/simpleHyperQ/simpleHyperQ_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleHyperQ.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/simpleIPC/Makefile b/Samples/0_Introduction/simpleIPC/Makefile index 31945c1e5..914f1ab6f 100644 --- a/Samples/0_Introduction/simpleIPC/Makefile +++ b/Samples/0_Introduction/simpleIPC/Makefile @@ -303,9 +303,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/0_Introduction/simpleIPC/NsightEclipse.xml b/Samples/0_Introduction/simpleIPC/NsightEclipse.xml index 23e3c5a7c..d25608dfe 100644 --- a/Samples/0_Introduction/simpleIPC/NsightEclipse.xml +++ b/Samples/0_Introduction/simpleIPC/NsightEclipse.xml @@ -3,29 +3,29 @@ simpleIPC - cudaDeviceEnablePeerAccess - cudaIpcOpenEventHandle - cudaOccupancyMaxActiveBlocksPerMultiprocessor - cudaStreamCreateWithFlags - cudaDeviceCanAccessPeer + cudaSetDevice + cudaIpcCloseMemHandle + cudaEventDestroy + cudaGetDeviceCount cudaMemcpyAsync + cudaDeviceCanAccessPeer + cudaStreamCreateWithFlags cudaStreamDestroy - cudaEventCreate - cudaMalloc - cudaEventDestroy - cudaSetDevice + cudaGetLastError + cudaIpcOpenEventHandle cudaIpcOpenMemHandle - cudaGetDeviceProperties - cudaGetDeviceCount cudaIpcGetEventHandle - cudaGetLastError - cudaStreamSynchronize cudaStreamWaitEvent + cudaEventCreate cudaFree - cudaIpcCloseMemHandle + cudaEventSynchronize cudaEventRecord cudaIpcGetMemHandle - cudaEventSynchronize + cudaStreamSynchronize + cudaDeviceEnablePeerAccess + cudaMalloc + cudaOccupancyMaxActiveBlocksPerMultiprocessor + cudaGetDeviceProperties whole @@ -71,6 +71,7 @@ sm80 sm86 sm87 + sm90 ../../../Common/helper_multiprocess.cpp ../../../Common/helper_multiprocess.h diff --git a/Samples/0_Introduction/simpleIPC/README.md b/Samples/0_Introduction/simpleIPC/README.md index 1594c529a..a9d3336b1 100644 --- a/Samples/0_Introduction/simpleIPC/README.md +++ b/Samples/0_Introduction/simpleIPC/README.md @@ -10,7 +10,7 @@ CUDA Systems Integration, Peer to Peer, InterProcess Communication ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaDeviceEnablePeerAccess, cudaIpcOpenEventHandle, cudaOccupancyMaxActiveBlocksPerMultiprocessor, cudaStreamCreateWithFlags, cudaDeviceCanAccessPeer, cudaMemcpyAsync, cudaStreamDestroy, cudaEventCreate, cudaMalloc, cudaEventDestroy, cudaSetDevice, cudaIpcOpenMemHandle, cudaGetDeviceProperties, cudaGetDeviceCount, cudaIpcGetEventHandle, cudaGetLastError, cudaStreamSynchronize, cudaStreamWaitEvent, cudaFree, cudaIpcCloseMemHandle, cudaEventRecord, cudaIpcGetMemHandle, cudaEventSynchronize +cudaSetDevice, cudaIpcCloseMemHandle, cudaEventDestroy, cudaGetDeviceCount, cudaMemcpyAsync, cudaDeviceCanAccessPeer, cudaStreamCreateWithFlags, cudaStreamDestroy, cudaGetLastError, cudaIpcOpenEventHandle, cudaIpcOpenMemHandle, cudaIpcGetEventHandle, cudaStreamWaitEvent, cudaEventCreate, cudaFree, cudaEventSynchronize, cudaEventRecord, cudaIpcGetMemHandle, cudaStreamSynchronize, cudaDeviceEnablePeerAccess, cudaMalloc, cudaOccupancyMaxActiveBlocksPerMultiprocessor, cudaGetDeviceProperties ## Dependencies needed to build/run [IPC](../../../README.md#ipc) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/0_Introduction/simpleIPC/simpleIPC_vs2017.vcxproj b/Samples/0_Introduction/simpleIPC/simpleIPC_vs2017.vcxproj index 99d342ac9..037714303 100644 --- a/Samples/0_Introduction/simpleIPC/simpleIPC_vs2017.vcxproj +++ b/Samples/0_Introduction/simpleIPC/simpleIPC_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/simpleIPC.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -108,6 +108,6 @@ - + diff --git a/Samples/0_Introduction/simpleIPC/simpleIPC_vs2019.vcxproj b/Samples/0_Introduction/simpleIPC/simpleIPC_vs2019.vcxproj index 1e507919f..4d8096a2b 100644 --- a/Samples/0_Introduction/simpleIPC/simpleIPC_vs2019.vcxproj +++ b/Samples/0_Introduction/simpleIPC/simpleIPC_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleIPC.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -104,6 +104,6 @@ - + diff --git a/Samples/0_Introduction/simpleIPC/simpleIPC_vs2022.vcxproj b/Samples/0_Introduction/simpleIPC/simpleIPC_vs2022.vcxproj index d9e9f48d8..df3aba1f0 100644 --- a/Samples/0_Introduction/simpleIPC/simpleIPC_vs2022.vcxproj +++ b/Samples/0_Introduction/simpleIPC/simpleIPC_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleIPC.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -104,6 +104,6 @@ - + diff --git a/Samples/0_Introduction/simpleLayeredTexture/Makefile b/Samples/0_Introduction/simpleLayeredTexture/Makefile index bd2660f6e..eeb6d7eae 100644 --- a/Samples/0_Introduction/simpleLayeredTexture/Makefile +++ b/Samples/0_Introduction/simpleLayeredTexture/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/0_Introduction/simpleLayeredTexture/NsightEclipse.xml b/Samples/0_Introduction/simpleLayeredTexture/NsightEclipse.xml index 91481c99c..ff2bc6f00 100644 --- a/Samples/0_Introduction/simpleLayeredTexture/NsightEclipse.xml +++ b/Samples/0_Introduction/simpleLayeredTexture/NsightEclipse.xml @@ -3,17 +3,17 @@ simpleLayeredTexture - cudaFree + cudaMemcpy + cudaCreateChannelDesc cudaFreeArray + cudaFree + cudaPitchedPtr + cudaPos + cudaDestroyTextureObject cudaExtent cudaDeviceSynchronize - cudaDestroyTextureObject - cudaPitchedPtr cudaCreateTextureObject cudaMalloc - cudaCreateChannelDesc - cudaPos - cudaMemcpy cudaGetDeviceProperties @@ -52,6 +52,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/0_Introduction/simpleLayeredTexture/README.md b/Samples/0_Introduction/simpleLayeredTexture/README.md index 3add67780..5dc0eb715 100644 --- a/Samples/0_Introduction/simpleLayeredTexture/README.md +++ b/Samples/0_Introduction/simpleLayeredTexture/README.md @@ -10,7 +10,7 @@ Texture, Volume Processing ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaFreeArray, cudaExtent, cudaDeviceSynchronize, cudaDestroyTextureObject, cudaPitchedPtr, cudaCreateTextureObject, cudaMalloc, cudaCreateChannelDesc, cudaPos, cudaMemcpy, cudaGetDeviceProperties +cudaMemcpy, cudaCreateChannelDesc, cudaFreeArray, cudaFree, cudaPitchedPtr, cudaPos, cudaDestroyTextureObject, cudaExtent, cudaDeviceSynchronize, cudaCreateTextureObject, cudaMalloc, cudaGetDeviceProperties ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/0_Introduction/simpleLayeredTexture/simpleLayeredTexture_vs2017.vcxproj b/Samples/0_Introduction/simpleLayeredTexture/simpleLayeredTexture_vs2017.vcxproj index 71e3a84c2..ee1e3e42b 100644 --- a/Samples/0_Introduction/simpleLayeredTexture/simpleLayeredTexture_vs2017.vcxproj +++ b/Samples/0_Introduction/simpleLayeredTexture/simpleLayeredTexture_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/simpleLayeredTexture.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/0_Introduction/simpleLayeredTexture/simpleLayeredTexture_vs2019.vcxproj b/Samples/0_Introduction/simpleLayeredTexture/simpleLayeredTexture_vs2019.vcxproj index fe147d3ec..3ae1a4f9e 100644 --- a/Samples/0_Introduction/simpleLayeredTexture/simpleLayeredTexture_vs2019.vcxproj +++ b/Samples/0_Introduction/simpleLayeredTexture/simpleLayeredTexture_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleLayeredTexture.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/simpleLayeredTexture/simpleLayeredTexture_vs2022.vcxproj b/Samples/0_Introduction/simpleLayeredTexture/simpleLayeredTexture_vs2022.vcxproj index a99c2ee34..0d29aae5b 100644 --- a/Samples/0_Introduction/simpleLayeredTexture/simpleLayeredTexture_vs2022.vcxproj +++ b/Samples/0_Introduction/simpleLayeredTexture/simpleLayeredTexture_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleLayeredTexture.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/simpleMPI/Makefile b/Samples/0_Introduction/simpleMPI/Makefile index 49fc56c36..8726e03d4 100644 --- a/Samples/0_Introduction/simpleMPI/Makefile +++ b/Samples/0_Introduction/simpleMPI/Makefile @@ -335,9 +335,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/0_Introduction/simpleMPI/README.md b/Samples/0_Introduction/simpleMPI/README.md index 6f56a03d6..5e0f97fa3 100644 --- a/Samples/0_Introduction/simpleMPI/README.md +++ b/Samples/0_Introduction/simpleMPI/README.md @@ -10,7 +10,7 @@ CUDA Systems Integration, MPI, Multithreading ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMalloc, cudaGetLastError, cudaFree, cudaMemcpy +cudaMalloc, cudaGetLastError, cudaMemcpy, cudaFree ## Dependencies needed to build/run [MPI](../../../README.md#mpi) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/0_Introduction/simpleMPI/simpleMPI_vs2017.vcxproj b/Samples/0_Introduction/simpleMPI/simpleMPI_vs2017.vcxproj index 4e1777bc4..94e77612e 100644 --- a/Samples/0_Introduction/simpleMPI/simpleMPI_vs2017.vcxproj +++ b/Samples/0_Introduction/simpleMPI/simpleMPI_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/simpleMPI.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -108,6 +108,6 @@ - + diff --git a/Samples/0_Introduction/simpleMPI/simpleMPI_vs2019.vcxproj b/Samples/0_Introduction/simpleMPI/simpleMPI_vs2019.vcxproj index cf5e568bd..978222207 100644 --- a/Samples/0_Introduction/simpleMPI/simpleMPI_vs2019.vcxproj +++ b/Samples/0_Introduction/simpleMPI/simpleMPI_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleMPI.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -104,6 +104,6 @@ - + diff --git a/Samples/0_Introduction/simpleMPI/simpleMPI_vs2022.vcxproj b/Samples/0_Introduction/simpleMPI/simpleMPI_vs2022.vcxproj index 2959c87e1..8f6ea5ae9 100644 --- a/Samples/0_Introduction/simpleMPI/simpleMPI_vs2022.vcxproj +++ b/Samples/0_Introduction/simpleMPI/simpleMPI_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleMPI.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -104,6 +104,6 @@ - + diff --git a/Samples/0_Introduction/simpleMultiCopy/Makefile b/Samples/0_Introduction/simpleMultiCopy/Makefile index 26974b350..d6d253c60 100644 --- a/Samples/0_Introduction/simpleMultiCopy/Makefile +++ b/Samples/0_Introduction/simpleMultiCopy/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/0_Introduction/simpleMultiCopy/NsightEclipse.xml b/Samples/0_Introduction/simpleMultiCopy/NsightEclipse.xml index ca79562c0..bb76ce8c7 100644 --- a/Samples/0_Introduction/simpleMultiCopy/NsightEclipse.xml +++ b/Samples/0_Introduction/simpleMultiCopy/NsightEclipse.xml @@ -3,22 +3,22 @@ simpleMultiCopy - cudaMemset - cudaFree - cudaStreamDestroy - cudaEventRecord - cudaStreamCreate cudaHostAlloc - cudaEventCreate - cudaEventElapsedTime - cudaDeviceSynchronize + cudaStreamDestroy + cudaMalloc + cudaMemcpyAsync + cudaFree + cudaSetDevice cudaEventSynchronize + cudaDeviceSynchronize + cudaEventRecord cudaFreeHost - cudaMalloc + cudaMemset cudaEventDestroy - cudaSetDevice - cudaMemcpyAsync + cudaEventElapsedTime + cudaStreamCreate cudaGetDeviceProperties + cudaEventCreate whole @@ -66,6 +66,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/0_Introduction/simpleMultiCopy/README.md b/Samples/0_Introduction/simpleMultiCopy/README.md index 8f015c263..724042872 100644 --- a/Samples/0_Introduction/simpleMultiCopy/README.md +++ b/Samples/0_Introduction/simpleMultiCopy/README.md @@ -10,7 +10,7 @@ CUDA Streams and Events, Asynchronous Data Transfers, Overlap Compute and Copy, ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMemset, cudaFree, cudaStreamDestroy, cudaEventRecord, cudaStreamCreate, cudaHostAlloc, cudaEventCreate, cudaEventElapsedTime, cudaDeviceSynchronize, cudaEventSynchronize, cudaFreeHost, cudaMalloc, cudaEventDestroy, cudaSetDevice, cudaMemcpyAsync, cudaGetDeviceProperties +cudaHostAlloc, cudaStreamDestroy, cudaMalloc, cudaMemcpyAsync, cudaFree, cudaSetDevice, cudaEventSynchronize, cudaDeviceSynchronize, cudaEventRecord, cudaFreeHost, cudaMemset, cudaEventDestroy, cudaEventElapsedTime, cudaStreamCreate, cudaGetDeviceProperties, cudaEventCreate ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/0_Introduction/simpleMultiCopy/simpleMultiCopy_vs2017.vcxproj b/Samples/0_Introduction/simpleMultiCopy/simpleMultiCopy_vs2017.vcxproj index 86ccf67a3..8fbcf08cb 100644 --- a/Samples/0_Introduction/simpleMultiCopy/simpleMultiCopy_vs2017.vcxproj +++ b/Samples/0_Introduction/simpleMultiCopy/simpleMultiCopy_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/simpleMultiCopy.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/0_Introduction/simpleMultiCopy/simpleMultiCopy_vs2019.vcxproj b/Samples/0_Introduction/simpleMultiCopy/simpleMultiCopy_vs2019.vcxproj index d1a1609f6..1f77866f6 100644 --- a/Samples/0_Introduction/simpleMultiCopy/simpleMultiCopy_vs2019.vcxproj +++ b/Samples/0_Introduction/simpleMultiCopy/simpleMultiCopy_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleMultiCopy.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/simpleMultiCopy/simpleMultiCopy_vs2022.vcxproj b/Samples/0_Introduction/simpleMultiCopy/simpleMultiCopy_vs2022.vcxproj index 5251d5928..447b63311 100644 --- a/Samples/0_Introduction/simpleMultiCopy/simpleMultiCopy_vs2022.vcxproj +++ b/Samples/0_Introduction/simpleMultiCopy/simpleMultiCopy_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleMultiCopy.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/simpleMultiGPU/Makefile b/Samples/0_Introduction/simpleMultiGPU/Makefile index 6db255e49..15d13ddeb 100644 --- a/Samples/0_Introduction/simpleMultiGPU/Makefile +++ b/Samples/0_Introduction/simpleMultiGPU/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/0_Introduction/simpleMultiGPU/NsightEclipse.xml b/Samples/0_Introduction/simpleMultiGPU/NsightEclipse.xml index 500fc9ea8..a1e377e56 100644 --- a/Samples/0_Introduction/simpleMultiGPU/NsightEclipse.xml +++ b/Samples/0_Introduction/simpleMultiGPU/NsightEclipse.xml @@ -6,13 +6,13 @@ cudaStreamDestroy cudaFree cudaMallocHost - cudaStreamCreate - cudaGetDeviceCount - cudaFreeHost - cudaMalloc cudaSetDevice + cudaFreeHost cudaStreamSynchronize + cudaMalloc cudaMemcpyAsync + cudaStreamCreate + cudaGetDeviceCount whole @@ -53,6 +53,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/0_Introduction/simpleMultiGPU/README.md b/Samples/0_Introduction/simpleMultiGPU/README.md index 0f8464c26..284904f8a 100644 --- a/Samples/0_Introduction/simpleMultiGPU/README.md +++ b/Samples/0_Introduction/simpleMultiGPU/README.md @@ -10,7 +10,7 @@ Asynchronous Data Transfers, CUDA Streams and Events, Multithreading, Multi-GPU ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaStreamDestroy, cudaFree, cudaMallocHost, cudaStreamCreate, cudaGetDeviceCount, cudaFreeHost, cudaMalloc, cudaSetDevice, cudaStreamSynchronize, cudaMemcpyAsync +cudaStreamDestroy, cudaFree, cudaMallocHost, cudaSetDevice, cudaFreeHost, cudaStreamSynchronize, cudaMalloc, cudaMemcpyAsync, cudaStreamCreate, cudaGetDeviceCount ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/0_Introduction/simpleMultiGPU/simpleMultiGPU_vs2017.vcxproj b/Samples/0_Introduction/simpleMultiGPU/simpleMultiGPU_vs2017.vcxproj index bcc574be1..a025b2ec4 100644 --- a/Samples/0_Introduction/simpleMultiGPU/simpleMultiGPU_vs2017.vcxproj +++ b/Samples/0_Introduction/simpleMultiGPU/simpleMultiGPU_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/simpleMultiGPU.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/0_Introduction/simpleMultiGPU/simpleMultiGPU_vs2019.vcxproj b/Samples/0_Introduction/simpleMultiGPU/simpleMultiGPU_vs2019.vcxproj index 30a6f1990..2a6ce2539 100644 --- a/Samples/0_Introduction/simpleMultiGPU/simpleMultiGPU_vs2019.vcxproj +++ b/Samples/0_Introduction/simpleMultiGPU/simpleMultiGPU_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleMultiGPU.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/simpleMultiGPU/simpleMultiGPU_vs2022.vcxproj b/Samples/0_Introduction/simpleMultiGPU/simpleMultiGPU_vs2022.vcxproj index 6fd4139c3..315059cab 100644 --- a/Samples/0_Introduction/simpleMultiGPU/simpleMultiGPU_vs2022.vcxproj +++ b/Samples/0_Introduction/simpleMultiGPU/simpleMultiGPU_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleMultiGPU.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/simpleOccupancy/Makefile b/Samples/0_Introduction/simpleOccupancy/Makefile index 85aa3c9f0..b735ec0c8 100644 --- a/Samples/0_Introduction/simpleOccupancy/Makefile +++ b/Samples/0_Introduction/simpleOccupancy/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/0_Introduction/simpleOccupancy/NsightEclipse.xml b/Samples/0_Introduction/simpleOccupancy/NsightEclipse.xml index 34577846b..e4383b1c5 100644 --- a/Samples/0_Introduction/simpleOccupancy/NsightEclipse.xml +++ b/Samples/0_Introduction/simpleOccupancy/NsightEclipse.xml @@ -3,17 +3,17 @@ simpleOccupancy + cudaMemcpy cudaFree - cudaEventRecord - cudaOccupancyMaxActiveBlocksPerMultiprocessor - cudaEventCreate - cudaOccupancyMaxPotentialBlockSize - cudaEventElapsedTime cudaDeviceSynchronize + cudaEventRecord + cudaGetDevice cudaMalloc - cudaMemcpy + cudaEventElapsedTime + cudaOccupancyMaxActiveBlocksPerMultiprocessor cudaGetDeviceProperties - cudaGetDevice + cudaOccupancyMaxPotentialBlockSize + cudaEventCreate whole @@ -52,6 +52,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/0_Introduction/simpleOccupancy/README.md b/Samples/0_Introduction/simpleOccupancy/README.md index 4ca96acf9..ddc12f2a2 100644 --- a/Samples/0_Introduction/simpleOccupancy/README.md +++ b/Samples/0_Introduction/simpleOccupancy/README.md @@ -10,7 +10,7 @@ Occupancy Calculator ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaEventRecord, cudaOccupancyMaxActiveBlocksPerMultiprocessor, cudaEventCreate, cudaOccupancyMaxPotentialBlockSize, cudaEventElapsedTime, cudaDeviceSynchronize, cudaMalloc, cudaMemcpy, cudaGetDeviceProperties, cudaGetDevice +cudaMemcpy, cudaFree, cudaDeviceSynchronize, cudaEventRecord, cudaGetDevice, cudaMalloc, cudaEventElapsedTime, cudaOccupancyMaxActiveBlocksPerMultiprocessor, cudaGetDeviceProperties, cudaOccupancyMaxPotentialBlockSize, cudaEventCreate ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/0_Introduction/simpleOccupancy/simpleOccupancy_vs2017.vcxproj b/Samples/0_Introduction/simpleOccupancy/simpleOccupancy_vs2017.vcxproj index ee3e8ca57..d4d97a025 100644 --- a/Samples/0_Introduction/simpleOccupancy/simpleOccupancy_vs2017.vcxproj +++ b/Samples/0_Introduction/simpleOccupancy/simpleOccupancy_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/simpleOccupancy.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/0_Introduction/simpleOccupancy/simpleOccupancy_vs2019.vcxproj b/Samples/0_Introduction/simpleOccupancy/simpleOccupancy_vs2019.vcxproj index a0db9b8e0..096cea4af 100644 --- a/Samples/0_Introduction/simpleOccupancy/simpleOccupancy_vs2019.vcxproj +++ b/Samples/0_Introduction/simpleOccupancy/simpleOccupancy_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleOccupancy.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/simpleOccupancy/simpleOccupancy_vs2022.vcxproj b/Samples/0_Introduction/simpleOccupancy/simpleOccupancy_vs2022.vcxproj index 312b5e699..57de8a550 100644 --- a/Samples/0_Introduction/simpleOccupancy/simpleOccupancy_vs2022.vcxproj +++ b/Samples/0_Introduction/simpleOccupancy/simpleOccupancy_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleOccupancy.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/simpleP2P/Makefile b/Samples/0_Introduction/simpleP2P/Makefile index 036ff0d85..804aa449d 100644 --- a/Samples/0_Introduction/simpleP2P/Makefile +++ b/Samples/0_Introduction/simpleP2P/Makefile @@ -303,9 +303,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/0_Introduction/simpleP2P/NsightEclipse.xml b/Samples/0_Introduction/simpleP2P/NsightEclipse.xml index 69fc274d6..65fe83bb0 100644 --- a/Samples/0_Introduction/simpleP2P/NsightEclipse.xml +++ b/Samples/0_Introduction/simpleP2P/NsightEclipse.xml @@ -3,23 +3,23 @@ simpleP2P - cudaDeviceEnablePeerAccess + cudaMemcpy + cudaMalloc cudaFree - cudaEventRecord cudaMallocHost - cudaGetDeviceCount - cudaEventElapsedTime - cudaDeviceSynchronize - cudaEventSynchronize - cudaFreeHost - cudaMalloc cudaEventCreateWithFlags - cudaDeviceCanAccessPeer - cudaEventDestroy cudaSetDevice + cudaEventSynchronize cudaDeviceDisablePeerAccess - cudaMemcpy + cudaGetDeviceCount + cudaDeviceSynchronize + cudaEventRecord + cudaFreeHost cudaGetDeviceProperties + cudaDeviceEnablePeerAccess + cudaEventDestroy + cudaEventElapsedTime + cudaDeviceCanAccessPeer whole @@ -67,6 +67,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/0_Introduction/simpleP2P/README.md b/Samples/0_Introduction/simpleP2P/README.md index cbe3b2524..56b4b8bf1 100644 --- a/Samples/0_Introduction/simpleP2P/README.md +++ b/Samples/0_Introduction/simpleP2P/README.md @@ -10,7 +10,7 @@ Performance Strategies, Asynchronous Data Transfers, Unified Virtual Address Spa ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaDeviceEnablePeerAccess, cudaFree, cudaEventRecord, cudaMallocHost, cudaGetDeviceCount, cudaEventElapsedTime, cudaDeviceSynchronize, cudaEventSynchronize, cudaFreeHost, cudaMalloc, cudaEventCreateWithFlags, cudaDeviceCanAccessPeer, cudaEventDestroy, cudaSetDevice, cudaDeviceDisablePeerAccess, cudaMemcpy, cudaGetDeviceProperties +cudaMemcpy, cudaMalloc, cudaFree, cudaMallocHost, cudaEventCreateWithFlags, cudaSetDevice, cudaEventSynchronize, cudaDeviceDisablePeerAccess, cudaGetDeviceCount, cudaDeviceSynchronize, cudaEventRecord, cudaFreeHost, cudaGetDeviceProperties, cudaDeviceEnablePeerAccess, cudaEventDestroy, cudaEventElapsedTime, cudaDeviceCanAccessPeer ## Dependencies needed to build/run [only-64-bit](../../../README.md#only-64-bit) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/0_Introduction/simpleP2P/simpleP2P_vs2017.vcxproj b/Samples/0_Introduction/simpleP2P/simpleP2P_vs2017.vcxproj index aea119d94..41efff17b 100644 --- a/Samples/0_Introduction/simpleP2P/simpleP2P_vs2017.vcxproj +++ b/Samples/0_Introduction/simpleP2P/simpleP2P_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/simpleP2P.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/0_Introduction/simpleP2P/simpleP2P_vs2019.vcxproj b/Samples/0_Introduction/simpleP2P/simpleP2P_vs2019.vcxproj index af3b80747..d51f6d7bb 100644 --- a/Samples/0_Introduction/simpleP2P/simpleP2P_vs2019.vcxproj +++ b/Samples/0_Introduction/simpleP2P/simpleP2P_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleP2P.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/simpleP2P/simpleP2P_vs2022.vcxproj b/Samples/0_Introduction/simpleP2P/simpleP2P_vs2022.vcxproj index ea28f0709..9ed321645 100644 --- a/Samples/0_Introduction/simpleP2P/simpleP2P_vs2022.vcxproj +++ b/Samples/0_Introduction/simpleP2P/simpleP2P_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleP2P.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/simplePitchLinearTexture/Makefile b/Samples/0_Introduction/simplePitchLinearTexture/Makefile index 32d993f2c..98218e83a 100644 --- a/Samples/0_Introduction/simplePitchLinearTexture/Makefile +++ b/Samples/0_Introduction/simplePitchLinearTexture/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/0_Introduction/simplePitchLinearTexture/NsightEclipse.xml b/Samples/0_Introduction/simplePitchLinearTexture/NsightEclipse.xml index e10820631..be7882b8b 100644 --- a/Samples/0_Introduction/simplePitchLinearTexture/NsightEclipse.xml +++ b/Samples/0_Introduction/simplePitchLinearTexture/NsightEclipse.xml @@ -3,18 +3,18 @@ simplePitchLinearTexture - cudaFree - cudaMemcpyToArray - cudaEventRecord + cudaMallocArray cudaFreeArray - cudaEventCreate - cudaEventElapsedTime + cudaFree + cudaMallocPitch cudaDestroyTextureObject cudaEventSynchronize - cudaMallocPitch + cudaMemcpyToArray + cudaEventRecord cudaCreateTextureObject cudaEventDestroy - cudaMallocArray + cudaEventElapsedTime + cudaEventCreate whole @@ -54,6 +54,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/0_Introduction/simplePitchLinearTexture/README.md b/Samples/0_Introduction/simplePitchLinearTexture/README.md index dc437cf2e..95944a886 100644 --- a/Samples/0_Introduction/simplePitchLinearTexture/README.md +++ b/Samples/0_Introduction/simplePitchLinearTexture/README.md @@ -10,7 +10,7 @@ Texture, Image Processing ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaMemcpyToArray, cudaEventRecord, cudaFreeArray, cudaEventCreate, cudaEventElapsedTime, cudaDestroyTextureObject, cudaEventSynchronize, cudaMallocPitch, cudaCreateTextureObject, cudaEventDestroy, cudaMallocArray +cudaMallocArray, cudaFreeArray, cudaFree, cudaMallocPitch, cudaDestroyTextureObject, cudaEventSynchronize, cudaMemcpyToArray, cudaEventRecord, cudaCreateTextureObject, cudaEventDestroy, cudaEventElapsedTime, cudaEventCreate ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/0_Introduction/simplePitchLinearTexture/simplePitchLinearTexture_vs2017.vcxproj b/Samples/0_Introduction/simplePitchLinearTexture/simplePitchLinearTexture_vs2017.vcxproj index ae30718fe..f33a061ea 100644 --- a/Samples/0_Introduction/simplePitchLinearTexture/simplePitchLinearTexture_vs2017.vcxproj +++ b/Samples/0_Introduction/simplePitchLinearTexture/simplePitchLinearTexture_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/simplePitchLinearTexture.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/0_Introduction/simplePitchLinearTexture/simplePitchLinearTexture_vs2019.vcxproj b/Samples/0_Introduction/simplePitchLinearTexture/simplePitchLinearTexture_vs2019.vcxproj index 60bf7a611..c08e4de30 100644 --- a/Samples/0_Introduction/simplePitchLinearTexture/simplePitchLinearTexture_vs2019.vcxproj +++ b/Samples/0_Introduction/simplePitchLinearTexture/simplePitchLinearTexture_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simplePitchLinearTexture.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/simplePitchLinearTexture/simplePitchLinearTexture_vs2022.vcxproj b/Samples/0_Introduction/simplePitchLinearTexture/simplePitchLinearTexture_vs2022.vcxproj index e964badc3..441ae1c9d 100644 --- a/Samples/0_Introduction/simplePitchLinearTexture/simplePitchLinearTexture_vs2022.vcxproj +++ b/Samples/0_Introduction/simplePitchLinearTexture/simplePitchLinearTexture_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simplePitchLinearTexture.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/simplePrintf/Makefile b/Samples/0_Introduction/simplePrintf/Makefile index e1297aa6f..3b8cf8a0a 100644 --- a/Samples/0_Introduction/simplePrintf/Makefile +++ b/Samples/0_Introduction/simplePrintf/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/0_Introduction/simplePrintf/README.md b/Samples/0_Introduction/simplePrintf/README.md index fd29cf269..872faf89c 100644 --- a/Samples/0_Introduction/simplePrintf/README.md +++ b/Samples/0_Introduction/simplePrintf/README.md @@ -10,7 +10,7 @@ Debugging ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaDeviceSynchronize, cudaGetDeviceProperties, cudaGetDevice +cudaGetDeviceProperties, cudaDeviceSynchronize, cudaGetDevice ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/0_Introduction/simplePrintf/simplePrintf_vs2017.vcxproj b/Samples/0_Introduction/simplePrintf/simplePrintf_vs2017.vcxproj index e06b0f938..861f30b9b 100644 --- a/Samples/0_Introduction/simplePrintf/simplePrintf_vs2017.vcxproj +++ b/Samples/0_Introduction/simplePrintf/simplePrintf_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/simplePrintf.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/0_Introduction/simplePrintf/simplePrintf_vs2019.vcxproj b/Samples/0_Introduction/simplePrintf/simplePrintf_vs2019.vcxproj index 84c7cfbba..6dcb3c5e0 100644 --- a/Samples/0_Introduction/simplePrintf/simplePrintf_vs2019.vcxproj +++ b/Samples/0_Introduction/simplePrintf/simplePrintf_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simplePrintf.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/simplePrintf/simplePrintf_vs2022.vcxproj b/Samples/0_Introduction/simplePrintf/simplePrintf_vs2022.vcxproj index 039c5fb25..e45b59538 100644 --- a/Samples/0_Introduction/simplePrintf/simplePrintf_vs2022.vcxproj +++ b/Samples/0_Introduction/simplePrintf/simplePrintf_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simplePrintf.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/simpleSeparateCompilation/Makefile b/Samples/0_Introduction/simpleSeparateCompilation/Makefile index 59116ae72..4a92e480d 100644 --- a/Samples/0_Introduction/simpleSeparateCompilation/Makefile +++ b/Samples/0_Introduction/simpleSeparateCompilation/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/0_Introduction/simpleSeparateCompilation/NsightEclipse.xml b/Samples/0_Introduction/simpleSeparateCompilation/NsightEclipse.xml index 586ab4771..2f1c15ab1 100644 --- a/Samples/0_Introduction/simpleSeparateCompilation/NsightEclipse.xml +++ b/Samples/0_Introduction/simpleSeparateCompilation/NsightEclipse.xml @@ -12,11 +12,11 @@ - cudaFree - cudaMalloc - cudaGetLastError cudaMemcpy cudaMemcpyFromSymbol + cudaFree + cudaGetLastError + cudaMalloc separate @@ -56,6 +56,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/0_Introduction/simpleSeparateCompilation/README.md b/Samples/0_Introduction/simpleSeparateCompilation/README.md index b83c4f5c8..a4b54487b 100644 --- a/Samples/0_Introduction/simpleSeparateCompilation/README.md +++ b/Samples/0_Introduction/simpleSeparateCompilation/README.md @@ -10,7 +10,7 @@ Separate Compilation ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaMalloc, cudaGetLastError, cudaMemcpy, cudaMemcpyFromSymbol +cudaMemcpy, cudaMemcpyFromSymbol, cudaFree, cudaGetLastError, cudaMalloc ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/0_Introduction/simpleSeparateCompilation/simpleSeparateCompilation_vs2017.vcxproj b/Samples/0_Introduction/simpleSeparateCompilation/simpleSeparateCompilation_vs2017.vcxproj index f8293c639..fc05d0f38 100644 --- a/Samples/0_Introduction/simpleSeparateCompilation/simpleSeparateCompilation_vs2017.vcxproj +++ b/Samples/0_Introduction/simpleSeparateCompilation/simpleSeparateCompilation_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/simpleSeparateCompilation.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -109,6 +109,6 @@ - + diff --git a/Samples/0_Introduction/simpleSeparateCompilation/simpleSeparateCompilation_vs2019.vcxproj b/Samples/0_Introduction/simpleSeparateCompilation/simpleSeparateCompilation_vs2019.vcxproj index db4e0716e..d6f50cf49 100644 --- a/Samples/0_Introduction/simpleSeparateCompilation/simpleSeparateCompilation_vs2019.vcxproj +++ b/Samples/0_Introduction/simpleSeparateCompilation/simpleSeparateCompilation_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleSeparateCompilation.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -105,6 +105,6 @@ - + diff --git a/Samples/0_Introduction/simpleSeparateCompilation/simpleSeparateCompilation_vs2022.vcxproj b/Samples/0_Introduction/simpleSeparateCompilation/simpleSeparateCompilation_vs2022.vcxproj index 07e55c2ba..758766ef7 100644 --- a/Samples/0_Introduction/simpleSeparateCompilation/simpleSeparateCompilation_vs2022.vcxproj +++ b/Samples/0_Introduction/simpleSeparateCompilation/simpleSeparateCompilation_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleSeparateCompilation.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -105,6 +105,6 @@ - + diff --git a/Samples/0_Introduction/simpleStreams/Makefile b/Samples/0_Introduction/simpleStreams/Makefile index 0c0886863..0e83a3071 100644 --- a/Samples/0_Introduction/simpleStreams/Makefile +++ b/Samples/0_Introduction/simpleStreams/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/0_Introduction/simpleStreams/NsightEclipse.xml b/Samples/0_Introduction/simpleStreams/NsightEclipse.xml index b2b5aff62..d7fb6d38d 100644 --- a/Samples/0_Introduction/simpleStreams/NsightEclipse.xml +++ b/Samples/0_Introduction/simpleStreams/NsightEclipse.xml @@ -4,25 +4,25 @@ simpleStreams cudaMemcpy - cudaMemcpyAsync - cudaStreamDestroy - cudaMallocHost - cudaHostAlloc - cudaHostRegister - cudaMalloc - cudaEventCreateWithFlags - cudaEventDestroy cudaSetDeviceFlags cudaSetDevice - cudaGetDeviceProperties + cudaEventDestroy + cudaStreamCreate + cudaMallocHost + cudaEventCreateWithFlags + cudaFreeHost + cudaMemcpyAsync cudaGetDeviceCount - cudaEventElapsedTime + cudaStreamDestroy cudaMemset + cudaEventElapsedTime + cudaHostAlloc cudaFree - cudaEventRecord - cudaStreamCreate + cudaHostRegister cudaEventSynchronize - cudaFreeHost + cudaEventRecord + cudaMalloc + cudaGetDeviceProperties cudaHostUnregister @@ -61,6 +61,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/0_Introduction/simpleStreams/README.md b/Samples/0_Introduction/simpleStreams/README.md index 7c33b9071..a9de18f80 100644 --- a/Samples/0_Introduction/simpleStreams/README.md +++ b/Samples/0_Introduction/simpleStreams/README.md @@ -10,7 +10,7 @@ Asynchronous Data Transfers, CUDA Streams and Events ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMemcpy, cudaMemcpyAsync, cudaStreamDestroy, cudaMallocHost, cudaHostAlloc, cudaHostRegister, cudaMalloc, cudaEventCreateWithFlags, cudaEventDestroy, cudaSetDeviceFlags, cudaSetDevice, cudaGetDeviceProperties, cudaGetDeviceCount, cudaEventElapsedTime, cudaMemset, cudaFree, cudaEventRecord, cudaStreamCreate, cudaEventSynchronize, cudaFreeHost, cudaHostUnregister +cudaMemcpy, cudaSetDeviceFlags, cudaSetDevice, cudaEventDestroy, cudaStreamCreate, cudaMallocHost, cudaEventCreateWithFlags, cudaFreeHost, cudaMemcpyAsync, cudaGetDeviceCount, cudaStreamDestroy, cudaMemset, cudaEventElapsedTime, cudaHostAlloc, cudaFree, cudaHostRegister, cudaEventSynchronize, cudaEventRecord, cudaMalloc, cudaGetDeviceProperties, cudaHostUnregister ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/0_Introduction/simpleStreams/simpleStreams_vs2017.vcxproj b/Samples/0_Introduction/simpleStreams/simpleStreams_vs2017.vcxproj index 1aae760f8..b0fc51da6 100644 --- a/Samples/0_Introduction/simpleStreams/simpleStreams_vs2017.vcxproj +++ b/Samples/0_Introduction/simpleStreams/simpleStreams_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/simpleStreams.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/0_Introduction/simpleStreams/simpleStreams_vs2019.vcxproj b/Samples/0_Introduction/simpleStreams/simpleStreams_vs2019.vcxproj index f950cfd87..6b96b6b69 100644 --- a/Samples/0_Introduction/simpleStreams/simpleStreams_vs2019.vcxproj +++ b/Samples/0_Introduction/simpleStreams/simpleStreams_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleStreams.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/simpleStreams/simpleStreams_vs2022.vcxproj b/Samples/0_Introduction/simpleStreams/simpleStreams_vs2022.vcxproj index 580200404..cf0c0e4d2 100644 --- a/Samples/0_Introduction/simpleStreams/simpleStreams_vs2022.vcxproj +++ b/Samples/0_Introduction/simpleStreams/simpleStreams_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleStreams.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/simpleSurfaceWrite/Makefile b/Samples/0_Introduction/simpleSurfaceWrite/Makefile index 024e0d4cd..7440eee76 100644 --- a/Samples/0_Introduction/simpleSurfaceWrite/Makefile +++ b/Samples/0_Introduction/simpleSurfaceWrite/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/0_Introduction/simpleSurfaceWrite/NsightEclipse.xml b/Samples/0_Introduction/simpleSurfaceWrite/NsightEclipse.xml index 182fad401..4d9153cd0 100644 --- a/Samples/0_Introduction/simpleSurfaceWrite/NsightEclipse.xml +++ b/Samples/0_Introduction/simpleSurfaceWrite/NsightEclipse.xml @@ -6,18 +6,18 @@ output.pgm + cudaMemcpy + cudaCreateChannelDesc + cudaMallocArray + cudaFreeArray cudaFree + cudaDestroyTextureObject cudaMemcpyToArray - cudaFreeArray cudaDestroySurfaceObject - cudaCreateSurfaceObject - cudaDestroyTextureObject cudaDeviceSynchronize + cudaCreateSurfaceObject cudaCreateTextureObject cudaMalloc - cudaMallocArray - cudaCreateChannelDesc - cudaMemcpy cudaGetDeviceProperties @@ -61,6 +61,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/0_Introduction/simpleSurfaceWrite/README.md b/Samples/0_Introduction/simpleSurfaceWrite/README.md index a93a9d6a8..944cf2fcf 100644 --- a/Samples/0_Introduction/simpleSurfaceWrite/README.md +++ b/Samples/0_Introduction/simpleSurfaceWrite/README.md @@ -10,7 +10,7 @@ Texture, Surface Writes, Image Processing ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaMemcpyToArray, cudaFreeArray, cudaDestroySurfaceObject, cudaCreateSurfaceObject, cudaDestroyTextureObject, cudaDeviceSynchronize, cudaCreateTextureObject, cudaMalloc, cudaMallocArray, cudaCreateChannelDesc, cudaMemcpy, cudaGetDeviceProperties +cudaMemcpy, cudaCreateChannelDesc, cudaMallocArray, cudaFreeArray, cudaFree, cudaDestroyTextureObject, cudaMemcpyToArray, cudaDestroySurfaceObject, cudaDeviceSynchronize, cudaCreateSurfaceObject, cudaCreateTextureObject, cudaMalloc, cudaGetDeviceProperties ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/0_Introduction/simpleSurfaceWrite/simpleSurfaceWrite_vs2017.vcxproj b/Samples/0_Introduction/simpleSurfaceWrite/simpleSurfaceWrite_vs2017.vcxproj index c076e2710..8a65d23a6 100644 --- a/Samples/0_Introduction/simpleSurfaceWrite/simpleSurfaceWrite_vs2017.vcxproj +++ b/Samples/0_Introduction/simpleSurfaceWrite/simpleSurfaceWrite_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/simpleSurfaceWrite.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/0_Introduction/simpleSurfaceWrite/simpleSurfaceWrite_vs2019.vcxproj b/Samples/0_Introduction/simpleSurfaceWrite/simpleSurfaceWrite_vs2019.vcxproj index c9fdbc59f..d61b86899 100644 --- a/Samples/0_Introduction/simpleSurfaceWrite/simpleSurfaceWrite_vs2019.vcxproj +++ b/Samples/0_Introduction/simpleSurfaceWrite/simpleSurfaceWrite_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleSurfaceWrite.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/simpleSurfaceWrite/simpleSurfaceWrite_vs2022.vcxproj b/Samples/0_Introduction/simpleSurfaceWrite/simpleSurfaceWrite_vs2022.vcxproj index 1b8e29b38..d5275f35c 100644 --- a/Samples/0_Introduction/simpleSurfaceWrite/simpleSurfaceWrite_vs2022.vcxproj +++ b/Samples/0_Introduction/simpleSurfaceWrite/simpleSurfaceWrite_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleSurfaceWrite.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/simpleTemplates/Makefile b/Samples/0_Introduction/simpleTemplates/Makefile index 78ad03d31..beac3d79d 100644 --- a/Samples/0_Introduction/simpleTemplates/Makefile +++ b/Samples/0_Introduction/simpleTemplates/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/0_Introduction/simpleTemplates/NsightEclipse.xml b/Samples/0_Introduction/simpleTemplates/NsightEclipse.xml index 343057ce0..361492619 100644 --- a/Samples/0_Introduction/simpleTemplates/NsightEclipse.xml +++ b/Samples/0_Introduction/simpleTemplates/NsightEclipse.xml @@ -4,9 +4,9 @@ simpleTemplates cudaMalloc - cudaFree cudaMemcpy cudaGetDeviceProperties + cudaFree whole @@ -42,6 +42,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/0_Introduction/simpleTemplates/README.md b/Samples/0_Introduction/simpleTemplates/README.md index 83a671030..0db671510 100644 --- a/Samples/0_Introduction/simpleTemplates/README.md +++ b/Samples/0_Introduction/simpleTemplates/README.md @@ -10,7 +10,7 @@ C++ Templates ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMalloc, cudaFree, cudaMemcpy, cudaGetDeviceProperties +cudaMalloc, cudaMemcpy, cudaGetDeviceProperties, cudaFree ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/0_Introduction/simpleTemplates/simpleTemplates_vs2017.vcxproj b/Samples/0_Introduction/simpleTemplates/simpleTemplates_vs2017.vcxproj index b78a4c18a..96cca9854 100644 --- a/Samples/0_Introduction/simpleTemplates/simpleTemplates_vs2017.vcxproj +++ b/Samples/0_Introduction/simpleTemplates/simpleTemplates_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/simpleTemplates.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/0_Introduction/simpleTemplates/simpleTemplates_vs2019.vcxproj b/Samples/0_Introduction/simpleTemplates/simpleTemplates_vs2019.vcxproj index 9c5ec6d86..93969038c 100644 --- a/Samples/0_Introduction/simpleTemplates/simpleTemplates_vs2019.vcxproj +++ b/Samples/0_Introduction/simpleTemplates/simpleTemplates_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleTemplates.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/simpleTemplates/simpleTemplates_vs2022.vcxproj b/Samples/0_Introduction/simpleTemplates/simpleTemplates_vs2022.vcxproj index 0929e9be7..90b222bea 100644 --- a/Samples/0_Introduction/simpleTemplates/simpleTemplates_vs2022.vcxproj +++ b/Samples/0_Introduction/simpleTemplates/simpleTemplates_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleTemplates.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/simpleTemplates_nvrtc/README.md b/Samples/0_Introduction/simpleTemplates_nvrtc/README.md index ae3092616..31c588ee2 100644 --- a/Samples/0_Introduction/simpleTemplates_nvrtc/README.md +++ b/Samples/0_Introduction/simpleTemplates_nvrtc/README.md @@ -10,7 +10,7 @@ C++ Templates, Runtime Compilation ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, aarch64 ## CUDA APIs involved ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html) -cuModuleGetFunction, cuMemAlloc, cuLaunchKernel, cuCtxSynchronize, cuMemFree, cuMemcpyDtoH, cuMemcpyHtoD +cuMemcpyDtoH, cuLaunchKernel, cuMemcpyHtoD, cuCtxSynchronize, cuMemAlloc, cuMemFree, cuModuleGetFunction ## Dependencies needed to build/run [NVRTC](../../../README.md#nvrtc) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/0_Introduction/simpleTemplates_nvrtc/simpleTemplates_nvrtc_vs2017.vcxproj b/Samples/0_Introduction/simpleTemplates_nvrtc/simpleTemplates_nvrtc_vs2017.vcxproj index 0c2dfa96d..1655b5369 100644 --- a/Samples/0_Introduction/simpleTemplates_nvrtc/simpleTemplates_nvrtc_vs2017.vcxproj +++ b/Samples/0_Introduction/simpleTemplates_nvrtc/simpleTemplates_nvrtc_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -108,6 +108,6 @@ - + diff --git a/Samples/0_Introduction/simpleTemplates_nvrtc/simpleTemplates_nvrtc_vs2019.vcxproj b/Samples/0_Introduction/simpleTemplates_nvrtc/simpleTemplates_nvrtc_vs2019.vcxproj index 08f3653a2..6d28d7aa7 100644 --- a/Samples/0_Introduction/simpleTemplates_nvrtc/simpleTemplates_nvrtc_vs2019.vcxproj +++ b/Samples/0_Introduction/simpleTemplates_nvrtc/simpleTemplates_nvrtc_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -104,6 +104,6 @@ - + diff --git a/Samples/0_Introduction/simpleTemplates_nvrtc/simpleTemplates_nvrtc_vs2022.vcxproj b/Samples/0_Introduction/simpleTemplates_nvrtc/simpleTemplates_nvrtc_vs2022.vcxproj index c139a2742..0c8ae9691 100644 --- a/Samples/0_Introduction/simpleTemplates_nvrtc/simpleTemplates_nvrtc_vs2022.vcxproj +++ b/Samples/0_Introduction/simpleTemplates_nvrtc/simpleTemplates_nvrtc_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -104,6 +104,6 @@ - + diff --git a/Samples/0_Introduction/simpleTexture/Makefile b/Samples/0_Introduction/simpleTexture/Makefile index 046207e9c..e705cef81 100644 --- a/Samples/0_Introduction/simpleTexture/Makefile +++ b/Samples/0_Introduction/simpleTexture/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/0_Introduction/simpleTexture/NsightEclipse.xml b/Samples/0_Introduction/simpleTexture/NsightEclipse.xml index a4fbab4b0..0f029aeaf 100644 --- a/Samples/0_Introduction/simpleTexture/NsightEclipse.xml +++ b/Samples/0_Introduction/simpleTexture/NsightEclipse.xml @@ -6,16 +6,16 @@ ./data/teapot512_bw_out.pgm + cudaMemcpy + cudaCreateChannelDesc + cudaMallocArray + cudaFreeArray cudaFree + cudaDestroyTextureObject cudaMemcpyToArray - cudaFreeArray cudaDeviceSynchronize - cudaDestroyTextureObject cudaCreateTextureObject cudaMalloc - cudaMallocArray - cudaCreateChannelDesc - cudaMemcpy whole @@ -59,6 +59,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/0_Introduction/simpleTexture/README.md b/Samples/0_Introduction/simpleTexture/README.md index 8577f3379..834d4ee84 100644 --- a/Samples/0_Introduction/simpleTexture/README.md +++ b/Samples/0_Introduction/simpleTexture/README.md @@ -10,7 +10,7 @@ CUDA Runtime API, Texture, Image Processing ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaMemcpyToArray, cudaFreeArray, cudaDeviceSynchronize, cudaDestroyTextureObject, cudaCreateTextureObject, cudaMalloc, cudaMallocArray, cudaCreateChannelDesc, cudaMemcpy +cudaMemcpy, cudaCreateChannelDesc, cudaMallocArray, cudaFreeArray, cudaFree, cudaDestroyTextureObject, cudaMemcpyToArray, cudaDeviceSynchronize, cudaCreateTextureObject, cudaMalloc ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/0_Introduction/simpleTexture/simpleTexture_vs2017.vcxproj b/Samples/0_Introduction/simpleTexture/simpleTexture_vs2017.vcxproj index 544d7b224..c12f6f171 100644 --- a/Samples/0_Introduction/simpleTexture/simpleTexture_vs2017.vcxproj +++ b/Samples/0_Introduction/simpleTexture/simpleTexture_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/simpleTexture.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/0_Introduction/simpleTexture/simpleTexture_vs2019.vcxproj b/Samples/0_Introduction/simpleTexture/simpleTexture_vs2019.vcxproj index 034f28908..6c76b4ab4 100644 --- a/Samples/0_Introduction/simpleTexture/simpleTexture_vs2019.vcxproj +++ b/Samples/0_Introduction/simpleTexture/simpleTexture_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleTexture.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/simpleTexture/simpleTexture_vs2022.vcxproj b/Samples/0_Introduction/simpleTexture/simpleTexture_vs2022.vcxproj index 04f2a44d1..5562ce0bb 100644 --- a/Samples/0_Introduction/simpleTexture/simpleTexture_vs2022.vcxproj +++ b/Samples/0_Introduction/simpleTexture/simpleTexture_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleTexture.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/simpleTexture3D/Makefile b/Samples/0_Introduction/simpleTexture3D/Makefile index fa6da461b..f232cfff5 100644 --- a/Samples/0_Introduction/simpleTexture3D/Makefile +++ b/Samples/0_Introduction/simpleTexture3D/Makefile @@ -299,9 +299,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/0_Introduction/simpleTexture3D/NsightEclipse.xml b/Samples/0_Introduction/simpleTexture3D/NsightEclipse.xml index de66d67b0..56848594b 100644 --- a/Samples/0_Introduction/simpleTexture3D/NsightEclipse.xml +++ b/Samples/0_Introduction/simpleTexture3D/NsightEclipse.xml @@ -3,20 +3,20 @@ simpleTexture3D - cudaFree - cudaGraphicsMapResources + cudaGraphicsUnmapResources + cudaMemcpy cudaFreeArray - cudaGraphicsGLRegisterBuffer + cudaFree + cudaPitchedPtr cudaGraphicsResourceGetMappedPointer + cudaGraphicsMapResources + cudaDestroyTextureObject cudaExtent cudaDeviceSynchronize - cudaDestroyTextureObject - cudaPitchedPtr cudaCreateTextureObject - cudaMalloc cudaGraphicsUnregisterResource - cudaGraphicsUnmapResources - cudaMemcpy + cudaMalloc + cudaGraphicsGLRegisterBuffer whole @@ -79,6 +79,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/0_Introduction/simpleTexture3D/README.md b/Samples/0_Introduction/simpleTexture3D/README.md index 7ea427e74..de889b8bd 100644 --- a/Samples/0_Introduction/simpleTexture3D/README.md +++ b/Samples/0_Introduction/simpleTexture3D/README.md @@ -10,7 +10,7 @@ Graphics Interop, Image Processing, 3D Textures, Surface Writes ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaGraphicsMapResources, cudaFreeArray, cudaGraphicsGLRegisterBuffer, cudaGraphicsResourceGetMappedPointer, cudaExtent, cudaDeviceSynchronize, cudaDestroyTextureObject, cudaPitchedPtr, cudaCreateTextureObject, cudaMalloc, cudaGraphicsUnregisterResource, cudaGraphicsUnmapResources, cudaMemcpy +cudaGraphicsUnmapResources, cudaMemcpy, cudaFreeArray, cudaFree, cudaPitchedPtr, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDestroyTextureObject, cudaExtent, cudaDeviceSynchronize, cudaCreateTextureObject, cudaGraphicsUnregisterResource, cudaMalloc, cudaGraphicsGLRegisterBuffer ## Dependencies needed to build/run [X11](../../../README.md#x11), [GL](../../../README.md#gl) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/0_Introduction/simpleTexture3D/findgllib.mk b/Samples/0_Introduction/simpleTexture3D/findgllib.mk index f0a5c5512..998fcf0f1 100644 --- a/Samples/0_Introduction/simpleTexture3D/findgllib.mk +++ b/Samples/0_Introduction/simpleTexture3D/findgllib.mk @@ -53,11 +53,12 @@ endif ifeq ("$(TARGET_OS)","linux") # $(info) >> findgllib.mk -> LINUX path <<<) # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside - UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) - FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) - RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) - CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) + UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) + FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) + RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) + CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) SUSE = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?) + KYLIN = $(shell echo $(DISTRO) | grep -i kylin >/dev/null 2>&1; echo $$?) ifeq ("$(UBUNTU)","0") ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l) GLPATH := /usr/arm-linux-gnueabihf/lib @@ -87,27 +88,17 @@ ifeq ("$(TARGET_OS)","linux") DFLT_PATH ?= /usr/lib endif endif + ifeq ("$(SUSE)","0") GLPATH ?= /usr/X11R6/lib64 GLLINK ?= -L/usr/X11R6/lib64 DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(FEDORA)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(RHEL)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(CENTOS)","0") + else GLPATH ?= /usr/lib64/nvidia GLLINK ?= -L/usr/lib64/nvidia DFLT_PATH ?= /usr/lib64 endif - + # find libGL, libGLU GLLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGL.so -print 2>/dev/null) GLULIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLU.so -print 2>/dev/null) diff --git a/Samples/0_Introduction/simpleTexture3D/simpleTexture3D_vs2017.vcxproj b/Samples/0_Introduction/simpleTexture3D/simpleTexture3D_vs2017.vcxproj index 9fefb6012..ed90a63b8 100644 --- a/Samples/0_Introduction/simpleTexture3D/simpleTexture3D_vs2017.vcxproj +++ b/Samples/0_Introduction/simpleTexture3D/simpleTexture3D_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/simpleTexture3D.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -118,6 +118,6 @@ - + diff --git a/Samples/0_Introduction/simpleTexture3D/simpleTexture3D_vs2019.vcxproj b/Samples/0_Introduction/simpleTexture3D/simpleTexture3D_vs2019.vcxproj index 23518ada3..be0fa981f 100644 --- a/Samples/0_Introduction/simpleTexture3D/simpleTexture3D_vs2019.vcxproj +++ b/Samples/0_Introduction/simpleTexture3D/simpleTexture3D_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleTexture3D.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -114,6 +114,6 @@ - + diff --git a/Samples/0_Introduction/simpleTexture3D/simpleTexture3D_vs2022.vcxproj b/Samples/0_Introduction/simpleTexture3D/simpleTexture3D_vs2022.vcxproj index dd4ab2af5..1dd427b06 100644 --- a/Samples/0_Introduction/simpleTexture3D/simpleTexture3D_vs2022.vcxproj +++ b/Samples/0_Introduction/simpleTexture3D/simpleTexture3D_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleTexture3D.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -114,6 +114,6 @@ - + diff --git a/Samples/0_Introduction/simpleTextureDrv/Makefile b/Samples/0_Introduction/simpleTextureDrv/Makefile index bca148380..95ff9ffe3 100644 --- a/Samples/0_Introduction/simpleTextureDrv/Makefile +++ b/Samples/0_Introduction/simpleTextureDrv/Makefile @@ -283,9 +283,9 @@ FATBIN_FILE := simpleTexture_kernel${TARGET_SIZE}.fatbin # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(GENCODE_FLAGS),) diff --git a/Samples/0_Introduction/simpleTextureDrv/README.md b/Samples/0_Introduction/simpleTextureDrv/README.md index 3457bfbe6..ee28ee7f9 100644 --- a/Samples/0_Introduction/simpleTextureDrv/README.md +++ b/Samples/0_Introduction/simpleTextureDrv/README.md @@ -10,7 +10,7 @@ CUDA Driver API, Texture, Image Processing ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html) -cuModuleGetFunction, cuTexObjectDestroy, cuModuleLoadData, cuCtxCreate, cuArrayCreate, cuDeviceGetName, cuLaunchKernel, cuMemAlloc, cuCtxSynchronize, cuArrayDestroy, cuTexObjectCreate, cuMemFree, cuMemcpyDtoH, cuCtxDestroy, cuDeviceGetAttribute +cuMemcpyDtoH, cuLaunchKernel, cuModuleLoadData, cuDeviceGetName, cuDeviceGetAttribute, cuCtxSynchronize, cuMemAlloc, cuArrayCreate, cuMemFree, cuCtxDestroy, cuTexObjectDestroy, cuTexObjectCreate, cuCtxCreate, cuModuleGetFunction, cuArrayDestroy ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/0_Introduction/simpleTextureDrv/simpleTextureDrv_vs2017.vcxproj b/Samples/0_Introduction/simpleTextureDrv/simpleTextureDrv_vs2017.vcxproj index d674efc69..66dc7b33f 100644 --- a/Samples/0_Introduction/simpleTextureDrv/simpleTextureDrv_vs2017.vcxproj +++ b/Samples/0_Introduction/simpleTextureDrv/simpleTextureDrv_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/simpleTextureDrv.exe - compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -111,6 +111,6 @@ - + diff --git a/Samples/0_Introduction/simpleTextureDrv/simpleTextureDrv_vs2019.vcxproj b/Samples/0_Introduction/simpleTextureDrv/simpleTextureDrv_vs2019.vcxproj index 5817eb47a..e1c437169 100644 --- a/Samples/0_Introduction/simpleTextureDrv/simpleTextureDrv_vs2019.vcxproj +++ b/Samples/0_Introduction/simpleTextureDrv/simpleTextureDrv_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleTextureDrv.exe - compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/0_Introduction/simpleTextureDrv/simpleTextureDrv_vs2022.vcxproj b/Samples/0_Introduction/simpleTextureDrv/simpleTextureDrv_vs2022.vcxproj index 0cbde7a7f..21cf1f35f 100644 --- a/Samples/0_Introduction/simpleTextureDrv/simpleTextureDrv_vs2022.vcxproj +++ b/Samples/0_Introduction/simpleTextureDrv/simpleTextureDrv_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleTextureDrv.exe - compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/0_Introduction/simpleVoteIntrinsics/Makefile b/Samples/0_Introduction/simpleVoteIntrinsics/Makefile index 82e012793..32edcf723 100644 --- a/Samples/0_Introduction/simpleVoteIntrinsics/Makefile +++ b/Samples/0_Introduction/simpleVoteIntrinsics/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/0_Introduction/simpleVoteIntrinsics/NsightEclipse.xml b/Samples/0_Introduction/simpleVoteIntrinsics/NsightEclipse.xml index b424859f7..e91b97149 100644 --- a/Samples/0_Introduction/simpleVoteIntrinsics/NsightEclipse.xml +++ b/Samples/0_Introduction/simpleVoteIntrinsics/NsightEclipse.xml @@ -3,10 +3,10 @@ simpleVoteIntrinsics + cudaMemcpy cudaFree cudaDeviceSynchronize cudaMalloc - cudaMemcpy cudaGetDeviceProperties @@ -46,6 +46,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/0_Introduction/simpleVoteIntrinsics/README.md b/Samples/0_Introduction/simpleVoteIntrinsics/README.md index e35dece3d..9c86c6358 100644 --- a/Samples/0_Introduction/simpleVoteIntrinsics/README.md +++ b/Samples/0_Introduction/simpleVoteIntrinsics/README.md @@ -10,7 +10,7 @@ Vote Intrinsics ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l, aarch64 ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaDeviceSynchronize, cudaMalloc, cudaMemcpy, cudaGetDeviceProperties +cudaMemcpy, cudaFree, cudaDeviceSynchronize, cudaMalloc, cudaGetDeviceProperties ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/0_Introduction/simpleVoteIntrinsics/simpleVoteIntrinsics_vs2017.vcxproj b/Samples/0_Introduction/simpleVoteIntrinsics/simpleVoteIntrinsics_vs2017.vcxproj index c49388da7..d9045a581 100644 --- a/Samples/0_Introduction/simpleVoteIntrinsics/simpleVoteIntrinsics_vs2017.vcxproj +++ b/Samples/0_Introduction/simpleVoteIntrinsics/simpleVoteIntrinsics_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/simpleVoteIntrinsics.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/0_Introduction/simpleVoteIntrinsics/simpleVoteIntrinsics_vs2019.vcxproj b/Samples/0_Introduction/simpleVoteIntrinsics/simpleVoteIntrinsics_vs2019.vcxproj index 91da2af06..3bcc14725 100644 --- a/Samples/0_Introduction/simpleVoteIntrinsics/simpleVoteIntrinsics_vs2019.vcxproj +++ b/Samples/0_Introduction/simpleVoteIntrinsics/simpleVoteIntrinsics_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleVoteIntrinsics.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/simpleVoteIntrinsics/simpleVoteIntrinsics_vs2022.vcxproj b/Samples/0_Introduction/simpleVoteIntrinsics/simpleVoteIntrinsics_vs2022.vcxproj index 980c93b29..3af7fc246 100644 --- a/Samples/0_Introduction/simpleVoteIntrinsics/simpleVoteIntrinsics_vs2022.vcxproj +++ b/Samples/0_Introduction/simpleVoteIntrinsics/simpleVoteIntrinsics_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleVoteIntrinsics.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/simpleVoteIntrinsics_nvrtc/README.md b/Samples/0_Introduction/simpleVoteIntrinsics_nvrtc/README.md index 28801cbd4..2f4cdeb49 100644 --- a/Samples/0_Introduction/simpleVoteIntrinsics_nvrtc/README.md +++ b/Samples/0_Introduction/simpleVoteIntrinsics_nvrtc/README.md @@ -10,7 +10,7 @@ Vote Intrinsics, CUDA Driver API, Runtime Compilation ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, aarch64 ## CUDA APIs involved ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html) -cuModuleGetFunction, cuMemAlloc, cuLaunchKernel, cuCtxSynchronize, cuMemFree, cuMemcpyDtoH, cuMemcpyHtoD +cuMemcpyDtoH, cuLaunchKernel, cuMemcpyHtoD, cuCtxSynchronize, cuMemAlloc, cuMemFree, cuModuleGetFunction ## Dependencies needed to build/run [NVRTC](../../../README.md#nvrtc) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/0_Introduction/simpleVoteIntrinsics_nvrtc/simpleVoteIntrinsics_nvrtc_vs2017.vcxproj b/Samples/0_Introduction/simpleVoteIntrinsics_nvrtc/simpleVoteIntrinsics_nvrtc_vs2017.vcxproj index a0d074920..0d541d12c 100644 --- a/Samples/0_Introduction/simpleVoteIntrinsics_nvrtc/simpleVoteIntrinsics_nvrtc_vs2017.vcxproj +++ b/Samples/0_Introduction/simpleVoteIntrinsics_nvrtc/simpleVoteIntrinsics_nvrtc_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -107,6 +107,6 @@ - + diff --git a/Samples/0_Introduction/simpleVoteIntrinsics_nvrtc/simpleVoteIntrinsics_nvrtc_vs2019.vcxproj b/Samples/0_Introduction/simpleVoteIntrinsics_nvrtc/simpleVoteIntrinsics_nvrtc_vs2019.vcxproj index 00d7d2757..2c334e595 100644 --- a/Samples/0_Introduction/simpleVoteIntrinsics_nvrtc/simpleVoteIntrinsics_nvrtc_vs2019.vcxproj +++ b/Samples/0_Introduction/simpleVoteIntrinsics_nvrtc/simpleVoteIntrinsics_nvrtc_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/simpleVoteIntrinsics_nvrtc/simpleVoteIntrinsics_nvrtc_vs2022.vcxproj b/Samples/0_Introduction/simpleVoteIntrinsics_nvrtc/simpleVoteIntrinsics_nvrtc_vs2022.vcxproj index c8c5ff0bd..69dbd9683 100644 --- a/Samples/0_Introduction/simpleVoteIntrinsics_nvrtc/simpleVoteIntrinsics_nvrtc_vs2022.vcxproj +++ b/Samples/0_Introduction/simpleVoteIntrinsics_nvrtc/simpleVoteIntrinsics_nvrtc_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/simpleZeroCopy/Makefile b/Samples/0_Introduction/simpleZeroCopy/Makefile index 8fe33e69b..3161f90b9 100644 --- a/Samples/0_Introduction/simpleZeroCopy/Makefile +++ b/Samples/0_Introduction/simpleZeroCopy/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/0_Introduction/simpleZeroCopy/NsightEclipse.xml b/Samples/0_Introduction/simpleZeroCopy/NsightEclipse.xml index 155731c7c..a5776e59e 100644 --- a/Samples/0_Introduction/simpleZeroCopy/NsightEclipse.xml +++ b/Samples/0_Introduction/simpleZeroCopy/NsightEclipse.xml @@ -4,15 +4,15 @@ simpleZeroCopy cudaHostAlloc - cudaGetDeviceCount - cudaHostRegister - cudaDeviceSynchronize - cudaFreeHost - cudaHostUnregister cudaSetDeviceFlags + cudaHostRegister cudaSetDevice + cudaGetDeviceCount cudaHostGetDevicePointer + cudaDeviceSynchronize + cudaFreeHost cudaGetDeviceProperties + cudaHostUnregister whole @@ -51,6 +51,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/0_Introduction/simpleZeroCopy/README.md b/Samples/0_Introduction/simpleZeroCopy/README.md index 640a3096b..a2f5acc82 100644 --- a/Samples/0_Introduction/simpleZeroCopy/README.md +++ b/Samples/0_Introduction/simpleZeroCopy/README.md @@ -10,7 +10,7 @@ Performance Strategies, Pinned System Paged Memory, Vector Addition ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaHostAlloc, cudaGetDeviceCount, cudaHostRegister, cudaDeviceSynchronize, cudaFreeHost, cudaHostUnregister, cudaSetDeviceFlags, cudaSetDevice, cudaHostGetDevicePointer, cudaGetDeviceProperties +cudaHostAlloc, cudaSetDeviceFlags, cudaHostRegister, cudaSetDevice, cudaGetDeviceCount, cudaHostGetDevicePointer, cudaDeviceSynchronize, cudaFreeHost, cudaGetDeviceProperties, cudaHostUnregister ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/0_Introduction/simpleZeroCopy/simpleZeroCopy_vs2017.vcxproj b/Samples/0_Introduction/simpleZeroCopy/simpleZeroCopy_vs2017.vcxproj index 46bb04d3d..b7e0b9c04 100644 --- a/Samples/0_Introduction/simpleZeroCopy/simpleZeroCopy_vs2017.vcxproj +++ b/Samples/0_Introduction/simpleZeroCopy/simpleZeroCopy_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/simpleZeroCopy.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/0_Introduction/simpleZeroCopy/simpleZeroCopy_vs2019.vcxproj b/Samples/0_Introduction/simpleZeroCopy/simpleZeroCopy_vs2019.vcxproj index 17ea198b2..c7a9daed9 100644 --- a/Samples/0_Introduction/simpleZeroCopy/simpleZeroCopy_vs2019.vcxproj +++ b/Samples/0_Introduction/simpleZeroCopy/simpleZeroCopy_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleZeroCopy.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/simpleZeroCopy/simpleZeroCopy_vs2022.vcxproj b/Samples/0_Introduction/simpleZeroCopy/simpleZeroCopy_vs2022.vcxproj index 6f4d0c204..29709586e 100644 --- a/Samples/0_Introduction/simpleZeroCopy/simpleZeroCopy_vs2022.vcxproj +++ b/Samples/0_Introduction/simpleZeroCopy/simpleZeroCopy_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleZeroCopy.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/systemWideAtomics/Makefile b/Samples/0_Introduction/systemWideAtomics/Makefile index e8019879b..6832e615b 100644 --- a/Samples/0_Introduction/systemWideAtomics/Makefile +++ b/Samples/0_Introduction/systemWideAtomics/Makefile @@ -303,9 +303,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 61 70 72 75 80 86 87 +SMS ?= 61 70 72 75 80 86 87 90 else -SMS ?= 60 61 70 75 80 86 +SMS ?= 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/0_Introduction/systemWideAtomics/NsightEclipse.xml b/Samples/0_Introduction/systemWideAtomics/NsightEclipse.xml index f9099627d..05284552b 100644 --- a/Samples/0_Introduction/systemWideAtomics/NsightEclipse.xml +++ b/Samples/0_Introduction/systemWideAtomics/NsightEclipse.xml @@ -3,10 +3,10 @@ systemWideAtomics - cudaFree cudaDeviceSynchronize cudaMallocManaged cudaGetDeviceProperties + cudaFree whole @@ -44,6 +44,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/0_Introduction/systemWideAtomics/README.md b/Samples/0_Introduction/systemWideAtomics/README.md index c838fcd5b..98f2a0629 100644 --- a/Samples/0_Introduction/systemWideAtomics/README.md +++ b/Samples/0_Introduction/systemWideAtomics/README.md @@ -10,7 +10,7 @@ Atomic Intrinsics, Unified Memory ## Supported SM Architectures -[SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaDeviceSynchronize, cudaMallocManaged, cudaGetDeviceProperties +cudaDeviceSynchronize, cudaMallocManaged, cudaGetDeviceProperties, cudaFree ## Dependencies needed to build/run [UVM](../../../README.md#uvm) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/0_Introduction/template/Makefile b/Samples/0_Introduction/template/Makefile index a462a6374..47f379667 100644 --- a/Samples/0_Introduction/template/Makefile +++ b/Samples/0_Introduction/template/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/0_Introduction/template/NsightEclipse.xml b/Samples/0_Introduction/template/NsightEclipse.xml index e043d3891..21fe74fb2 100644 --- a/Samples/0_Introduction/template/NsightEclipse.xml +++ b/Samples/0_Introduction/template/NsightEclipse.xml @@ -4,8 +4,8 @@ template cudaMalloc - cudaFree cudaMemcpy + cudaFree whole @@ -41,6 +41,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/0_Introduction/template/README.md b/Samples/0_Introduction/template/README.md index afcf51e14..f22445699 100644 --- a/Samples/0_Introduction/template/README.md +++ b/Samples/0_Introduction/template/README.md @@ -10,7 +10,7 @@ Device Memory Allocation ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMalloc, cudaFree, cudaMemcpy +cudaMalloc, cudaMemcpy, cudaFree ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/0_Introduction/template/template_vs2017.vcxproj b/Samples/0_Introduction/template/template_vs2017.vcxproj index 01694a193..5e4367812 100644 --- a/Samples/0_Introduction/template/template_vs2017.vcxproj +++ b/Samples/0_Introduction/template/template_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/template.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -108,6 +108,6 @@ - + diff --git a/Samples/0_Introduction/template/template_vs2019.vcxproj b/Samples/0_Introduction/template/template_vs2019.vcxproj index 606bb330f..f736fc28e 100644 --- a/Samples/0_Introduction/template/template_vs2019.vcxproj +++ b/Samples/0_Introduction/template/template_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/template.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -104,6 +104,6 @@ - + diff --git a/Samples/0_Introduction/template/template_vs2022.vcxproj b/Samples/0_Introduction/template/template_vs2022.vcxproj index cc552ab45..6a6d8744f 100644 --- a/Samples/0_Introduction/template/template_vs2022.vcxproj +++ b/Samples/0_Introduction/template/template_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/template.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -104,6 +104,6 @@ - + diff --git a/Samples/0_Introduction/vectorAdd/Makefile b/Samples/0_Introduction/vectorAdd/Makefile index 3ecf5e49c..62be14985 100644 --- a/Samples/0_Introduction/vectorAdd/Makefile +++ b/Samples/0_Introduction/vectorAdd/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/0_Introduction/vectorAdd/NsightEclipse.xml b/Samples/0_Introduction/vectorAdd/NsightEclipse.xml index 0df66051f..353acee52 100644 --- a/Samples/0_Introduction/vectorAdd/NsightEclipse.xml +++ b/Samples/0_Introduction/vectorAdd/NsightEclipse.xml @@ -3,11 +3,11 @@ vectorAdd - cudaFree - cudaMalloc - cudaGetLastError cudaMemcpy cudaGetErrorString + cudaFree + cudaGetLastError + cudaMalloc whole @@ -46,6 +46,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/0_Introduction/vectorAdd/README.md b/Samples/0_Introduction/vectorAdd/README.md index 8a7e44640..99523169e 100644 --- a/Samples/0_Introduction/vectorAdd/README.md +++ b/Samples/0_Introduction/vectorAdd/README.md @@ -10,7 +10,7 @@ CUDA Runtime API, Vector Addition ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaMalloc, cudaGetLastError, cudaMemcpy, cudaGetErrorString +cudaMemcpy, cudaGetErrorString, cudaFree, cudaGetLastError, cudaMalloc ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/0_Introduction/vectorAdd/vectorAdd_vs2017.vcxproj b/Samples/0_Introduction/vectorAdd/vectorAdd_vs2017.vcxproj index 6529e270e..e8af31493 100644 --- a/Samples/0_Introduction/vectorAdd/vectorAdd_vs2017.vcxproj +++ b/Samples/0_Introduction/vectorAdd/vectorAdd_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/vectorAdd.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/0_Introduction/vectorAdd/vectorAdd_vs2019.vcxproj b/Samples/0_Introduction/vectorAdd/vectorAdd_vs2019.vcxproj index 295f1945b..a25492e3b 100644 --- a/Samples/0_Introduction/vectorAdd/vectorAdd_vs2019.vcxproj +++ b/Samples/0_Introduction/vectorAdd/vectorAdd_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/vectorAdd.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/vectorAdd/vectorAdd_vs2022.vcxproj b/Samples/0_Introduction/vectorAdd/vectorAdd_vs2022.vcxproj index 4da531469..c983b88bb 100644 --- a/Samples/0_Introduction/vectorAdd/vectorAdd_vs2022.vcxproj +++ b/Samples/0_Introduction/vectorAdd/vectorAdd_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/vectorAdd.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/vectorAddDrv/Makefile b/Samples/0_Introduction/vectorAddDrv/Makefile index c21fa9428..472417b1d 100644 --- a/Samples/0_Introduction/vectorAddDrv/Makefile +++ b/Samples/0_Introduction/vectorAddDrv/Makefile @@ -283,9 +283,9 @@ FATBIN_FILE := vectorAdd_kernel${TARGET_SIZE}.fatbin # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(GENCODE_FLAGS),) diff --git a/Samples/0_Introduction/vectorAddDrv/README.md b/Samples/0_Introduction/vectorAddDrv/README.md index 09612d234..ac26085a3 100644 --- a/Samples/0_Introduction/vectorAddDrv/README.md +++ b/Samples/0_Introduction/vectorAddDrv/README.md @@ -10,7 +10,7 @@ CUDA Driver API, Vector Addition ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html) -cuModuleGetFunction, cuModuleLoadData, cuCtxCreate, cuLaunchKernel, cuMemAlloc, cuMemcpyDtoH, cuCtxSynchronize, cuMemFree, cuInit, cuCtxDestroy, cuMemcpyHtoD +cuMemcpyDtoH, cuLaunchKernel, cuMemcpyHtoD, cuModuleLoadData, cuCtxSynchronize, cuMemAlloc, cuMemFree, cuCtxDestroy, cuModuleGetFunction, cuCtxCreate, cuInit ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/0_Introduction/vectorAddDrv/vectorAddDrv_vs2017.vcxproj b/Samples/0_Introduction/vectorAddDrv/vectorAddDrv_vs2017.vcxproj index bfd45966f..57b066935 100644 --- a/Samples/0_Introduction/vectorAddDrv/vectorAddDrv_vs2017.vcxproj +++ b/Samples/0_Introduction/vectorAddDrv/vectorAddDrv_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/vectorAddDrv.exe - compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -111,6 +111,6 @@ - + diff --git a/Samples/0_Introduction/vectorAddDrv/vectorAddDrv_vs2019.vcxproj b/Samples/0_Introduction/vectorAddDrv/vectorAddDrv_vs2019.vcxproj index d1119c072..c48b9e149 100644 --- a/Samples/0_Introduction/vectorAddDrv/vectorAddDrv_vs2019.vcxproj +++ b/Samples/0_Introduction/vectorAddDrv/vectorAddDrv_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/vectorAddDrv.exe - compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/0_Introduction/vectorAddDrv/vectorAddDrv_vs2022.vcxproj b/Samples/0_Introduction/vectorAddDrv/vectorAddDrv_vs2022.vcxproj index 49a0a5fb3..4f04109bc 100644 --- a/Samples/0_Introduction/vectorAddDrv/vectorAddDrv_vs2022.vcxproj +++ b/Samples/0_Introduction/vectorAddDrv/vectorAddDrv_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/vectorAddDrv.exe - compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/0_Introduction/vectorAddMMAP/Makefile b/Samples/0_Introduction/vectorAddMMAP/Makefile index 72b2eaa41..743d2bdd1 100644 --- a/Samples/0_Introduction/vectorAddMMAP/Makefile +++ b/Samples/0_Introduction/vectorAddMMAP/Makefile @@ -307,9 +307,9 @@ FATBIN_FILE := vectorAdd_kernel${TARGET_SIZE}.fatbin # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(GENCODE_FLAGS),) diff --git a/Samples/0_Introduction/vectorAddMMAP/README.md b/Samples/0_Introduction/vectorAddMMAP/README.md index 786e136f4..6dbbcae71 100644 --- a/Samples/0_Introduction/vectorAddMMAP/README.md +++ b/Samples/0_Introduction/vectorAddMMAP/README.md @@ -10,7 +10,7 @@ CUDA Driver API, Vector Addition, MMAP ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le ## CUDA APIs involved ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html) -cuMemSetAccess, cuInit, cuMemAddressReserve, cuModuleGetFunction, cuCtxDestroy, cuCtxCreate, cuMemAddressFree, cuMemGetAllocationGranularity, cuMemUnmap, cuDeviceGetAttribute, cuMemRelease, cuModuleLoadData, cuMemMap, cuMemCreate, cuMemcpyHtoD, cuDeviceCanAccessPeer, cuDeviceGetCount, cuLaunchKernel, cuMemcpyDtoH +cuMemcpyDtoH, cuDeviceCanAccessPeer, cuModuleGetFunction, cuMemSetAccess, cuMemRelease, cuInit, cuMemcpyHtoD, cuLaunchKernel, cuMemCreate, cuModuleLoadData, cuCtxDestroy, cuDeviceGetCount, cuMemMap, cuDeviceGetAttribute, cuMemGetAllocationGranularity, cuMemAddressFree, cuMemUnmap, cuCtxCreate, cuMemAddressReserve ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/0_Introduction/vectorAddMMAP/vectorAddMMAP_vs2017.vcxproj b/Samples/0_Introduction/vectorAddMMAP/vectorAddMMAP_vs2017.vcxproj index 6aa6aa9e7..00641d9b0 100644 --- a/Samples/0_Introduction/vectorAddMMAP/vectorAddMMAP_vs2017.vcxproj +++ b/Samples/0_Introduction/vectorAddMMAP/vectorAddMMAP_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/vectorAddMMAP.exe - compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -112,6 +112,6 @@ - + diff --git a/Samples/0_Introduction/vectorAddMMAP/vectorAddMMAP_vs2019.vcxproj b/Samples/0_Introduction/vectorAddMMAP/vectorAddMMAP_vs2019.vcxproj index ece800ca5..a4885080d 100644 --- a/Samples/0_Introduction/vectorAddMMAP/vectorAddMMAP_vs2019.vcxproj +++ b/Samples/0_Introduction/vectorAddMMAP/vectorAddMMAP_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/vectorAddMMAP.exe - compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -108,6 +108,6 @@ - + diff --git a/Samples/0_Introduction/vectorAddMMAP/vectorAddMMAP_vs2022.vcxproj b/Samples/0_Introduction/vectorAddMMAP/vectorAddMMAP_vs2022.vcxproj index 81fe95b62..59ad3c829 100644 --- a/Samples/0_Introduction/vectorAddMMAP/vectorAddMMAP_vs2022.vcxproj +++ b/Samples/0_Introduction/vectorAddMMAP/vectorAddMMAP_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/vectorAddMMAP.exe - compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -108,6 +108,6 @@ - + diff --git a/Samples/0_Introduction/vectorAdd_nvrtc/README.md b/Samples/0_Introduction/vectorAdd_nvrtc/README.md index 5e83b512e..03ed2a744 100644 --- a/Samples/0_Introduction/vectorAdd_nvrtc/README.md +++ b/Samples/0_Introduction/vectorAdd_nvrtc/README.md @@ -10,7 +10,7 @@ CUDA Driver API, Vector Addition, Runtime Compilation ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,7 +23,7 @@ x86_64, ppc64le, aarch64 ## CUDA APIs involved ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html) -cuModuleGetFunction, cuMemAlloc, cuLaunchKernel, cuCtxSynchronize, cuMemFree, cuMemcpyDtoH, cuMemcpyHtoD +cuMemcpyDtoH, cuLaunchKernel, cuMemcpyHtoD, cuCtxSynchronize, cuMemAlloc, cuMemFree, cuModuleGetFunction ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) cudaBlockSize, cudaGridSize @@ -33,7 +33,7 @@ cudaBlockSize, cudaGridSize ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/0_Introduction/vectorAdd_nvrtc/vectorAdd_nvrtc_vs2017.vcxproj b/Samples/0_Introduction/vectorAdd_nvrtc/vectorAdd_nvrtc_vs2017.vcxproj index c83312456..1ad04a6d4 100644 --- a/Samples/0_Introduction/vectorAdd_nvrtc/vectorAdd_nvrtc_vs2017.vcxproj +++ b/Samples/0_Introduction/vectorAdd_nvrtc/vectorAdd_nvrtc_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -107,6 +107,6 @@ - + diff --git a/Samples/0_Introduction/vectorAdd_nvrtc/vectorAdd_nvrtc_vs2019.vcxproj b/Samples/0_Introduction/vectorAdd_nvrtc/vectorAdd_nvrtc_vs2019.vcxproj index 27cd03de3..e140f5eb4 100644 --- a/Samples/0_Introduction/vectorAdd_nvrtc/vectorAdd_nvrtc_vs2019.vcxproj +++ b/Samples/0_Introduction/vectorAdd_nvrtc/vectorAdd_nvrtc_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/vectorAdd_nvrtc/vectorAdd_nvrtc_vs2022.vcxproj b/Samples/0_Introduction/vectorAdd_nvrtc/vectorAdd_nvrtc_vs2022.vcxproj index db61b4750..a575fb820 100644 --- a/Samples/0_Introduction/vectorAdd_nvrtc/vectorAdd_nvrtc_vs2022.vcxproj +++ b/Samples/0_Introduction/vectorAdd_nvrtc/vectorAdd_nvrtc_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -103,6 +103,6 @@ - + diff --git a/Samples/1_Utilities/bandwidthTest/Makefile b/Samples/1_Utilities/bandwidthTest/Makefile index 31f60de59..8699a8bcb 100644 --- a/Samples/1_Utilities/bandwidthTest/Makefile +++ b/Samples/1_Utilities/bandwidthTest/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/1_Utilities/bandwidthTest/NsightEclipse.xml b/Samples/1_Utilities/bandwidthTest/NsightEclipse.xml index 840ab2736..6078a765d 100644 --- a/Samples/1_Utilities/bandwidthTest/NsightEclipse.xml +++ b/Samples/1_Utilities/bandwidthTest/NsightEclipse.xml @@ -3,22 +3,22 @@ bandwidthTest - cudaFree - cudaEventRecord - cudaMallocHost cudaHostAlloc - cudaEventCreate - cudaGetDeviceCount - cudaEventElapsedTime - cudaDeviceSynchronize - cudaFreeHost + cudaMemcpy cudaMalloc - cudaEventDestroy - cudaSetDevice cudaMemcpyAsync - cudaMemcpy + cudaFree cudaGetErrorString + cudaMallocHost + cudaSetDevice cudaGetDeviceProperties + cudaDeviceSynchronize + cudaEventRecord + cudaFreeHost + cudaEventDestroy + cudaEventElapsedTime + cudaGetDeviceCount + cudaEventCreate whole @@ -58,6 +58,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/1_Utilities/bandwidthTest/README.md b/Samples/1_Utilities/bandwidthTest/README.md index 32e4f7796..2cf0bad72 100644 --- a/Samples/1_Utilities/bandwidthTest/README.md +++ b/Samples/1_Utilities/bandwidthTest/README.md @@ -10,7 +10,7 @@ CUDA Streams and Events, Performance Strategies ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaEventRecord, cudaMallocHost, cudaHostAlloc, cudaEventCreate, cudaGetDeviceCount, cudaEventElapsedTime, cudaDeviceSynchronize, cudaFreeHost, cudaMalloc, cudaEventDestroy, cudaSetDevice, cudaMemcpyAsync, cudaMemcpy, cudaGetErrorString, cudaGetDeviceProperties +cudaHostAlloc, cudaMemcpy, cudaMalloc, cudaMemcpyAsync, cudaFree, cudaGetErrorString, cudaMallocHost, cudaSetDevice, cudaGetDeviceProperties, cudaDeviceSynchronize, cudaEventRecord, cudaFreeHost, cudaEventDestroy, cudaEventElapsedTime, cudaGetDeviceCount, cudaEventCreate ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/1_Utilities/bandwidthTest/bandwidthTest_vs2017.vcxproj b/Samples/1_Utilities/bandwidthTest/bandwidthTest_vs2017.vcxproj index 38f1b77fa..ad862bff4 100644 --- a/Samples/1_Utilities/bandwidthTest/bandwidthTest_vs2017.vcxproj +++ b/Samples/1_Utilities/bandwidthTest/bandwidthTest_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/bandwidthTest.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/1_Utilities/bandwidthTest/bandwidthTest_vs2019.vcxproj b/Samples/1_Utilities/bandwidthTest/bandwidthTest_vs2019.vcxproj index 59300fc11..e6f5f3049 100644 --- a/Samples/1_Utilities/bandwidthTest/bandwidthTest_vs2019.vcxproj +++ b/Samples/1_Utilities/bandwidthTest/bandwidthTest_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/bandwidthTest.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/1_Utilities/bandwidthTest/bandwidthTest_vs2022.vcxproj b/Samples/1_Utilities/bandwidthTest/bandwidthTest_vs2022.vcxproj index 9faf45d5f..676302bd9 100644 --- a/Samples/1_Utilities/bandwidthTest/bandwidthTest_vs2022.vcxproj +++ b/Samples/1_Utilities/bandwidthTest/bandwidthTest_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/bandwidthTest.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/1_Utilities/deviceQuery/Makefile b/Samples/1_Utilities/deviceQuery/Makefile index ea0d25c1e..44dd2fbc9 100644 --- a/Samples/1_Utilities/deviceQuery/Makefile +++ b/Samples/1_Utilities/deviceQuery/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/1_Utilities/deviceQuery/NsightEclipse.xml b/Samples/1_Utilities/deviceQuery/NsightEclipse.xml index 842dea350..dda30eb74 100644 --- a/Samples/1_Utilities/deviceQuery/NsightEclipse.xml +++ b/Samples/1_Utilities/deviceQuery/NsightEclipse.xml @@ -5,12 +5,12 @@ cuDeviceGetAttribute cuSafeCallNoSync - cudaGetDeviceCount - cudaDriverGetVersion - cudaDeviceCanAccessPeer - cudaSetDevice cudaRuntimeGetVersion cudaGetErrorString + cudaDeviceCanAccessPeer + cudaSetDevice + cudaGetDeviceCount + cudaDriverGetVersion cudaGetDeviceProperties @@ -48,6 +48,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/1_Utilities/deviceQuery/README.md b/Samples/1_Utilities/deviceQuery/README.md index 794c5f76a..4f4a647d1 100644 --- a/Samples/1_Utilities/deviceQuery/README.md +++ b/Samples/1_Utilities/deviceQuery/README.md @@ -10,7 +10,7 @@ CUDA Runtime API, Device Query ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -26,11 +26,11 @@ x86_64, ppc64le, armv7l, aarch64 cuDeviceGetAttribute, cuSafeCallNoSync ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaGetDeviceCount, cudaDriverGetVersion, cudaDeviceCanAccessPeer, cudaSetDevice, cudaRuntimeGetVersion, cudaGetErrorString, cudaGetDeviceProperties +cudaRuntimeGetVersion, cudaGetErrorString, cudaDeviceCanAccessPeer, cudaSetDevice, cudaGetDeviceCount, cudaDriverGetVersion, cudaGetDeviceProperties ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/1_Utilities/deviceQuery/deviceQuery_vs2017.vcxproj b/Samples/1_Utilities/deviceQuery/deviceQuery_vs2017.vcxproj index e52b7e5f7..87cca12f6 100644 --- a/Samples/1_Utilities/deviceQuery/deviceQuery_vs2017.vcxproj +++ b/Samples/1_Utilities/deviceQuery/deviceQuery_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/deviceQuery.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/1_Utilities/deviceQuery/deviceQuery_vs2019.vcxproj b/Samples/1_Utilities/deviceQuery/deviceQuery_vs2019.vcxproj index 3b4b1f75c..41b5bebd3 100644 --- a/Samples/1_Utilities/deviceQuery/deviceQuery_vs2019.vcxproj +++ b/Samples/1_Utilities/deviceQuery/deviceQuery_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/deviceQuery.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/1_Utilities/deviceQuery/deviceQuery_vs2022.vcxproj b/Samples/1_Utilities/deviceQuery/deviceQuery_vs2022.vcxproj index 4d9684d97..4ba036e5e 100644 --- a/Samples/1_Utilities/deviceQuery/deviceQuery_vs2022.vcxproj +++ b/Samples/1_Utilities/deviceQuery/deviceQuery_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/deviceQuery.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/1_Utilities/deviceQueryDrv/NsightEclipse.xml b/Samples/1_Utilities/deviceQueryDrv/NsightEclipse.xml index 999dc70ed..7a56e6975 100644 --- a/Samples/1_Utilities/deviceQueryDrv/NsightEclipse.xml +++ b/Samples/1_Utilities/deviceQueryDrv/NsightEclipse.xml @@ -3,13 +3,14 @@ deviceQueryDrv - cuDeviceCanAccessPeer - cuDriverGetVersion - cuDeviceGetCount cuDeviceGetName + cuDeviceGetAttribute cuDeviceTotalMem + cuDeviceCanAccessPeer + cuDeviceGetCount + cuDriverGetVersion cuInit - cuDeviceGetAttribute + cudaSetDevice whole @@ -49,6 +50,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/1_Utilities/deviceQueryDrv/README.md b/Samples/1_Utilities/deviceQueryDrv/README.md index 5d80066c1..92d02352f 100644 --- a/Samples/1_Utilities/deviceQueryDrv/README.md +++ b/Samples/1_Utilities/deviceQueryDrv/README.md @@ -10,7 +10,7 @@ CUDA Driver API, Device Query ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,14 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html) -cuDeviceCanAccessPeer, cuDriverGetVersion, cuDeviceGetCount, cuDeviceGetName, cuDeviceTotalMem, cuInit, cuDeviceGetAttribute +cuDeviceGetName, cuDeviceGetAttribute, cuDeviceTotalMem, cuDeviceCanAccessPeer, cuDeviceGetCount, cuDriverGetVersion, cuInit + +### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) +cudaSetDevice ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/1_Utilities/deviceQueryDrv/deviceQueryDrv_vs2017.vcxproj b/Samples/1_Utilities/deviceQueryDrv/deviceQueryDrv_vs2017.vcxproj index a94caee2f..59f77b9b5 100644 --- a/Samples/1_Utilities/deviceQueryDrv/deviceQueryDrv_vs2017.vcxproj +++ b/Samples/1_Utilities/deviceQueryDrv/deviceQueryDrv_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -107,6 +107,6 @@ - + diff --git a/Samples/1_Utilities/deviceQueryDrv/deviceQueryDrv_vs2019.vcxproj b/Samples/1_Utilities/deviceQueryDrv/deviceQueryDrv_vs2019.vcxproj index 282fef957..629a2e6eb 100644 --- a/Samples/1_Utilities/deviceQueryDrv/deviceQueryDrv_vs2019.vcxproj +++ b/Samples/1_Utilities/deviceQueryDrv/deviceQueryDrv_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -103,6 +103,6 @@ - + diff --git a/Samples/1_Utilities/deviceQueryDrv/deviceQueryDrv_vs2022.vcxproj b/Samples/1_Utilities/deviceQueryDrv/deviceQueryDrv_vs2022.vcxproj index 30aaeef46..5c8aab7e5 100644 --- a/Samples/1_Utilities/deviceQueryDrv/deviceQueryDrv_vs2022.vcxproj +++ b/Samples/1_Utilities/deviceQueryDrv/deviceQueryDrv_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -103,6 +103,6 @@ - + diff --git a/Samples/1_Utilities/topologyQuery/Makefile b/Samples/1_Utilities/topologyQuery/Makefile index ea5621746..9a48838ca 100644 --- a/Samples/1_Utilities/topologyQuery/Makefile +++ b/Samples/1_Utilities/topologyQuery/Makefile @@ -297,9 +297,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/1_Utilities/topologyQuery/NsightEclipse.xml b/Samples/1_Utilities/topologyQuery/NsightEclipse.xml index 722261a6d..8bfd757da 100644 --- a/Samples/1_Utilities/topologyQuery/NsightEclipse.xml +++ b/Samples/1_Utilities/topologyQuery/NsightEclipse.xml @@ -46,6 +46,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/1_Utilities/topologyQuery/README.md b/Samples/1_Utilities/topologyQuery/README.md index 4f4093fac..e08fa339d 100644 --- a/Samples/1_Utilities/topologyQuery/README.md +++ b/Samples/1_Utilities/topologyQuery/README.md @@ -10,7 +10,7 @@ Performance Strategies, Multi-GPU ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -27,7 +27,7 @@ cudaGetDeviceCount, cudaDeviceGetAttribute ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/1_Utilities/topologyQuery/topologyQuery_vs2017.vcxproj b/Samples/1_Utilities/topologyQuery/topologyQuery_vs2017.vcxproj index df094c222..feecd32fa 100644 --- a/Samples/1_Utilities/topologyQuery/topologyQuery_vs2017.vcxproj +++ b/Samples/1_Utilities/topologyQuery/topologyQuery_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/topologyQuery.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/1_Utilities/topologyQuery/topologyQuery_vs2019.vcxproj b/Samples/1_Utilities/topologyQuery/topologyQuery_vs2019.vcxproj index 62f21c12f..245f929bd 100644 --- a/Samples/1_Utilities/topologyQuery/topologyQuery_vs2019.vcxproj +++ b/Samples/1_Utilities/topologyQuery/topologyQuery_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/topologyQuery.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/1_Utilities/topologyQuery/topologyQuery_vs2022.vcxproj b/Samples/1_Utilities/topologyQuery/topologyQuery_vs2022.vcxproj index 40ba0ca7a..1d81b9337 100644 --- a/Samples/1_Utilities/topologyQuery/topologyQuery_vs2022.vcxproj +++ b/Samples/1_Utilities/topologyQuery/topologyQuery_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/topologyQuery.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/Makefile b/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/Makefile index 0200b2356..dabf05164 100644 --- a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/Makefile +++ b/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/Makefile @@ -301,9 +301,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/NsightEclipse.xml index 0ab4b349c..6df81e060 100644 --- a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/NsightEclipse.xml +++ b/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/NsightEclipse.xml @@ -3,42 +3,42 @@ EGLStream_CUDA_CrossGPU + cuDeviceGetName + cuEGLStreamConsumerReleaseFrame cuEGLStreamConsumerConnect - cuMemFree - cuInit + cuEGLStreamConsumerDisconnect + cuCtxPushCurrent + cuEGLStreamProducerReturnFrame cuStreamCreate - cuCtxCreate + cuEGLStreamProducerPresentFrame + cuMemFree cuGraphicsResourceGetMappedEglFrame - cuDeviceGetName - cuCtxSynchronize - cuEGLStreamConsumerAcquireFrame + cuInit + cuMemcpyHtoD cuDeviceGet - cuDeviceGetAttribute - cuMemAlloc - cuEGLStreamConsumerReleaseFrame + cuEGLStreamConsumerAcquireFrame cuEGLStreamProducerDisconnect cuEGLStreamProducerConnect - cuEGLStreamConsumerDisconnect - cuMemcpyHtoD - cuEGLStreamProducerReturnFrame - cuCtxPushCurrent + cuDeviceGetAttribute + cuCtxSynchronize + cuMemAlloc cuCtxPopCurrent - cuEGLStreamProducerPresentFrame - cudaDeviceCreateConsumer + cuCtxCreate + cudaMemcpy + cudaMalloc + cudaProducerPresentFrame cudaFree + cudaGetErrorString cudaConsumerReleaseFrame + cudaProducerReturnFrame cudaDeviceSynchronize - cudaGetValueMismatch + cudaDeviceCreateProducer cudaProducerDeinit - cudaProducerPresentFrame - cudaMalloc - cudaProducerInit - cudaProducerReturnFrame cudaProducerPrepareFrame + cudaGetValueMismatch cudaConsumerAcquireFrame - cudaMemcpy - cudaGetErrorString - cudaDeviceCreateProducer + cudaProducerInit + cudaDeviceCreateConsumer whole @@ -81,6 +81,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/README.md b/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/README.md index 2178db665..b559583b8 100644 --- a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/README.md +++ b/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/README.md @@ -10,7 +10,7 @@ EGLStreams Interop ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,17 +23,17 @@ x86_64, armv7l ## CUDA APIs involved ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html) -cuEGLStreamConsumerConnect, cuMemFree, cuInit, cuStreamCreate, cuCtxCreate, cuGraphicsResourceGetMappedEglFrame, cuDeviceGetName, cuCtxSynchronize, cuEGLStreamConsumerAcquireFrame, cuDeviceGet, cuDeviceGetAttribute, cuMemAlloc, cuEGLStreamConsumerReleaseFrame, cuEGLStreamProducerDisconnect, cuEGLStreamProducerConnect, cuEGLStreamConsumerDisconnect, cuMemcpyHtoD, cuEGLStreamProducerReturnFrame, cuCtxPushCurrent, cuCtxPopCurrent, cuEGLStreamProducerPresentFrame +cuDeviceGetName, cuEGLStreamConsumerReleaseFrame, cuEGLStreamConsumerConnect, cuEGLStreamConsumerDisconnect, cuCtxPushCurrent, cuEGLStreamProducerReturnFrame, cuStreamCreate, cuEGLStreamProducerPresentFrame, cuMemFree, cuGraphicsResourceGetMappedEglFrame, cuInit, cuMemcpyHtoD, cuDeviceGet, cuEGLStreamConsumerAcquireFrame, cuEGLStreamProducerDisconnect, cuEGLStreamProducerConnect, cuDeviceGetAttribute, cuCtxSynchronize, cuMemAlloc, cuCtxPopCurrent, cuCtxCreate ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaDeviceCreateConsumer, cudaFree, cudaConsumerReleaseFrame, cudaDeviceSynchronize, cudaGetValueMismatch, cudaProducerDeinit, cudaProducerPresentFrame, cudaMalloc, cudaProducerInit, cudaProducerReturnFrame, cudaProducerPrepareFrame, cudaConsumerAcquireFrame, cudaMemcpy, cudaGetErrorString, cudaDeviceCreateProducer +cudaMemcpy, cudaMalloc, cudaProducerPresentFrame, cudaFree, cudaGetErrorString, cudaConsumerReleaseFrame, cudaProducerReturnFrame, cudaDeviceSynchronize, cudaDeviceCreateProducer, cudaProducerDeinit, cudaProducerPrepareFrame, cudaGetValueMismatch, cudaConsumerAcquireFrame, cudaProducerInit, cudaDeviceCreateConsumer ## Dependencies needed to build/run [EGL](../../../README.md#egl) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/findegl.mk b/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/findegl.mk index cfeee8998..33ec1a961 100644 --- a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/findegl.mk +++ b/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/findegl.mk @@ -58,6 +58,7 @@ ifeq ("$(TARGET_OS)","linux") RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) SUSE = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?) + KYLIN = $(shell echo $(DISTRO) | grep -i kylin >/dev/null 2>&1; echo $$?) ifeq ("$(UBUNTU)","0") ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l) GLPATH := /usr/arm-linux-gnueabihf/lib @@ -84,22 +85,12 @@ ifeq ("$(TARGET_OS)","linux") DFLT_PATH ?= /usr/lib endif endif - ifeq ("$(SUSE)","0") + + ifeq ("$(SUSE)","0") GLPATH ?= /usr/X11R6/lib64 GLLINK ?= -L/usr/X11R6/lib64 DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(FEDORA)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(RHEL)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(CENTOS)","0") + else GLPATH ?= /usr/lib64/nvidia GLLINK ?= -L/usr/lib64/nvidia DFLT_PATH ?= /usr/lib64 diff --git a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/NsightEclipse.xml index d84d93136..ce22364ae 100644 --- a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/NsightEclipse.xml +++ b/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/NsightEclipse.xml @@ -3,23 +3,36 @@ EGLStream_CUDA_Interop - cuDeviceGet - cuDeviceGetAttribute - cuDeviceComputeCapability - cuDeviceGetCount + cuMemcpyDtoH cuDeviceGetName - cuGraphicsResourceGetMappedEglFrame - cuEGLStreamConsumerAcquireFrame cuEGLStreamConsumerReleaseFrame + cuEGLStreamConsumerConnect + cuEGLStreamConsumerDisconnect + cuCtxPushCurrent + cuArrayDestroy + cuEGLStreamProducerReturnFrame cuEGLStreamProducerPresentFrame - cuCtxCreate - cuMemAlloc cuMemFree - cuMemcpy3D - cuStreamCreate - cuCtxPushCurrent + cuGraphicsResourceGetMappedEglFrame + cuInit + cuEGLStreamConsumerAcquireFrame + cuEGLStreamProducerDisconnect + cuDeviceGetCount + cuEGLStreamProducerConnect + cuDeviceGetAttribute + cuCtxSynchronize + cuMemAlloc cuCtxPopCurrent - cuCtxDestroy + cuCtxCreate + cuMemcpy + cudaProducerReadYUVFrame + cudaProducerTest + cudaProducerDeinit + cudaDeviceCreateProducer + cudaProducerReadARGBFrame + cudaDeviceCreateConsumer + cudaConsumerTest + cudaProducerInit whole @@ -53,6 +66,7 @@ sm37 sm50 sm52 + sm53 sm60 sm61 sm70 @@ -60,6 +74,8 @@ sm75 sm80 sm86 + sm87 + sm90 x86_64 diff --git a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/README.md b/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/README.md index 3ccef85d4..5cee12b0b 100644 --- a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/README.md +++ b/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/README.md @@ -10,7 +10,7 @@ EGLStreams Interop ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,17 +23,17 @@ x86_64, aarch64 ## CUDA APIs involved ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html) -cuEGLStreamConsumerConnect, cuArrayDestroy, cuMemFree, cuInit, cuCtxCreate, cuGraphicsResourceGetMappedEglFrame, cuDeviceGetName, cuCtxSynchronize, cuEGLStreamConsumerAcquireFrame, cuDeviceGetAttribute, cuMemcpy, cuMemAlloc, cuEGLStreamConsumerReleaseFrame, cuEGLStreamProducerDisconnect, cuEGLStreamProducerConnect, cuEGLStreamConsumerDisconnect, cuDeviceGetCount, cuEGLStreamProducerReturnFrame, cuCtxPushCurrent, cuCtxPopCurrent, cuMemcpyDtoH, cuEGLStreamProducerPresentFrame +cuMemcpyDtoH, cuDeviceGetName, cuEGLStreamConsumerReleaseFrame, cuEGLStreamConsumerConnect, cuEGLStreamConsumerDisconnect, cuCtxPushCurrent, cuArrayDestroy, cuEGLStreamProducerReturnFrame, cuEGLStreamProducerPresentFrame, cuMemFree, cuGraphicsResourceGetMappedEglFrame, cuInit, cuEGLStreamConsumerAcquireFrame, cuEGLStreamProducerDisconnect, cuDeviceGetCount, cuEGLStreamProducerConnect, cuDeviceGetAttribute, cuCtxSynchronize, cuMemAlloc, cuCtxPopCurrent, cuCtxCreate, cuMemcpy ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaDeviceCreateConsumer, cudaConsumerTest, cudaProducerDeinit, cudaProducerInit, cudaProducerReadYUVFrame, cudaProducerTest, cudaProducerReadARGBFrame, cudaDeviceCreateProducer +cudaProducerReadYUVFrame, cudaProducerTest, cudaProducerDeinit, cudaDeviceCreateProducer, cudaProducerReadARGBFrame, cudaDeviceCreateConsumer, cudaConsumerTest, cudaProducerInit ## Dependencies needed to build/run [EGL](../../../README.md#egl) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/cuda_producer.cpp b/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/cuda_producer.cpp index f15c5cd1e..ef3adab2a 100644 --- a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/cuda_producer.cpp +++ b/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/cuda_producer.cpp @@ -301,7 +301,7 @@ CUresult cudaDeviceCreateProducer(test_cuda_producer_s *cudaProducer, if (major < 6) { printf( - "EGLStreams_CUDA_Interop requires SM 6.0 or higher arch GPU. " + "EGLStream_CUDA_Interop requires SM 6.0 or higher arch GPU. " "Exiting...\n"); exit(2); // EXIT_WAIVED } diff --git a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/findegl.mk b/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/findegl.mk index cfeee8998..33ec1a961 100644 --- a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/findegl.mk +++ b/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/findegl.mk @@ -58,6 +58,7 @@ ifeq ("$(TARGET_OS)","linux") RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) SUSE = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?) + KYLIN = $(shell echo $(DISTRO) | grep -i kylin >/dev/null 2>&1; echo $$?) ifeq ("$(UBUNTU)","0") ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l) GLPATH := /usr/arm-linux-gnueabihf/lib @@ -84,22 +85,12 @@ ifeq ("$(TARGET_OS)","linux") DFLT_PATH ?= /usr/lib endif endif - ifeq ("$(SUSE)","0") + + ifeq ("$(SUSE)","0") GLPATH ?= /usr/X11R6/lib64 GLLINK ?= -L/usr/X11R6/lib64 DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(FEDORA)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(RHEL)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(CENTOS)","0") + else GLPATH ?= /usr/lib64/nvidia GLLINK ?= -L/usr/lib64/nvidia DFLT_PATH ?= /usr/lib64 diff --git a/Samples/2_Concepts_and_Techniques/EGLSync_CUDAEvent_Interop/Makefile b/Samples/2_Concepts_and_Techniques/EGLSync_CUDAEvent_Interop/Makefile index 9a670c78c..86cfb9287 100644 --- a/Samples/2_Concepts_and_Techniques/EGLSync_CUDAEvent_Interop/Makefile +++ b/Samples/2_Concepts_and_Techniques/EGLSync_CUDAEvent_Interop/Makefile @@ -321,9 +321,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/2_Concepts_and_Techniques/EGLSync_CUDAEvent_Interop/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/EGLSync_CUDAEvent_Interop/NsightEclipse.xml index fbb91f1ff..639955253 100644 --- a/Samples/2_Concepts_and_Techniques/EGLSync_CUDAEvent_Interop/NsightEclipse.xml +++ b/Samples/2_Concepts_and_Techniques/EGLSync_CUDAEvent_Interop/NsightEclipse.xml @@ -3,27 +3,27 @@ EGLSync_CUDAEvent_Interop - cuGraphicsEGLRegisterImage - cuStreamCreate + cuEventRecord + cuDeviceGetAttribute cuEventCreate - cuCtxCreate - cuGraphicsSubResourceGetMappedArray - cuGraphicsUnregisterResource cuCtxSynchronize - cuEventCreateFromEGLSync cuEventDestroy + cuGraphicsEGLRegisterImage + cuGraphicsSubResourceGetMappedArray + cuStreamCreate cuStreamWaitEvent - cuCtxPushCurrent + cuGraphicsUnregisterResource + cuCtxCreate cuSurfObjectCreate + cuEventCreateFromEGLSync + cuCtxPushCurrent cuInit - cuEventRecord - cuDeviceGetAttribute + cudaMemcpy + cudaGetErrorString cudaFree cudaDeviceSynchronize cudaGetValueMismatch cudaMalloc - cudaMemcpy - cudaGetErrorString whole @@ -72,6 +72,7 @@ sm80 sm86 sm87 + sm90 arm diff --git a/Samples/2_Concepts_and_Techniques/EGLSync_CUDAEvent_Interop/README.md b/Samples/2_Concepts_and_Techniques/EGLSync_CUDAEvent_Interop/README.md index 655881d14..8c980b525 100644 --- a/Samples/2_Concepts_and_Techniques/EGLSync_CUDAEvent_Interop/README.md +++ b/Samples/2_Concepts_and_Techniques/EGLSync_CUDAEvent_Interop/README.md @@ -10,7 +10,7 @@ EGLSync-CUDAEvent Interop, EGLImage-CUDA Interop ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,17 +23,17 @@ armv7l ## CUDA APIs involved ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html) -cuGraphicsEGLRegisterImage, cuStreamCreate, cuEventCreate, cuCtxCreate, cuGraphicsSubResourceGetMappedArray, cuGraphicsUnregisterResource, cuCtxSynchronize, cuEventCreateFromEGLSync, cuEventDestroy, cuStreamWaitEvent, cuCtxPushCurrent, cuSurfObjectCreate, cuInit, cuEventRecord, cuDeviceGetAttribute +cuEventRecord, cuDeviceGetAttribute, cuEventCreate, cuCtxSynchronize, cuEventDestroy, cuGraphicsEGLRegisterImage, cuGraphicsSubResourceGetMappedArray, cuStreamCreate, cuStreamWaitEvent, cuGraphicsUnregisterResource, cuCtxCreate, cuSurfObjectCreate, cuEventCreateFromEGLSync, cuCtxPushCurrent, cuInit ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaDeviceSynchronize, cudaGetValueMismatch, cudaMalloc, cudaMemcpy, cudaGetErrorString +cudaMemcpy, cudaGetErrorString, cudaFree, cudaDeviceSynchronize, cudaGetValueMismatch, cudaMalloc ## Dependencies needed to build/run [EGL](../../../README.md#egl), [EGLSync](../../../README.md#eglsync), [X11](../../../README.md#x11), [GLES](../../../README.md#gles) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/2_Concepts_and_Techniques/EGLSync_CUDAEvent_Interop/findegl.mk b/Samples/2_Concepts_and_Techniques/EGLSync_CUDAEvent_Interop/findegl.mk index cfeee8998..33ec1a961 100644 --- a/Samples/2_Concepts_and_Techniques/EGLSync_CUDAEvent_Interop/findegl.mk +++ b/Samples/2_Concepts_and_Techniques/EGLSync_CUDAEvent_Interop/findegl.mk @@ -58,6 +58,7 @@ ifeq ("$(TARGET_OS)","linux") RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) SUSE = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?) + KYLIN = $(shell echo $(DISTRO) | grep -i kylin >/dev/null 2>&1; echo $$?) ifeq ("$(UBUNTU)","0") ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l) GLPATH := /usr/arm-linux-gnueabihf/lib @@ -84,22 +85,12 @@ ifeq ("$(TARGET_OS)","linux") DFLT_PATH ?= /usr/lib endif endif - ifeq ("$(SUSE)","0") + + ifeq ("$(SUSE)","0") GLPATH ?= /usr/X11R6/lib64 GLLINK ?= -L/usr/X11R6/lib64 DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(FEDORA)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(RHEL)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(CENTOS)","0") + else GLPATH ?= /usr/lib64/nvidia GLLINK ?= -L/usr/lib64/nvidia DFLT_PATH ?= /usr/lib64 diff --git a/Samples/2_Concepts_and_Techniques/FunctionPointers/FunctionPointers_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/FunctionPointers/FunctionPointers_vs2017.vcxproj index 93f0c4b64..e80efd5bd 100644 --- a/Samples/2_Concepts_and_Techniques/FunctionPointers/FunctionPointers_vs2017.vcxproj +++ b/Samples/2_Concepts_and_Techniques/FunctionPointers/FunctionPointers_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/FunctionPointers.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -118,6 +118,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/FunctionPointers/FunctionPointers_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/FunctionPointers/FunctionPointers_vs2019.vcxproj index a67cbbcd9..d5cbccdca 100644 --- a/Samples/2_Concepts_and_Techniques/FunctionPointers/FunctionPointers_vs2019.vcxproj +++ b/Samples/2_Concepts_and_Techniques/FunctionPointers/FunctionPointers_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/FunctionPointers.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -114,6 +114,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/FunctionPointers/FunctionPointers_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/FunctionPointers/FunctionPointers_vs2022.vcxproj index 45617a678..ca5eb33b5 100644 --- a/Samples/2_Concepts_and_Techniques/FunctionPointers/FunctionPointers_vs2022.vcxproj +++ b/Samples/2_Concepts_and_Techniques/FunctionPointers/FunctionPointers_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/FunctionPointers.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -114,6 +114,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/FunctionPointers/Makefile b/Samples/2_Concepts_and_Techniques/FunctionPointers/Makefile index 47c204fe6..651a4f81e 100644 --- a/Samples/2_Concepts_and_Techniques/FunctionPointers/Makefile +++ b/Samples/2_Concepts_and_Techniques/FunctionPointers/Makefile @@ -299,9 +299,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/2_Concepts_and_Techniques/FunctionPointers/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/FunctionPointers/NsightEclipse.xml index ecd90f9df..f90f7b346 100644 --- a/Samples/2_Concepts_and_Techniques/FunctionPointers/NsightEclipse.xml +++ b/Samples/2_Concepts_and_Techniques/FunctionPointers/NsightEclipse.xml @@ -3,21 +3,21 @@ FunctionPointers - cudaMemcpyToSymbol - cudaFree - cudaGraphicsMapResources + cudaGraphicsUnmapResources + cudaMemcpy + cudaMallocArray cudaFreeArray - cudaGraphicsGLRegisterBuffer + cudaFree + cudaMemcpyFromSymbol cudaGraphicsResourceGetMappedPointer - cudaDeviceSynchronize + cudaGraphicsMapResources cudaDestroyTextureObject + cudaDeviceSynchronize cudaCreateTextureObject - cudaMalloc - cudaMallocArray + cudaMemcpyToSymbol cudaGraphicsUnregisterResource - cudaGraphicsUnmapResources - cudaMemcpy - cudaMemcpyFromSymbol + cudaMalloc + cudaGraphicsGLRegisterBuffer whole @@ -88,6 +88,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/2_Concepts_and_Techniques/FunctionPointers/README.md b/Samples/2_Concepts_and_Techniques/FunctionPointers/README.md index a95ba59dc..83e16413b 100644 --- a/Samples/2_Concepts_and_Techniques/FunctionPointers/README.md +++ b/Samples/2_Concepts_and_Techniques/FunctionPointers/README.md @@ -10,7 +10,7 @@ Graphics Interop, Image Processing ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMemcpyToSymbol, cudaFree, cudaGraphicsMapResources, cudaFreeArray, cudaGraphicsGLRegisterBuffer, cudaGraphicsResourceGetMappedPointer, cudaDeviceSynchronize, cudaDestroyTextureObject, cudaCreateTextureObject, cudaMalloc, cudaMallocArray, cudaGraphicsUnregisterResource, cudaGraphicsUnmapResources, cudaMemcpy, cudaMemcpyFromSymbol +cudaGraphicsUnmapResources, cudaMemcpy, cudaMallocArray, cudaFreeArray, cudaFree, cudaMemcpyFromSymbol, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDestroyTextureObject, cudaDeviceSynchronize, cudaCreateTextureObject, cudaMemcpyToSymbol, cudaGraphicsUnregisterResource, cudaMalloc, cudaGraphicsGLRegisterBuffer ## Dependencies needed to build/run [X11](../../../README.md#x11), [GL](../../../README.md#gl) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/2_Concepts_and_Techniques/FunctionPointers/findgllib.mk b/Samples/2_Concepts_and_Techniques/FunctionPointers/findgllib.mk index f0a5c5512..998fcf0f1 100644 --- a/Samples/2_Concepts_and_Techniques/FunctionPointers/findgllib.mk +++ b/Samples/2_Concepts_and_Techniques/FunctionPointers/findgllib.mk @@ -53,11 +53,12 @@ endif ifeq ("$(TARGET_OS)","linux") # $(info) >> findgllib.mk -> LINUX path <<<) # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside - UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) - FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) - RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) - CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) + UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) + FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) + RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) + CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) SUSE = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?) + KYLIN = $(shell echo $(DISTRO) | grep -i kylin >/dev/null 2>&1; echo $$?) ifeq ("$(UBUNTU)","0") ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l) GLPATH := /usr/arm-linux-gnueabihf/lib @@ -87,27 +88,17 @@ ifeq ("$(TARGET_OS)","linux") DFLT_PATH ?= /usr/lib endif endif + ifeq ("$(SUSE)","0") GLPATH ?= /usr/X11R6/lib64 GLLINK ?= -L/usr/X11R6/lib64 DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(FEDORA)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(RHEL)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(CENTOS)","0") + else GLPATH ?= /usr/lib64/nvidia GLLINK ?= -L/usr/lib64/nvidia DFLT_PATH ?= /usr/lib64 endif - + # find libGL, libGLU GLLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGL.so -print 2>/dev/null) GLULIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLU.so -print 2>/dev/null) diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/MC_EstimatePiInlineP_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/MC_EstimatePiInlineP_vs2017.vcxproj index 7826468d6..9b6616f67 100644 --- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/MC_EstimatePiInlineP_vs2017.vcxproj +++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/MC_EstimatePiInlineP_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/MC_EstimatePiInlineP.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -111,6 +111,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/MC_EstimatePiInlineP_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/MC_EstimatePiInlineP_vs2019.vcxproj index accf4529d..fd17c3e2a 100644 --- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/MC_EstimatePiInlineP_vs2019.vcxproj +++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/MC_EstimatePiInlineP_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/MC_EstimatePiInlineP.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/MC_EstimatePiInlineP_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/MC_EstimatePiInlineP_vs2022.vcxproj index 6ba155315..9d5110d54 100644 --- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/MC_EstimatePiInlineP_vs2022.vcxproj +++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/MC_EstimatePiInlineP_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/MC_EstimatePiInlineP.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/Makefile b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/Makefile index 6bc2ee938..c4a3fa536 100644 --- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/Makefile +++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/NsightEclipse.xml index 5c63395b9..bf9c24d2d 100644 --- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/NsightEclipse.xml +++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/NsightEclipse.xml @@ -3,13 +3,13 @@ MC_EstimatePiInlineP + cudaMemcpy + cudaGetErrorString cudaFree - cudaFuncGetAttributes + cudaSetDevice cudaGetDeviceCount cudaMalloc - cudaSetDevice - cudaMemcpy - cudaGetErrorString + cudaFuncGetAttributes cudaGetDeviceProperties @@ -60,6 +60,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/README.md b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/README.md index 53b09b57d..a7d406785 100644 --- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/README.md +++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/README.md @@ -10,7 +10,7 @@ Random Number Generator, Computational Finance, CURAND Library ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaFuncGetAttributes, cudaGetDeviceCount, cudaMalloc, cudaSetDevice, cudaMemcpy, cudaGetErrorString, cudaGetDeviceProperties +cudaMemcpy, cudaGetErrorString, cudaFree, cudaSetDevice, cudaGetDeviceCount, cudaMalloc, cudaFuncGetAttributes, cudaGetDeviceProperties ## Dependencies needed to build/run [CURAND](../../../README.md#curand) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/MC_EstimatePiInlineQ_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/MC_EstimatePiInlineQ_vs2017.vcxproj index d5ad649be..9ac218442 100644 --- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/MC_EstimatePiInlineQ_vs2017.vcxproj +++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/MC_EstimatePiInlineQ_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/MC_EstimatePiInlineQ.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -111,6 +111,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/MC_EstimatePiInlineQ_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/MC_EstimatePiInlineQ_vs2019.vcxproj index bdf88dd5c..b8246a1a7 100644 --- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/MC_EstimatePiInlineQ_vs2019.vcxproj +++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/MC_EstimatePiInlineQ_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/MC_EstimatePiInlineQ.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/MC_EstimatePiInlineQ_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/MC_EstimatePiInlineQ_vs2022.vcxproj index 4a8efc239..da748e1f7 100644 --- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/MC_EstimatePiInlineQ_vs2022.vcxproj +++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/MC_EstimatePiInlineQ_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/MC_EstimatePiInlineQ.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/Makefile b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/Makefile index 20a15720a..5d8b086b2 100644 --- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/Makefile +++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/NsightEclipse.xml index ed2f8f7ac..f087e82b1 100644 --- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/NsightEclipse.xml +++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/NsightEclipse.xml @@ -3,13 +3,13 @@ MC_EstimatePiInlineQ + cudaMemcpy + cudaGetErrorString cudaFree - cudaFuncGetAttributes + cudaSetDevice cudaGetDeviceCount cudaMalloc - cudaSetDevice - cudaMemcpy - cudaGetErrorString + cudaFuncGetAttributes cudaGetDeviceProperties @@ -60,6 +60,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/README.md b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/README.md index 96e19b727..485c16aaa 100644 --- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/README.md +++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/README.md @@ -10,7 +10,7 @@ Random Number Generator, Computational Finance, CURAND Library ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaFuncGetAttributes, cudaGetDeviceCount, cudaMalloc, cudaSetDevice, cudaMemcpy, cudaGetErrorString, cudaGetDeviceProperties +cudaMemcpy, cudaGetErrorString, cudaFree, cudaSetDevice, cudaGetDeviceCount, cudaMalloc, cudaFuncGetAttributes, cudaGetDeviceProperties ## Dependencies needed to build/run [CURAND](../../../README.md#curand) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/MC_EstimatePiP_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/MC_EstimatePiP_vs2017.vcxproj index a628995cb..c64188719 100644 --- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/MC_EstimatePiP_vs2017.vcxproj +++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/MC_EstimatePiP_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/MC_EstimatePiP.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -111,6 +111,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/MC_EstimatePiP_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/MC_EstimatePiP_vs2019.vcxproj index fb20f5f1d..c188ae7dd 100644 --- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/MC_EstimatePiP_vs2019.vcxproj +++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/MC_EstimatePiP_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/MC_EstimatePiP.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/MC_EstimatePiP_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/MC_EstimatePiP_vs2022.vcxproj index ec7b8f28c..c9893f39d 100644 --- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/MC_EstimatePiP_vs2022.vcxproj +++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/MC_EstimatePiP_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/MC_EstimatePiP.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/Makefile b/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/Makefile index fba6735f2..0e5b42370 100644 --- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/Makefile +++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/NsightEclipse.xml index 6578c3f4d..56a07dea5 100644 --- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/NsightEclipse.xml +++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/NsightEclipse.xml @@ -3,13 +3,13 @@ MC_EstimatePiP + cudaMemcpy + cudaGetErrorString cudaFree - cudaFuncGetAttributes + cudaSetDevice cudaGetDeviceCount cudaMalloc - cudaSetDevice - cudaMemcpy - cudaGetErrorString + cudaFuncGetAttributes cudaGetDeviceProperties @@ -60,6 +60,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/README.md b/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/README.md index 098e77f86..4390385f0 100644 --- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/README.md +++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/README.md @@ -10,7 +10,7 @@ Random Number Generator, Computational Finance, CURAND Library ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaFuncGetAttributes, cudaGetDeviceCount, cudaMalloc, cudaSetDevice, cudaMemcpy, cudaGetErrorString, cudaGetDeviceProperties +cudaMemcpy, cudaGetErrorString, cudaFree, cudaSetDevice, cudaGetDeviceCount, cudaMalloc, cudaFuncGetAttributes, cudaGetDeviceProperties ## Dependencies needed to build/run [CURAND](../../../README.md#curand) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/MC_EstimatePiQ_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/MC_EstimatePiQ_vs2017.vcxproj index 83aba18ad..95b72fe5c 100644 --- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/MC_EstimatePiQ_vs2017.vcxproj +++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/MC_EstimatePiQ_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/MC_EstimatePiQ.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -111,6 +111,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/MC_EstimatePiQ_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/MC_EstimatePiQ_vs2019.vcxproj index ae4a67118..ff9316333 100644 --- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/MC_EstimatePiQ_vs2019.vcxproj +++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/MC_EstimatePiQ_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/MC_EstimatePiQ.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/MC_EstimatePiQ_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/MC_EstimatePiQ_vs2022.vcxproj index 9e11377dc..5714f941b 100644 --- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/MC_EstimatePiQ_vs2022.vcxproj +++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/MC_EstimatePiQ_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/MC_EstimatePiQ.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/Makefile b/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/Makefile index 98d071f93..61ae97d2e 100644 --- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/Makefile +++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/NsightEclipse.xml index 45497eeaf..71f9c101d 100644 --- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/NsightEclipse.xml +++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/NsightEclipse.xml @@ -3,13 +3,13 @@ MC_EstimatePiQ + cudaMemcpy + cudaGetErrorString cudaFree - cudaFuncGetAttributes + cudaSetDevice cudaGetDeviceCount cudaMalloc - cudaSetDevice - cudaMemcpy - cudaGetErrorString + cudaFuncGetAttributes cudaGetDeviceProperties @@ -60,6 +60,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/README.md b/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/README.md index dd73101e1..c6bac7b71 100644 --- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/README.md +++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/README.md @@ -10,7 +10,7 @@ Random Number Generator, Computational Finance, CURAND Library ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaFuncGetAttributes, cudaGetDeviceCount, cudaMalloc, cudaSetDevice, cudaMemcpy, cudaGetErrorString, cudaGetDeviceProperties +cudaMemcpy, cudaGetErrorString, cudaFree, cudaSetDevice, cudaGetDeviceCount, cudaMalloc, cudaFuncGetAttributes, cudaGetDeviceProperties ## Dependencies needed to build/run [CURAND](../../../README.md#curand) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/MC_SingleAsianOptionP_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/MC_SingleAsianOptionP_vs2017.vcxproj index df94f0e1c..c9d46e459 100644 --- a/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/MC_SingleAsianOptionP_vs2017.vcxproj +++ b/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/MC_SingleAsianOptionP_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/MC_SingleAsianOptionP.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -112,6 +112,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/MC_SingleAsianOptionP_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/MC_SingleAsianOptionP_vs2019.vcxproj index c6830be4b..1a31f284a 100644 --- a/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/MC_SingleAsianOptionP_vs2019.vcxproj +++ b/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/MC_SingleAsianOptionP_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/MC_SingleAsianOptionP.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -108,6 +108,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/MC_SingleAsianOptionP_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/MC_SingleAsianOptionP_vs2022.vcxproj index ab727ab07..a97d1d42e 100644 --- a/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/MC_SingleAsianOptionP_vs2022.vcxproj +++ b/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/MC_SingleAsianOptionP_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/MC_SingleAsianOptionP.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -108,6 +108,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/Makefile b/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/Makefile index c2e3b080d..97baec61f 100644 --- a/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/Makefile +++ b/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/NsightEclipse.xml index 52a3844dc..e11b104e4 100644 --- a/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/NsightEclipse.xml +++ b/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/NsightEclipse.xml @@ -3,13 +3,13 @@ MC_SingleAsianOptionP + cudaMemcpy + cudaGetErrorString cudaFree - cudaFuncGetAttributes + cudaSetDevice cudaGetDeviceCount cudaMalloc - cudaSetDevice - cudaMemcpy - cudaGetErrorString + cudaFuncGetAttributes cudaGetDeviceProperties @@ -61,6 +61,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/README.md b/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/README.md index f2a986d5e..f6f6cd1ac 100644 --- a/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/README.md +++ b/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/README.md @@ -10,7 +10,7 @@ Random Number Generator, Computational Finance, CURAND Library ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaFuncGetAttributes, cudaGetDeviceCount, cudaMalloc, cudaSetDevice, cudaMemcpy, cudaGetErrorString, cudaGetDeviceProperties +cudaMemcpy, cudaGetErrorString, cudaFree, cudaSetDevice, cudaGetDeviceCount, cudaMalloc, cudaFuncGetAttributes, cudaGetDeviceProperties ## Dependencies needed to build/run [CURAND](../../../README.md#curand) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/2_Concepts_and_Techniques/README.md b/Samples/2_Concepts_and_Techniques/README.md index 00265194e..b407d80b2 100644 --- a/Samples/2_Concepts_and_Techniques/README.md +++ b/Samples/2_Concepts_and_Techniques/README.md @@ -19,7 +19,7 @@ This sample demonstrates how Discrete Cosine Transform (DCT) for blocks of 8 by ### [EGLStream_CUDA_CrossGPU](./EGLStream_CUDA_CrossGPU) Demonstrates CUDA and EGL Streams interop, where consumer's EGL Stream is on one GPU and producer's on other and both consumer-producer are different processes. -### [EGLStreams_CUDA_Interop](./EGLStreams_CUDA_Interop) +### [EGLStream_CUDA_Interop](./EGLStream_CUDA_Interop) Demonstrates data exchange between CUDA and EGL Streams. ### [EGLSync_CUDAEvent_Interop](./EGLSync_CUDAEvent_Interop) diff --git a/Samples/2_Concepts_and_Techniques/boxFilter/Makefile b/Samples/2_Concepts_and_Techniques/boxFilter/Makefile index baec273e1..ef50006af 100644 --- a/Samples/2_Concepts_and_Techniques/boxFilter/Makefile +++ b/Samples/2_Concepts_and_Techniques/boxFilter/Makefile @@ -299,9 +299,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/2_Concepts_and_Techniques/boxFilter/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/boxFilter/NsightEclipse.xml index 60ada986e..5dc277901 100644 --- a/Samples/2_Concepts_and_Techniques/boxFilter/NsightEclipse.xml +++ b/Samples/2_Concepts_and_Techniques/boxFilter/NsightEclipse.xml @@ -3,21 +3,21 @@ boxFilter - cudaFree - cudaGraphicsMapResources + cudaGraphicsUnmapResources + cudaCreateChannelDesc + cudaMallocArray cudaFreeArray - cudaGraphicsGLRegisterBuffer + cudaFree + cudaGetErrorString + cudaMemcpy cudaGraphicsResourceGetMappedPointer - cudaDeviceSynchronize + cudaGraphicsMapResources cudaDestroyTextureObject + cudaDeviceSynchronize cudaCreateTextureObject - cudaMalloc - cudaMallocArray - cudaCreateChannelDesc cudaGraphicsUnregisterResource - cudaGraphicsUnmapResources - cudaMemcpy - cudaGetErrorString + cudaMalloc + cudaGraphicsGLRegisterBuffer whole @@ -84,6 +84,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/2_Concepts_and_Techniques/boxFilter/README.md b/Samples/2_Concepts_and_Techniques/boxFilter/README.md index f4d1299df..5be86cee5 100644 --- a/Samples/2_Concepts_and_Techniques/boxFilter/README.md +++ b/Samples/2_Concepts_and_Techniques/boxFilter/README.md @@ -10,7 +10,7 @@ Graphics Interop, Image Processing ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaGraphicsMapResources, cudaFreeArray, cudaGraphicsGLRegisterBuffer, cudaGraphicsResourceGetMappedPointer, cudaDeviceSynchronize, cudaDestroyTextureObject, cudaCreateTextureObject, cudaMalloc, cudaMallocArray, cudaCreateChannelDesc, cudaGraphicsUnregisterResource, cudaGraphicsUnmapResources, cudaMemcpy, cudaGetErrorString +cudaGraphicsUnmapResources, cudaCreateChannelDesc, cudaMallocArray, cudaFreeArray, cudaFree, cudaGetErrorString, cudaMemcpy, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDestroyTextureObject, cudaDeviceSynchronize, cudaCreateTextureObject, cudaGraphicsUnregisterResource, cudaMalloc, cudaGraphicsGLRegisterBuffer ## Dependencies needed to build/run [X11](../../../README.md#x11), [GL](../../../README.md#gl) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/2_Concepts_and_Techniques/boxFilter/boxFilter_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/boxFilter/boxFilter_vs2017.vcxproj index f5cef6565..e37e53620 100644 --- a/Samples/2_Concepts_and_Techniques/boxFilter/boxFilter_vs2017.vcxproj +++ b/Samples/2_Concepts_and_Techniques/boxFilter/boxFilter_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/boxFilter.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -119,6 +119,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/boxFilter/boxFilter_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/boxFilter/boxFilter_vs2019.vcxproj index 5e2b348e9..08573611b 100644 --- a/Samples/2_Concepts_and_Techniques/boxFilter/boxFilter_vs2019.vcxproj +++ b/Samples/2_Concepts_and_Techniques/boxFilter/boxFilter_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/boxFilter.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -115,6 +115,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/boxFilter/boxFilter_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/boxFilter/boxFilter_vs2022.vcxproj index 635ab0f1b..113b623b8 100644 --- a/Samples/2_Concepts_and_Techniques/boxFilter/boxFilter_vs2022.vcxproj +++ b/Samples/2_Concepts_and_Techniques/boxFilter/boxFilter_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/boxFilter.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -115,6 +115,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/boxFilter/findgllib.mk b/Samples/2_Concepts_and_Techniques/boxFilter/findgllib.mk index f0a5c5512..998fcf0f1 100644 --- a/Samples/2_Concepts_and_Techniques/boxFilter/findgllib.mk +++ b/Samples/2_Concepts_and_Techniques/boxFilter/findgllib.mk @@ -53,11 +53,12 @@ endif ifeq ("$(TARGET_OS)","linux") # $(info) >> findgllib.mk -> LINUX path <<<) # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside - UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) - FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) - RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) - CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) + UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) + FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) + RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) + CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) SUSE = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?) + KYLIN = $(shell echo $(DISTRO) | grep -i kylin >/dev/null 2>&1; echo $$?) ifeq ("$(UBUNTU)","0") ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l) GLPATH := /usr/arm-linux-gnueabihf/lib @@ -87,27 +88,17 @@ ifeq ("$(TARGET_OS)","linux") DFLT_PATH ?= /usr/lib endif endif + ifeq ("$(SUSE)","0") GLPATH ?= /usr/X11R6/lib64 GLLINK ?= -L/usr/X11R6/lib64 DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(FEDORA)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(RHEL)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(CENTOS)","0") + else GLPATH ?= /usr/lib64/nvidia GLLINK ?= -L/usr/lib64/nvidia DFLT_PATH ?= /usr/lib64 endif - + # find libGL, libGLU GLLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGL.so -print 2>/dev/null) GLULIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLU.so -print 2>/dev/null) diff --git a/Samples/2_Concepts_and_Techniques/convolutionSeparable/Makefile b/Samples/2_Concepts_and_Techniques/convolutionSeparable/Makefile index c189aba90..dd13e54b5 100644 --- a/Samples/2_Concepts_and_Techniques/convolutionSeparable/Makefile +++ b/Samples/2_Concepts_and_Techniques/convolutionSeparable/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/2_Concepts_and_Techniques/convolutionSeparable/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/convolutionSeparable/NsightEclipse.xml index a76eb5690..6471a4450 100644 --- a/Samples/2_Concepts_and_Techniques/convolutionSeparable/NsightEclipse.xml +++ b/Samples/2_Concepts_and_Techniques/convolutionSeparable/NsightEclipse.xml @@ -3,11 +3,11 @@ convolutionSeparable - cudaMemcpyToSymbol + cudaMemcpy cudaFree cudaDeviceSynchronize + cudaMemcpyToSymbol cudaMalloc - cudaMemcpy whole @@ -53,6 +53,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/2_Concepts_and_Techniques/convolutionSeparable/README.md b/Samples/2_Concepts_and_Techniques/convolutionSeparable/README.md index defbd7ea4..8afcf1770 100644 --- a/Samples/2_Concepts_and_Techniques/convolutionSeparable/README.md +++ b/Samples/2_Concepts_and_Techniques/convolutionSeparable/README.md @@ -10,7 +10,7 @@ Image Processing, Data Parallel Algorithms ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMemcpyToSymbol, cudaFree, cudaDeviceSynchronize, cudaMalloc, cudaMemcpy +cudaMemcpy, cudaFree, cudaDeviceSynchronize, cudaMemcpyToSymbol, cudaMalloc ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/2_Concepts_and_Techniques/convolutionSeparable/convolutionSeparable_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/convolutionSeparable/convolutionSeparable_vs2017.vcxproj index 4cbaabb27..12140d617 100644 --- a/Samples/2_Concepts_and_Techniques/convolutionSeparable/convolutionSeparable_vs2017.vcxproj +++ b/Samples/2_Concepts_and_Techniques/convolutionSeparable/convolutionSeparable_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/convolutionSeparable.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -109,6 +109,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/convolutionSeparable/convolutionSeparable_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/convolutionSeparable/convolutionSeparable_vs2019.vcxproj index c231867ed..882b180ca 100644 --- a/Samples/2_Concepts_and_Techniques/convolutionSeparable/convolutionSeparable_vs2019.vcxproj +++ b/Samples/2_Concepts_and_Techniques/convolutionSeparable/convolutionSeparable_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/convolutionSeparable.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -105,6 +105,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/convolutionSeparable/convolutionSeparable_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/convolutionSeparable/convolutionSeparable_vs2022.vcxproj index e1ae239aa..0bfe92ac0 100644 --- a/Samples/2_Concepts_and_Techniques/convolutionSeparable/convolutionSeparable_vs2022.vcxproj +++ b/Samples/2_Concepts_and_Techniques/convolutionSeparable/convolutionSeparable_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/convolutionSeparable.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -105,6 +105,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/convolutionTexture/Makefile b/Samples/2_Concepts_and_Techniques/convolutionTexture/Makefile index 5eaca29fd..e06312117 100644 --- a/Samples/2_Concepts_and_Techniques/convolutionTexture/Makefile +++ b/Samples/2_Concepts_and_Techniques/convolutionTexture/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/2_Concepts_and_Techniques/convolutionTexture/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/convolutionTexture/NsightEclipse.xml index 622b0ebc2..c2a9e145b 100644 --- a/Samples/2_Concepts_and_Techniques/convolutionTexture/NsightEclipse.xml +++ b/Samples/2_Concepts_and_Techniques/convolutionTexture/NsightEclipse.xml @@ -3,15 +3,15 @@ convolutionTexture - cudaMemcpyToSymbol + cudaMemcpy + cudaMallocArray + cudaFreeArray cudaFree cudaMemcpyToArray - cudaFreeArray cudaDeviceSynchronize cudaCreateTextureObject + cudaMemcpyToSymbol cudaMalloc - cudaMallocArray - cudaMemcpy whole @@ -59,6 +59,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/2_Concepts_and_Techniques/convolutionTexture/README.md b/Samples/2_Concepts_and_Techniques/convolutionTexture/README.md index 11ef179a7..b54a396c6 100644 --- a/Samples/2_Concepts_and_Techniques/convolutionTexture/README.md +++ b/Samples/2_Concepts_and_Techniques/convolutionTexture/README.md @@ -10,7 +10,7 @@ Image Processing, Texture, Data Parallel Algorithms ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMemcpyToSymbol, cudaFree, cudaMemcpyToArray, cudaFreeArray, cudaDeviceSynchronize, cudaCreateTextureObject, cudaMalloc, cudaMallocArray, cudaMemcpy +cudaMemcpy, cudaMallocArray, cudaFreeArray, cudaFree, cudaMemcpyToArray, cudaDeviceSynchronize, cudaCreateTextureObject, cudaMemcpyToSymbol, cudaMalloc ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/2_Concepts_and_Techniques/convolutionTexture/convolutionTexture_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/convolutionTexture/convolutionTexture_vs2017.vcxproj index 442fa53b3..1769a595b 100644 --- a/Samples/2_Concepts_and_Techniques/convolutionTexture/convolutionTexture_vs2017.vcxproj +++ b/Samples/2_Concepts_and_Techniques/convolutionTexture/convolutionTexture_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/convolutionTexture.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -109,6 +109,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/convolutionTexture/convolutionTexture_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/convolutionTexture/convolutionTexture_vs2019.vcxproj index 936b6fe5b..1a367ce1e 100644 --- a/Samples/2_Concepts_and_Techniques/convolutionTexture/convolutionTexture_vs2019.vcxproj +++ b/Samples/2_Concepts_and_Techniques/convolutionTexture/convolutionTexture_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/convolutionTexture.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -105,6 +105,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/convolutionTexture/convolutionTexture_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/convolutionTexture/convolutionTexture_vs2022.vcxproj index 24511a34e..d9ff12c8b 100644 --- a/Samples/2_Concepts_and_Techniques/convolutionTexture/convolutionTexture_vs2022.vcxproj +++ b/Samples/2_Concepts_and_Techniques/convolutionTexture/convolutionTexture_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/convolutionTexture.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -105,6 +105,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/cuHook/Makefile b/Samples/2_Concepts_and_Techniques/cuHook/Makefile index 81ea73bed..5986c7b7a 100644 --- a/Samples/2_Concepts_and_Techniques/cuHook/Makefile +++ b/Samples/2_Concepts_and_Techniques/cuHook/Makefile @@ -329,9 +329,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/2_Concepts_and_Techniques/cuHook/README.md b/Samples/2_Concepts_and_Techniques/cuHook/README.md index 05833d89c..39ec1cbe4 100644 --- a/Samples/2_Concepts_and_Techniques/cuHook/README.md +++ b/Samples/2_Concepts_and_Techniques/cuHook/README.md @@ -12,7 +12,7 @@ Debugging ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -25,14 +25,14 @@ x86_64, ppc64le ## CUDA APIs involved ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html) -cuDeviceGetCount, cuCtxCreate, cuMemAlloc, cuHookInfo, cuHookRegisterCallback, cuHook, cuMemFree, cuInit, cuCtxDestroy +cuHook, cuMemAlloc, cuHookInfo, cuHookRegisterCallback, cuCtxDestroy, cuMemFree, cuDeviceGetCount, cuCtxCreate, cuInit ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaDeviceReset +cudaDeviceReset, cudaFree ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/2_Concepts_and_Techniques/dct8x8/Makefile b/Samples/2_Concepts_and_Techniques/dct8x8/Makefile index b00e4d359..0540bc005 100644 --- a/Samples/2_Concepts_and_Techniques/dct8x8/Makefile +++ b/Samples/2_Concepts_and_Techniques/dct8x8/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/2_Concepts_and_Techniques/dct8x8/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/dct8x8/NsightEclipse.xml index 3bbe52f96..adb067568 100644 --- a/Samples/2_Concepts_and_Techniques/dct8x8/NsightEclipse.xml +++ b/Samples/2_Concepts_and_Techniques/dct8x8/NsightEclipse.xml @@ -3,13 +3,13 @@ dct8x8 - cudaFree + cudaMallocArray cudaFreeArray - cudaDeviceSynchronize - cudaDestroyTextureObject + cudaFree cudaMallocPitch + cudaDestroyTextureObject + cudaDeviceSynchronize cudaCreateTextureObject - cudaMallocArray whole @@ -57,6 +57,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/2_Concepts_and_Techniques/dct8x8/README.md b/Samples/2_Concepts_and_Techniques/dct8x8/README.md index 337b2ba29..7e0e24f35 100644 --- a/Samples/2_Concepts_and_Techniques/dct8x8/README.md +++ b/Samples/2_Concepts_and_Techniques/dct8x8/README.md @@ -10,7 +10,7 @@ Image Processing, Video Compression ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaFreeArray, cudaDeviceSynchronize, cudaDestroyTextureObject, cudaMallocPitch, cudaCreateTextureObject, cudaMallocArray +cudaMallocArray, cudaFreeArray, cudaFree, cudaMallocPitch, cudaDestroyTextureObject, cudaDeviceSynchronize, cudaCreateTextureObject ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/2_Concepts_and_Techniques/dct8x8/dct8x8_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/dct8x8/dct8x8_vs2017.vcxproj index 11ef09af3..5044890e9 100644 --- a/Samples/2_Concepts_and_Techniques/dct8x8/dct8x8_vs2017.vcxproj +++ b/Samples/2_Concepts_and_Techniques/dct8x8/dct8x8_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/dct8x8.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -115,6 +115,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/dct8x8/dct8x8_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/dct8x8/dct8x8_vs2019.vcxproj index 7eace209d..8452c9db6 100644 --- a/Samples/2_Concepts_and_Techniques/dct8x8/dct8x8_vs2019.vcxproj +++ b/Samples/2_Concepts_and_Techniques/dct8x8/dct8x8_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/dct8x8.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -111,6 +111,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/dct8x8/dct8x8_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/dct8x8/dct8x8_vs2022.vcxproj index f71108411..b0fd73786 100644 --- a/Samples/2_Concepts_and_Techniques/dct8x8/dct8x8_vs2022.vcxproj +++ b/Samples/2_Concepts_and_Techniques/dct8x8/dct8x8_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/dct8x8.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -111,6 +111,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/eigenvalues/Makefile b/Samples/2_Concepts_and_Techniques/eigenvalues/Makefile index 93e54441e..54c371eae 100644 --- a/Samples/2_Concepts_and_Techniques/eigenvalues/Makefile +++ b/Samples/2_Concepts_and_Techniques/eigenvalues/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/2_Concepts_and_Techniques/eigenvalues/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/eigenvalues/NsightEclipse.xml index 0fce11299..74a135157 100644 --- a/Samples/2_Concepts_and_Techniques/eigenvalues/NsightEclipse.xml +++ b/Samples/2_Concepts_and_Techniques/eigenvalues/NsightEclipse.xml @@ -4,9 +4,9 @@ eigenvalues cudaMalloc - cudaFree cudaDeviceSynchronize cudaMemcpy + cudaFree whole @@ -52,6 +52,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/2_Concepts_and_Techniques/eigenvalues/README.md b/Samples/2_Concepts_and_Techniques/eigenvalues/README.md index 7d217766f..bef2e951c 100644 --- a/Samples/2_Concepts_and_Techniques/eigenvalues/README.md +++ b/Samples/2_Concepts_and_Techniques/eigenvalues/README.md @@ -10,7 +10,7 @@ Linear Algebra ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMalloc, cudaFree, cudaDeviceSynchronize, cudaMemcpy +cudaMalloc, cudaDeviceSynchronize, cudaMemcpy, cudaFree ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/2_Concepts_and_Techniques/eigenvalues/eigenvalues_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/eigenvalues/eigenvalues_vs2017.vcxproj index 3f2ba5edd..e5a31279b 100644 --- a/Samples/2_Concepts_and_Techniques/eigenvalues/eigenvalues_vs2017.vcxproj +++ b/Samples/2_Concepts_and_Techniques/eigenvalues/eigenvalues_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/eigenvalues.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -122,6 +122,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/eigenvalues/eigenvalues_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/eigenvalues/eigenvalues_vs2019.vcxproj index 4f6a81526..8f846835d 100644 --- a/Samples/2_Concepts_and_Techniques/eigenvalues/eigenvalues_vs2019.vcxproj +++ b/Samples/2_Concepts_and_Techniques/eigenvalues/eigenvalues_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/eigenvalues.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -118,6 +118,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/eigenvalues/eigenvalues_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/eigenvalues/eigenvalues_vs2022.vcxproj index 0fd7a89ab..32faad5ed 100644 --- a/Samples/2_Concepts_and_Techniques/eigenvalues/eigenvalues_vs2022.vcxproj +++ b/Samples/2_Concepts_and_Techniques/eigenvalues/eigenvalues_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/eigenvalues.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -118,6 +118,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/histogram/Makefile b/Samples/2_Concepts_and_Techniques/histogram/Makefile index d35c575cf..c73f8a988 100644 --- a/Samples/2_Concepts_and_Techniques/histogram/Makefile +++ b/Samples/2_Concepts_and_Techniques/histogram/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/2_Concepts_and_Techniques/histogram/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/histogram/NsightEclipse.xml index 6147ce832..72e7c05ba 100644 --- a/Samples/2_Concepts_and_Techniques/histogram/NsightEclipse.xml +++ b/Samples/2_Concepts_and_Techniques/histogram/NsightEclipse.xml @@ -3,10 +3,10 @@ histogram + cudaMemcpy cudaFree cudaDeviceSynchronize cudaMalloc - cudaMemcpy cudaGetDeviceProperties @@ -52,6 +52,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/2_Concepts_and_Techniques/histogram/README.md b/Samples/2_Concepts_and_Techniques/histogram/README.md index 4ec0ce474..8ddf8e58f 100644 --- a/Samples/2_Concepts_and_Techniques/histogram/README.md +++ b/Samples/2_Concepts_and_Techniques/histogram/README.md @@ -10,7 +10,7 @@ Image Processing, Data Parallel Algorithms ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaDeviceSynchronize, cudaMalloc, cudaMemcpy, cudaGetDeviceProperties +cudaMemcpy, cudaFree, cudaDeviceSynchronize, cudaMalloc, cudaGetDeviceProperties ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/2_Concepts_and_Techniques/histogram/histogram_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/histogram/histogram_vs2017.vcxproj index f4cff9b55..39dd83783 100644 --- a/Samples/2_Concepts_and_Techniques/histogram/histogram_vs2017.vcxproj +++ b/Samples/2_Concepts_and_Techniques/histogram/histogram_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/histogram.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -110,6 +110,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/histogram/histogram_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/histogram/histogram_vs2019.vcxproj index d2f26cb23..5bde10fc2 100644 --- a/Samples/2_Concepts_and_Techniques/histogram/histogram_vs2019.vcxproj +++ b/Samples/2_Concepts_and_Techniques/histogram/histogram_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/histogram.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -106,6 +106,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/histogram/histogram_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/histogram/histogram_vs2022.vcxproj index 22496c8f9..a3e05c1a3 100644 --- a/Samples/2_Concepts_and_Techniques/histogram/histogram_vs2022.vcxproj +++ b/Samples/2_Concepts_and_Techniques/histogram/histogram_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/histogram.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -106,6 +106,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/imageDenoising/Makefile b/Samples/2_Concepts_and_Techniques/imageDenoising/Makefile index d452de665..37895080e 100644 --- a/Samples/2_Concepts_and_Techniques/imageDenoising/Makefile +++ b/Samples/2_Concepts_and_Techniques/imageDenoising/Makefile @@ -299,9 +299,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/2_Concepts_and_Techniques/imageDenoising/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/imageDenoising/NsightEclipse.xml index 0fbb86314..bef88766a 100644 --- a/Samples/2_Concepts_and_Techniques/imageDenoising/NsightEclipse.xml +++ b/Samples/2_Concepts_and_Techniques/imageDenoising/NsightEclipse.xml @@ -3,19 +3,19 @@ imageDenoising - cudaFree - cudaGraphicsMapResources - cudaGraphicsUnregisterResource + cudaGraphicsUnmapResources + cudaMemcpy + cudaMallocArray cudaFreeArray - cudaGraphicsGLRegisterBuffer + cudaFree cudaGraphicsResourceGetMappedPointer + cudaGraphicsMapResources cudaDeviceSynchronize cudaCreateTextureObject - cudaMalloc - cudaMallocArray cudaGLRegisterBufferObject - cudaGraphicsUnmapResources - cudaMemcpy + cudaGraphicsUnregisterResource + cudaMalloc + cudaGraphicsGLRegisterBuffer whole @@ -83,6 +83,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/2_Concepts_and_Techniques/imageDenoising/README.md b/Samples/2_Concepts_and_Techniques/imageDenoising/README.md index e0ef23db7..d5741a773 100644 --- a/Samples/2_Concepts_and_Techniques/imageDenoising/README.md +++ b/Samples/2_Concepts_and_Techniques/imageDenoising/README.md @@ -10,7 +10,7 @@ Image Processing ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaGraphicsMapResources, cudaGraphicsUnregisterResource, cudaFreeArray, cudaGraphicsGLRegisterBuffer, cudaGraphicsResourceGetMappedPointer, cudaDeviceSynchronize, cudaCreateTextureObject, cudaMalloc, cudaMallocArray, cudaGLRegisterBufferObject, cudaGraphicsUnmapResources, cudaMemcpy +cudaGraphicsUnmapResources, cudaMemcpy, cudaMallocArray, cudaFreeArray, cudaFree, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDeviceSynchronize, cudaCreateTextureObject, cudaGLRegisterBufferObject, cudaGraphicsUnregisterResource, cudaMalloc, cudaGraphicsGLRegisterBuffer ## Dependencies needed to build/run [X11](../../../README.md#x11), [GL](../../../README.md#gl) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/2_Concepts_and_Techniques/imageDenoising/findgllib.mk b/Samples/2_Concepts_and_Techniques/imageDenoising/findgllib.mk index f0a5c5512..998fcf0f1 100644 --- a/Samples/2_Concepts_and_Techniques/imageDenoising/findgllib.mk +++ b/Samples/2_Concepts_and_Techniques/imageDenoising/findgllib.mk @@ -53,11 +53,12 @@ endif ifeq ("$(TARGET_OS)","linux") # $(info) >> findgllib.mk -> LINUX path <<<) # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside - UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) - FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) - RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) - CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) + UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) + FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) + RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) + CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) SUSE = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?) + KYLIN = $(shell echo $(DISTRO) | grep -i kylin >/dev/null 2>&1; echo $$?) ifeq ("$(UBUNTU)","0") ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l) GLPATH := /usr/arm-linux-gnueabihf/lib @@ -87,27 +88,17 @@ ifeq ("$(TARGET_OS)","linux") DFLT_PATH ?= /usr/lib endif endif + ifeq ("$(SUSE)","0") GLPATH ?= /usr/X11R6/lib64 GLLINK ?= -L/usr/X11R6/lib64 DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(FEDORA)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(RHEL)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(CENTOS)","0") + else GLPATH ?= /usr/lib64/nvidia GLLINK ?= -L/usr/lib64/nvidia DFLT_PATH ?= /usr/lib64 endif - + # find libGL, libGLU GLLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGL.so -print 2>/dev/null) GLULIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLU.so -print 2>/dev/null) diff --git a/Samples/2_Concepts_and_Techniques/imageDenoising/imageDenoising_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/imageDenoising/imageDenoising_vs2017.vcxproj index 866d8b333..bf03ff5bb 100644 --- a/Samples/2_Concepts_and_Techniques/imageDenoising/imageDenoising_vs2017.vcxproj +++ b/Samples/2_Concepts_and_Techniques/imageDenoising/imageDenoising_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/imageDenoising.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -123,6 +123,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/imageDenoising/imageDenoising_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/imageDenoising/imageDenoising_vs2019.vcxproj index c924e1a5b..facb985e5 100644 --- a/Samples/2_Concepts_and_Techniques/imageDenoising/imageDenoising_vs2019.vcxproj +++ b/Samples/2_Concepts_and_Techniques/imageDenoising/imageDenoising_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/imageDenoising.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -119,6 +119,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/imageDenoising/imageDenoising_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/imageDenoising/imageDenoising_vs2022.vcxproj index 4af74bf37..ec0b7c631 100644 --- a/Samples/2_Concepts_and_Techniques/imageDenoising/imageDenoising_vs2022.vcxproj +++ b/Samples/2_Concepts_and_Techniques/imageDenoising/imageDenoising_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/imageDenoising.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -119,6 +119,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/inlinePTX/Makefile b/Samples/2_Concepts_and_Techniques/inlinePTX/Makefile index 481f8a366..6b06a65e2 100644 --- a/Samples/2_Concepts_and_Techniques/inlinePTX/Makefile +++ b/Samples/2_Concepts_and_Techniques/inlinePTX/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/2_Concepts_and_Techniques/inlinePTX/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/inlinePTX/NsightEclipse.xml index dd8e554c2..b32981020 100644 --- a/Samples/2_Concepts_and_Techniques/inlinePTX/NsightEclipse.xml +++ b/Samples/2_Concepts_and_Techniques/inlinePTX/NsightEclipse.xml @@ -3,15 +3,15 @@ inlinePTX + cudaMemcpy cudaFree cudaMallocHost + cudaGetLastError cudaGridSize - cudaDeviceSynchronize cudaBlockSize + cudaDeviceSynchronize cudaFreeHost cudaMalloc - cudaGetLastError - cudaMemcpy whole @@ -51,6 +51,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/2_Concepts_and_Techniques/inlinePTX/README.md b/Samples/2_Concepts_and_Techniques/inlinePTX/README.md index 61009f1d8..60d100d84 100644 --- a/Samples/2_Concepts_and_Techniques/inlinePTX/README.md +++ b/Samples/2_Concepts_and_Techniques/inlinePTX/README.md @@ -10,7 +10,7 @@ Performance Strategies, PTX Assembly, CUDA Driver API ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaMallocHost, cudaGridSize, cudaDeviceSynchronize, cudaBlockSize, cudaFreeHost, cudaMalloc, cudaGetLastError, cudaMemcpy +cudaMemcpy, cudaFree, cudaMallocHost, cudaGetLastError, cudaGridSize, cudaBlockSize, cudaDeviceSynchronize, cudaFreeHost, cudaMalloc ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/2_Concepts_and_Techniques/inlinePTX/inlinePTX_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/inlinePTX/inlinePTX_vs2017.vcxproj index ef5647697..b54f0ecac 100644 --- a/Samples/2_Concepts_and_Techniques/inlinePTX/inlinePTX_vs2017.vcxproj +++ b/Samples/2_Concepts_and_Techniques/inlinePTX/inlinePTX_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/inlinePTX.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/inlinePTX/inlinePTX_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/inlinePTX/inlinePTX_vs2019.vcxproj index c843d4c87..59b6d3357 100644 --- a/Samples/2_Concepts_and_Techniques/inlinePTX/inlinePTX_vs2019.vcxproj +++ b/Samples/2_Concepts_and_Techniques/inlinePTX/inlinePTX_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/inlinePTX.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/inlinePTX/inlinePTX_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/inlinePTX/inlinePTX_vs2022.vcxproj index 6e0f9bbd3..955dd6cad 100644 --- a/Samples/2_Concepts_and_Techniques/inlinePTX/inlinePTX_vs2022.vcxproj +++ b/Samples/2_Concepts_and_Techniques/inlinePTX/inlinePTX_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/inlinePTX.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/inlinePTX_nvrtc/README.md b/Samples/2_Concepts_and_Techniques/inlinePTX_nvrtc/README.md index 8b06db79f..3d99e87e1 100644 --- a/Samples/2_Concepts_and_Techniques/inlinePTX_nvrtc/README.md +++ b/Samples/2_Concepts_and_Techniques/inlinePTX_nvrtc/README.md @@ -10,7 +10,7 @@ Performance Strategies, PTX Assembly, CUDA Driver API, Runtime Compilation ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,7 +23,7 @@ x86_64, ppc64le, aarch64 ## CUDA APIs involved ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html) -cuMemAlloc, cuLaunchKernel, cuCtxSynchronize, cuMemFree, cuMemcpyDtoH, cuModuleGetFunction +cuMemcpyDtoH, cuLaunchKernel, cuCtxSynchronize, cuMemAlloc, cuMemFree, cuModuleGetFunction ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) cudaBlockSize, cudaGridSize @@ -33,7 +33,7 @@ cudaBlockSize, cudaGridSize ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/2_Concepts_and_Techniques/inlinePTX_nvrtc/inlinePTX_nvrtc_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/inlinePTX_nvrtc/inlinePTX_nvrtc_vs2017.vcxproj index 7b167dbec..3d8dcc9f1 100644 --- a/Samples/2_Concepts_and_Techniques/inlinePTX_nvrtc/inlinePTX_nvrtc_vs2017.vcxproj +++ b/Samples/2_Concepts_and_Techniques/inlinePTX_nvrtc/inlinePTX_nvrtc_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -107,6 +107,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/inlinePTX_nvrtc/inlinePTX_nvrtc_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/inlinePTX_nvrtc/inlinePTX_nvrtc_vs2019.vcxproj index 4e9539a9d..be53ad45d 100644 --- a/Samples/2_Concepts_and_Techniques/inlinePTX_nvrtc/inlinePTX_nvrtc_vs2019.vcxproj +++ b/Samples/2_Concepts_and_Techniques/inlinePTX_nvrtc/inlinePTX_nvrtc_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -103,6 +103,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/inlinePTX_nvrtc/inlinePTX_nvrtc_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/inlinePTX_nvrtc/inlinePTX_nvrtc_vs2022.vcxproj index 3eb36cb75..87de14f3c 100644 --- a/Samples/2_Concepts_and_Techniques/inlinePTX_nvrtc/inlinePTX_nvrtc_vs2022.vcxproj +++ b/Samples/2_Concepts_and_Techniques/inlinePTX_nvrtc/inlinePTX_nvrtc_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -103,6 +103,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/interval/Makefile b/Samples/2_Concepts_and_Techniques/interval/Makefile index bb0e46f95..69cf0f083 100644 --- a/Samples/2_Concepts_and_Techniques/interval/Makefile +++ b/Samples/2_Concepts_and_Techniques/interval/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/2_Concepts_and_Techniques/interval/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/interval/NsightEclipse.xml index 5b7f3004f..33d957a09 100644 --- a/Samples/2_Concepts_and_Techniques/interval/NsightEclipse.xml +++ b/Samples/2_Concepts_and_Techniques/interval/NsightEclipse.xml @@ -3,19 +3,19 @@ interval - cudaDeviceSetLimit - cudaFree + cudaMemcpy cudaFuncSetCacheConfig - cudaEventRecord - cudaEventCreate - cudaEventElapsedTime - cudaDeviceSynchronize - cudaSetDevice cudaMalloc - cudaEventDestroy + cudaFree cudaGetLastError - cudaMemcpy + cudaSetDevice + cudaDeviceSynchronize + cudaEventRecord + cudaDeviceSetLimit + cudaEventDestroy + cudaEventElapsedTime cudaGetDeviceProperties + cudaEventCreate whole @@ -55,6 +55,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/2_Concepts_and_Techniques/interval/README.md b/Samples/2_Concepts_and_Techniques/interval/README.md index 2d336b2e3..d13b6e9aa 100644 --- a/Samples/2_Concepts_and_Techniques/interval/README.md +++ b/Samples/2_Concepts_and_Techniques/interval/README.md @@ -10,7 +10,7 @@ Recursion, Templates ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaDeviceSetLimit, cudaFree, cudaFuncSetCacheConfig, cudaEventRecord, cudaEventCreate, cudaEventElapsedTime, cudaDeviceSynchronize, cudaSetDevice, cudaMalloc, cudaEventDestroy, cudaGetLastError, cudaMemcpy, cudaGetDeviceProperties +cudaMemcpy, cudaFuncSetCacheConfig, cudaMalloc, cudaFree, cudaGetLastError, cudaSetDevice, cudaDeviceSynchronize, cudaEventRecord, cudaDeviceSetLimit, cudaEventDestroy, cudaEventElapsedTime, cudaGetDeviceProperties, cudaEventCreate ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/2_Concepts_and_Techniques/interval/interval_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/interval/interval_vs2017.vcxproj index 937345f70..2c71346d3 100644 --- a/Samples/2_Concepts_and_Techniques/interval/interval_vs2017.vcxproj +++ b/Samples/2_Concepts_and_Techniques/interval/interval_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/interval.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -213,6 +213,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/interval/interval_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/interval/interval_vs2019.vcxproj index 0b54fff8b..43bea2307 100644 --- a/Samples/2_Concepts_and_Techniques/interval/interval_vs2019.vcxproj +++ b/Samples/2_Concepts_and_Techniques/interval/interval_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/interval.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -209,6 +209,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/interval/interval_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/interval/interval_vs2022.vcxproj index 4f5e0b4c3..ff04d5b61 100644 --- a/Samples/2_Concepts_and_Techniques/interval/interval_vs2022.vcxproj +++ b/Samples/2_Concepts_and_Techniques/interval/interval_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/interval.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -209,6 +209,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/particles/Makefile b/Samples/2_Concepts_and_Techniques/particles/Makefile index f322ac641..62b19bfb3 100644 --- a/Samples/2_Concepts_and_Techniques/particles/Makefile +++ b/Samples/2_Concepts_and_Techniques/particles/Makefile @@ -324,9 +324,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/2_Concepts_and_Techniques/particles/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/particles/NsightEclipse.xml index 0b16f690f..b3fcd5add 100644 --- a/Samples/2_Concepts_and_Techniques/particles/NsightEclipse.xml +++ b/Samples/2_Concepts_and_Techniques/particles/NsightEclipse.xml @@ -6,19 +6,19 @@ --std=c++14 - cudaMemcpyToSymbol - cudaMemset + cudaGraphicsUnmapResources + cudaMemcpy cudaFree - cudaGraphicsMapResources - cudaGLInit - cudaGraphicsGLRegisterBuffer cudaGraphicsResourceGetMappedPointer + cudaGraphicsMapResources cudaDeviceSynchronize - cudaInit - cudaMalloc + cudaMemset + cudaMemcpyToSymbol + cudaGraphicsGLRegisterBuffer cudaGraphicsUnregisterResource - cudaGraphicsUnmapResources - cudaMemcpy + cudaMalloc + cudaInit + cudaGLInit " to the command line will allow users to set # of particles for simulation. This example implements a uniform grid data structure using either atomic operations or a fast radix sort from the Thrust library]]> whole @@ -83,6 +83,7 @@ sm80 sm86 sm87 + sm90 ..\..\..\Common\param.h ..\..\..\Common\paramgl.h diff --git a/Samples/2_Concepts_and_Techniques/particles/README.md b/Samples/2_Concepts_and_Techniques/particles/README.md index 0e1b6134c..3b1a6974a 100644 --- a/Samples/2_Concepts_and_Techniques/particles/README.md +++ b/Samples/2_Concepts_and_Techniques/particles/README.md @@ -10,7 +10,7 @@ Graphics Interop, Data Parallel Algorithms, Physically-Based Simulation, Perform ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMemcpyToSymbol, cudaMemset, cudaFree, cudaGraphicsMapResources, cudaGLInit, cudaGraphicsGLRegisterBuffer, cudaGraphicsResourceGetMappedPointer, cudaDeviceSynchronize, cudaInit, cudaMalloc, cudaGraphicsUnregisterResource, cudaGraphicsUnmapResources, cudaMemcpy +cudaGraphicsUnmapResources, cudaMemcpy, cudaFree, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDeviceSynchronize, cudaMemset, cudaMemcpyToSymbol, cudaGraphicsGLRegisterBuffer, cudaGraphicsUnregisterResource, cudaMalloc, cudaInit, cudaGLInit ## Dependencies needed to build/run [X11](../../../README.md#x11), [GL](../../../README.md#gl) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/2_Concepts_and_Techniques/particles/findgllib.mk b/Samples/2_Concepts_and_Techniques/particles/findgllib.mk index f0a5c5512..998fcf0f1 100644 --- a/Samples/2_Concepts_and_Techniques/particles/findgllib.mk +++ b/Samples/2_Concepts_and_Techniques/particles/findgllib.mk @@ -53,11 +53,12 @@ endif ifeq ("$(TARGET_OS)","linux") # $(info) >> findgllib.mk -> LINUX path <<<) # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside - UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) - FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) - RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) - CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) + UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) + FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) + RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) + CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) SUSE = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?) + KYLIN = $(shell echo $(DISTRO) | grep -i kylin >/dev/null 2>&1; echo $$?) ifeq ("$(UBUNTU)","0") ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l) GLPATH := /usr/arm-linux-gnueabihf/lib @@ -87,27 +88,17 @@ ifeq ("$(TARGET_OS)","linux") DFLT_PATH ?= /usr/lib endif endif + ifeq ("$(SUSE)","0") GLPATH ?= /usr/X11R6/lib64 GLLINK ?= -L/usr/X11R6/lib64 DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(FEDORA)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(RHEL)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(CENTOS)","0") + else GLPATH ?= /usr/lib64/nvidia GLLINK ?= -L/usr/lib64/nvidia DFLT_PATH ?= /usr/lib64 endif - + # find libGL, libGLU GLLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGL.so -print 2>/dev/null) GLULIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLU.so -print 2>/dev/null) diff --git a/Samples/2_Concepts_and_Techniques/particles/particles_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/particles/particles_vs2017.vcxproj index 8f28c7642..d2c2a6f80 100644 --- a/Samples/2_Concepts_and_Techniques/particles/particles_vs2017.vcxproj +++ b/Samples/2_Concepts_and_Techniques/particles/particles_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/particles.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -129,6 +129,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/particles/particles_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/particles/particles_vs2019.vcxproj index fba7fdcc9..c739cc74a 100644 --- a/Samples/2_Concepts_and_Techniques/particles/particles_vs2019.vcxproj +++ b/Samples/2_Concepts_and_Techniques/particles/particles_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/particles.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -125,6 +125,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/particles/particles_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/particles/particles_vs2022.vcxproj index a915f4c0d..77b78a23b 100644 --- a/Samples/2_Concepts_and_Techniques/particles/particles_vs2022.vcxproj +++ b/Samples/2_Concepts_and_Techniques/particles/particles_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/particles.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -125,6 +125,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/radixSortThrust/Makefile b/Samples/2_Concepts_and_Techniques/radixSortThrust/Makefile index df5ee47fb..459e9a21a 100644 --- a/Samples/2_Concepts_and_Techniques/radixSortThrust/Makefile +++ b/Samples/2_Concepts_and_Techniques/radixSortThrust/Makefile @@ -304,9 +304,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/2_Concepts_and_Techniques/radixSortThrust/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/radixSortThrust/NsightEclipse.xml index c29e9f175..62fd4d555 100644 --- a/Samples/2_Concepts_and_Techniques/radixSortThrust/NsightEclipse.xml +++ b/Samples/2_Concepts_and_Techniques/radixSortThrust/NsightEclipse.xml @@ -6,13 +6,13 @@ --std=c++14 - cudaEventRecord - cudaEventCreate - cudaEventElapsedTime cudaEventSynchronize + cudaEventRecord + cudaGetDevice cudaEventDestroy + cudaEventElapsedTime cudaGetDeviceProperties - cudaGetDevice + cudaEventCreate whole @@ -58,6 +58,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/2_Concepts_and_Techniques/radixSortThrust/README.md b/Samples/2_Concepts_and_Techniques/radixSortThrust/README.md index 835bfd17b..4914b5d20 100644 --- a/Samples/2_Concepts_and_Techniques/radixSortThrust/README.md +++ b/Samples/2_Concepts_and_Techniques/radixSortThrust/README.md @@ -10,7 +10,7 @@ Data-Parallel Algorithms, Performance Strategies ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaEventRecord, cudaEventCreate, cudaEventElapsedTime, cudaEventSynchronize, cudaEventDestroy, cudaGetDeviceProperties, cudaGetDevice +cudaEventSynchronize, cudaEventRecord, cudaGetDevice, cudaEventDestroy, cudaEventElapsedTime, cudaGetDeviceProperties, cudaEventCreate ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/2_Concepts_and_Techniques/radixSortThrust/radixSortThrust_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/radixSortThrust/radixSortThrust_vs2017.vcxproj index 159d441eb..a9c39d2e3 100644 --- a/Samples/2_Concepts_and_Techniques/radixSortThrust/radixSortThrust_vs2017.vcxproj +++ b/Samples/2_Concepts_and_Techniques/radixSortThrust/radixSortThrust_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/radixSortThrust.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/radixSortThrust/radixSortThrust_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/radixSortThrust/radixSortThrust_vs2019.vcxproj index 244e1889d..9ac46af9d 100644 --- a/Samples/2_Concepts_and_Techniques/radixSortThrust/radixSortThrust_vs2019.vcxproj +++ b/Samples/2_Concepts_and_Techniques/radixSortThrust/radixSortThrust_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/radixSortThrust.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/radixSortThrust/radixSortThrust_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/radixSortThrust/radixSortThrust_vs2022.vcxproj index edef2e2dd..30b2d9bb3 100644 --- a/Samples/2_Concepts_and_Techniques/radixSortThrust/radixSortThrust_vs2022.vcxproj +++ b/Samples/2_Concepts_and_Techniques/radixSortThrust/radixSortThrust_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/radixSortThrust.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/reduction/Makefile b/Samples/2_Concepts_and_Techniques/reduction/Makefile index a46a5bb38..eed9f8012 100644 --- a/Samples/2_Concepts_and_Techniques/reduction/Makefile +++ b/Samples/2_Concepts_and_Techniques/reduction/Makefile @@ -310,9 +310,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/2_Concepts_and_Techniques/reduction/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/reduction/NsightEclipse.xml index e7fa89ac2..629ec3ff4 100644 --- a/Samples/2_Concepts_and_Techniques/reduction/NsightEclipse.xml +++ b/Samples/2_Concepts_and_Techniques/reduction/NsightEclipse.xml @@ -6,13 +6,13 @@ --std=c++11 + cudaMemcpy cudaFree + cudaSetDevice cudaDeviceSynchronize + cudaGetDevice cudaMalloc - cudaSetDevice - cudaMemcpy cudaGetDeviceProperties - cudaGetDevice whole @@ -67,6 +67,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/2_Concepts_and_Techniques/reduction/README.md b/Samples/2_Concepts_and_Techniques/reduction/README.md index 1fde9b55d..65024c63a 100644 --- a/Samples/2_Concepts_and_Techniques/reduction/README.md +++ b/Samples/2_Concepts_and_Techniques/reduction/README.md @@ -10,7 +10,7 @@ Data-Parallel Algorithms, Performance Strategies ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaDeviceSynchronize, cudaMalloc, cudaSetDevice, cudaMemcpy, cudaGetDeviceProperties, cudaGetDevice +cudaMemcpy, cudaFree, cudaSetDevice, cudaDeviceSynchronize, cudaGetDevice, cudaMalloc, cudaGetDeviceProperties ## Dependencies needed to build/run [CPP11](../../../README.md#cpp11) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/2_Concepts_and_Techniques/reduction/reduction_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/reduction/reduction_vs2017.vcxproj index 65d31a5d4..f80b1766e 100644 --- a/Samples/2_Concepts_and_Techniques/reduction/reduction_vs2017.vcxproj +++ b/Samples/2_Concepts_and_Techniques/reduction/reduction_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/reduction.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -108,6 +108,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/reduction/reduction_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/reduction/reduction_vs2019.vcxproj index b5b673a37..dea43eabb 100644 --- a/Samples/2_Concepts_and_Techniques/reduction/reduction_vs2019.vcxproj +++ b/Samples/2_Concepts_and_Techniques/reduction/reduction_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/reduction.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -104,6 +104,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/reduction/reduction_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/reduction/reduction_vs2022.vcxproj index a3f39519f..bbc6826a8 100644 --- a/Samples/2_Concepts_and_Techniques/reduction/reduction_vs2022.vcxproj +++ b/Samples/2_Concepts_and_Techniques/reduction/reduction_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/reduction.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -104,6 +104,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/Makefile b/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/Makefile index 32bdaeb71..2f62c73ee 100644 --- a/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/Makefile +++ b/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/Makefile @@ -322,9 +322,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 61 70 72 75 80 86 87 +SMS ?= 61 70 72 75 80 86 87 90 else -SMS ?= 60 61 70 75 80 86 +SMS ?= 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/NsightEclipse.xml index bc231be2e..09decc91e 100644 --- a/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/NsightEclipse.xml +++ b/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/NsightEclipse.xml @@ -6,15 +6,15 @@ --std=c++11 + cudaMemcpy cudaFree - cudaOccupancyMaxActiveBlocksPerMultiprocessor - cudaOccupancyMaxPotentialBlockSize - cudaDeviceSynchronize cudaSetDevice - cudaMalloc + cudaDeviceSynchronize cudaLaunchCooperativeKernel - cudaMemcpy + cudaMalloc + cudaOccupancyMaxActiveBlocksPerMultiprocessor cudaGetDeviceProperties + cudaOccupancyMaxPotentialBlockSize @@ -51,6 +51,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/README.md b/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/README.md index b0d5e58f6..7473bae9a 100644 --- a/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/README.md +++ b/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/README.md @@ -10,7 +10,7 @@ Cooperative Groups, MultiBlock Cooperative Groups ## Supported SM Architectures -[SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, aarch64 ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaOccupancyMaxActiveBlocksPerMultiprocessor, cudaOccupancyMaxPotentialBlockSize, cudaDeviceSynchronize, cudaSetDevice, cudaMalloc, cudaLaunchCooperativeKernel, cudaMemcpy, cudaGetDeviceProperties +cudaMemcpy, cudaFree, cudaSetDevice, cudaDeviceSynchronize, cudaLaunchCooperativeKernel, cudaMalloc, cudaOccupancyMaxActiveBlocksPerMultiprocessor, cudaGetDeviceProperties, cudaOccupancyMaxPotentialBlockSize ## Dependencies needed to build/run [MBCG](../../../README.md#mbcg), [CPP11](../../../README.md#cpp11) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/reductionMultiBlockCG_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/reductionMultiBlockCG_vs2017.vcxproj index 4a505b432..c467625a7 100644 --- a/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/reductionMultiBlockCG_vs2017.vcxproj +++ b/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/reductionMultiBlockCG_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/reductionMultiBlockCG.exe - compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/reductionMultiBlockCG_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/reductionMultiBlockCG_vs2019.vcxproj index 1e0a9cd1e..306b8c6d2 100644 --- a/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/reductionMultiBlockCG_vs2019.vcxproj +++ b/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/reductionMultiBlockCG_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/reductionMultiBlockCG.exe - compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/reductionMultiBlockCG_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/reductionMultiBlockCG_vs2022.vcxproj index e5b2eb9f8..a7261ba7a 100644 --- a/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/reductionMultiBlockCG_vs2022.vcxproj +++ b/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/reductionMultiBlockCG_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/reductionMultiBlockCG.exe - compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/scalarProd/Makefile b/Samples/2_Concepts_and_Techniques/scalarProd/Makefile index e20a38107..9cacc538f 100644 --- a/Samples/2_Concepts_and_Techniques/scalarProd/Makefile +++ b/Samples/2_Concepts_and_Techniques/scalarProd/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/2_Concepts_and_Techniques/scalarProd/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/scalarProd/NsightEclipse.xml index 6d36f7644..79a458f33 100644 --- a/Samples/2_Concepts_and_Techniques/scalarProd/NsightEclipse.xml +++ b/Samples/2_Concepts_and_Techniques/scalarProd/NsightEclipse.xml @@ -4,9 +4,9 @@ scalarProd cudaMalloc - cudaFree cudaDeviceSynchronize cudaMemcpy + cudaFree whole @@ -46,6 +46,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/2_Concepts_and_Techniques/scalarProd/README.md b/Samples/2_Concepts_and_Techniques/scalarProd/README.md index 3b54a9662..47ff8e57b 100644 --- a/Samples/2_Concepts_and_Techniques/scalarProd/README.md +++ b/Samples/2_Concepts_and_Techniques/scalarProd/README.md @@ -10,7 +10,7 @@ Linear Algebra ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMalloc, cudaFree, cudaDeviceSynchronize, cudaMemcpy +cudaMalloc, cudaDeviceSynchronize, cudaMemcpy, cudaFree ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/2_Concepts_and_Techniques/scalarProd/scalarProd_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/scalarProd/scalarProd_vs2017.vcxproj index 087bc7dfc..d404cd613 100644 --- a/Samples/2_Concepts_and_Techniques/scalarProd/scalarProd_vs2017.vcxproj +++ b/Samples/2_Concepts_and_Techniques/scalarProd/scalarProd_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/scalarProd.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -108,6 +108,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/scalarProd/scalarProd_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/scalarProd/scalarProd_vs2019.vcxproj index bfb89fa12..72e9579a5 100644 --- a/Samples/2_Concepts_and_Techniques/scalarProd/scalarProd_vs2019.vcxproj +++ b/Samples/2_Concepts_and_Techniques/scalarProd/scalarProd_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/scalarProd.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -104,6 +104,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/scalarProd/scalarProd_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/scalarProd/scalarProd_vs2022.vcxproj index 0aabdbf46..cb130eaf4 100644 --- a/Samples/2_Concepts_and_Techniques/scalarProd/scalarProd_vs2022.vcxproj +++ b/Samples/2_Concepts_and_Techniques/scalarProd/scalarProd_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/scalarProd.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -104,6 +104,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/scan/Makefile b/Samples/2_Concepts_and_Techniques/scan/Makefile index 5d371f2f1..8ce4ab790 100644 --- a/Samples/2_Concepts_and_Techniques/scan/Makefile +++ b/Samples/2_Concepts_and_Techniques/scan/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/2_Concepts_and_Techniques/scan/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/scan/NsightEclipse.xml index 239b68b14..11e6c2f39 100644 --- a/Samples/2_Concepts_and_Techniques/scan/NsightEclipse.xml +++ b/Samples/2_Concepts_and_Techniques/scan/NsightEclipse.xml @@ -4,9 +4,9 @@ scan cudaMalloc - cudaFree cudaDeviceSynchronize cudaMemcpy + cudaFree whole @@ -50,6 +50,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/2_Concepts_and_Techniques/scan/README.md b/Samples/2_Concepts_and_Techniques/scan/README.md index 8a0563f4d..e10d0a389 100644 --- a/Samples/2_Concepts_and_Techniques/scan/README.md +++ b/Samples/2_Concepts_and_Techniques/scan/README.md @@ -10,7 +10,7 @@ Data-Parallel Algorithms, Performance Strategies ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMalloc, cudaFree, cudaDeviceSynchronize, cudaMemcpy +cudaMalloc, cudaDeviceSynchronize, cudaMemcpy, cudaFree ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/2_Concepts_and_Techniques/scan/scan_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/scan/scan_vs2017.vcxproj index 37db85698..ac2bd4bd7 100644 --- a/Samples/2_Concepts_and_Techniques/scan/scan_vs2017.vcxproj +++ b/Samples/2_Concepts_and_Techniques/scan/scan_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/scan.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -109,6 +109,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/scan/scan_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/scan/scan_vs2019.vcxproj index b6da5f75d..fddb8498c 100644 --- a/Samples/2_Concepts_and_Techniques/scan/scan_vs2019.vcxproj +++ b/Samples/2_Concepts_and_Techniques/scan/scan_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/scan.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -105,6 +105,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/scan/scan_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/scan/scan_vs2022.vcxproj index cb3e2bed2..4f238933c 100644 --- a/Samples/2_Concepts_and_Techniques/scan/scan_vs2022.vcxproj +++ b/Samples/2_Concepts_and_Techniques/scan/scan_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/scan.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -105,6 +105,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/Makefile b/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/Makefile index 9f15915f0..fe1a04ef2 100644 --- a/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/Makefile +++ b/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/Makefile @@ -304,9 +304,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/NsightEclipse.xml index dd3363eaf..bfabb713b 100644 --- a/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/NsightEclipse.xml +++ b/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/NsightEclipse.xml @@ -7,13 +7,13 @@ --threads 1 - cudaMemset + cudaMemcpy + cudaMemGetInfo + cudaEventSynchronize cudaEventRecord - cudaEventCreate + cudaMemset cudaEventElapsedTime - cudaEventSynchronize - cudaMemGetInfo - cudaMemcpy + cudaEventCreate whole @@ -60,6 +60,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/README.md b/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/README.md index cd6cbc5ae..b62923534 100644 --- a/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/README.md +++ b/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/README.md @@ -10,7 +10,7 @@ Data-Parallel Algorithms, Performance Strategies ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMemset, cudaEventRecord, cudaEventCreate, cudaEventElapsedTime, cudaEventSynchronize, cudaMemGetInfo, cudaMemcpy +cudaMemcpy, cudaMemGetInfo, cudaEventSynchronize, cudaEventRecord, cudaMemset, cudaEventElapsedTime, cudaEventCreate ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/segmentationTreeThrust_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/segmentationTreeThrust_vs2017.vcxproj index 14b777a67..d72fc3b5e 100644 --- a/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/segmentationTreeThrust_vs2017.vcxproj +++ b/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/segmentationTreeThrust_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/segmentationTreeThrust.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 1 ./;../../../Common WIN32 @@ -108,6 +108,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/segmentationTreeThrust_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/segmentationTreeThrust_vs2019.vcxproj index 591302fec..96fd37606 100644 --- a/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/segmentationTreeThrust_vs2019.vcxproj +++ b/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/segmentationTreeThrust_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/segmentationTreeThrust.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 1 ./;../../../Common WIN32 @@ -104,6 +104,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/segmentationTreeThrust_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/segmentationTreeThrust_vs2022.vcxproj index e82eceb40..18e37f1fd 100644 --- a/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/segmentationTreeThrust_vs2022.vcxproj +++ b/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/segmentationTreeThrust_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/segmentationTreeThrust.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 1 ./;../../../Common WIN32 @@ -104,6 +104,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/shfl_scan/Makefile b/Samples/2_Concepts_and_Techniques/shfl_scan/Makefile index 256e10fd9..4f210b3ef 100644 --- a/Samples/2_Concepts_and_Techniques/shfl_scan/Makefile +++ b/Samples/2_Concepts_and_Techniques/shfl_scan/Makefile @@ -310,9 +310,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/2_Concepts_and_Techniques/shfl_scan/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/shfl_scan/NsightEclipse.xml index 8410c3253..d60600637 100644 --- a/Samples/2_Concepts_and_Techniques/shfl_scan/NsightEclipse.xml +++ b/Samples/2_Concepts_and_Techniques/shfl_scan/NsightEclipse.xml @@ -7,18 +7,18 @@ -O3 - cudaMemset + cudaMemcpy cudaFree - cudaEventRecord cudaMallocHost - cudaEventCreate - cudaEventElapsedTime cudaEventSynchronize + cudaEventRecord cudaFreeHost + cudaGetDevice + cudaMemset cudaMalloc - cudaMemcpy + cudaEventElapsedTime cudaGetDeviceProperties - cudaGetDevice + cudaEventCreate whole @@ -66,6 +66,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/2_Concepts_and_Techniques/shfl_scan/README.md b/Samples/2_Concepts_and_Techniques/shfl_scan/README.md index f042bb793..5afaefead 100644 --- a/Samples/2_Concepts_and_Techniques/shfl_scan/README.md +++ b/Samples/2_Concepts_and_Techniques/shfl_scan/README.md @@ -10,7 +10,7 @@ Data-Parallel Algorithms, Performance Strategies ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l, aarch64 ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMemset, cudaFree, cudaEventRecord, cudaMallocHost, cudaEventCreate, cudaEventElapsedTime, cudaEventSynchronize, cudaFreeHost, cudaMalloc, cudaMemcpy, cudaGetDeviceProperties, cudaGetDevice +cudaMemcpy, cudaFree, cudaMallocHost, cudaEventSynchronize, cudaEventRecord, cudaFreeHost, cudaGetDevice, cudaMemset, cudaMalloc, cudaEventElapsedTime, cudaGetDeviceProperties, cudaEventCreate ## Dependencies needed to build/run [CPP11](../../../README.md#cpp11) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/2_Concepts_and_Techniques/shfl_scan/shfl_scan_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/shfl_scan/shfl_scan_vs2017.vcxproj index ee4c6348f..9e8017119 100644 --- a/Samples/2_Concepts_and_Techniques/shfl_scan/shfl_scan_vs2017.vcxproj +++ b/Samples/2_Concepts_and_Techniques/shfl_scan/shfl_scan_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/shfl_scan.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -108,6 +108,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/shfl_scan/shfl_scan_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/shfl_scan/shfl_scan_vs2019.vcxproj index c99b3fe4f..72076c473 100644 --- a/Samples/2_Concepts_and_Techniques/shfl_scan/shfl_scan_vs2019.vcxproj +++ b/Samples/2_Concepts_and_Techniques/shfl_scan/shfl_scan_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/shfl_scan.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -104,6 +104,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/shfl_scan/shfl_scan_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/shfl_scan/shfl_scan_vs2022.vcxproj index ec328ba67..3ad9db48c 100644 --- a/Samples/2_Concepts_and_Techniques/shfl_scan/shfl_scan_vs2022.vcxproj +++ b/Samples/2_Concepts_and_Techniques/shfl_scan/shfl_scan_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/shfl_scan.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -104,6 +104,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/sortingNetworks/Makefile b/Samples/2_Concepts_and_Techniques/sortingNetworks/Makefile index a2de7cb98..7d715f267 100644 --- a/Samples/2_Concepts_and_Techniques/sortingNetworks/Makefile +++ b/Samples/2_Concepts_and_Techniques/sortingNetworks/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/2_Concepts_and_Techniques/sortingNetworks/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/sortingNetworks/NsightEclipse.xml index bec0501ad..3ef627213 100644 --- a/Samples/2_Concepts_and_Techniques/sortingNetworks/NsightEclipse.xml +++ b/Samples/2_Concepts_and_Techniques/sortingNetworks/NsightEclipse.xml @@ -4,9 +4,9 @@ sortingNetworks cudaMalloc - cudaFree cudaDeviceSynchronize cudaMemcpy + cudaFree whole @@ -46,6 +46,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/2_Concepts_and_Techniques/sortingNetworks/README.md b/Samples/2_Concepts_and_Techniques/sortingNetworks/README.md index 905e698ff..707ed30dc 100644 --- a/Samples/2_Concepts_and_Techniques/sortingNetworks/README.md +++ b/Samples/2_Concepts_and_Techniques/sortingNetworks/README.md @@ -10,7 +10,7 @@ Data-Parallel Algorithms ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMalloc, cudaFree, cudaDeviceSynchronize, cudaMemcpy +cudaMalloc, cudaDeviceSynchronize, cudaMemcpy, cudaFree ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/2_Concepts_and_Techniques/sortingNetworks/sortingNetworks_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/sortingNetworks/sortingNetworks_vs2017.vcxproj index 0012d67a6..7c018efb1 100644 --- a/Samples/2_Concepts_and_Techniques/sortingNetworks/sortingNetworks_vs2017.vcxproj +++ b/Samples/2_Concepts_and_Techniques/sortingNetworks/sortingNetworks_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/sortingNetworks.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -111,6 +111,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/sortingNetworks/sortingNetworks_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/sortingNetworks/sortingNetworks_vs2019.vcxproj index 90052760a..3181de728 100644 --- a/Samples/2_Concepts_and_Techniques/sortingNetworks/sortingNetworks_vs2019.vcxproj +++ b/Samples/2_Concepts_and_Techniques/sortingNetworks/sortingNetworks_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/sortingNetworks.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/sortingNetworks/sortingNetworks_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/sortingNetworks/sortingNetworks_vs2022.vcxproj index 626d389a3..dc83a7b72 100644 --- a/Samples/2_Concepts_and_Techniques/sortingNetworks/sortingNetworks_vs2022.vcxproj +++ b/Samples/2_Concepts_and_Techniques/sortingNetworks/sortingNetworks_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/sortingNetworks.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/Makefile b/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/Makefile index c2e55c399..c672f7074 100644 --- a/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/Makefile +++ b/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/Makefile @@ -285,9 +285,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/NsightEclipse.xml index 63511e063..8ca780e91 100644 --- a/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/NsightEclipse.xml +++ b/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/NsightEclipse.xml @@ -3,20 +3,20 @@ streamOrderedAllocation + cudaDeviceGetDefaultMemPool + cudaFreeAsync + cudaStreamCreateWithFlags cudaStreamDestroy - cudaEventRecord - cudaEventCreate - cudaEventElapsedTime - cudaEventSynchronize cudaDeviceGetAttribute cudaMallocAsync - cudaStreamCreateWithFlags - cudaDeviceGetDefaultMemPool cudaSetDevice - cudaMemPoolSetAttribute + cudaEventSynchronize + cudaEventRecord cudaStreamSynchronize + cudaMemPoolSetAttribute + cudaEventElapsedTime cudaMemcpyAsync - cudaFreeAsync + cudaEventCreate whole @@ -48,6 +48,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/README.md b/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/README.md index 2eaa6d1a2..4af372ec2 100644 --- a/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/README.md +++ b/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/README.md @@ -10,7 +10,7 @@ Performance Strategies ## Supported SM Architectures -[SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaStreamDestroy, cudaEventRecord, cudaEventCreate, cudaEventElapsedTime, cudaEventSynchronize, cudaDeviceGetAttribute, cudaMallocAsync, cudaStreamCreateWithFlags, cudaDeviceGetDefaultMemPool, cudaSetDevice, cudaMemPoolSetAttribute, cudaStreamSynchronize, cudaMemcpyAsync, cudaFreeAsync +cudaDeviceGetDefaultMemPool, cudaFreeAsync, cudaStreamCreateWithFlags, cudaStreamDestroy, cudaDeviceGetAttribute, cudaMallocAsync, cudaSetDevice, cudaEventSynchronize, cudaEventRecord, cudaStreamSynchronize, cudaMemPoolSetAttribute, cudaEventElapsedTime, cudaMemcpyAsync, cudaEventCreate ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/streamOrderedAllocation_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/streamOrderedAllocation_vs2017.vcxproj index 376ea1fa0..8f4dc7c00 100644 --- a/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/streamOrderedAllocation_vs2017.vcxproj +++ b/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/streamOrderedAllocation_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/streamOrderedAllocation.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/streamOrderedAllocation_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/streamOrderedAllocation_vs2019.vcxproj index f81e896cb..9cd3baae7 100644 --- a/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/streamOrderedAllocation_vs2019.vcxproj +++ b/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/streamOrderedAllocation_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/streamOrderedAllocation.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/streamOrderedAllocation_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/streamOrderedAllocation_vs2022.vcxproj index b61e42f3f..6f2d5040c 100644 --- a/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/streamOrderedAllocation_vs2022.vcxproj +++ b/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/streamOrderedAllocation_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/streamOrderedAllocation.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationIPC/Makefile b/Samples/2_Concepts_and_Techniques/streamOrderedAllocationIPC/Makefile index b71befbe7..418451617 100644 --- a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationIPC/Makefile +++ b/Samples/2_Concepts_and_Techniques/streamOrderedAllocationIPC/Makefile @@ -305,9 +305,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationIPC/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/streamOrderedAllocationIPC/NsightEclipse.xml index d583eaaad..b357595e0 100644 --- a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationIPC/NsightEclipse.xml +++ b/Samples/2_Concepts_and_Techniques/streamOrderedAllocationIPC/NsightEclipse.xml @@ -8,28 +8,28 @@ cuDeviceGetAttribute cuDeviceGet - cudaDeviceEnablePeerAccess - cudaMemPoolImportPointer - cudaOccupancyMaxActiveBlocksPerMultiprocessor + cudaDeviceGetAttribute + cudaMemPoolImportFromShareableHandle + cudaSetDevice + cudaMemPoolExportPointer + cudaMemPoolGetAccess cudaMemPoolDestroy + cudaMemPoolSetAccess cudaMallocAsync - cudaStreamCreateWithFlags - cudaDeviceCanAccessPeer + cudaMemPoolImportPointer + cudaGetDeviceCount cudaMemcpyAsync + cudaDeviceCanAccessPeer + cudaFreeAsync + cudaStreamCreateWithFlags cudaStreamDestroy - cudaSetDevice - cudaGetDeviceProperties - cudaMemPoolSetAccess - cudaGetDeviceCount - cudaDeviceGetAttribute - cudaMemPoolExportPointer - cudaMemPoolImportFromShareableHandle - cudaMemPoolCreate cudaGetLastError - cudaStreamSynchronize - cudaMemPoolGetAccess + cudaMemPoolCreate cudaMemPoolExportToShareableHandle - cudaFreeAsync + cudaStreamSynchronize + cudaDeviceEnablePeerAccess + cudaOccupancyMaxActiveBlocksPerMultiprocessor + cudaGetDeviceProperties whole @@ -63,6 +63,7 @@ sm80 sm86 sm87 + sm90 ../../../Common/helper_multiprocess.cpp ../../../Common/helper_multiprocess.h diff --git a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationIPC/README.md b/Samples/2_Concepts_and_Techniques/streamOrderedAllocationIPC/README.md index 73a90c5e6..b3eff96a3 100644 --- a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationIPC/README.md +++ b/Samples/2_Concepts_and_Techniques/streamOrderedAllocationIPC/README.md @@ -10,7 +10,7 @@ Performance Strategies ## Supported SM Architectures -[SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -26,11 +26,11 @@ x86_64 cuDeviceGetAttribute, cuDeviceGet ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaDeviceEnablePeerAccess, cudaMemPoolImportPointer, cudaOccupancyMaxActiveBlocksPerMultiprocessor, cudaMemPoolDestroy, cudaMallocAsync, cudaStreamCreateWithFlags, cudaDeviceCanAccessPeer, cudaMemcpyAsync, cudaStreamDestroy, cudaSetDevice, cudaGetDeviceProperties, cudaMemPoolSetAccess, cudaGetDeviceCount, cudaDeviceGetAttribute, cudaMemPoolExportPointer, cudaMemPoolImportFromShareableHandle, cudaMemPoolCreate, cudaGetLastError, cudaStreamSynchronize, cudaMemPoolGetAccess, cudaMemPoolExportToShareableHandle, cudaFreeAsync +cudaDeviceGetAttribute, cudaMemPoolImportFromShareableHandle, cudaSetDevice, cudaMemPoolExportPointer, cudaMemPoolGetAccess, cudaMemPoolDestroy, cudaMemPoolSetAccess, cudaMallocAsync, cudaMemPoolImportPointer, cudaGetDeviceCount, cudaMemcpyAsync, cudaDeviceCanAccessPeer, cudaFreeAsync, cudaStreamCreateWithFlags, cudaStreamDestroy, cudaGetLastError, cudaMemPoolCreate, cudaMemPoolExportToShareableHandle, cudaStreamSynchronize, cudaDeviceEnablePeerAccess, cudaOccupancyMaxActiveBlocksPerMultiprocessor, cudaGetDeviceProperties ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/Makefile b/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/Makefile index 792db21e7..75bf6386c 100644 --- a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/Makefile +++ b/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/Makefile @@ -285,9 +285,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/NsightEclipse.xml index 3e51c0ff0..f64444425 100644 --- a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/NsightEclipse.xml +++ b/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/NsightEclipse.xml @@ -6,21 +6,21 @@ --std=c++11 - cudaStreamWaitEvent - cudaStreamDestroy + cudaDeviceGetDefaultMemPool + cudaFreeAsync + cudaStreamCreateWithFlags cudaMemPoolSetAccess - cudaEventRecord - cudaEventCreate - cudaGetDeviceCount - cudaMallocAsync + cudaStreamDestroy cudaDeviceGetAttribute - cudaStreamCreateWithFlags - cudaDeviceCanAccessPeer - cudaDeviceGetDefaultMemPool + cudaMallocAsync cudaSetDevice + cudaGetDeviceCount + cudaEventRecord cudaStreamSynchronize + cudaStreamWaitEvent cudaMemcpyAsync - cudaFreeAsync + cudaDeviceCanAccessPeer + cudaEventCreate whole @@ -52,6 +52,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/README.md b/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/README.md index 9ef3fa179..0b2a83d95 100644 --- a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/README.md +++ b/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/README.md @@ -10,7 +10,7 @@ Performance Strategies ## Supported SM Architectures -[SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaStreamWaitEvent, cudaStreamDestroy, cudaMemPoolSetAccess, cudaEventRecord, cudaEventCreate, cudaGetDeviceCount, cudaMallocAsync, cudaDeviceGetAttribute, cudaStreamCreateWithFlags, cudaDeviceCanAccessPeer, cudaDeviceGetDefaultMemPool, cudaSetDevice, cudaStreamSynchronize, cudaMemcpyAsync, cudaFreeAsync +cudaDeviceGetDefaultMemPool, cudaFreeAsync, cudaStreamCreateWithFlags, cudaMemPoolSetAccess, cudaStreamDestroy, cudaDeviceGetAttribute, cudaMallocAsync, cudaSetDevice, cudaGetDeviceCount, cudaEventRecord, cudaStreamSynchronize, cudaStreamWaitEvent, cudaMemcpyAsync, cudaDeviceCanAccessPeer, cudaEventCreate ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/streamOrderedAllocationP2P_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/streamOrderedAllocationP2P_vs2017.vcxproj index 5b9ec3209..5ad486fcf 100644 --- a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/streamOrderedAllocationP2P_vs2017.vcxproj +++ b/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/streamOrderedAllocationP2P_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/streamOrderedAllocationP2P.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/streamOrderedAllocationP2P_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/streamOrderedAllocationP2P_vs2019.vcxproj index 79b1ce851..a7248a6e3 100644 --- a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/streamOrderedAllocationP2P_vs2019.vcxproj +++ b/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/streamOrderedAllocationP2P_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/streamOrderedAllocationP2P.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/streamOrderedAllocationP2P_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/streamOrderedAllocationP2P_vs2022.vcxproj index 6b468f18d..0ea721452 100644 --- a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/streamOrderedAllocationP2P_vs2022.vcxproj +++ b/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/streamOrderedAllocationP2P_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/streamOrderedAllocationP2P.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/threadFenceReduction/Makefile b/Samples/2_Concepts_and_Techniques/threadFenceReduction/Makefile index e3f4586be..37b8a9b46 100644 --- a/Samples/2_Concepts_and_Techniques/threadFenceReduction/Makefile +++ b/Samples/2_Concepts_and_Techniques/threadFenceReduction/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/2_Concepts_and_Techniques/threadFenceReduction/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/threadFenceReduction/NsightEclipse.xml index e997b187f..8436fef3e 100644 --- a/Samples/2_Concepts_and_Techniques/threadFenceReduction/NsightEclipse.xml +++ b/Samples/2_Concepts_and_Techniques/threadFenceReduction/NsightEclipse.xml @@ -3,10 +3,10 @@ threadFenceReduction + cudaMemcpy cudaFree cudaDeviceSynchronize cudaMalloc - cudaMemcpy cudaGetDeviceProperties @@ -48,6 +48,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/2_Concepts_and_Techniques/threadFenceReduction/README.md b/Samples/2_Concepts_and_Techniques/threadFenceReduction/README.md index 4ce80f5c4..0156a5f12 100644 --- a/Samples/2_Concepts_and_Techniques/threadFenceReduction/README.md +++ b/Samples/2_Concepts_and_Techniques/threadFenceReduction/README.md @@ -10,7 +10,7 @@ Cooperative Groups, Data-Parallel Algorithms, Performance Strategies ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaDeviceSynchronize, cudaMalloc, cudaMemcpy, cudaGetDeviceProperties +cudaMemcpy, cudaFree, cudaDeviceSynchronize, cudaMalloc, cudaGetDeviceProperties ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/2_Concepts_and_Techniques/threadFenceReduction/threadFenceReduction_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/threadFenceReduction/threadFenceReduction_vs2017.vcxproj index 4b0cf87e5..0d547054e 100644 --- a/Samples/2_Concepts_and_Techniques/threadFenceReduction/threadFenceReduction_vs2017.vcxproj +++ b/Samples/2_Concepts_and_Techniques/threadFenceReduction/threadFenceReduction_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/threadFenceReduction.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -108,6 +108,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/threadFenceReduction/threadFenceReduction_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/threadFenceReduction/threadFenceReduction_vs2019.vcxproj index b15f148f5..ca4df79f1 100644 --- a/Samples/2_Concepts_and_Techniques/threadFenceReduction/threadFenceReduction_vs2019.vcxproj +++ b/Samples/2_Concepts_and_Techniques/threadFenceReduction/threadFenceReduction_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/threadFenceReduction.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -104,6 +104,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/threadFenceReduction/threadFenceReduction_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/threadFenceReduction/threadFenceReduction_vs2022.vcxproj index 157d11dcc..2e5fa2f9d 100644 --- a/Samples/2_Concepts_and_Techniques/threadFenceReduction/threadFenceReduction_vs2022.vcxproj +++ b/Samples/2_Concepts_and_Techniques/threadFenceReduction/threadFenceReduction_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/threadFenceReduction.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -104,6 +104,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/threadMigration/Makefile b/Samples/2_Concepts_and_Techniques/threadMigration/Makefile index 3acd84547..6c4d542dd 100644 --- a/Samples/2_Concepts_and_Techniques/threadMigration/Makefile +++ b/Samples/2_Concepts_and_Techniques/threadMigration/Makefile @@ -283,9 +283,9 @@ FATBIN_FILE := threadMigration_kernel${TARGET_SIZE}.fatbin # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(GENCODE_FLAGS),) diff --git a/Samples/2_Concepts_and_Techniques/threadMigration/README.md b/Samples/2_Concepts_and_Techniques/threadMigration/README.md index 22a095da6..801305ce0 100644 --- a/Samples/2_Concepts_and_Techniques/threadMigration/README.md +++ b/Samples/2_Concepts_and_Techniques/threadMigration/README.md @@ -10,7 +10,7 @@ CUDA Driver API ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html) -cuModuleGetFunction, cuModuleLoadData, cuDeviceGetCount, cuCtxCreate, cuCtxPopCurrent, cuDeviceGetName, cuLaunchKernel, cuMemAlloc, cuMemcpyDtoH, cuModuleUnload, cuCtxPushCurrent, cuDeviceGet, cuMemFree, cuInit, cuCtxDestroy, cuDeviceGetAttribute +cuMemcpyDtoH, cuLaunchKernel, cuModuleLoadData, cuDeviceGetName, cuDeviceGet, cuDeviceGetAttribute, cuMemAlloc, cuMemFree, cuCtxDestroy, cuCtxPopCurrent, cuModuleUnload, cuDeviceGetCount, cuModuleGetFunction, cuCtxCreate, cuCtxPushCurrent, cuInit ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/2_Concepts_and_Techniques/threadMigration/threadMigration_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/threadMigration/threadMigration_vs2017.vcxproj index 8a38efdd2..ba53a46ac 100644 --- a/Samples/2_Concepts_and_Techniques/threadMigration/threadMigration_vs2017.vcxproj +++ b/Samples/2_Concepts_and_Techniques/threadMigration/threadMigration_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/threadMigration.exe - compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -111,6 +111,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/threadMigration/threadMigration_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/threadMigration/threadMigration_vs2019.vcxproj index 05df36d85..6f0f8a904 100644 --- a/Samples/2_Concepts_and_Techniques/threadMigration/threadMigration_vs2019.vcxproj +++ b/Samples/2_Concepts_and_Techniques/threadMigration/threadMigration_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/threadMigration.exe - compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/threadMigration/threadMigration_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/threadMigration/threadMigration_vs2022.vcxproj index cb4ebcc86..b81a2b07e 100644 --- a/Samples/2_Concepts_and_Techniques/threadMigration/threadMigration_vs2022.vcxproj +++ b/Samples/2_Concepts_and_Techniques/threadMigration/threadMigration_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/threadMigration.exe - compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/3_CUDA_Features/StreamPriorities/Makefile b/Samples/3_CUDA_Features/StreamPriorities/Makefile index 9b1734f5f..6faeb198a 100644 --- a/Samples/3_CUDA_Features/StreamPriorities/Makefile +++ b/Samples/3_CUDA_Features/StreamPriorities/Makefile @@ -297,9 +297,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/3_CUDA_Features/StreamPriorities/NsightEclipse.xml b/Samples/3_CUDA_Features/StreamPriorities/NsightEclipse.xml index b3e3aab7e..0cb59d58a 100644 --- a/Samples/3_CUDA_Features/StreamPriorities/NsightEclipse.xml +++ b/Samples/3_CUDA_Features/StreamPriorities/NsightEclipse.xml @@ -3,15 +3,15 @@ StreamPriorities + cudaMemcpy + cudaStreamCreateWithPriority cudaDeviceGetStreamPriorityRange - cudaEventRecord - cudaEventCreate - cudaEventElapsedTime cudaEventSynchronize + cudaEventRecord cudaMalloc - cudaStreamCreateWithPriority - cudaMemcpy + cudaEventElapsedTime cudaGetDeviceProperties + cudaEventCreate whole @@ -54,6 +54,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/3_CUDA_Features/StreamPriorities/README.md b/Samples/3_CUDA_Features/StreamPriorities/README.md index 52150773a..0b616b490 100644 --- a/Samples/3_CUDA_Features/StreamPriorities/README.md +++ b/Samples/3_CUDA_Features/StreamPriorities/README.md @@ -10,7 +10,7 @@ CUDA Streams and Events ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaDeviceGetStreamPriorityRange, cudaEventRecord, cudaEventCreate, cudaEventElapsedTime, cudaEventSynchronize, cudaMalloc, cudaStreamCreateWithPriority, cudaMemcpy, cudaGetDeviceProperties +cudaMemcpy, cudaStreamCreateWithPriority, cudaDeviceGetStreamPriorityRange, cudaEventSynchronize, cudaEventRecord, cudaMalloc, cudaEventElapsedTime, cudaGetDeviceProperties, cudaEventCreate ## Dependencies needed to build/run [Stream-Priorities](../../../README.md#stream-priorities) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/3_CUDA_Features/bf16TensorCoreGemm/Makefile b/Samples/3_CUDA_Features/bf16TensorCoreGemm/Makefile index 3d8b8dec9..29fdcbaec 100644 --- a/Samples/3_CUDA_Features/bf16TensorCoreGemm/Makefile +++ b/Samples/3_CUDA_Features/bf16TensorCoreGemm/Makefile @@ -322,9 +322,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 80 86 87 +SMS ?= 80 86 87 90 else -SMS ?= 80 86 +SMS ?= 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/3_CUDA_Features/bf16TensorCoreGemm/NsightEclipse.xml b/Samples/3_CUDA_Features/bf16TensorCoreGemm/NsightEclipse.xml index a93136960..1eac8e5c0 100644 --- a/Samples/3_CUDA_Features/bf16TensorCoreGemm/NsightEclipse.xml +++ b/Samples/3_CUDA_Features/bf16TensorCoreGemm/NsightEclipse.xml @@ -6,18 +6,18 @@ --std=c++11 - cudaMemset + cudaMemcpy cudaFree - cudaEventRecord - cudaEventCreate - cudaFuncSetAttribute - cudaEventElapsedTime + cudaGetErrorString + cudaGetLastError cudaEventSynchronize + cudaFuncSetAttribute + cudaEventRecord + cudaMemset cudaMalloc - cudaGetLastError - cudaMemcpy - cudaGetErrorString + cudaEventElapsedTime cudaGetDeviceProperties + cudaEventCreate whole @@ -52,6 +52,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/3_CUDA_Features/bf16TensorCoreGemm/README.md b/Samples/3_CUDA_Features/bf16TensorCoreGemm/README.md index 7a8b729b7..30f4eece5 100644 --- a/Samples/3_CUDA_Features/bf16TensorCoreGemm/README.md +++ b/Samples/3_CUDA_Features/bf16TensorCoreGemm/README.md @@ -10,7 +10,7 @@ Matrix Multiply, WMMA, Tensor Cores ## Supported SM Architectures -[SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, aarch64 ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMemset, cudaFree, cudaEventRecord, cudaEventCreate, cudaFuncSetAttribute, cudaEventElapsedTime, cudaEventSynchronize, cudaMalloc, cudaGetLastError, cudaMemcpy, cudaGetErrorString, cudaGetDeviceProperties +cudaMemcpy, cudaFree, cudaGetErrorString, cudaGetLastError, cudaEventSynchronize, cudaFuncSetAttribute, cudaEventRecord, cudaMemset, cudaMalloc, cudaEventElapsedTime, cudaGetDeviceProperties, cudaEventCreate ## Dependencies needed to build/run [CPP11](../../../README.md#cpp11) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/3_CUDA_Features/bf16TensorCoreGemm/bf16TensorCoreGemm_vs2017.vcxproj b/Samples/3_CUDA_Features/bf16TensorCoreGemm/bf16TensorCoreGemm_vs2017.vcxproj index bfab31dcf..b8cb9fb27 100644 --- a/Samples/3_CUDA_Features/bf16TensorCoreGemm/bf16TensorCoreGemm_vs2017.vcxproj +++ b/Samples/3_CUDA_Features/bf16TensorCoreGemm/bf16TensorCoreGemm_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/bf16TensorCoreGemm.exe - compute_80,sm_80;compute_86,sm_86; + compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/3_CUDA_Features/bf16TensorCoreGemm/bf16TensorCoreGemm_vs2019.vcxproj b/Samples/3_CUDA_Features/bf16TensorCoreGemm/bf16TensorCoreGemm_vs2019.vcxproj index 4240024c6..c2c1f9202 100644 --- a/Samples/3_CUDA_Features/bf16TensorCoreGemm/bf16TensorCoreGemm_vs2019.vcxproj +++ b/Samples/3_CUDA_Features/bf16TensorCoreGemm/bf16TensorCoreGemm_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/bf16TensorCoreGemm.exe - compute_80,sm_80;compute_86,sm_86; + compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/3_CUDA_Features/bf16TensorCoreGemm/bf16TensorCoreGemm_vs2022.vcxproj b/Samples/3_CUDA_Features/bf16TensorCoreGemm/bf16TensorCoreGemm_vs2022.vcxproj index ecbfc6d54..774b45bf7 100644 --- a/Samples/3_CUDA_Features/bf16TensorCoreGemm/bf16TensorCoreGemm_vs2022.vcxproj +++ b/Samples/3_CUDA_Features/bf16TensorCoreGemm/bf16TensorCoreGemm_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/bf16TensorCoreGemm.exe - compute_80,sm_80;compute_86,sm_86; + compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/3_CUDA_Features/binaryPartitionCG/Makefile b/Samples/3_CUDA_Features/binaryPartitionCG/Makefile index d1bdbc6d3..ace0b3ae0 100644 --- a/Samples/3_CUDA_Features/binaryPartitionCG/Makefile +++ b/Samples/3_CUDA_Features/binaryPartitionCG/Makefile @@ -310,9 +310,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/3_CUDA_Features/binaryPartitionCG/NsightEclipse.xml b/Samples/3_CUDA_Features/binaryPartitionCG/NsightEclipse.xml index b611d7c1c..96982217e 100644 --- a/Samples/3_CUDA_Features/binaryPartitionCG/NsightEclipse.xml +++ b/Samples/3_CUDA_Features/binaryPartitionCG/NsightEclipse.xml @@ -6,15 +6,15 @@ --std=c++11 + cudaStreamCreateWithFlags cudaFree cudaMallocHost - cudaOccupancyMaxPotentialBlockSize - cudaMemsetAsync cudaFreeHost - cudaMalloc - cudaStreamCreateWithFlags cudaStreamSynchronize + cudaMalloc + cudaMemsetAsync cudaMemcpyAsync + cudaOccupancyMaxPotentialBlockSize whole @@ -54,6 +54,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/3_CUDA_Features/binaryPartitionCG/README.md b/Samples/3_CUDA_Features/binaryPartitionCG/README.md index c40ff6bdc..d2c296826 100644 --- a/Samples/3_CUDA_Features/binaryPartitionCG/README.md +++ b/Samples/3_CUDA_Features/binaryPartitionCG/README.md @@ -10,7 +10,7 @@ Cooperative Groups ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaMallocHost, cudaOccupancyMaxPotentialBlockSize, cudaMemsetAsync, cudaFreeHost, cudaMalloc, cudaStreamCreateWithFlags, cudaStreamSynchronize, cudaMemcpyAsync +cudaStreamCreateWithFlags, cudaFree, cudaMallocHost, cudaFreeHost, cudaStreamSynchronize, cudaMalloc, cudaMemsetAsync, cudaMemcpyAsync, cudaOccupancyMaxPotentialBlockSize ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/3_CUDA_Features/binaryPartitionCG/binaryPartitionCG_vs2017.vcxproj b/Samples/3_CUDA_Features/binaryPartitionCG/binaryPartitionCG_vs2017.vcxproj index cd3fccf0a..be9569bba 100644 --- a/Samples/3_CUDA_Features/binaryPartitionCG/binaryPartitionCG_vs2017.vcxproj +++ b/Samples/3_CUDA_Features/binaryPartitionCG/binaryPartitionCG_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/binaryPartitionCG.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/3_CUDA_Features/binaryPartitionCG/binaryPartitionCG_vs2019.vcxproj b/Samples/3_CUDA_Features/binaryPartitionCG/binaryPartitionCG_vs2019.vcxproj index 5b7320cec..c3c283623 100644 --- a/Samples/3_CUDA_Features/binaryPartitionCG/binaryPartitionCG_vs2019.vcxproj +++ b/Samples/3_CUDA_Features/binaryPartitionCG/binaryPartitionCG_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/binaryPartitionCG.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/3_CUDA_Features/binaryPartitionCG/binaryPartitionCG_vs2022.vcxproj b/Samples/3_CUDA_Features/binaryPartitionCG/binaryPartitionCG_vs2022.vcxproj index df9ea16fd..594d4bd3c 100644 --- a/Samples/3_CUDA_Features/binaryPartitionCG/binaryPartitionCG_vs2022.vcxproj +++ b/Samples/3_CUDA_Features/binaryPartitionCG/binaryPartitionCG_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/binaryPartitionCG.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/3_CUDA_Features/bindlessTexture/Makefile b/Samples/3_CUDA_Features/bindlessTexture/Makefile index 4310ee3ee..9e9c3369d 100644 --- a/Samples/3_CUDA_Features/bindlessTexture/Makefile +++ b/Samples/3_CUDA_Features/bindlessTexture/Makefile @@ -299,9 +299,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/3_CUDA_Features/bindlessTexture/NsightEclipse.xml b/Samples/3_CUDA_Features/bindlessTexture/NsightEclipse.xml index 247f12559..25b63ad86 100644 --- a/Samples/3_CUDA_Features/bindlessTexture/NsightEclipse.xml +++ b/Samples/3_CUDA_Features/bindlessTexture/NsightEclipse.xml @@ -3,28 +3,28 @@ bindlessTexture - cudaGraphicsMapResources - cudaGetMipmappedArrayLevel - cudaGraphicsResourceGetMappedPointer - cudaArrayGetInfo cudaMemcpy - cudaFreeMipmappedArray + cudaGetMipmappedArrayLevel + cudaGraphicsMapResources cudaDestroySurfaceObject - cudaPitchedPtr - cudaMalloc - cudaGraphicsUnregisterResource - cudaDeviceSynchronize - cudaDestroyTextureObject - cudaGetLastError - cudaFree - cudaFreeArray - cudaGraphicsGLRegisterBuffer cudaExtent + cudaDeviceSynchronize cudaCreateSurfaceObject cudaMallocMipmappedArray + cudaPitchedPtr + cudaGraphicsResourceGetMappedPointer cudaCreateTextureObject - cudaMallocArray cudaGraphicsUnmapResources + cudaMallocArray + cudaFreeArray + cudaArrayGetInfo + cudaGetLastError + cudaDestroyTextureObject + cudaGraphicsGLRegisterBuffer + cudaFreeMipmappedArray + cudaFree + cudaGraphicsUnregisterResource + cudaMalloc whole @@ -85,6 +85,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/3_CUDA_Features/bindlessTexture/README.md b/Samples/3_CUDA_Features/bindlessTexture/README.md index 4047f08aa..cf14ba1ae 100644 --- a/Samples/3_CUDA_Features/bindlessTexture/README.md +++ b/Samples/3_CUDA_Features/bindlessTexture/README.md @@ -10,7 +10,7 @@ Graphics Interop, Texture ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaGraphicsMapResources, cudaGetMipmappedArrayLevel, cudaGraphicsResourceGetMappedPointer, cudaArrayGetInfo, cudaMemcpy, cudaFreeMipmappedArray, cudaDestroySurfaceObject, cudaPitchedPtr, cudaMalloc, cudaGraphicsUnregisterResource, cudaDeviceSynchronize, cudaDestroyTextureObject, cudaGetLastError, cudaFree, cudaFreeArray, cudaGraphicsGLRegisterBuffer, cudaExtent, cudaCreateSurfaceObject, cudaMallocMipmappedArray, cudaCreateTextureObject, cudaMallocArray, cudaGraphicsUnmapResources +cudaMemcpy, cudaGetMipmappedArrayLevel, cudaGraphicsMapResources, cudaDestroySurfaceObject, cudaExtent, cudaDeviceSynchronize, cudaCreateSurfaceObject, cudaMallocMipmappedArray, cudaPitchedPtr, cudaGraphicsResourceGetMappedPointer, cudaCreateTextureObject, cudaGraphicsUnmapResources, cudaMallocArray, cudaFreeArray, cudaArrayGetInfo, cudaGetLastError, cudaDestroyTextureObject, cudaGraphicsGLRegisterBuffer, cudaFreeMipmappedArray, cudaFree, cudaGraphicsUnregisterResource, cudaMalloc ## Dependencies needed to build/run [X11](../../../README.md#x11), [GL](../../../README.md#gl) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/3_CUDA_Features/bindlessTexture/bindlessTexture_vs2017.vcxproj b/Samples/3_CUDA_Features/bindlessTexture/bindlessTexture_vs2017.vcxproj index c74343151..bcc1990a3 100644 --- a/Samples/3_CUDA_Features/bindlessTexture/bindlessTexture_vs2017.vcxproj +++ b/Samples/3_CUDA_Features/bindlessTexture/bindlessTexture_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/bindlessTexture.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -118,6 +118,6 @@ - + diff --git a/Samples/3_CUDA_Features/bindlessTexture/bindlessTexture_vs2019.vcxproj b/Samples/3_CUDA_Features/bindlessTexture/bindlessTexture_vs2019.vcxproj index d18b1ac48..110d990fb 100644 --- a/Samples/3_CUDA_Features/bindlessTexture/bindlessTexture_vs2019.vcxproj +++ b/Samples/3_CUDA_Features/bindlessTexture/bindlessTexture_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/bindlessTexture.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -114,6 +114,6 @@ - + diff --git a/Samples/3_CUDA_Features/bindlessTexture/bindlessTexture_vs2022.vcxproj b/Samples/3_CUDA_Features/bindlessTexture/bindlessTexture_vs2022.vcxproj index e08edee01..f9bcc8ae7 100644 --- a/Samples/3_CUDA_Features/bindlessTexture/bindlessTexture_vs2022.vcxproj +++ b/Samples/3_CUDA_Features/bindlessTexture/bindlessTexture_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/bindlessTexture.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -114,6 +114,6 @@ - + diff --git a/Samples/3_CUDA_Features/bindlessTexture/findgllib.mk b/Samples/3_CUDA_Features/bindlessTexture/findgllib.mk index f0a5c5512..998fcf0f1 100644 --- a/Samples/3_CUDA_Features/bindlessTexture/findgllib.mk +++ b/Samples/3_CUDA_Features/bindlessTexture/findgllib.mk @@ -53,11 +53,12 @@ endif ifeq ("$(TARGET_OS)","linux") # $(info) >> findgllib.mk -> LINUX path <<<) # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside - UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) - FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) - RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) - CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) + UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) + FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) + RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) + CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) SUSE = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?) + KYLIN = $(shell echo $(DISTRO) | grep -i kylin >/dev/null 2>&1; echo $$?) ifeq ("$(UBUNTU)","0") ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l) GLPATH := /usr/arm-linux-gnueabihf/lib @@ -87,27 +88,17 @@ ifeq ("$(TARGET_OS)","linux") DFLT_PATH ?= /usr/lib endif endif + ifeq ("$(SUSE)","0") GLPATH ?= /usr/X11R6/lib64 GLLINK ?= -L/usr/X11R6/lib64 DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(FEDORA)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(RHEL)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(CENTOS)","0") + else GLPATH ?= /usr/lib64/nvidia GLLINK ?= -L/usr/lib64/nvidia DFLT_PATH ?= /usr/lib64 endif - + # find libGL, libGLU GLLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGL.so -print 2>/dev/null) GLULIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLU.so -print 2>/dev/null) diff --git a/Samples/3_CUDA_Features/cdpAdvancedQuicksort/Makefile b/Samples/3_CUDA_Features/cdpAdvancedQuicksort/Makefile index 85498be38..5eab1414e 100644 --- a/Samples/3_CUDA_Features/cdpAdvancedQuicksort/Makefile +++ b/Samples/3_CUDA_Features/cdpAdvancedQuicksort/Makefile @@ -310,9 +310,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 61 70 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/3_CUDA_Features/cdpAdvancedQuicksort/NsightEclipse.xml b/Samples/3_CUDA_Features/cdpAdvancedQuicksort/NsightEclipse.xml index 714aab28a..383410d36 100644 --- a/Samples/3_CUDA_Features/cdpAdvancedQuicksort/NsightEclipse.xml +++ b/Samples/3_CUDA_Features/cdpAdvancedQuicksort/NsightEclipse.xml @@ -8,20 +8,20 @@ --std=c++14 - cudaMemset - cudaFree - cudaEventRecord - cudaEventCreate - cudaEventElapsedTime - cudaDeviceSynchronize - cudaPeekAtLastError - cudaMalloc cudaStreamCreateWithFlags - cudaGetLastError - cudaMemcpyAsync cudaMemcpy + cudaMemcpyAsync + cudaFree cudaGetErrorString + cudaGetLastError + cudaPeekAtLastError + cudaDeviceSynchronize + cudaEventRecord + cudaMemset + cudaMalloc + cudaEventElapsedTime cudaGetDeviceProperties + cudaEventCreate @@ -63,6 +63,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/3_CUDA_Features/cdpAdvancedQuicksort/README.md b/Samples/3_CUDA_Features/cdpAdvancedQuicksort/README.md index b3f334194..1314b89b4 100644 --- a/Samples/3_CUDA_Features/cdpAdvancedQuicksort/README.md +++ b/Samples/3_CUDA_Features/cdpAdvancedQuicksort/README.md @@ -10,7 +10,7 @@ Cooperative Groups, CUDA Dynamic Parallelism ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMemset, cudaFree, cudaEventRecord, cudaEventCreate, cudaEventElapsedTime, cudaDeviceSynchronize, cudaPeekAtLastError, cudaMalloc, cudaStreamCreateWithFlags, cudaGetLastError, cudaMemcpyAsync, cudaMemcpy, cudaGetErrorString, cudaGetDeviceProperties +cudaStreamCreateWithFlags, cudaMemcpy, cudaMemcpyAsync, cudaFree, cudaGetErrorString, cudaGetLastError, cudaPeekAtLastError, cudaDeviceSynchronize, cudaEventRecord, cudaMemset, cudaMalloc, cudaEventElapsedTime, cudaGetDeviceProperties, cudaEventCreate ## Dependencies needed to build/run [CDP](../../../README.md#cdp) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/3_CUDA_Features/cdpAdvancedQuicksort/cdpAdvancedQuicksort_vs2017.vcxproj b/Samples/3_CUDA_Features/cdpAdvancedQuicksort/cdpAdvancedQuicksort_vs2017.vcxproj index 6244cf4fd..5c76d1c7d 100644 --- a/Samples/3_CUDA_Features/cdpAdvancedQuicksort/cdpAdvancedQuicksort_vs2017.vcxproj +++ b/Samples/3_CUDA_Features/cdpAdvancedQuicksort/cdpAdvancedQuicksort_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/cdpAdvancedQuicksort.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -109,6 +109,6 @@ - + diff --git a/Samples/3_CUDA_Features/cdpAdvancedQuicksort/cdpAdvancedQuicksort_vs2019.vcxproj b/Samples/3_CUDA_Features/cdpAdvancedQuicksort/cdpAdvancedQuicksort_vs2019.vcxproj index 68071f8a3..4779bb514 100644 --- a/Samples/3_CUDA_Features/cdpAdvancedQuicksort/cdpAdvancedQuicksort_vs2019.vcxproj +++ b/Samples/3_CUDA_Features/cdpAdvancedQuicksort/cdpAdvancedQuicksort_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/cdpAdvancedQuicksort.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -105,6 +105,6 @@ - + diff --git a/Samples/3_CUDA_Features/cdpAdvancedQuicksort/cdpAdvancedQuicksort_vs2022.vcxproj b/Samples/3_CUDA_Features/cdpAdvancedQuicksort/cdpAdvancedQuicksort_vs2022.vcxproj index 462ed63c6..39da54075 100644 --- a/Samples/3_CUDA_Features/cdpAdvancedQuicksort/cdpAdvancedQuicksort_vs2022.vcxproj +++ b/Samples/3_CUDA_Features/cdpAdvancedQuicksort/cdpAdvancedQuicksort_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/cdpAdvancedQuicksort.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -105,6 +105,6 @@ - + diff --git a/Samples/3_CUDA_Features/cdpBezierTessellation/Makefile b/Samples/3_CUDA_Features/cdpBezierTessellation/Makefile index 4f89e84df..23ef29e87 100644 --- a/Samples/3_CUDA_Features/cdpBezierTessellation/Makefile +++ b/Samples/3_CUDA_Features/cdpBezierTessellation/Makefile @@ -285,9 +285,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 61 70 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/3_CUDA_Features/cdpBezierTessellation/NsightEclipse.xml b/Samples/3_CUDA_Features/cdpBezierTessellation/NsightEclipse.xml index 5566265e5..26392f7b9 100644 --- a/Samples/3_CUDA_Features/cdpBezierTessellation/NsightEclipse.xml +++ b/Samples/3_CUDA_Features/cdpBezierTessellation/NsightEclipse.xml @@ -6,10 +6,10 @@ -dc + cudaMemcpy cudaFree cudaGetDeviceCount cudaMalloc - cudaMemcpy cudaGetDeviceProperties @@ -50,6 +50,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/3_CUDA_Features/cdpBezierTessellation/README.md b/Samples/3_CUDA_Features/cdpBezierTessellation/README.md index 30ba33753..bb2d6e636 100644 --- a/Samples/3_CUDA_Features/cdpBezierTessellation/README.md +++ b/Samples/3_CUDA_Features/cdpBezierTessellation/README.md @@ -10,7 +10,7 @@ CUDA Dynamic Parallelism ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaGetDeviceCount, cudaMalloc, cudaMemcpy, cudaGetDeviceProperties +cudaMemcpy, cudaFree, cudaGetDeviceCount, cudaMalloc, cudaGetDeviceProperties ## Dependencies needed to build/run [CDP](../../../README.md#cdp) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/3_CUDA_Features/cdpBezierTessellation/cdpBezierTessellation_vs2017.vcxproj b/Samples/3_CUDA_Features/cdpBezierTessellation/cdpBezierTessellation_vs2017.vcxproj index 6436a8ad2..e7733bf88 100644 --- a/Samples/3_CUDA_Features/cdpBezierTessellation/cdpBezierTessellation_vs2017.vcxproj +++ b/Samples/3_CUDA_Features/cdpBezierTessellation/cdpBezierTessellation_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/cdpBezierTessellation.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -108,6 +108,6 @@ - + diff --git a/Samples/3_CUDA_Features/cdpBezierTessellation/cdpBezierTessellation_vs2019.vcxproj b/Samples/3_CUDA_Features/cdpBezierTessellation/cdpBezierTessellation_vs2019.vcxproj index 1452f3312..aa2fefe0a 100644 --- a/Samples/3_CUDA_Features/cdpBezierTessellation/cdpBezierTessellation_vs2019.vcxproj +++ b/Samples/3_CUDA_Features/cdpBezierTessellation/cdpBezierTessellation_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/cdpBezierTessellation.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -104,6 +104,6 @@ - + diff --git a/Samples/3_CUDA_Features/cdpBezierTessellation/cdpBezierTessellation_vs2022.vcxproj b/Samples/3_CUDA_Features/cdpBezierTessellation/cdpBezierTessellation_vs2022.vcxproj index b1eb0d98a..577c5e196 100644 --- a/Samples/3_CUDA_Features/cdpBezierTessellation/cdpBezierTessellation_vs2022.vcxproj +++ b/Samples/3_CUDA_Features/cdpBezierTessellation/cdpBezierTessellation_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/cdpBezierTessellation.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -104,6 +104,6 @@ - + diff --git a/Samples/3_CUDA_Features/cdpQuadtree/Makefile b/Samples/3_CUDA_Features/cdpQuadtree/Makefile index fc39f0e0b..9b4e08f87 100644 --- a/Samples/3_CUDA_Features/cdpQuadtree/Makefile +++ b/Samples/3_CUDA_Features/cdpQuadtree/Makefile @@ -310,9 +310,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 61 70 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/3_CUDA_Features/cdpQuadtree/NsightEclipse.xml b/Samples/3_CUDA_Features/cdpQuadtree/NsightEclipse.xml index 63b022306..6d867f7f1 100644 --- a/Samples/3_CUDA_Features/cdpQuadtree/NsightEclipse.xml +++ b/Samples/3_CUDA_Features/cdpQuadtree/NsightEclipse.xml @@ -7,11 +7,11 @@ --std=c++14 - cudaDeviceSetLimit + cudaMemcpy cudaFree - cudaMalloc cudaGetLastError - cudaMemcpy + cudaDeviceSetLimit + cudaMalloc cudaGetDeviceProperties @@ -54,6 +54,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/3_CUDA_Features/cdpQuadtree/README.md b/Samples/3_CUDA_Features/cdpQuadtree/README.md index a170e47e4..dc96c3c5a 100644 --- a/Samples/3_CUDA_Features/cdpQuadtree/README.md +++ b/Samples/3_CUDA_Features/cdpQuadtree/README.md @@ -10,7 +10,7 @@ Cooperative Groups, CUDA Dynamic Parallelism ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaDeviceSetLimit, cudaFree, cudaMalloc, cudaGetLastError, cudaMemcpy, cudaGetDeviceProperties +cudaMemcpy, cudaFree, cudaGetLastError, cudaDeviceSetLimit, cudaMalloc, cudaGetDeviceProperties ## Dependencies needed to build/run [CDP](../../../README.md#cdp) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/3_CUDA_Features/cdpQuadtree/cdpQuadtree_vs2017.vcxproj b/Samples/3_CUDA_Features/cdpQuadtree/cdpQuadtree_vs2017.vcxproj index b0cd83761..15110cd3a 100644 --- a/Samples/3_CUDA_Features/cdpQuadtree/cdpQuadtree_vs2017.vcxproj +++ b/Samples/3_CUDA_Features/cdpQuadtree/cdpQuadtree_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/cdpQuadtree.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -108,6 +108,6 @@ - + diff --git a/Samples/3_CUDA_Features/cdpQuadtree/cdpQuadtree_vs2019.vcxproj b/Samples/3_CUDA_Features/cdpQuadtree/cdpQuadtree_vs2019.vcxproj index 99bfdc1d0..3ec1b1364 100644 --- a/Samples/3_CUDA_Features/cdpQuadtree/cdpQuadtree_vs2019.vcxproj +++ b/Samples/3_CUDA_Features/cdpQuadtree/cdpQuadtree_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/cdpQuadtree.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -104,6 +104,6 @@ - + diff --git a/Samples/3_CUDA_Features/cdpQuadtree/cdpQuadtree_vs2022.vcxproj b/Samples/3_CUDA_Features/cdpQuadtree/cdpQuadtree_vs2022.vcxproj index d2e082999..181408c39 100644 --- a/Samples/3_CUDA_Features/cdpQuadtree/cdpQuadtree_vs2022.vcxproj +++ b/Samples/3_CUDA_Features/cdpQuadtree/cdpQuadtree_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/cdpQuadtree.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -104,6 +104,6 @@ - + diff --git a/Samples/3_CUDA_Features/cdpSimplePrint/Makefile b/Samples/3_CUDA_Features/cdpSimplePrint/Makefile index d64d69da8..1ea644281 100644 --- a/Samples/3_CUDA_Features/cdpSimplePrint/Makefile +++ b/Samples/3_CUDA_Features/cdpSimplePrint/Makefile @@ -285,9 +285,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 61 70 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/3_CUDA_Features/cdpSimplePrint/NsightEclipse.xml b/Samples/3_CUDA_Features/cdpSimplePrint/NsightEclipse.xml index d1d5fd2fb..cfe32d190 100644 --- a/Samples/3_CUDA_Features/cdpSimplePrint/NsightEclipse.xml +++ b/Samples/3_CUDA_Features/cdpSimplePrint/NsightEclipse.xml @@ -6,10 +6,10 @@ -dc - cudaDeviceSetLimit - cudaGetLastError cudaDeviceSynchronize + cudaGetLastError cudaGetDeviceProperties + cudaDeviceSetLimit @@ -51,6 +51,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/3_CUDA_Features/cdpSimplePrint/README.md b/Samples/3_CUDA_Features/cdpSimplePrint/README.md index ab8682711..c872f4bbb 100644 --- a/Samples/3_CUDA_Features/cdpSimplePrint/README.md +++ b/Samples/3_CUDA_Features/cdpSimplePrint/README.md @@ -10,7 +10,7 @@ CUDA Dynamic Parallelism ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaDeviceSetLimit, cudaGetLastError, cudaDeviceSynchronize, cudaGetDeviceProperties +cudaDeviceSynchronize, cudaGetLastError, cudaGetDeviceProperties, cudaDeviceSetLimit ## Dependencies needed to build/run [CDP](../../../README.md#cdp) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/3_CUDA_Features/cdpSimplePrint/cdpSimplePrint_vs2017.vcxproj b/Samples/3_CUDA_Features/cdpSimplePrint/cdpSimplePrint_vs2017.vcxproj index 63502de30..b26134875 100644 --- a/Samples/3_CUDA_Features/cdpSimplePrint/cdpSimplePrint_vs2017.vcxproj +++ b/Samples/3_CUDA_Features/cdpSimplePrint/cdpSimplePrint_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/cdpSimplePrint.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -108,6 +108,6 @@ - + diff --git a/Samples/3_CUDA_Features/cdpSimplePrint/cdpSimplePrint_vs2019.vcxproj b/Samples/3_CUDA_Features/cdpSimplePrint/cdpSimplePrint_vs2019.vcxproj index d87192480..ae105c0af 100644 --- a/Samples/3_CUDA_Features/cdpSimplePrint/cdpSimplePrint_vs2019.vcxproj +++ b/Samples/3_CUDA_Features/cdpSimplePrint/cdpSimplePrint_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/cdpSimplePrint.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -104,6 +104,6 @@ - + diff --git a/Samples/3_CUDA_Features/cdpSimplePrint/cdpSimplePrint_vs2022.vcxproj b/Samples/3_CUDA_Features/cdpSimplePrint/cdpSimplePrint_vs2022.vcxproj index bc59636b0..7f1b73f1b 100644 --- a/Samples/3_CUDA_Features/cdpSimplePrint/cdpSimplePrint_vs2022.vcxproj +++ b/Samples/3_CUDA_Features/cdpSimplePrint/cdpSimplePrint_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/cdpSimplePrint.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -104,6 +104,6 @@ - + diff --git a/Samples/3_CUDA_Features/cdpSimpleQuicksort/Makefile b/Samples/3_CUDA_Features/cdpSimpleQuicksort/Makefile index 5771acad3..1bf519d50 100644 --- a/Samples/3_CUDA_Features/cdpSimpleQuicksort/Makefile +++ b/Samples/3_CUDA_Features/cdpSimpleQuicksort/Makefile @@ -285,9 +285,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 61 70 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/3_CUDA_Features/cdpSimpleQuicksort/NsightEclipse.xml b/Samples/3_CUDA_Features/cdpSimpleQuicksort/NsightEclipse.xml index 827146192..9c1960819 100644 --- a/Samples/3_CUDA_Features/cdpSimpleQuicksort/NsightEclipse.xml +++ b/Samples/3_CUDA_Features/cdpSimpleQuicksort/NsightEclipse.xml @@ -6,13 +6,13 @@ -dc - cudaDeviceSetLimit - cudaFree + cudaStreamCreateWithFlags + cudaMemcpy cudaStreamDestroy + cudaFree cudaDeviceSynchronize + cudaDeviceSetLimit cudaMalloc - cudaStreamCreateWithFlags - cudaMemcpy cudaGetDeviceProperties @@ -55,6 +55,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/3_CUDA_Features/cdpSimpleQuicksort/README.md b/Samples/3_CUDA_Features/cdpSimpleQuicksort/README.md index 0805d3800..5a765c0a8 100644 --- a/Samples/3_CUDA_Features/cdpSimpleQuicksort/README.md +++ b/Samples/3_CUDA_Features/cdpSimpleQuicksort/README.md @@ -10,7 +10,7 @@ CUDA Dynamic Parallelism ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaDeviceSetLimit, cudaFree, cudaStreamDestroy, cudaDeviceSynchronize, cudaMalloc, cudaStreamCreateWithFlags, cudaMemcpy, cudaGetDeviceProperties +cudaStreamCreateWithFlags, cudaMemcpy, cudaStreamDestroy, cudaFree, cudaDeviceSynchronize, cudaDeviceSetLimit, cudaMalloc, cudaGetDeviceProperties ## Dependencies needed to build/run [CDP](../../../README.md#cdp) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/3_CUDA_Features/cdpSimpleQuicksort/cdpSimpleQuicksort_vs2017.vcxproj b/Samples/3_CUDA_Features/cdpSimpleQuicksort/cdpSimpleQuicksort_vs2017.vcxproj index 1f396df63..701ef7dfd 100644 --- a/Samples/3_CUDA_Features/cdpSimpleQuicksort/cdpSimpleQuicksort_vs2017.vcxproj +++ b/Samples/3_CUDA_Features/cdpSimpleQuicksort/cdpSimpleQuicksort_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/cdpSimpleQuicksort.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -108,6 +108,6 @@ - + diff --git a/Samples/3_CUDA_Features/cdpSimpleQuicksort/cdpSimpleQuicksort_vs2019.vcxproj b/Samples/3_CUDA_Features/cdpSimpleQuicksort/cdpSimpleQuicksort_vs2019.vcxproj index e926d0d1a..be79af0a7 100644 --- a/Samples/3_CUDA_Features/cdpSimpleQuicksort/cdpSimpleQuicksort_vs2019.vcxproj +++ b/Samples/3_CUDA_Features/cdpSimpleQuicksort/cdpSimpleQuicksort_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/cdpSimpleQuicksort.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -104,6 +104,6 @@ - + diff --git a/Samples/3_CUDA_Features/cdpSimpleQuicksort/cdpSimpleQuicksort_vs2022.vcxproj b/Samples/3_CUDA_Features/cdpSimpleQuicksort/cdpSimpleQuicksort_vs2022.vcxproj index bf24f8f3e..601d665a4 100644 --- a/Samples/3_CUDA_Features/cdpSimpleQuicksort/cdpSimpleQuicksort_vs2022.vcxproj +++ b/Samples/3_CUDA_Features/cdpSimpleQuicksort/cdpSimpleQuicksort_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/cdpSimpleQuicksort.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -104,6 +104,6 @@ - + diff --git a/Samples/3_CUDA_Features/cudaCompressibleMemory/Makefile b/Samples/3_CUDA_Features/cudaCompressibleMemory/Makefile index 7118ad8c4..9d7f9adfc 100644 --- a/Samples/3_CUDA_Features/cudaCompressibleMemory/Makefile +++ b/Samples/3_CUDA_Features/cudaCompressibleMemory/Makefile @@ -293,9 +293,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/3_CUDA_Features/cudaCompressibleMemory/README.md b/Samples/3_CUDA_Features/cudaCompressibleMemory/README.md index e95e49045..cd28b6f24 100644 --- a/Samples/3_CUDA_Features/cudaCompressibleMemory/README.md +++ b/Samples/3_CUDA_Features/cudaCompressibleMemory/README.md @@ -10,7 +10,7 @@ CUDA Driver API, Compressible Memory, MMAP ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, aarch64 ## CUDA APIs involved ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html) -cuMemRelease, cuCtxGetDevice, cuMemGetAllocationPropertiesFromHandle, cuMemSetAccess, cuMemMap, cuMemCreate, cuMemAddressFree, cuMemGetAllocationGranularity, cuMemUnmap, cuMemAddressReserve, cuDeviceGetAttribute +cuMemGetAllocationPropertiesFromHandle, cuMemCreate, cuDeviceGetAttribute, cuCtxGetDevice, cuMemGetAllocationGranularity, cuMemAddressFree, cuMemUnmap, cuMemMap, cuMemRelease, cuMemAddressReserve, cuMemSetAccess ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaEventRecord, cudaEventCreate, cudaOccupancyMaxPotentialBlockSize, cudaEventElapsedTime, cudaEventSynchronize, cudaMemcpy +cudaMemcpy, cudaEventSynchronize, cudaEventRecord, cudaEventElapsedTime, cudaOccupancyMaxPotentialBlockSize, cudaEventCreate ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/3_CUDA_Features/cudaCompressibleMemory/cudaCompressibleMemory_vs2017.vcxproj b/Samples/3_CUDA_Features/cudaCompressibleMemory/cudaCompressibleMemory_vs2017.vcxproj index cb21616cd..5fd82a2d2 100644 --- a/Samples/3_CUDA_Features/cudaCompressibleMemory/cudaCompressibleMemory_vs2017.vcxproj +++ b/Samples/3_CUDA_Features/cudaCompressibleMemory/cudaCompressibleMemory_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/cudaCompressibleMemory.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -108,6 +108,6 @@ - + diff --git a/Samples/3_CUDA_Features/cudaCompressibleMemory/cudaCompressibleMemory_vs2019.vcxproj b/Samples/3_CUDA_Features/cudaCompressibleMemory/cudaCompressibleMemory_vs2019.vcxproj index f4763a3e8..f01382784 100644 --- a/Samples/3_CUDA_Features/cudaCompressibleMemory/cudaCompressibleMemory_vs2019.vcxproj +++ b/Samples/3_CUDA_Features/cudaCompressibleMemory/cudaCompressibleMemory_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/cudaCompressibleMemory.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -104,6 +104,6 @@ - + diff --git a/Samples/3_CUDA_Features/cudaCompressibleMemory/cudaCompressibleMemory_vs2022.vcxproj b/Samples/3_CUDA_Features/cudaCompressibleMemory/cudaCompressibleMemory_vs2022.vcxproj index 1a116a853..5c5b989cb 100644 --- a/Samples/3_CUDA_Features/cudaCompressibleMemory/cudaCompressibleMemory_vs2022.vcxproj +++ b/Samples/3_CUDA_Features/cudaCompressibleMemory/cudaCompressibleMemory_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/cudaCompressibleMemory.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -104,6 +104,6 @@ - + diff --git a/Samples/3_CUDA_Features/cudaTensorCoreGemm/Makefile b/Samples/3_CUDA_Features/cudaTensorCoreGemm/Makefile index cd5ed03c6..a949034f5 100644 --- a/Samples/3_CUDA_Features/cudaTensorCoreGemm/Makefile +++ b/Samples/3_CUDA_Features/cudaTensorCoreGemm/Makefile @@ -291,9 +291,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 70 72 75 80 86 87 +SMS ?= 70 72 75 80 86 87 90 else -SMS ?= 70 75 80 86 +SMS ?= 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/3_CUDA_Features/cudaTensorCoreGemm/NsightEclipse.xml b/Samples/3_CUDA_Features/cudaTensorCoreGemm/NsightEclipse.xml index 25dc757d5..e4306062b 100644 --- a/Samples/3_CUDA_Features/cudaTensorCoreGemm/NsightEclipse.xml +++ b/Samples/3_CUDA_Features/cudaTensorCoreGemm/NsightEclipse.xml @@ -6,18 +6,18 @@ -maxrregcount=255 - cudaMemset + cudaMemcpy cudaFree - cudaEventRecord - cudaEventCreate - cudaFuncSetAttribute - cudaEventElapsedTime + cudaGetErrorString + cudaGetLastError cudaEventSynchronize + cudaFuncSetAttribute + cudaEventRecord + cudaMemset cudaMalloc - cudaGetLastError - cudaMemcpy - cudaGetErrorString + cudaEventElapsedTime cudaGetDeviceProperties + cudaEventCreate sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/3_CUDA_Features/cudaTensorCoreGemm/README.md b/Samples/3_CUDA_Features/cudaTensorCoreGemm/README.md index 5c7a02ca7..ed9ca03e2 100644 --- a/Samples/3_CUDA_Features/cudaTensorCoreGemm/README.md +++ b/Samples/3_CUDA_Features/cudaTensorCoreGemm/README.md @@ -14,7 +14,7 @@ Matrix Multiply, WMMA, Tensor Cores ## Supported SM Architectures -[SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -27,11 +27,11 @@ x86_64, ppc64le, aarch64 ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMemset, cudaFree, cudaEventRecord, cudaEventCreate, cudaFuncSetAttribute, cudaEventElapsedTime, cudaEventSynchronize, cudaMalloc, cudaGetLastError, cudaMemcpy, cudaGetErrorString, cudaGetDeviceProperties +cudaMemcpy, cudaFree, cudaGetErrorString, cudaGetLastError, cudaEventSynchronize, cudaFuncSetAttribute, cudaEventRecord, cudaMemset, cudaMalloc, cudaEventElapsedTime, cudaGetDeviceProperties, cudaEventCreate ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/3_CUDA_Features/cudaTensorCoreGemm/cudaTensorCoreGemm_vs2017.vcxproj b/Samples/3_CUDA_Features/cudaTensorCoreGemm/cudaTensorCoreGemm_vs2017.vcxproj index 6f38472db..a5854708c 100644 --- a/Samples/3_CUDA_Features/cudaTensorCoreGemm/cudaTensorCoreGemm_vs2017.vcxproj +++ b/Samples/3_CUDA_Features/cudaTensorCoreGemm/cudaTensorCoreGemm_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/cudaTensorCoreGemm.exe - compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/3_CUDA_Features/cudaTensorCoreGemm/cudaTensorCoreGemm_vs2019.vcxproj b/Samples/3_CUDA_Features/cudaTensorCoreGemm/cudaTensorCoreGemm_vs2019.vcxproj index 32c37bb49..5a7700ba1 100644 --- a/Samples/3_CUDA_Features/cudaTensorCoreGemm/cudaTensorCoreGemm_vs2019.vcxproj +++ b/Samples/3_CUDA_Features/cudaTensorCoreGemm/cudaTensorCoreGemm_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/cudaTensorCoreGemm.exe - compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/3_CUDA_Features/cudaTensorCoreGemm/cudaTensorCoreGemm_vs2022.vcxproj b/Samples/3_CUDA_Features/cudaTensorCoreGemm/cudaTensorCoreGemm_vs2022.vcxproj index 24801f678..3a4f102a2 100644 --- a/Samples/3_CUDA_Features/cudaTensorCoreGemm/cudaTensorCoreGemm_vs2022.vcxproj +++ b/Samples/3_CUDA_Features/cudaTensorCoreGemm/cudaTensorCoreGemm_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/cudaTensorCoreGemm.exe - compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/3_CUDA_Features/dmmaTensorCoreGemm/Makefile b/Samples/3_CUDA_Features/dmmaTensorCoreGemm/Makefile index e8ed96d9b..a8731ad23 100644 --- a/Samples/3_CUDA_Features/dmmaTensorCoreGemm/Makefile +++ b/Samples/3_CUDA_Features/dmmaTensorCoreGemm/Makefile @@ -322,9 +322,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 80 86 87 +SMS ?= 80 86 87 90 else -SMS ?= 80 86 +SMS ?= 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/3_CUDA_Features/dmmaTensorCoreGemm/NsightEclipse.xml b/Samples/3_CUDA_Features/dmmaTensorCoreGemm/NsightEclipse.xml index 2cd658145..f28f86b30 100644 --- a/Samples/3_CUDA_Features/dmmaTensorCoreGemm/NsightEclipse.xml +++ b/Samples/3_CUDA_Features/dmmaTensorCoreGemm/NsightEclipse.xml @@ -6,18 +6,18 @@ --std=c++11 - cudaMemset + cudaMemcpy cudaFree - cudaEventRecord - cudaEventCreate - cudaFuncSetAttribute - cudaEventElapsedTime + cudaGetErrorString + cudaGetLastError cudaEventSynchronize + cudaFuncSetAttribute + cudaEventRecord + cudaMemset cudaMalloc - cudaGetLastError - cudaMemcpy - cudaGetErrorString + cudaEventElapsedTime cudaGetDeviceProperties + cudaEventCreate whole @@ -52,6 +52,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/3_CUDA_Features/dmmaTensorCoreGemm/README.md b/Samples/3_CUDA_Features/dmmaTensorCoreGemm/README.md index 13b8e8e40..8699aa21f 100644 --- a/Samples/3_CUDA_Features/dmmaTensorCoreGemm/README.md +++ b/Samples/3_CUDA_Features/dmmaTensorCoreGemm/README.md @@ -10,7 +10,7 @@ Matrix Multiply, WMMA, Tensor Cores ## Supported SM Architectures -[SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, aarch64 ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMemset, cudaFree, cudaEventRecord, cudaEventCreate, cudaFuncSetAttribute, cudaEventElapsedTime, cudaEventSynchronize, cudaMalloc, cudaGetLastError, cudaMemcpy, cudaGetErrorString, cudaGetDeviceProperties +cudaMemcpy, cudaFree, cudaGetErrorString, cudaGetLastError, cudaEventSynchronize, cudaFuncSetAttribute, cudaEventRecord, cudaMemset, cudaMalloc, cudaEventElapsedTime, cudaGetDeviceProperties, cudaEventCreate ## Dependencies needed to build/run [CPP11](../../../README.md#cpp11) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/3_CUDA_Features/dmmaTensorCoreGemm/dmmaTensorCoreGemm_vs2017.vcxproj b/Samples/3_CUDA_Features/dmmaTensorCoreGemm/dmmaTensorCoreGemm_vs2017.vcxproj index 8e82e63ea..1dbcff5ee 100644 --- a/Samples/3_CUDA_Features/dmmaTensorCoreGemm/dmmaTensorCoreGemm_vs2017.vcxproj +++ b/Samples/3_CUDA_Features/dmmaTensorCoreGemm/dmmaTensorCoreGemm_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/dmmaTensorCoreGemm.exe - compute_80,sm_80;compute_86,sm_86; + compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/3_CUDA_Features/dmmaTensorCoreGemm/dmmaTensorCoreGemm_vs2019.vcxproj b/Samples/3_CUDA_Features/dmmaTensorCoreGemm/dmmaTensorCoreGemm_vs2019.vcxproj index 7c6849bde..0f024a266 100644 --- a/Samples/3_CUDA_Features/dmmaTensorCoreGemm/dmmaTensorCoreGemm_vs2019.vcxproj +++ b/Samples/3_CUDA_Features/dmmaTensorCoreGemm/dmmaTensorCoreGemm_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/dmmaTensorCoreGemm.exe - compute_80,sm_80;compute_86,sm_86; + compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/3_CUDA_Features/dmmaTensorCoreGemm/dmmaTensorCoreGemm_vs2022.vcxproj b/Samples/3_CUDA_Features/dmmaTensorCoreGemm/dmmaTensorCoreGemm_vs2022.vcxproj index 480cc0b81..6dcc0232a 100644 --- a/Samples/3_CUDA_Features/dmmaTensorCoreGemm/dmmaTensorCoreGemm_vs2022.vcxproj +++ b/Samples/3_CUDA_Features/dmmaTensorCoreGemm/dmmaTensorCoreGemm_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/dmmaTensorCoreGemm.exe - compute_80,sm_80;compute_86,sm_86; + compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/3_CUDA_Features/globalToShmemAsyncCopy/Makefile b/Samples/3_CUDA_Features/globalToShmemAsyncCopy/Makefile index 7a7fd2fa1..6fdd9aab7 100644 --- a/Samples/3_CUDA_Features/globalToShmemAsyncCopy/Makefile +++ b/Samples/3_CUDA_Features/globalToShmemAsyncCopy/Makefile @@ -310,9 +310,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 70 72 75 80 86 87 +SMS ?= 70 72 75 80 86 87 90 else -SMS ?= 70 75 80 86 +SMS ?= 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/3_CUDA_Features/globalToShmemAsyncCopy/NsightEclipse.xml b/Samples/3_CUDA_Features/globalToShmemAsyncCopy/NsightEclipse.xml index 60eb92c8a..83bc6d5c5 100644 --- a/Samples/3_CUDA_Features/globalToShmemAsyncCopy/NsightEclipse.xml +++ b/Samples/3_CUDA_Features/globalToShmemAsyncCopy/NsightEclipse.xml @@ -6,20 +6,20 @@ --std=c++11 + cudaStreamCreateWithFlags + cudaMalloc + cudaDeviceGetAttribute cudaFree - cudaEventRecord cudaMallocHost - cudaEventCreate - cudaMemsetAsync - cudaEventElapsedTime cudaEventSynchronize - cudaDeviceGetAttribute + cudaEventRecord cudaFreeHost - cudaMalloc - cudaStreamCreateWithFlags - cudaEventDestroy cudaStreamSynchronize + cudaEventDestroy + cudaEventElapsedTime + cudaMemsetAsync cudaMemcpyAsync + cudaEventCreate whole @@ -59,6 +59,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/3_CUDA_Features/globalToShmemAsyncCopy/README.md b/Samples/3_CUDA_Features/globalToShmemAsyncCopy/README.md index b5adb76f4..a2d7d6a34 100644 --- a/Samples/3_CUDA_Features/globalToShmemAsyncCopy/README.md +++ b/Samples/3_CUDA_Features/globalToShmemAsyncCopy/README.md @@ -10,7 +10,7 @@ CUDA Runtime API, Linear Algebra, CPP11 CUDA ## Supported SM Architectures -[SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l, aarch64 ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaEventRecord, cudaMallocHost, cudaEventCreate, cudaMemsetAsync, cudaEventElapsedTime, cudaEventSynchronize, cudaDeviceGetAttribute, cudaFreeHost, cudaMalloc, cudaStreamCreateWithFlags, cudaEventDestroy, cudaStreamSynchronize, cudaMemcpyAsync +cudaStreamCreateWithFlags, cudaMalloc, cudaDeviceGetAttribute, cudaFree, cudaMallocHost, cudaEventSynchronize, cudaEventRecord, cudaFreeHost, cudaStreamSynchronize, cudaEventDestroy, cudaEventElapsedTime, cudaMemsetAsync, cudaMemcpyAsync, cudaEventCreate ## Dependencies needed to build/run [CPP11](../../../README.md#cpp11) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/3_CUDA_Features/globalToShmemAsyncCopy/globalToShmemAsyncCopy_vs2017.vcxproj b/Samples/3_CUDA_Features/globalToShmemAsyncCopy/globalToShmemAsyncCopy_vs2017.vcxproj index 100834d71..41bedad12 100644 --- a/Samples/3_CUDA_Features/globalToShmemAsyncCopy/globalToShmemAsyncCopy_vs2017.vcxproj +++ b/Samples/3_CUDA_Features/globalToShmemAsyncCopy/globalToShmemAsyncCopy_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/globalToShmemAsyncCopy.exe - compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/3_CUDA_Features/globalToShmemAsyncCopy/globalToShmemAsyncCopy_vs2019.vcxproj b/Samples/3_CUDA_Features/globalToShmemAsyncCopy/globalToShmemAsyncCopy_vs2019.vcxproj index 5f1c0721b..7db3f2315 100644 --- a/Samples/3_CUDA_Features/globalToShmemAsyncCopy/globalToShmemAsyncCopy_vs2019.vcxproj +++ b/Samples/3_CUDA_Features/globalToShmemAsyncCopy/globalToShmemAsyncCopy_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/globalToShmemAsyncCopy.exe - compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/3_CUDA_Features/globalToShmemAsyncCopy/globalToShmemAsyncCopy_vs2022.vcxproj b/Samples/3_CUDA_Features/globalToShmemAsyncCopy/globalToShmemAsyncCopy_vs2022.vcxproj index 2b82306f1..5351130d8 100644 --- a/Samples/3_CUDA_Features/globalToShmemAsyncCopy/globalToShmemAsyncCopy_vs2022.vcxproj +++ b/Samples/3_CUDA_Features/globalToShmemAsyncCopy/globalToShmemAsyncCopy_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/globalToShmemAsyncCopy.exe - compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/3_CUDA_Features/graphMemoryFootprint/Makefile b/Samples/3_CUDA_Features/graphMemoryFootprint/Makefile index f4647da1f..0233718bf 100644 --- a/Samples/3_CUDA_Features/graphMemoryFootprint/Makefile +++ b/Samples/3_CUDA_Features/graphMemoryFootprint/Makefile @@ -285,9 +285,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/3_CUDA_Features/graphMemoryFootprint/NsightEclipse.xml b/Samples/3_CUDA_Features/graphMemoryFootprint/NsightEclipse.xml index 72be4775f..7f7d842b7 100644 --- a/Samples/3_CUDA_Features/graphMemoryFootprint/NsightEclipse.xml +++ b/Samples/3_CUDA_Features/graphMemoryFootprint/NsightEclipse.xml @@ -3,23 +3,23 @@ graphMemoryFootprint - cudaGraphAddMemFreeNode + cudaGraphAddMemAllocNode + cudaStreamCreateWithFlags + cudaGraphInstantiate cudaStreamDestroy cudaFree - cudaGraphExecDestroy - cudaGraphInstantiate cudaDeviceGetAttribute - cudaDriverGetVersion - cudaGraphCreate cudaGraphAddKernelNode - cudaGraphAddMemAllocNode - cudaStreamCreateWithFlags - cudaDeviceGraphMemTrim - cudaStreamSynchronize + cudaGraphAddMemFreeNode cudaDeviceGetGraphMemAttribute + cudaGraphCreate cudaGraphDestroy - cudaGetDeviceProperties + cudaDriverGetVersion cudaGraphLaunch + cudaStreamSynchronize + cudaDeviceGraphMemTrim + cudaGetDeviceProperties + cudaGraphExecDestroy whole @@ -56,6 +56,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/3_CUDA_Features/graphMemoryFootprint/README.md b/Samples/3_CUDA_Features/graphMemoryFootprint/README.md index 76c04cf75..6286fa0df 100644 --- a/Samples/3_CUDA_Features/graphMemoryFootprint/README.md +++ b/Samples/3_CUDA_Features/graphMemoryFootprint/README.md @@ -10,7 +10,7 @@ CUDA Runtime API, Performance Strategies, CUDA Graphs ## Supported SM Architectures -[SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaGraphAddMemFreeNode, cudaStreamDestroy, cudaFree, cudaGraphExecDestroy, cudaGraphInstantiate, cudaDeviceGetAttribute, cudaDriverGetVersion, cudaGraphCreate, cudaGraphAddKernelNode, cudaGraphAddMemAllocNode, cudaStreamCreateWithFlags, cudaDeviceGraphMemTrim, cudaStreamSynchronize, cudaDeviceGetGraphMemAttribute, cudaGraphDestroy, cudaGetDeviceProperties, cudaGraphLaunch +cudaGraphAddMemAllocNode, cudaStreamCreateWithFlags, cudaGraphInstantiate, cudaStreamDestroy, cudaFree, cudaDeviceGetAttribute, cudaGraphAddKernelNode, cudaGraphAddMemFreeNode, cudaDeviceGetGraphMemAttribute, cudaGraphCreate, cudaGraphDestroy, cudaDriverGetVersion, cudaGraphLaunch, cudaStreamSynchronize, cudaDeviceGraphMemTrim, cudaGetDeviceProperties, cudaGraphExecDestroy ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/3_CUDA_Features/graphMemoryFootprint/graphMemoryFootprint_vs2017.vcxproj b/Samples/3_CUDA_Features/graphMemoryFootprint/graphMemoryFootprint_vs2017.vcxproj index 412492cbd..e65d0b667 100644 --- a/Samples/3_CUDA_Features/graphMemoryFootprint/graphMemoryFootprint_vs2017.vcxproj +++ b/Samples/3_CUDA_Features/graphMemoryFootprint/graphMemoryFootprint_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/graphMemoryFootprint.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/3_CUDA_Features/graphMemoryFootprint/graphMemoryFootprint_vs2019.vcxproj b/Samples/3_CUDA_Features/graphMemoryFootprint/graphMemoryFootprint_vs2019.vcxproj index 28c073169..82b981424 100644 --- a/Samples/3_CUDA_Features/graphMemoryFootprint/graphMemoryFootprint_vs2019.vcxproj +++ b/Samples/3_CUDA_Features/graphMemoryFootprint/graphMemoryFootprint_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/graphMemoryFootprint.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/3_CUDA_Features/graphMemoryFootprint/graphMemoryFootprint_vs2022.vcxproj b/Samples/3_CUDA_Features/graphMemoryFootprint/graphMemoryFootprint_vs2022.vcxproj index 5eaef34bb..cbf3dd312 100644 --- a/Samples/3_CUDA_Features/graphMemoryFootprint/graphMemoryFootprint_vs2022.vcxproj +++ b/Samples/3_CUDA_Features/graphMemoryFootprint/graphMemoryFootprint_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/graphMemoryFootprint.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/3_CUDA_Features/graphMemoryNodes/Makefile b/Samples/3_CUDA_Features/graphMemoryNodes/Makefile index a233d14bb..b760fc44b 100644 --- a/Samples/3_CUDA_Features/graphMemoryNodes/Makefile +++ b/Samples/3_CUDA_Features/graphMemoryNodes/Makefile @@ -285,9 +285,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/3_CUDA_Features/graphMemoryNodes/NsightEclipse.xml b/Samples/3_CUDA_Features/graphMemoryNodes/NsightEclipse.xml index 5fd9f688d..a2cc3608d 100644 --- a/Samples/3_CUDA_Features/graphMemoryNodes/NsightEclipse.xml +++ b/Samples/3_CUDA_Features/graphMemoryNodes/NsightEclipse.xml @@ -3,32 +3,32 @@ graphMemoryNodes - cudaMallocAsync - cudaStreamCreateWithFlags cudaMemcpy - cudaMemcpyAsync - cudaStreamDestroy - cudaMallocManaged - cudaEventCreate + cudaDeviceGetAttribute cudaDriverGetVersion - cudaGraphCreate - cudaGraphAddMemAllocNode - cudaMalloc + cudaGraphLaunch cudaEventDestroy + cudaMallocAsync cudaStreamEndCapture - cudaGraphExecDestroy + cudaMallocManaged + cudaGraphCreate + cudaMemcpyAsync + cudaFreeAsync + cudaStreamCreateWithFlags + cudaGraphInstantiate + cudaStreamDestroy cudaStreamBeginCapture - cudaDeviceGetAttribute - cudaStreamSynchronize - cudaGraphDestroy - cudaGraphLaunch - cudaGraphAddMemFreeNode cudaStreamWaitEvent + cudaEventCreate + cudaGraphAddMemAllocNode cudaFree - cudaEventRecord - cudaGraphInstantiate cudaGraphAddKernelNode - cudaFreeAsync + cudaGraphAddMemFreeNode + cudaGraphDestroy + cudaEventRecord + cudaStreamSynchronize + cudaMalloc + cudaGraphExecDestroy whole @@ -67,6 +67,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/3_CUDA_Features/graphMemoryNodes/README.md b/Samples/3_CUDA_Features/graphMemoryNodes/README.md index f3e934e10..7bf467a4d 100644 --- a/Samples/3_CUDA_Features/graphMemoryNodes/README.md +++ b/Samples/3_CUDA_Features/graphMemoryNodes/README.md @@ -10,7 +10,7 @@ CUDA Graphs, Stream Capture ## Supported SM Architectures -[SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMallocAsync, cudaStreamCreateWithFlags, cudaMemcpy, cudaMemcpyAsync, cudaStreamDestroy, cudaMallocManaged, cudaEventCreate, cudaDriverGetVersion, cudaGraphCreate, cudaGraphAddMemAllocNode, cudaMalloc, cudaEventDestroy, cudaStreamEndCapture, cudaGraphExecDestroy, cudaStreamBeginCapture, cudaDeviceGetAttribute, cudaStreamSynchronize, cudaGraphDestroy, cudaGraphLaunch, cudaGraphAddMemFreeNode, cudaStreamWaitEvent, cudaFree, cudaEventRecord, cudaGraphInstantiate, cudaGraphAddKernelNode, cudaFreeAsync +cudaMemcpy, cudaDeviceGetAttribute, cudaDriverGetVersion, cudaGraphLaunch, cudaEventDestroy, cudaMallocAsync, cudaStreamEndCapture, cudaMallocManaged, cudaGraphCreate, cudaMemcpyAsync, cudaFreeAsync, cudaStreamCreateWithFlags, cudaGraphInstantiate, cudaStreamDestroy, cudaStreamBeginCapture, cudaStreamWaitEvent, cudaEventCreate, cudaGraphAddMemAllocNode, cudaFree, cudaGraphAddKernelNode, cudaGraphAddMemFreeNode, cudaGraphDestroy, cudaEventRecord, cudaStreamSynchronize, cudaMalloc, cudaGraphExecDestroy ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/3_CUDA_Features/graphMemoryNodes/graphMemoryNodes_vs2017.vcxproj b/Samples/3_CUDA_Features/graphMemoryNodes/graphMemoryNodes_vs2017.vcxproj index 54629b898..f025d778b 100644 --- a/Samples/3_CUDA_Features/graphMemoryNodes/graphMemoryNodes_vs2017.vcxproj +++ b/Samples/3_CUDA_Features/graphMemoryNodes/graphMemoryNodes_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/graphMemoryNodes.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/3_CUDA_Features/graphMemoryNodes/graphMemoryNodes_vs2019.vcxproj b/Samples/3_CUDA_Features/graphMemoryNodes/graphMemoryNodes_vs2019.vcxproj index ce007363f..df298580f 100644 --- a/Samples/3_CUDA_Features/graphMemoryNodes/graphMemoryNodes_vs2019.vcxproj +++ b/Samples/3_CUDA_Features/graphMemoryNodes/graphMemoryNodes_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/graphMemoryNodes.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/3_CUDA_Features/graphMemoryNodes/graphMemoryNodes_vs2022.vcxproj b/Samples/3_CUDA_Features/graphMemoryNodes/graphMemoryNodes_vs2022.vcxproj index 70d59c50e..5f123dc96 100644 --- a/Samples/3_CUDA_Features/graphMemoryNodes/graphMemoryNodes_vs2022.vcxproj +++ b/Samples/3_CUDA_Features/graphMemoryNodes/graphMemoryNodes_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/graphMemoryNodes.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/3_CUDA_Features/immaTensorCoreGemm/Makefile b/Samples/3_CUDA_Features/immaTensorCoreGemm/Makefile index 0018823dd..840e9399c 100644 --- a/Samples/3_CUDA_Features/immaTensorCoreGemm/Makefile +++ b/Samples/3_CUDA_Features/immaTensorCoreGemm/Makefile @@ -291,9 +291,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 72 75 80 86 87 +SMS ?= 72 75 80 86 87 90 else -SMS ?= 75 80 86 +SMS ?= 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/3_CUDA_Features/immaTensorCoreGemm/NsightEclipse.xml b/Samples/3_CUDA_Features/immaTensorCoreGemm/NsightEclipse.xml index ce92f2df5..35a48fe6a 100644 --- a/Samples/3_CUDA_Features/immaTensorCoreGemm/NsightEclipse.xml +++ b/Samples/3_CUDA_Features/immaTensorCoreGemm/NsightEclipse.xml @@ -6,18 +6,18 @@ -maxrregcount=255 - cudaMemset + cudaMemcpy cudaFree - cudaEventRecord - cudaEventCreate - cudaFuncSetAttribute - cudaEventElapsedTime + cudaGetErrorString + cudaGetLastError cudaEventSynchronize + cudaFuncSetAttribute + cudaEventRecord + cudaMemset cudaMalloc - cudaGetLastError - cudaMemcpy - cudaGetErrorString + cudaEventElapsedTime cudaGetDeviceProperties + cudaEventCreate whole @@ -47,6 +47,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/3_CUDA_Features/immaTensorCoreGemm/README.md b/Samples/3_CUDA_Features/immaTensorCoreGemm/README.md index 61b407100..db9d48029 100644 --- a/Samples/3_CUDA_Features/immaTensorCoreGemm/README.md +++ b/Samples/3_CUDA_Features/immaTensorCoreGemm/README.md @@ -10,7 +10,7 @@ Matrix Multiply, WMMA, Tensor Cores ## Supported SM Architectures -[SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, aarch64 ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMemset, cudaFree, cudaEventRecord, cudaEventCreate, cudaFuncSetAttribute, cudaEventElapsedTime, cudaEventSynchronize, cudaMalloc, cudaGetLastError, cudaMemcpy, cudaGetErrorString, cudaGetDeviceProperties +cudaMemcpy, cudaFree, cudaGetErrorString, cudaGetLastError, cudaEventSynchronize, cudaFuncSetAttribute, cudaEventRecord, cudaMemset, cudaMalloc, cudaEventElapsedTime, cudaGetDeviceProperties, cudaEventCreate ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/3_CUDA_Features/immaTensorCoreGemm/immaTensorCoreGemm_vs2017.vcxproj b/Samples/3_CUDA_Features/immaTensorCoreGemm/immaTensorCoreGemm_vs2017.vcxproj index 79b1a3195..b48ad38fa 100644 --- a/Samples/3_CUDA_Features/immaTensorCoreGemm/immaTensorCoreGemm_vs2017.vcxproj +++ b/Samples/3_CUDA_Features/immaTensorCoreGemm/immaTensorCoreGemm_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/immaTensorCoreGemm.exe - compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/3_CUDA_Features/immaTensorCoreGemm/immaTensorCoreGemm_vs2019.vcxproj b/Samples/3_CUDA_Features/immaTensorCoreGemm/immaTensorCoreGemm_vs2019.vcxproj index 9351844dc..b5931f576 100644 --- a/Samples/3_CUDA_Features/immaTensorCoreGemm/immaTensorCoreGemm_vs2019.vcxproj +++ b/Samples/3_CUDA_Features/immaTensorCoreGemm/immaTensorCoreGemm_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/immaTensorCoreGemm.exe - compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/3_CUDA_Features/immaTensorCoreGemm/immaTensorCoreGemm_vs2022.vcxproj b/Samples/3_CUDA_Features/immaTensorCoreGemm/immaTensorCoreGemm_vs2022.vcxproj index 17495837d..90ae33904 100644 --- a/Samples/3_CUDA_Features/immaTensorCoreGemm/immaTensorCoreGemm_vs2022.vcxproj +++ b/Samples/3_CUDA_Features/immaTensorCoreGemm/immaTensorCoreGemm_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/immaTensorCoreGemm.exe - compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/3_CUDA_Features/jacobiCudaGraphs/Makefile b/Samples/3_CUDA_Features/jacobiCudaGraphs/Makefile index 35fadaa55..fa42ed87f 100644 --- a/Samples/3_CUDA_Features/jacobiCudaGraphs/Makefile +++ b/Samples/3_CUDA_Features/jacobiCudaGraphs/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/3_CUDA_Features/jacobiCudaGraphs/NsightEclipse.xml b/Samples/3_CUDA_Features/jacobiCudaGraphs/NsightEclipse.xml index a752218c7..e1572b437 100644 --- a/Samples/3_CUDA_Features/jacobiCudaGraphs/NsightEclipse.xml +++ b/Samples/3_CUDA_Features/jacobiCudaGraphs/NsightEclipse.xml @@ -3,28 +3,28 @@ jacobiCudaGraphs - cudaGraphAddMemsetNode - cudaStreamCreateWithFlags - cudaMemcpyAsync + cudaExtent + cudaGraphLaunch + cudaGraphAddMemcpyNode cudaMallocHost cudaPitchedPtr - cudaGraphCreate - cudaMalloc - cudaPos - cudaGraphAddMemcpyNode cudaStreamEndCapture - cudaGraphExecDestroy - cudaStreamBeginCapture + cudaGraphCreate + cudaFreeHost + cudaMemsetAsync + cudaMemcpyAsync cudaGraphExecKernelNodeSetParams - cudaStreamSynchronize - cudaGraphLaunch - cudaFree + cudaStreamCreateWithFlags cudaGraphInstantiate - cudaExtent - cudaMemsetAsync - cudaFreeHost - cudaGraphAddKernelNode + cudaStreamBeginCapture + cudaFree cudaGraphExecUpdate + cudaGraphAddKernelNode + cudaPos + cudaStreamSynchronize + cudaGraphAddMemsetNode + cudaMalloc + cudaGraphExecDestroy whole @@ -66,6 +66,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/3_CUDA_Features/jacobiCudaGraphs/README.md b/Samples/3_CUDA_Features/jacobiCudaGraphs/README.md index b9d76e69e..687221875 100644 --- a/Samples/3_CUDA_Features/jacobiCudaGraphs/README.md +++ b/Samples/3_CUDA_Features/jacobiCudaGraphs/README.md @@ -10,7 +10,7 @@ CUDA Graphs, Stream Capture, Instantiated CUDA Graph Update, Cooperative Groups ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaGraphAddMemsetNode, cudaStreamCreateWithFlags, cudaMemcpyAsync, cudaMallocHost, cudaPitchedPtr, cudaGraphCreate, cudaMalloc, cudaPos, cudaGraphAddMemcpyNode, cudaStreamEndCapture, cudaGraphExecDestroy, cudaStreamBeginCapture, cudaGraphExecKernelNodeSetParams, cudaStreamSynchronize, cudaGraphLaunch, cudaFree, cudaGraphInstantiate, cudaExtent, cudaMemsetAsync, cudaFreeHost, cudaGraphAddKernelNode, cudaGraphExecUpdate +cudaExtent, cudaGraphLaunch, cudaGraphAddMemcpyNode, cudaMallocHost, cudaPitchedPtr, cudaStreamEndCapture, cudaGraphCreate, cudaFreeHost, cudaMemsetAsync, cudaMemcpyAsync, cudaGraphExecKernelNodeSetParams, cudaStreamCreateWithFlags, cudaGraphInstantiate, cudaStreamBeginCapture, cudaFree, cudaGraphExecUpdate, cudaGraphAddKernelNode, cudaPos, cudaStreamSynchronize, cudaGraphAddMemsetNode, cudaMalloc, cudaGraphExecDestroy ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/3_CUDA_Features/jacobiCudaGraphs/jacobiCudaGraphs_vs2017.vcxproj b/Samples/3_CUDA_Features/jacobiCudaGraphs/jacobiCudaGraphs_vs2017.vcxproj index e8a5153bc..489735bcc 100644 --- a/Samples/3_CUDA_Features/jacobiCudaGraphs/jacobiCudaGraphs_vs2017.vcxproj +++ b/Samples/3_CUDA_Features/jacobiCudaGraphs/jacobiCudaGraphs_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/jacobiCudaGraphs.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -108,6 +108,6 @@ - + diff --git a/Samples/3_CUDA_Features/jacobiCudaGraphs/jacobiCudaGraphs_vs2019.vcxproj b/Samples/3_CUDA_Features/jacobiCudaGraphs/jacobiCudaGraphs_vs2019.vcxproj index be8464540..b6440eb15 100644 --- a/Samples/3_CUDA_Features/jacobiCudaGraphs/jacobiCudaGraphs_vs2019.vcxproj +++ b/Samples/3_CUDA_Features/jacobiCudaGraphs/jacobiCudaGraphs_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/jacobiCudaGraphs.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -104,6 +104,6 @@ - + diff --git a/Samples/3_CUDA_Features/jacobiCudaGraphs/jacobiCudaGraphs_vs2022.vcxproj b/Samples/3_CUDA_Features/jacobiCudaGraphs/jacobiCudaGraphs_vs2022.vcxproj index e7b1ea437..2d37b0879 100644 --- a/Samples/3_CUDA_Features/jacobiCudaGraphs/jacobiCudaGraphs_vs2022.vcxproj +++ b/Samples/3_CUDA_Features/jacobiCudaGraphs/jacobiCudaGraphs_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/jacobiCudaGraphs.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -104,6 +104,6 @@ - + diff --git a/Samples/3_CUDA_Features/memMapIPCDrv/Makefile b/Samples/3_CUDA_Features/memMapIPCDrv/Makefile index fd7d6ba76..ae7b17d1e 100644 --- a/Samples/3_CUDA_Features/memMapIPCDrv/Makefile +++ b/Samples/3_CUDA_Features/memMapIPCDrv/Makefile @@ -274,24 +274,6 @@ ifeq ($(TARGET_OS),darwin) SAMPLE_ENABLED := 0 endif -# This sample is not supported on ARMv7 -ifeq ($(TARGET_ARCH),armv7l) - $(info >>> WARNING - memMapIPCDrv is not supported on ARMv7 - waiving sample <<<) - SAMPLE_ENABLED := 0 -endif - -# This sample is not supported on aarch64 -ifeq ($(TARGET_ARCH),aarch64) - $(info >>> WARNING - memMapIPCDrv is not supported on aarch64 - waiving sample <<<) - SAMPLE_ENABLED := 0 -endif - -# This sample is not supported on sbsa -ifeq ($(TARGET_ARCH),sbsa) - $(info >>> WARNING - memMapIPCDrv is not supported on sbsa - waiving sample <<<) - SAMPLE_ENABLED := 0 -endif - ALL_LDFLAGS := ALL_LDFLAGS += $(ALL_CCFLAGS) ALL_LDFLAGS += $(addprefix -Xlinker ,$(LDFLAGS)) diff --git a/Samples/3_CUDA_Features/memMapIPCDrv/README.md b/Samples/3_CUDA_Features/memMapIPCDrv/README.md index 435af2edd..bace5c4f1 100644 --- a/Samples/3_CUDA_Features/memMapIPCDrv/README.md +++ b/Samples/3_CUDA_Features/memMapIPCDrv/README.md @@ -10,27 +10,27 @@ CUDA Driver API, cuMemMap IPC, MMAP ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes -Linux, Windows +Linux, Windows, QNX ## Supported CPU Architecture -x86_64, ppc64le +x86_64, ppc64le, armv7l, aarch64 ## CUDA APIs involved ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html) -cuCtxSetCurrent, cuMemSetAccess, cuMemcpyDtoHAsync, cuStreamDestroy, cuInit, cuMemAddressReserve, cuCtxDestroy, cuModuleGetFunction, cuModuleLoad, cuStreamCreate, cuCtxCreate, cuMemExportToShareableHandle, cuMemAddressFree, cuMemGetAllocationGranularity, cuModuleLoadDataEx, cuDeviceGet, cuMemUnmap, cuDeviceGetAttribute, cuMemRelease, cuCtxEnablePeerAccess, cuMemMap, cuMemImportFromShareableHandle, cuMemCreate, cuStreamSynchronize, cuDeviceCanAccessPeer, cuDeviceGetCount, cuLaunchKernel, cuOccupancyMaxActiveBlocksPerMultiprocessor +cuDeviceCanAccessPeer, cuMemImportFromShareableHandle, cuModuleLoadDataEx, cuModuleGetFunction, cuMemSetAccess, cuModuleLoad, cuStreamCreate, cuMemRelease, cuInit, cuLaunchKernel, cuMemcpyDtoHAsync, cuMemCreate, cuDeviceGet, cuCtxDestroy, cuDeviceGetCount, cuMemMap, cuMemExportToShareableHandle, cuStreamSynchronize, cuCtxEnablePeerAccess, cuDeviceGetAttribute, cuOccupancyMaxActiveBlocksPerMultiprocessor, cuCtxSetCurrent, cuMemGetAllocationGranularity, cuMemAddressFree, cuMemUnmap, cuCtxCreate, cuStreamDestroy, cuMemAddressReserve ## Dependencies needed to build/run [IPC](../../../README.md#ipc) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run @@ -52,9 +52,9 @@ $ cd $ make ``` The samples makefiles can take advantage of certain options: -* **TARGET_ARCH=** - cross-compile targeting a specific architecture. Allowed architectures are x86_64, ppc64le. +* **TARGET_ARCH=** - cross-compile targeting a specific architecture. Allowed architectures are x86_64, ppc64le, armv7l, aarch64. By default, TARGET_ARCH is set to HOST_ARCH. On a x86_64 machine, not setting TARGET_ARCH is the equivalent of setting TARGET_ARCH=x86_64.
-`$ make TARGET_ARCH=x86_64`
`$ make TARGET_ARCH=ppc64le`
+`$ make TARGET_ARCH=x86_64`
`$ make TARGET_ARCH=ppc64le`
`$ make TARGET_ARCH=armv7l`
`$ make TARGET_ARCH=aarch64`
See [here](http://docs.nvidia.com/cuda/cuda-samples/index.html#cross-samples) for more details. * **dbg=1** - build with debug symbols ``` diff --git a/Samples/3_CUDA_Features/memMapIPCDrv/memMapIPCDrv_vs2017.vcxproj b/Samples/3_CUDA_Features/memMapIPCDrv/memMapIPCDrv_vs2017.vcxproj index 02b0d7ead..4d7d058a2 100644 --- a/Samples/3_CUDA_Features/memMapIPCDrv/memMapIPCDrv_vs2017.vcxproj +++ b/Samples/3_CUDA_Features/memMapIPCDrv/memMapIPCDrv_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -112,6 +112,6 @@ - + diff --git a/Samples/3_CUDA_Features/memMapIPCDrv/memMapIPCDrv_vs2019.vcxproj b/Samples/3_CUDA_Features/memMapIPCDrv/memMapIPCDrv_vs2019.vcxproj index 3abc66c66..287fbc927 100644 --- a/Samples/3_CUDA_Features/memMapIPCDrv/memMapIPCDrv_vs2019.vcxproj +++ b/Samples/3_CUDA_Features/memMapIPCDrv/memMapIPCDrv_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -108,6 +108,6 @@ - + diff --git a/Samples/3_CUDA_Features/memMapIPCDrv/memMapIPCDrv_vs2022.vcxproj b/Samples/3_CUDA_Features/memMapIPCDrv/memMapIPCDrv_vs2022.vcxproj index 8da6a6cdc..d6bc39c60 100644 --- a/Samples/3_CUDA_Features/memMapIPCDrv/memMapIPCDrv_vs2022.vcxproj +++ b/Samples/3_CUDA_Features/memMapIPCDrv/memMapIPCDrv_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -108,6 +108,6 @@ - + diff --git a/Samples/3_CUDA_Features/memMapIPCDrv/memMapIpc.cpp b/Samples/3_CUDA_Features/memMapIPCDrv/memMapIpc.cpp index ba275b64d..19d6aa608 100644 --- a/Samples/3_CUDA_Features/memMapIPCDrv/memMapIpc.cpp +++ b/Samples/3_CUDA_Features/memMapIPCDrv/memMapIpc.cpp @@ -595,10 +595,6 @@ static void parentProcess(char *app) { // Host code int main(int argc, char **argv) { -#if defined(__arm__) || defined(__aarch64__) - printf("Not supported on ARM\n"); - return EXIT_WAIVED; -#else // Initialize checkCudaErrors(cuInit(0)); @@ -608,7 +604,6 @@ int main(int argc, char **argv) { childProcess(atoi(argv[1]), atoi(argv[2]), argv); } return EXIT_SUCCESS; -#endif } bool inline findModulePath(const char *module_file, string &module_path, @@ -643,4 +638,4 @@ bool inline findModulePath(const char *module_file, string &module_path, return true; } -} \ No newline at end of file +} diff --git a/Samples/3_CUDA_Features/newdelete/Makefile b/Samples/3_CUDA_Features/newdelete/Makefile index 9e4ba3b66..48c352f31 100644 --- a/Samples/3_CUDA_Features/newdelete/Makefile +++ b/Samples/3_CUDA_Features/newdelete/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/3_CUDA_Features/newdelete/NsightEclipse.xml b/Samples/3_CUDA_Features/newdelete/NsightEclipse.xml index edcd52704..ae7639e44 100644 --- a/Samples/3_CUDA_Features/newdelete/NsightEclipse.xml +++ b/Samples/3_CUDA_Features/newdelete/NsightEclipse.xml @@ -3,11 +3,11 @@ newdelete - cudaDeviceSetLimit + cudaMemcpy cudaFree cudaDeviceSynchronize + cudaDeviceSetLimit cudaMalloc - cudaMemcpy whole @@ -46,6 +46,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/3_CUDA_Features/newdelete/README.md b/Samples/3_CUDA_Features/newdelete/README.md index a976004d4..1df54ae29 100644 --- a/Samples/3_CUDA_Features/newdelete/README.md +++ b/Samples/3_CUDA_Features/newdelete/README.md @@ -10,7 +10,7 @@ Device Memory Allocation, C++ Templates ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaDeviceSetLimit, cudaFree, cudaDeviceSynchronize, cudaMalloc, cudaMemcpy +cudaMemcpy, cudaFree, cudaDeviceSynchronize, cudaDeviceSetLimit, cudaMalloc ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/3_CUDA_Features/newdelete/newdelete_vs2017.vcxproj b/Samples/3_CUDA_Features/newdelete/newdelete_vs2017.vcxproj index 04f1a3b28..f5546f277 100644 --- a/Samples/3_CUDA_Features/newdelete/newdelete_vs2017.vcxproj +++ b/Samples/3_CUDA_Features/newdelete/newdelete_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/newdelete.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/3_CUDA_Features/newdelete/newdelete_vs2019.vcxproj b/Samples/3_CUDA_Features/newdelete/newdelete_vs2019.vcxproj index 75e28c88f..4f6a09f44 100644 --- a/Samples/3_CUDA_Features/newdelete/newdelete_vs2019.vcxproj +++ b/Samples/3_CUDA_Features/newdelete/newdelete_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/newdelete.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/3_CUDA_Features/newdelete/newdelete_vs2022.vcxproj b/Samples/3_CUDA_Features/newdelete/newdelete_vs2022.vcxproj index 20c428e5f..cec331d86 100644 --- a/Samples/3_CUDA_Features/newdelete/newdelete_vs2022.vcxproj +++ b/Samples/3_CUDA_Features/newdelete/newdelete_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/newdelete.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/3_CUDA_Features/ptxjit/NsightEclipse.xml b/Samples/3_CUDA_Features/ptxjit/NsightEclipse.xml index a5dc2e54f..2beac119a 100644 --- a/Samples/3_CUDA_Features/ptxjit/NsightEclipse.xml +++ b/Samples/3_CUDA_Features/ptxjit/NsightEclipse.xml @@ -3,18 +3,18 @@ ptxjit + cuLaunchKernel + cuModuleLoadData + cuLinkCreate cuModuleGetFunction cuLinkAddData - cuModuleLoadData - cuLaunchKernel cuModuleUnload - cuLinkComplete - cuLinkCreate cuLinkDestroy - cudaDriverGetVersion - cudaFree + cuLinkComplete cudaMalloc + cudaDriverGetVersion cudaMemcpy + cudaFree separate @@ -57,6 +57,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/3_CUDA_Features/ptxjit/README.md b/Samples/3_CUDA_Features/ptxjit/README.md index 021b4b400..314c62844 100644 --- a/Samples/3_CUDA_Features/ptxjit/README.md +++ b/Samples/3_CUDA_Features/ptxjit/README.md @@ -10,7 +10,7 @@ CUDA Driver API ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html) -cuModuleGetFunction, cuLinkAddData, cuModuleLoadData, cuLaunchKernel, cuModuleUnload, cuLinkComplete, cuLinkCreate, cuLinkDestroy +cuLaunchKernel, cuModuleLoadData, cuLinkCreate, cuModuleGetFunction, cuLinkAddData, cuModuleUnload, cuLinkDestroy, cuLinkComplete ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaDriverGetVersion, cudaFree, cudaMalloc, cudaMemcpy +cudaMalloc, cudaDriverGetVersion, cudaMemcpy, cudaFree ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/3_CUDA_Features/ptxjit/ptxjit_vs2017.vcxproj b/Samples/3_CUDA_Features/ptxjit/ptxjit_vs2017.vcxproj index 6c7fa953c..8544a38c4 100644 --- a/Samples/3_CUDA_Features/ptxjit/ptxjit_vs2017.vcxproj +++ b/Samples/3_CUDA_Features/ptxjit/ptxjit_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -111,6 +111,6 @@ - + diff --git a/Samples/3_CUDA_Features/ptxjit/ptxjit_vs2019.vcxproj b/Samples/3_CUDA_Features/ptxjit/ptxjit_vs2019.vcxproj index 9d3b8c111..d0c152c80 100644 --- a/Samples/3_CUDA_Features/ptxjit/ptxjit_vs2019.vcxproj +++ b/Samples/3_CUDA_Features/ptxjit/ptxjit_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -107,6 +107,6 @@ - + diff --git a/Samples/3_CUDA_Features/ptxjit/ptxjit_vs2022.vcxproj b/Samples/3_CUDA_Features/ptxjit/ptxjit_vs2022.vcxproj index f8eecb9f2..c4dbf9127 100644 --- a/Samples/3_CUDA_Features/ptxjit/ptxjit_vs2022.vcxproj +++ b/Samples/3_CUDA_Features/ptxjit/ptxjit_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -107,6 +107,6 @@ - + diff --git a/Samples/3_CUDA_Features/simpleCudaGraphs/Makefile b/Samples/3_CUDA_Features/simpleCudaGraphs/Makefile index a64fbbf6d..d956e9b4e 100644 --- a/Samples/3_CUDA_Features/simpleCudaGraphs/Makefile +++ b/Samples/3_CUDA_Features/simpleCudaGraphs/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/3_CUDA_Features/simpleCudaGraphs/NsightEclipse.xml b/Samples/3_CUDA_Features/simpleCudaGraphs/NsightEclipse.xml index 0b60949bb..56db08fb3 100644 --- a/Samples/3_CUDA_Features/simpleCudaGraphs/NsightEclipse.xml +++ b/Samples/3_CUDA_Features/simpleCudaGraphs/NsightEclipse.xml @@ -3,37 +3,37 @@ simpleCudaGraphs - cudaGraphAddMemsetNode - cudaGraphsUsingStreamCapture - cudaMemcpyAsync - cudaGraphGetNodes - cudaStreamDestroy - cudaMallocHost cudaGraphClone - cudaEventCreate - cudaPitchedPtr - cudaGraphCreate - cudaMalloc - cudaPos + cudaExtent + cudaGraphLaunch + cudaStreamCreate + cudaLaunchHostFunc cudaGraphAddMemcpyNode + cudaMallocHost + cudaPitchedPtr cudaStreamEndCapture - cudaGraphExecDestroy - cudaStreamBeginCapture + cudaGraphCreate + cudaFreeHost + cudaGraphGetNodes + cudaMemsetAsync + cudaMemcpyAsync cudaGraphAddHostNode - cudaGraphsManual - cudaStreamSynchronize - cudaGraphDestroy - cudaGraphLaunch + cudaGraphInstantiate + cudaStreamDestroy + cudaStreamBeginCapture cudaStreamWaitEvent + cudaEventCreate + cudaMalloc cudaFree - cudaEventRecord - cudaStreamCreate - cudaGraphInstantiate - cudaLaunchHostFunc - cudaExtent - cudaMemsetAsync - cudaFreeHost + cudaPos cudaGraphAddKernelNode + cudaGraphDestroy + cudaEventRecord + cudaGraphsManual + cudaStreamSynchronize + cudaGraphAddMemsetNode + cudaGraphsUsingStreamCapture + cudaGraphExecDestroy whole @@ -73,6 +73,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/3_CUDA_Features/simpleCudaGraphs/README.md b/Samples/3_CUDA_Features/simpleCudaGraphs/README.md index 9a2c9249a..b421b2fb4 100644 --- a/Samples/3_CUDA_Features/simpleCudaGraphs/README.md +++ b/Samples/3_CUDA_Features/simpleCudaGraphs/README.md @@ -10,7 +10,7 @@ CUDA Graphs, Stream Capture ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaGraphAddMemsetNode, cudaGraphsUsingStreamCapture, cudaMemcpyAsync, cudaGraphGetNodes, cudaStreamDestroy, cudaMallocHost, cudaGraphClone, cudaEventCreate, cudaPitchedPtr, cudaGraphCreate, cudaMalloc, cudaPos, cudaGraphAddMemcpyNode, cudaStreamEndCapture, cudaGraphExecDestroy, cudaStreamBeginCapture, cudaGraphAddHostNode, cudaGraphsManual, cudaStreamSynchronize, cudaGraphDestroy, cudaGraphLaunch, cudaStreamWaitEvent, cudaFree, cudaEventRecord, cudaStreamCreate, cudaGraphInstantiate, cudaLaunchHostFunc, cudaExtent, cudaMemsetAsync, cudaFreeHost, cudaGraphAddKernelNode +cudaGraphClone, cudaExtent, cudaGraphLaunch, cudaStreamCreate, cudaLaunchHostFunc, cudaGraphAddMemcpyNode, cudaMallocHost, cudaPitchedPtr, cudaStreamEndCapture, cudaGraphCreate, cudaFreeHost, cudaGraphGetNodes, cudaMemsetAsync, cudaMemcpyAsync, cudaGraphAddHostNode, cudaGraphInstantiate, cudaStreamDestroy, cudaStreamBeginCapture, cudaStreamWaitEvent, cudaEventCreate, cudaMalloc, cudaFree, cudaPos, cudaGraphAddKernelNode, cudaGraphDestroy, cudaEventRecord, cudaGraphsManual, cudaStreamSynchronize, cudaGraphAddMemsetNode, cudaGraphsUsingStreamCapture, cudaGraphExecDestroy ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/3_CUDA_Features/simpleCudaGraphs/simpleCudaGraphs_vs2017.vcxproj b/Samples/3_CUDA_Features/simpleCudaGraphs/simpleCudaGraphs_vs2017.vcxproj index d9e1f37e1..a9525b01f 100644 --- a/Samples/3_CUDA_Features/simpleCudaGraphs/simpleCudaGraphs_vs2017.vcxproj +++ b/Samples/3_CUDA_Features/simpleCudaGraphs/simpleCudaGraphs_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/simpleCudaGraphs.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/3_CUDA_Features/simpleCudaGraphs/simpleCudaGraphs_vs2019.vcxproj b/Samples/3_CUDA_Features/simpleCudaGraphs/simpleCudaGraphs_vs2019.vcxproj index 7d3dfb84a..168b88abc 100644 --- a/Samples/3_CUDA_Features/simpleCudaGraphs/simpleCudaGraphs_vs2019.vcxproj +++ b/Samples/3_CUDA_Features/simpleCudaGraphs/simpleCudaGraphs_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleCudaGraphs.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/3_CUDA_Features/simpleCudaGraphs/simpleCudaGraphs_vs2022.vcxproj b/Samples/3_CUDA_Features/simpleCudaGraphs/simpleCudaGraphs_vs2022.vcxproj index b41246b05..58840c801 100644 --- a/Samples/3_CUDA_Features/simpleCudaGraphs/simpleCudaGraphs_vs2022.vcxproj +++ b/Samples/3_CUDA_Features/simpleCudaGraphs/simpleCudaGraphs_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleCudaGraphs.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/3_CUDA_Features/tf32TensorCoreGemm/Makefile b/Samples/3_CUDA_Features/tf32TensorCoreGemm/Makefile index eee8e8437..a5c514249 100644 --- a/Samples/3_CUDA_Features/tf32TensorCoreGemm/Makefile +++ b/Samples/3_CUDA_Features/tf32TensorCoreGemm/Makefile @@ -322,9 +322,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 80 86 87 +SMS ?= 80 86 87 90 else -SMS ?= 80 86 +SMS ?= 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/3_CUDA_Features/tf32TensorCoreGemm/NsightEclipse.xml b/Samples/3_CUDA_Features/tf32TensorCoreGemm/NsightEclipse.xml index 8213c761f..f21f38958 100644 --- a/Samples/3_CUDA_Features/tf32TensorCoreGemm/NsightEclipse.xml +++ b/Samples/3_CUDA_Features/tf32TensorCoreGemm/NsightEclipse.xml @@ -7,18 +7,18 @@ --maxrregcount=128 - cudaMemset + cudaMemcpy cudaFree - cudaEventRecord - cudaEventCreate - cudaFuncSetAttribute - cudaEventElapsedTime + cudaGetErrorString + cudaGetLastError cudaEventSynchronize + cudaFuncSetAttribute + cudaEventRecord + cudaMemset cudaMalloc - cudaGetLastError - cudaMemcpy - cudaGetErrorString + cudaEventElapsedTime cudaGetDeviceProperties + cudaEventCreate whole @@ -53,6 +53,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/3_CUDA_Features/tf32TensorCoreGemm/README.md b/Samples/3_CUDA_Features/tf32TensorCoreGemm/README.md index 343eb6de0..d7f41f68e 100644 --- a/Samples/3_CUDA_Features/tf32TensorCoreGemm/README.md +++ b/Samples/3_CUDA_Features/tf32TensorCoreGemm/README.md @@ -10,7 +10,7 @@ Matrix Multiply, WMMA, Tensor Cores ## Supported SM Architectures -[SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, aarch64 ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMemset, cudaFree, cudaEventRecord, cudaEventCreate, cudaFuncSetAttribute, cudaEventElapsedTime, cudaEventSynchronize, cudaMalloc, cudaGetLastError, cudaMemcpy, cudaGetErrorString, cudaGetDeviceProperties +cudaMemcpy, cudaFree, cudaGetErrorString, cudaGetLastError, cudaEventSynchronize, cudaFuncSetAttribute, cudaEventRecord, cudaMemset, cudaMalloc, cudaEventElapsedTime, cudaGetDeviceProperties, cudaEventCreate ## Dependencies needed to build/run [CPP11](../../../README.md#cpp11) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/3_CUDA_Features/tf32TensorCoreGemm/tf32TensorCoreGemm_vs2017.vcxproj b/Samples/3_CUDA_Features/tf32TensorCoreGemm/tf32TensorCoreGemm_vs2017.vcxproj index 6b002d886..2948b39d0 100644 --- a/Samples/3_CUDA_Features/tf32TensorCoreGemm/tf32TensorCoreGemm_vs2017.vcxproj +++ b/Samples/3_CUDA_Features/tf32TensorCoreGemm/tf32TensorCoreGemm_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/tf32TensorCoreGemm.exe - compute_80,sm_80;compute_86,sm_86; + compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/3_CUDA_Features/tf32TensorCoreGemm/tf32TensorCoreGemm_vs2019.vcxproj b/Samples/3_CUDA_Features/tf32TensorCoreGemm/tf32TensorCoreGemm_vs2019.vcxproj index 4166d39e9..7a06218bc 100644 --- a/Samples/3_CUDA_Features/tf32TensorCoreGemm/tf32TensorCoreGemm_vs2019.vcxproj +++ b/Samples/3_CUDA_Features/tf32TensorCoreGemm/tf32TensorCoreGemm_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/tf32TensorCoreGemm.exe - compute_80,sm_80;compute_86,sm_86; + compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/3_CUDA_Features/tf32TensorCoreGemm/tf32TensorCoreGemm_vs2022.vcxproj b/Samples/3_CUDA_Features/tf32TensorCoreGemm/tf32TensorCoreGemm_vs2022.vcxproj index c4534b653..f7b1d1a04 100644 --- a/Samples/3_CUDA_Features/tf32TensorCoreGemm/tf32TensorCoreGemm_vs2022.vcxproj +++ b/Samples/3_CUDA_Features/tf32TensorCoreGemm/tf32TensorCoreGemm_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/tf32TensorCoreGemm.exe - compute_80,sm_80;compute_86,sm_86; + compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/Makefile b/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/Makefile index e743176ee..aa25f1512 100644 --- a/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/Makefile +++ b/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/Makefile @@ -304,9 +304,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/NsightEclipse.xml b/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/NsightEclipse.xml index 7f5952b94..f0457b127 100644 --- a/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/NsightEclipse.xml +++ b/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/NsightEclipse.xml @@ -6,11 +6,11 @@ --std=c++11 - cudaMemset + cudaMemcpy cudaFree cudaDeviceGetAttribute + cudaMemset cudaMalloc - cudaMemcpy @@ -50,6 +50,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/README.md b/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/README.md index 69c27bdd1..f40b05ef1 100644 --- a/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/README.md +++ b/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/README.md @@ -10,7 +10,7 @@ Cooperative Groups, Atomic Intrinsics ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l, aarch64 ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMemset, cudaFree, cudaDeviceGetAttribute, cudaMalloc, cudaMemcpy +cudaMemcpy, cudaFree, cudaDeviceGetAttribute, cudaMemset, cudaMalloc ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/warpAggregatedAtomicsCG_vs2017.vcxproj b/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/warpAggregatedAtomicsCG_vs2017.vcxproj index d7339145e..0e2d5973f 100644 --- a/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/warpAggregatedAtomicsCG_vs2017.vcxproj +++ b/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/warpAggregatedAtomicsCG_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/warpAggregatedAtomicsCG.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/warpAggregatedAtomicsCG_vs2019.vcxproj b/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/warpAggregatedAtomicsCG_vs2019.vcxproj index 36bd9bcac..a15057bb7 100644 --- a/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/warpAggregatedAtomicsCG_vs2019.vcxproj +++ b/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/warpAggregatedAtomicsCG_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/warpAggregatedAtomicsCG.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/warpAggregatedAtomicsCG_vs2022.vcxproj b/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/warpAggregatedAtomicsCG_vs2022.vcxproj index ff0902b4b..8567bb365 100644 --- a/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/warpAggregatedAtomicsCG_vs2022.vcxproj +++ b/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/warpAggregatedAtomicsCG_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/warpAggregatedAtomicsCG.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/FilterBorderControlNPP/FilterBorderControlNPP_vs2017.vcxproj b/Samples/4_CUDA_Libraries/FilterBorderControlNPP/FilterBorderControlNPP_vs2017.vcxproj index 695d8b1ad..61a190adc 100644 --- a/Samples/4_CUDA_Libraries/FilterBorderControlNPP/FilterBorderControlNPP_vs2017.vcxproj +++ b/Samples/4_CUDA_Libraries/FilterBorderControlNPP/FilterBorderControlNPP_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -117,6 +117,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/FilterBorderControlNPP/FilterBorderControlNPP_vs2019.vcxproj b/Samples/4_CUDA_Libraries/FilterBorderControlNPP/FilterBorderControlNPP_vs2019.vcxproj index 702ecd0ab..438dd31e2 100644 --- a/Samples/4_CUDA_Libraries/FilterBorderControlNPP/FilterBorderControlNPP_vs2019.vcxproj +++ b/Samples/4_CUDA_Libraries/FilterBorderControlNPP/FilterBorderControlNPP_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -113,6 +113,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/FilterBorderControlNPP/FilterBorderControlNPP_vs2022.vcxproj b/Samples/4_CUDA_Libraries/FilterBorderControlNPP/FilterBorderControlNPP_vs2022.vcxproj index 0e5f29290..867808495 100644 --- a/Samples/4_CUDA_Libraries/FilterBorderControlNPP/FilterBorderControlNPP_vs2022.vcxproj +++ b/Samples/4_CUDA_Libraries/FilterBorderControlNPP/FilterBorderControlNPP_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -113,6 +113,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/FilterBorderControlNPP/NsightEclipse.xml b/Samples/4_CUDA_Libraries/FilterBorderControlNPP/NsightEclipse.xml index cc161f14a..60bd7e9d2 100644 --- a/Samples/4_CUDA_Libraries/FilterBorderControlNPP/NsightEclipse.xml +++ b/Samples/4_CUDA_Libraries/FilterBorderControlNPP/NsightEclipse.xml @@ -3,12 +3,12 @@ FilterBorderControlNPP + cudaRuntimeGetVersion cudaDeviceReset + cudaSetDevice cudaGetDeviceCount - cudaDriverGetVersion cudaDeviceInit - cudaSetDevice - cudaRuntimeGetVersion + cudaDriverGetVersion cudaGetDeviceProperties @@ -68,6 +68,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/4_CUDA_Libraries/FilterBorderControlNPP/README.md b/Samples/4_CUDA_Libraries/FilterBorderControlNPP/README.md index 23f2bfb89..54a652ade 100644 --- a/Samples/4_CUDA_Libraries/FilterBorderControlNPP/README.md +++ b/Samples/4_CUDA_Libraries/FilterBorderControlNPP/README.md @@ -10,7 +10,7 @@ Performance Strategies, Image Processing, NPP Library ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaDeviceReset, cudaGetDeviceCount, cudaDriverGetVersion, cudaDeviceInit, cudaSetDevice, cudaRuntimeGetVersion, cudaGetDeviceProperties +cudaRuntimeGetVersion, cudaDeviceReset, cudaSetDevice, cudaGetDeviceCount, cudaDeviceInit, cudaDriverGetVersion, cudaGetDeviceProperties ## Dependencies needed to build/run [FreeImage](../../../README.md#freeimage), [NPP](../../../README.md#npp) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/MersenneTwisterGP11213_vs2017.vcxproj b/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/MersenneTwisterGP11213_vs2017.vcxproj index dd8192260..17822bcb5 100644 --- a/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/MersenneTwisterGP11213_vs2017.vcxproj +++ b/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/MersenneTwisterGP11213_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -107,6 +107,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/MersenneTwisterGP11213_vs2019.vcxproj b/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/MersenneTwisterGP11213_vs2019.vcxproj index d2ac6df21..753226082 100644 --- a/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/MersenneTwisterGP11213_vs2019.vcxproj +++ b/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/MersenneTwisterGP11213_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -103,6 +103,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/MersenneTwisterGP11213_vs2022.vcxproj b/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/MersenneTwisterGP11213_vs2022.vcxproj index 1de5e0199..a40090f1c 100644 --- a/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/MersenneTwisterGP11213_vs2022.vcxproj +++ b/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/MersenneTwisterGP11213_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -103,6 +103,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/NsightEclipse.xml b/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/NsightEclipse.xml index 40c95ec03..34ed799bc 100644 --- a/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/NsightEclipse.xml +++ b/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/NsightEclipse.xml @@ -3,13 +3,13 @@ MersenneTwisterGP11213 + cudaStreamCreateWithFlags cudaStreamDestroy cudaFree cudaMallocHost cudaFreeHost - cudaMalloc - cudaStreamCreateWithFlags cudaStreamSynchronize + cudaMalloc cudaMemcpyAsync @@ -58,6 +58,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/README.md b/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/README.md index 9af50ae28..6244164c2 100644 --- a/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/README.md +++ b/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/README.md @@ -10,7 +10,7 @@ CURAND Library ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaStreamDestroy, cudaFree, cudaMallocHost, cudaFreeHost, cudaMalloc, cudaStreamCreateWithFlags, cudaStreamSynchronize, cudaMemcpyAsync +cudaStreamCreateWithFlags, cudaStreamDestroy, cudaFree, cudaMallocHost, cudaFreeHost, cudaStreamSynchronize, cudaMalloc, cudaMemcpyAsync ## Dependencies needed to build/run [CURAND](../../../README.md#curand) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/4_CUDA_Libraries/batchCUBLAS/NsightEclipse.xml b/Samples/4_CUDA_Libraries/batchCUBLAS/NsightEclipse.xml index e166cad3b..44f306e21 100644 --- a/Samples/4_CUDA_Libraries/batchCUBLAS/NsightEclipse.xml +++ b/Samples/4_CUDA_Libraries/batchCUBLAS/NsightEclipse.xml @@ -5,15 +5,15 @@ cuRand cuEqual + cudaMemcpy + cudaGetErrorString cudaFree - cudaStreamCreate + cudaGetLastError cudaDeviceSynchronize + cudaGetDevice cudaMalloc - cudaGetLastError - cudaMemcpy - cudaGetErrorString + cudaStreamCreate cudaGetDeviceProperties - cudaGetDevice whole @@ -58,6 +58,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/4_CUDA_Libraries/batchCUBLAS/README.md b/Samples/4_CUDA_Libraries/batchCUBLAS/README.md index ade22850e..635750224 100644 --- a/Samples/4_CUDA_Libraries/batchCUBLAS/README.md +++ b/Samples/4_CUDA_Libraries/batchCUBLAS/README.md @@ -10,7 +10,7 @@ Linear Algebra, CUBLAS Library ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -26,14 +26,14 @@ x86_64, ppc64le, armv7l cuRand, cuEqual ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaStreamCreate, cudaDeviceSynchronize, cudaMalloc, cudaGetLastError, cudaMemcpy, cudaGetErrorString, cudaGetDeviceProperties, cudaGetDevice +cudaMemcpy, cudaGetErrorString, cudaFree, cudaGetLastError, cudaDeviceSynchronize, cudaGetDevice, cudaMalloc, cudaStreamCreate, cudaGetDeviceProperties ## Dependencies needed to build/run [CUBLAS](../../../README.md#cublas) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/4_CUDA_Libraries/batchCUBLAS/batchCUBLAS_vs2017.vcxproj b/Samples/4_CUDA_Libraries/batchCUBLAS/batchCUBLAS_vs2017.vcxproj index 8df8d4956..2c41bea31 100644 --- a/Samples/4_CUDA_Libraries/batchCUBLAS/batchCUBLAS_vs2017.vcxproj +++ b/Samples/4_CUDA_Libraries/batchCUBLAS/batchCUBLAS_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -107,6 +107,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/batchCUBLAS/batchCUBLAS_vs2019.vcxproj b/Samples/4_CUDA_Libraries/batchCUBLAS/batchCUBLAS_vs2019.vcxproj index 3f42f964d..1bd17bac8 100644 --- a/Samples/4_CUDA_Libraries/batchCUBLAS/batchCUBLAS_vs2019.vcxproj +++ b/Samples/4_CUDA_Libraries/batchCUBLAS/batchCUBLAS_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -103,6 +103,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/batchCUBLAS/batchCUBLAS_vs2022.vcxproj b/Samples/4_CUDA_Libraries/batchCUBLAS/batchCUBLAS_vs2022.vcxproj index 1823a30b0..dba07dfdd 100644 --- a/Samples/4_CUDA_Libraries/batchCUBLAS/batchCUBLAS_vs2022.vcxproj +++ b/Samples/4_CUDA_Libraries/batchCUBLAS/batchCUBLAS_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -103,6 +103,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/NsightEclipse.xml b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/NsightEclipse.xml index 821f1eea8..9cd72dd80 100644 --- a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/NsightEclipse.xml +++ b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/NsightEclipse.xml @@ -3,19 +3,19 @@ batchedLabelMarkersAndLabelCompressionNPP - cudaFree - cudaMallocHost + cudaRuntimeGetVersion cudaMallocPitch + cudaFree cudaDeviceGetAttribute - cudaFreeHost + cudaMallocHost cudaDriverGetVersion - cudaMalloc + cudaFreeHost + cudaGetDevice cudaStreamGetFlags - cudaRuntimeGetVersion cudaStreamSynchronize + cudaMalloc cudaMemcpyAsync cudaGetDeviceProperties - cudaGetDevice whole @@ -68,6 +68,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/README.md b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/README.md index 97dbcd1dd..ddc106f35 100644 --- a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/README.md +++ b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/README.md @@ -10,7 +10,7 @@ Performance Strategies, Image Processing, NPP Library, Using NPP Batch Functions ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaMallocHost, cudaMallocPitch, cudaDeviceGetAttribute, cudaFreeHost, cudaDriverGetVersion, cudaMalloc, cudaStreamGetFlags, cudaRuntimeGetVersion, cudaStreamSynchronize, cudaMemcpyAsync, cudaGetDeviceProperties, cudaGetDevice +cudaRuntimeGetVersion, cudaMallocPitch, cudaFree, cudaDeviceGetAttribute, cudaMallocHost, cudaDriverGetVersion, cudaFreeHost, cudaGetDevice, cudaStreamGetFlags, cudaStreamSynchronize, cudaMalloc, cudaMemcpyAsync, cudaGetDeviceProperties ## Dependencies needed to build/run [NPP](../../../README.md#npp) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/batchedLabelMarkersAndLabelCompressionNPP_vs2017.vcxproj b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/batchedLabelMarkersAndLabelCompressionNPP_vs2017.vcxproj index 59e207a8c..da25b507b 100644 --- a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/batchedLabelMarkersAndLabelCompressionNPP_vs2017.vcxproj +++ b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/batchedLabelMarkersAndLabelCompressionNPP_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -107,6 +107,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/batchedLabelMarkersAndLabelCompressionNPP_vs2019.vcxproj b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/batchedLabelMarkersAndLabelCompressionNPP_vs2019.vcxproj index 148be7cb1..928dc419d 100644 --- a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/batchedLabelMarkersAndLabelCompressionNPP_vs2019.vcxproj +++ b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/batchedLabelMarkersAndLabelCompressionNPP_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -103,6 +103,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/batchedLabelMarkersAndLabelCompressionNPP_vs2022.vcxproj b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/batchedLabelMarkersAndLabelCompressionNPP_vs2022.vcxproj index 1f5a0ff4b..5b9408aab 100644 --- a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/batchedLabelMarkersAndLabelCompressionNPP_vs2022.vcxproj +++ b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/batchedLabelMarkersAndLabelCompressionNPP_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -103,6 +103,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/boxFilterNPP/NsightEclipse.xml b/Samples/4_CUDA_Libraries/boxFilterNPP/NsightEclipse.xml index ae3b9b465..da9c0d55c 100644 --- a/Samples/4_CUDA_Libraries/boxFilterNPP/NsightEclipse.xml +++ b/Samples/4_CUDA_Libraries/boxFilterNPP/NsightEclipse.xml @@ -3,8 +3,8 @@ boxFilterNPP - cudaDriverGetVersion cudaRuntimeGetVersion + cudaDriverGetVersion whole @@ -64,6 +64,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/4_CUDA_Libraries/boxFilterNPP/README.md b/Samples/4_CUDA_Libraries/boxFilterNPP/README.md index 3de69529b..f647397c2 100644 --- a/Samples/4_CUDA_Libraries/boxFilterNPP/README.md +++ b/Samples/4_CUDA_Libraries/boxFilterNPP/README.md @@ -10,7 +10,7 @@ Performance Strategies, Image Processing, NPP Library ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaDriverGetVersion, cudaRuntimeGetVersion +cudaRuntimeGetVersion, cudaDriverGetVersion ## Dependencies needed to build/run [FreeImage](../../../README.md#freeimage), [NPP](../../../README.md#npp) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/4_CUDA_Libraries/boxFilterNPP/boxFilterNPP_vs2017.vcxproj b/Samples/4_CUDA_Libraries/boxFilterNPP/boxFilterNPP_vs2017.vcxproj index 693cda831..bf2a30abe 100644 --- a/Samples/4_CUDA_Libraries/boxFilterNPP/boxFilterNPP_vs2017.vcxproj +++ b/Samples/4_CUDA_Libraries/boxFilterNPP/boxFilterNPP_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -117,6 +117,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/boxFilterNPP/boxFilterNPP_vs2019.vcxproj b/Samples/4_CUDA_Libraries/boxFilterNPP/boxFilterNPP_vs2019.vcxproj index 0f7a3fa01..a7ab43bb7 100644 --- a/Samples/4_CUDA_Libraries/boxFilterNPP/boxFilterNPP_vs2019.vcxproj +++ b/Samples/4_CUDA_Libraries/boxFilterNPP/boxFilterNPP_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -113,6 +113,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/boxFilterNPP/boxFilterNPP_vs2022.vcxproj b/Samples/4_CUDA_Libraries/boxFilterNPP/boxFilterNPP_vs2022.vcxproj index 1ef674c0d..5b3e3fb3c 100644 --- a/Samples/4_CUDA_Libraries/boxFilterNPP/boxFilterNPP_vs2022.vcxproj +++ b/Samples/4_CUDA_Libraries/boxFilterNPP/boxFilterNPP_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -113,6 +113,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/NsightEclipse.xml b/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/NsightEclipse.xml index d4dca5bac..26b3f456c 100644 --- a/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/NsightEclipse.xml +++ b/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/NsightEclipse.xml @@ -3,13 +3,13 @@ cannyEdgeDetectorNPP + cudaRuntimeGetVersion cudaFree + cudaSetDevice cudaGetDeviceCount + cudaDeviceInit cudaDriverGetVersion cudaMalloc - cudaDeviceInit - cudaSetDevice - cudaRuntimeGetVersion cudaGetDeviceProperties @@ -66,6 +66,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/README.md b/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/README.md index 41630e5da..01493d71b 100644 --- a/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/README.md +++ b/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/README.md @@ -10,7 +10,7 @@ Performance Strategies, Image Processing, NPP Library ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaGetDeviceCount, cudaDriverGetVersion, cudaMalloc, cudaDeviceInit, cudaSetDevice, cudaRuntimeGetVersion, cudaGetDeviceProperties +cudaRuntimeGetVersion, cudaFree, cudaSetDevice, cudaGetDeviceCount, cudaDeviceInit, cudaDriverGetVersion, cudaMalloc, cudaGetDeviceProperties ## Dependencies needed to build/run [FreeImage](../../../README.md#freeimage), [NPP](../../../README.md#npp) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/cannyEdgeDetectorNPP_vs2017.vcxproj b/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/cannyEdgeDetectorNPP_vs2017.vcxproj index a2fe1e4c8..338a498d4 100644 --- a/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/cannyEdgeDetectorNPP_vs2017.vcxproj +++ b/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/cannyEdgeDetectorNPP_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -117,6 +117,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/cannyEdgeDetectorNPP_vs2019.vcxproj b/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/cannyEdgeDetectorNPP_vs2019.vcxproj index 0510328c9..91164159a 100644 --- a/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/cannyEdgeDetectorNPP_vs2019.vcxproj +++ b/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/cannyEdgeDetectorNPP_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -113,6 +113,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/cannyEdgeDetectorNPP_vs2022.vcxproj b/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/cannyEdgeDetectorNPP_vs2022.vcxproj index 29e20532b..31270caa3 100644 --- a/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/cannyEdgeDetectorNPP_vs2022.vcxproj +++ b/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/cannyEdgeDetectorNPP_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -113,6 +113,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/conjugateGradient/NsightEclipse.xml b/Samples/4_CUDA_Libraries/conjugateGradient/NsightEclipse.xml index 7fb145798..99a85b356 100644 --- a/Samples/4_CUDA_Libraries/conjugateGradient/NsightEclipse.xml +++ b/Samples/4_CUDA_Libraries/conjugateGradient/NsightEclipse.xml @@ -3,10 +3,10 @@ conjugateGradient + cudaMemcpy cudaFree cudaDeviceSynchronize cudaMalloc - cudaMemcpy cudaGetDeviceProperties @@ -59,6 +59,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/4_CUDA_Libraries/conjugateGradient/README.md b/Samples/4_CUDA_Libraries/conjugateGradient/README.md index c85219875..9f6647821 100644 --- a/Samples/4_CUDA_Libraries/conjugateGradient/README.md +++ b/Samples/4_CUDA_Libraries/conjugateGradient/README.md @@ -10,7 +10,7 @@ Linear Algebra, CUBLAS Library, CUSPARSE Library ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaDeviceSynchronize, cudaMalloc, cudaMemcpy, cudaGetDeviceProperties +cudaMemcpy, cudaFree, cudaDeviceSynchronize, cudaMalloc, cudaGetDeviceProperties ## Dependencies needed to build/run [CUBLAS](../../../README.md#cublas), [CUSPARSE](../../../README.md#cusparse) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/4_CUDA_Libraries/conjugateGradient/conjugateGradient_vs2017.vcxproj b/Samples/4_CUDA_Libraries/conjugateGradient/conjugateGradient_vs2017.vcxproj index 3f9f1fc84..42096e5ab 100644 --- a/Samples/4_CUDA_Libraries/conjugateGradient/conjugateGradient_vs2017.vcxproj +++ b/Samples/4_CUDA_Libraries/conjugateGradient/conjugateGradient_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -107,6 +107,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/conjugateGradient/conjugateGradient_vs2019.vcxproj b/Samples/4_CUDA_Libraries/conjugateGradient/conjugateGradient_vs2019.vcxproj index 9755b7c00..44cdb92c2 100644 --- a/Samples/4_CUDA_Libraries/conjugateGradient/conjugateGradient_vs2019.vcxproj +++ b/Samples/4_CUDA_Libraries/conjugateGradient/conjugateGradient_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -103,6 +103,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/conjugateGradient/conjugateGradient_vs2022.vcxproj b/Samples/4_CUDA_Libraries/conjugateGradient/conjugateGradient_vs2022.vcxproj index 2d9bb9181..afb56e1de 100644 --- a/Samples/4_CUDA_Libraries/conjugateGradient/conjugateGradient_vs2022.vcxproj +++ b/Samples/4_CUDA_Libraries/conjugateGradient/conjugateGradient_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -103,6 +103,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/Makefile b/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/Makefile index af3ffc453..4c4c95d56 100644 --- a/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/Makefile +++ b/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/NsightEclipse.xml b/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/NsightEclipse.xml index 3a4d5e980..0a766802f 100644 --- a/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/NsightEclipse.xml +++ b/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/NsightEclipse.xml @@ -3,23 +3,23 @@ conjugateGradientCudaGraphs + cudaGraphInstantiate cudaStreamDestroy + cudaStreamBeginCapture cudaFree - cudaGraphExecDestroy cudaMallocHost - cudaStreamCreate - cudaGraphInstantiate - cudaOccupancyMaxPotentialBlockSize - cudaStreamBeginCapture - cudaMemsetAsync cudaStreamEndCapture + cudaGraphDestroy cudaFreeHost - cudaMalloc + cudaGraphLaunch + cudaStreamCreate cudaStreamSynchronize + cudaOccupancyMaxPotentialBlockSize + cudaMalloc cudaMemcpyAsync - cudaGraphDestroy + cudaMemsetAsync cudaGetDeviceProperties - cudaGraphLaunch + cudaGraphExecDestroy whole @@ -71,6 +71,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/README.md b/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/README.md index 8d634cdea..787c89a4f 100644 --- a/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/README.md +++ b/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/README.md @@ -10,7 +10,7 @@ Linear Algebra, CUBLAS Library, CUSPARSE Library ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaStreamDestroy, cudaFree, cudaGraphExecDestroy, cudaMallocHost, cudaStreamCreate, cudaGraphInstantiate, cudaOccupancyMaxPotentialBlockSize, cudaStreamBeginCapture, cudaMemsetAsync, cudaStreamEndCapture, cudaFreeHost, cudaMalloc, cudaStreamSynchronize, cudaMemcpyAsync, cudaGraphDestroy, cudaGetDeviceProperties, cudaGraphLaunch +cudaGraphInstantiate, cudaStreamDestroy, cudaStreamBeginCapture, cudaFree, cudaMallocHost, cudaStreamEndCapture, cudaGraphDestroy, cudaFreeHost, cudaGraphLaunch, cudaStreamCreate, cudaStreamSynchronize, cudaOccupancyMaxPotentialBlockSize, cudaMalloc, cudaMemcpyAsync, cudaMemsetAsync, cudaGetDeviceProperties, cudaGraphExecDestroy ## Dependencies needed to build/run [CUBLAS](../../../README.md#cublas), [CUSPARSE](../../../README.md#cusparse) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/conjugateGradientCudaGraphs_vs2017.vcxproj b/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/conjugateGradientCudaGraphs_vs2017.vcxproj index 8bd52098a..2a1e00ec7 100644 --- a/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/conjugateGradientCudaGraphs_vs2017.vcxproj +++ b/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/conjugateGradientCudaGraphs_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/conjugateGradientCudaGraphs.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/conjugateGradientCudaGraphs_vs2019.vcxproj b/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/conjugateGradientCudaGraphs_vs2019.vcxproj index 1838fc792..c1e36760b 100644 --- a/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/conjugateGradientCudaGraphs_vs2019.vcxproj +++ b/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/conjugateGradientCudaGraphs_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/conjugateGradientCudaGraphs.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/conjugateGradientCudaGraphs_vs2022.vcxproj b/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/conjugateGradientCudaGraphs_vs2022.vcxproj index b07dfc7b8..f641b210c 100644 --- a/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/conjugateGradientCudaGraphs_vs2022.vcxproj +++ b/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/conjugateGradientCudaGraphs_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/conjugateGradientCudaGraphs.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/Makefile b/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/Makefile index 2cd57b055..6e5d34352 100644 --- a/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/Makefile +++ b/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/Makefile @@ -322,9 +322,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 61 70 72 75 80 86 87 +SMS ?= 61 70 72 75 80 86 87 90 else -SMS ?= 60 61 70 75 80 86 +SMS ?= 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/NsightEclipse.xml b/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/NsightEclipse.xml index 55c72749e..0deaa733a 100644 --- a/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/NsightEclipse.xml +++ b/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/NsightEclipse.xml @@ -7,15 +7,15 @@ cudaFree - cudaEventRecord cudaMallocManaged - cudaOccupancyMaxActiveBlocksPerMultiprocessor - cudaEventCreate - cudaEventElapsedTime cudaDeviceSynchronize - cudaEventDestroy + cudaEventRecord cudaLaunchCooperativeKernel + cudaEventDestroy + cudaEventElapsedTime + cudaOccupancyMaxActiveBlocksPerMultiprocessor cudaGetDeviceProperties + cudaEventCreate whole @@ -60,6 +60,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/README.md b/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/README.md index 804955b69..e8c0643c1 100644 --- a/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/README.md +++ b/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/README.md @@ -10,7 +10,7 @@ Unified Memory, Linear Algebra, Cooperative Groups, MultiBlock Cooperative Group ## Supported SM Architectures -[SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, aarch64 ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaEventRecord, cudaMallocManaged, cudaOccupancyMaxActiveBlocksPerMultiprocessor, cudaEventCreate, cudaEventElapsedTime, cudaDeviceSynchronize, cudaEventDestroy, cudaLaunchCooperativeKernel, cudaGetDeviceProperties +cudaFree, cudaMallocManaged, cudaDeviceSynchronize, cudaEventRecord, cudaLaunchCooperativeKernel, cudaEventDestroy, cudaEventElapsedTime, cudaOccupancyMaxActiveBlocksPerMultiprocessor, cudaGetDeviceProperties, cudaEventCreate ## Dependencies needed to build/run [UVM](../../../README.md#uvm), [MBCG](../../../README.md#mbcg) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/conjugateGradientMultiBlockCG_vs2017.vcxproj b/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/conjugateGradientMultiBlockCG_vs2017.vcxproj index 0fa1a17ec..d3fc2fdf8 100644 --- a/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/conjugateGradientMultiBlockCG_vs2017.vcxproj +++ b/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/conjugateGradientMultiBlockCG_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/conjugateGradientMultiBlockCG.exe - compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/conjugateGradientMultiBlockCG_vs2019.vcxproj b/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/conjugateGradientMultiBlockCG_vs2019.vcxproj index 7301e0327..6c064a95a 100644 --- a/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/conjugateGradientMultiBlockCG_vs2019.vcxproj +++ b/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/conjugateGradientMultiBlockCG_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/conjugateGradientMultiBlockCG.exe - compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/conjugateGradientMultiBlockCG_vs2022.vcxproj b/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/conjugateGradientMultiBlockCG_vs2022.vcxproj index 57816f0be..0e4d81bed 100644 --- a/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/conjugateGradientMultiBlockCG_vs2022.vcxproj +++ b/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/conjugateGradientMultiBlockCG_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/conjugateGradientMultiBlockCG.exe - compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/Makefile b/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/Makefile index 06f2703ef..317131819 100644 --- a/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/Makefile +++ b/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/Makefile @@ -322,9 +322,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 61 70 72 75 80 86 87 +SMS ?= 61 70 72 75 80 86 87 90 else -SMS ?= 60 61 70 75 80 86 +SMS ?= 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/NsightEclipse.xml b/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/NsightEclipse.xml index 7852487ae..ef83507cf 100644 --- a/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/NsightEclipse.xml +++ b/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/NsightEclipse.xml @@ -8,22 +8,22 @@ --std=c++11 - cudaDeviceEnablePeerAccess - cudaMemset + cudaHostAlloc + cudaMemPrefetchAsync cudaFree + cudaLaunchCooperativeKernel cudaMallocManaged - cudaMemPrefetchAsync - cudaHostAlloc - cudaOccupancyMaxActiveBlocksPerMultiprocessor - cudaStreamCreate + cudaSetDevice cudaGetDeviceCount + cudaGetDeviceProperties cudaFreeHost - cudaSetDevice - cudaDeviceCanAccessPeer - cudaLaunchCooperativeKernel + cudaMemset + cudaStreamCreate cudaStreamSynchronize + cudaDeviceEnablePeerAccess cudaMemAdvise - cudaGetDeviceProperties + cudaOccupancyMaxActiveBlocksPerMultiprocessor + cudaDeviceCanAccessPeer whole @@ -71,6 +71,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/README.md b/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/README.md index e21a35071..9d77bf384 100644 --- a/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/README.md +++ b/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/README.md @@ -10,7 +10,7 @@ Unified Memory, Linear Algebra, Cooperative Groups, MultiDevice Cooperative Grou ## Supported SM Architectures -[SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, aarch64 ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaDeviceEnablePeerAccess, cudaMemset, cudaFree, cudaMallocManaged, cudaMemPrefetchAsync, cudaHostAlloc, cudaOccupancyMaxActiveBlocksPerMultiprocessor, cudaStreamCreate, cudaGetDeviceCount, cudaFreeHost, cudaSetDevice, cudaDeviceCanAccessPeer, cudaLaunchCooperativeKernel, cudaStreamSynchronize, cudaMemAdvise, cudaGetDeviceProperties +cudaHostAlloc, cudaMemPrefetchAsync, cudaFree, cudaLaunchCooperativeKernel, cudaMallocManaged, cudaSetDevice, cudaGetDeviceCount, cudaGetDeviceProperties, cudaFreeHost, cudaMemset, cudaStreamCreate, cudaStreamSynchronize, cudaDeviceEnablePeerAccess, cudaMemAdvise, cudaOccupancyMaxActiveBlocksPerMultiprocessor, cudaDeviceCanAccessPeer ## Dependencies needed to build/run [UVM](../../../README.md#uvm), [MDCG](../../../README.md#mdcg), [CPP11](../../../README.md#cpp11) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/conjugateGradientMultiDeviceCG_vs2017.vcxproj b/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/conjugateGradientMultiDeviceCG_vs2017.vcxproj index 85930d364..b58051c34 100644 --- a/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/conjugateGradientMultiDeviceCG_vs2017.vcxproj +++ b/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/conjugateGradientMultiDeviceCG_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/conjugateGradientMultiDeviceCG.exe - compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -108,6 +108,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/conjugateGradientMultiDeviceCG_vs2019.vcxproj b/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/conjugateGradientMultiDeviceCG_vs2019.vcxproj index 4a5df0b85..b422370dd 100644 --- a/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/conjugateGradientMultiDeviceCG_vs2019.vcxproj +++ b/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/conjugateGradientMultiDeviceCG_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/conjugateGradientMultiDeviceCG.exe - compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -104,6 +104,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/conjugateGradientMultiDeviceCG_vs2022.vcxproj b/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/conjugateGradientMultiDeviceCG_vs2022.vcxproj index 91ab39cc7..41caff0d8 100644 --- a/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/conjugateGradientMultiDeviceCG_vs2022.vcxproj +++ b/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/conjugateGradientMultiDeviceCG_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/conjugateGradientMultiDeviceCG.exe - compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -104,6 +104,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/conjugateGradientPrecond/NsightEclipse.xml b/Samples/4_CUDA_Libraries/conjugateGradientPrecond/NsightEclipse.xml index 62d843c01..760d5de3e 100644 --- a/Samples/4_CUDA_Libraries/conjugateGradientPrecond/NsightEclipse.xml +++ b/Samples/4_CUDA_Libraries/conjugateGradientPrecond/NsightEclipse.xml @@ -3,10 +3,10 @@ conjugateGradientPrecond - cudaMemset + cudaMemcpy cudaFree + cudaMemset cudaMalloc - cudaMemcpy cudaGetDeviceProperties @@ -57,6 +57,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/4_CUDA_Libraries/conjugateGradientPrecond/README.md b/Samples/4_CUDA_Libraries/conjugateGradientPrecond/README.md index 6e1116d59..bded98176 100644 --- a/Samples/4_CUDA_Libraries/conjugateGradientPrecond/README.md +++ b/Samples/4_CUDA_Libraries/conjugateGradientPrecond/README.md @@ -10,7 +10,7 @@ Linear Algebra, CUBLAS Library, CUSPARSE Library ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMemset, cudaFree, cudaMalloc, cudaMemcpy, cudaGetDeviceProperties +cudaMemcpy, cudaFree, cudaMemset, cudaMalloc, cudaGetDeviceProperties ## Dependencies needed to build/run [CUBLAS](../../../README.md#cublas), [CUSPARSE](../../../README.md#cusparse) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/4_CUDA_Libraries/conjugateGradientPrecond/conjugateGradientPrecond_vs2017.vcxproj b/Samples/4_CUDA_Libraries/conjugateGradientPrecond/conjugateGradientPrecond_vs2017.vcxproj index bd750e905..3af1df6d3 100644 --- a/Samples/4_CUDA_Libraries/conjugateGradientPrecond/conjugateGradientPrecond_vs2017.vcxproj +++ b/Samples/4_CUDA_Libraries/conjugateGradientPrecond/conjugateGradientPrecond_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -107,6 +107,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/conjugateGradientPrecond/conjugateGradientPrecond_vs2019.vcxproj b/Samples/4_CUDA_Libraries/conjugateGradientPrecond/conjugateGradientPrecond_vs2019.vcxproj index 593817cda..0721d9eb5 100644 --- a/Samples/4_CUDA_Libraries/conjugateGradientPrecond/conjugateGradientPrecond_vs2019.vcxproj +++ b/Samples/4_CUDA_Libraries/conjugateGradientPrecond/conjugateGradientPrecond_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -103,6 +103,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/conjugateGradientPrecond/conjugateGradientPrecond_vs2022.vcxproj b/Samples/4_CUDA_Libraries/conjugateGradientPrecond/conjugateGradientPrecond_vs2022.vcxproj index d368d236b..e601f5fd1 100644 --- a/Samples/4_CUDA_Libraries/conjugateGradientPrecond/conjugateGradientPrecond_vs2022.vcxproj +++ b/Samples/4_CUDA_Libraries/conjugateGradientPrecond/conjugateGradientPrecond_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -103,6 +103,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/conjugateGradientUM/Makefile b/Samples/4_CUDA_Libraries/conjugateGradientUM/Makefile index b16c5e68c..8f72576d4 100644 --- a/Samples/4_CUDA_Libraries/conjugateGradientUM/Makefile +++ b/Samples/4_CUDA_Libraries/conjugateGradientUM/Makefile @@ -285,9 +285,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/4_CUDA_Libraries/conjugateGradientUM/NsightEclipse.xml b/Samples/4_CUDA_Libraries/conjugateGradientUM/NsightEclipse.xml index 3e59ba5d2..ca7258c98 100644 --- a/Samples/4_CUDA_Libraries/conjugateGradientUM/NsightEclipse.xml +++ b/Samples/4_CUDA_Libraries/conjugateGradientUM/NsightEclipse.xml @@ -59,6 +59,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/4_CUDA_Libraries/conjugateGradientUM/README.md b/Samples/4_CUDA_Libraries/conjugateGradientUM/README.md index bb3253f18..ac9fd2529 100644 --- a/Samples/4_CUDA_Libraries/conjugateGradientUM/README.md +++ b/Samples/4_CUDA_Libraries/conjugateGradientUM/README.md @@ -10,7 +10,7 @@ Unified Memory, Linear Algebra, CUBLAS Library, CUSPARSE Library ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -30,7 +30,7 @@ cudaFree, cudaMallocManaged, cudaDeviceSynchronize, cudaMalloc, cudaGetDevicePro ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/4_CUDA_Libraries/conjugateGradientUM/conjugateGradientUM_vs2017.vcxproj b/Samples/4_CUDA_Libraries/conjugateGradientUM/conjugateGradientUM_vs2017.vcxproj index 62f9d13da..4a8f1f5fd 100644 --- a/Samples/4_CUDA_Libraries/conjugateGradientUM/conjugateGradientUM_vs2017.vcxproj +++ b/Samples/4_CUDA_Libraries/conjugateGradientUM/conjugateGradientUM_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/conjugateGradientUM.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/conjugateGradientUM/conjugateGradientUM_vs2019.vcxproj b/Samples/4_CUDA_Libraries/conjugateGradientUM/conjugateGradientUM_vs2019.vcxproj index a6541e3d2..44fd5a526 100644 --- a/Samples/4_CUDA_Libraries/conjugateGradientUM/conjugateGradientUM_vs2019.vcxproj +++ b/Samples/4_CUDA_Libraries/conjugateGradientUM/conjugateGradientUM_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/conjugateGradientUM.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/conjugateGradientUM/conjugateGradientUM_vs2022.vcxproj b/Samples/4_CUDA_Libraries/conjugateGradientUM/conjugateGradientUM_vs2022.vcxproj index 045228e5c..05b5205cc 100644 --- a/Samples/4_CUDA_Libraries/conjugateGradientUM/conjugateGradientUM_vs2022.vcxproj +++ b/Samples/4_CUDA_Libraries/conjugateGradientUM/conjugateGradientUM_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/conjugateGradientUM.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/cuDLAErrorReporting/Makefile b/Samples/4_CUDA_Libraries/cuDLAErrorReporting/Makefile index 19b23678c..9c8f53fed 100644 --- a/Samples/4_CUDA_Libraries/cuDLAErrorReporting/Makefile +++ b/Samples/4_CUDA_Libraries/cuDLAErrorReporting/Makefile @@ -330,9 +330,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/4_CUDA_Libraries/cuDLAErrorReporting/NsightEclipse.xml b/Samples/4_CUDA_Libraries/cuDLAErrorReporting/NsightEclipse.xml index fdf187ae4..0d4cf217b 100644 --- a/Samples/4_CUDA_Libraries/cuDLAErrorReporting/NsightEclipse.xml +++ b/Samples/4_CUDA_Libraries/cuDLAErrorReporting/NsightEclipse.xml @@ -6,14 +6,14 @@ --std=c++11 + cudaStreamCreateWithFlags cudaStreamDestroy cudaFree + cudaGetErrorName + cudaSetDevice cudaStreamSynchronize - cudaMemsetAsync cudaMalloc - cudaStreamCreateWithFlags - cudaSetDevice - cudaGetErrorName + cudaMemsetAsync cudaMemcpyAsync @@ -51,6 +51,7 @@ sm80 sm86 sm87 + sm90 aarch64 diff --git a/Samples/4_CUDA_Libraries/cuDLAErrorReporting/README.md b/Samples/4_CUDA_Libraries/cuDLAErrorReporting/README.md index e4b50ac9e..e18dc7d8a 100644 --- a/Samples/4_CUDA_Libraries/cuDLAErrorReporting/README.md +++ b/Samples/4_CUDA_Libraries/cuDLAErrorReporting/README.md @@ -10,7 +10,7 @@ cuDLA, Data Parallel Algorithms, Image Processing ## Supported SM Architectures -[SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ aarch64 ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaStreamDestroy, cudaFree, cudaStreamSynchronize, cudaMemsetAsync, cudaMalloc, cudaStreamCreateWithFlags, cudaSetDevice, cudaGetErrorName, cudaMemcpyAsync +cudaStreamCreateWithFlags, cudaStreamDestroy, cudaFree, cudaGetErrorName, cudaSetDevice, cudaStreamSynchronize, cudaMalloc, cudaMemsetAsync, cudaMemcpyAsync ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/4_CUDA_Libraries/cuDLAHybridMode/Makefile b/Samples/4_CUDA_Libraries/cuDLAHybridMode/Makefile index 70ba53d68..5cf7413aa 100644 --- a/Samples/4_CUDA_Libraries/cuDLAHybridMode/Makefile +++ b/Samples/4_CUDA_Libraries/cuDLAHybridMode/Makefile @@ -330,9 +330,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/4_CUDA_Libraries/cuDLAHybridMode/NsightEclipse.xml b/Samples/4_CUDA_Libraries/cuDLAHybridMode/NsightEclipse.xml index 07ca19c56..d59cdec6d 100644 --- a/Samples/4_CUDA_Libraries/cuDLAHybridMode/NsightEclipse.xml +++ b/Samples/4_CUDA_Libraries/cuDLAHybridMode/NsightEclipse.xml @@ -6,14 +6,14 @@ --std=c++11 + cudaStreamCreateWithFlags cudaStreamDestroy cudaFree + cudaGetErrorName + cudaSetDevice cudaStreamSynchronize - cudaMemsetAsync cudaMalloc - cudaStreamCreateWithFlags - cudaSetDevice - cudaGetErrorName + cudaMemsetAsync cudaMemcpyAsync @@ -51,6 +51,7 @@ sm80 sm86 sm87 + sm90 aarch64 diff --git a/Samples/4_CUDA_Libraries/cuDLAHybridMode/README.md b/Samples/4_CUDA_Libraries/cuDLAHybridMode/README.md index 2f4559bb9..12799c18a 100644 --- a/Samples/4_CUDA_Libraries/cuDLAHybridMode/README.md +++ b/Samples/4_CUDA_Libraries/cuDLAHybridMode/README.md @@ -10,7 +10,7 @@ cuDLA, Data Parallel Algorithms, Image Processing ## Supported SM Architectures -[SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ aarch64 ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaStreamDestroy, cudaFree, cudaStreamSynchronize, cudaMemsetAsync, cudaMalloc, cudaStreamCreateWithFlags, cudaSetDevice, cudaGetErrorName, cudaMemcpyAsync +cudaStreamCreateWithFlags, cudaStreamDestroy, cudaFree, cudaGetErrorName, cudaSetDevice, cudaStreamSynchronize, cudaMalloc, cudaMemsetAsync, cudaMemcpyAsync ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/4_CUDA_Libraries/cuDLAStandaloneMode/Makefile b/Samples/4_CUDA_Libraries/cuDLAStandaloneMode/Makefile index 4893fee94..76bfe834a 100644 --- a/Samples/4_CUDA_Libraries/cuDLAStandaloneMode/Makefile +++ b/Samples/4_CUDA_Libraries/cuDLAStandaloneMode/Makefile @@ -333,9 +333,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/4_CUDA_Libraries/cuDLAStandaloneMode/NsightEclipse.xml b/Samples/4_CUDA_Libraries/cuDLAStandaloneMode/NsightEclipse.xml index b9db9e0b2..7f3ef809f 100644 --- a/Samples/4_CUDA_Libraries/cuDLAStandaloneMode/NsightEclipse.xml +++ b/Samples/4_CUDA_Libraries/cuDLAStandaloneMode/NsightEclipse.xml @@ -45,6 +45,7 @@ sm80 sm86 sm87 + sm90 aarch64 diff --git a/Samples/4_CUDA_Libraries/cuDLAStandaloneMode/README.md b/Samples/4_CUDA_Libraries/cuDLAStandaloneMode/README.md index 6ca250bab..21cdfb8ff 100644 --- a/Samples/4_CUDA_Libraries/cuDLAStandaloneMode/README.md +++ b/Samples/4_CUDA_Libraries/cuDLAStandaloneMode/README.md @@ -10,7 +10,7 @@ cuDLA, Data Parallel Algorithms, Image Processing ## Supported SM Architectures -[SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -27,7 +27,7 @@ aarch64 ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/4_CUDA_Libraries/cuDLAStandaloneMode/findnvsci.mk b/Samples/4_CUDA_Libraries/cuDLAStandaloneMode/findnvsci.mk index 1934e1380..0d6d157c4 100644 --- a/Samples/4_CUDA_Libraries/cuDLAStandaloneMode/findnvsci.mk +++ b/Samples/4_CUDA_Libraries/cuDLAStandaloneMode/findnvsci.mk @@ -56,6 +56,7 @@ ifeq ("$(TARGET_OS)","linux") RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) SUSE = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?) + KYLIN = $(shell echo $(DISTRO) | grep -i kylin >/dev/null 2>&1; echo $$?) ifeq ("$(UBUNTU)","0") ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l) GLPATH := /usr/arm-linux-gnueabihf/lib @@ -101,6 +102,11 @@ ifeq ("$(TARGET_OS)","linux") GLLINK ?= -L/usr/lib64/nvidia DFLT_PATH ?= /usr/lib64 endif + ifeq ("$(KYLIN)","0") + GLPATH ?= /usr/lib64/nvidia + GLLINK ?= -L/usr/lib64/nvidia + DFLT_PATH ?= /usr/lib64 + endif NVSCIBUFLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libnvscibuf.so -print 2>/dev/null) NVSCISYNCLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libnvscisync.so -print 2>/dev/null) diff --git a/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/NsightEclipse.xml b/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/NsightEclipse.xml index ba8ad2016..9405b3940 100644 --- a/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/NsightEclipse.xml +++ b/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/NsightEclipse.xml @@ -5,13 +5,13 @@ cuDoubleComplex cuComplex - cudaMemset - cudaFree + cudaMemcpy cudaStreamDestroy - cudaStreamCreate + cudaFree cudaDeviceSynchronize + cudaMemset cudaMalloc - cudaMemcpy + cudaStreamCreate whole @@ -68,6 +68,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/README.md b/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/README.md index 92210b8fd..0b311943a 100644 --- a/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/README.md +++ b/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/README.md @@ -10,7 +10,7 @@ Linear Algebra, CUSOLVER Library ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -26,14 +26,14 @@ x86_64, ppc64le, aarch64 cuDoubleComplex, cuComplex ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMemset, cudaFree, cudaStreamDestroy, cudaStreamCreate, cudaDeviceSynchronize, cudaMalloc, cudaMemcpy +cudaMemcpy, cudaStreamDestroy, cudaFree, cudaDeviceSynchronize, cudaMemset, cudaMalloc, cudaStreamCreate ## Dependencies needed to build/run [CUSOLVER](../../../README.md#cusolver), [CUBLAS](../../../README.md#cublas), [CUSPARSE](../../../README.md#cusparse) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/cuSolverDn_LinearSolver_vs2017.vcxproj b/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/cuSolverDn_LinearSolver_vs2017.vcxproj index 90d7b869e..cd859b9b1 100644 --- a/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/cuSolverDn_LinearSolver_vs2017.vcxproj +++ b/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/cuSolverDn_LinearSolver_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -109,6 +109,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/cuSolverDn_LinearSolver_vs2019.vcxproj b/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/cuSolverDn_LinearSolver_vs2019.vcxproj index 2b63d7d11..6155be1ab 100644 --- a/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/cuSolverDn_LinearSolver_vs2019.vcxproj +++ b/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/cuSolverDn_LinearSolver_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -105,6 +105,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/cuSolverDn_LinearSolver_vs2022.vcxproj b/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/cuSolverDn_LinearSolver_vs2022.vcxproj index 1f123af80..98448924f 100644 --- a/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/cuSolverDn_LinearSolver_vs2022.vcxproj +++ b/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/cuSolverDn_LinearSolver_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -105,6 +105,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/cuSolverRf/Makefile b/Samples/4_CUDA_Libraries/cuSolverRf/Makefile index 27824bc2d..92fdc9849 100644 --- a/Samples/4_CUDA_Libraries/cuSolverRf/Makefile +++ b/Samples/4_CUDA_Libraries/cuSolverRf/Makefile @@ -283,9 +283,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/4_CUDA_Libraries/cuSolverRf/NsightEclipse.xml b/Samples/4_CUDA_Libraries/cuSolverRf/NsightEclipse.xml index 2cf3f041a..84c56c891 100644 --- a/Samples/4_CUDA_Libraries/cuSolverRf/NsightEclipse.xml +++ b/Samples/4_CUDA_Libraries/cuSolverRf/NsightEclipse.xml @@ -3,15 +3,15 @@ cuSolverRf + cuGet cuDoubleComplex cuComplex - cuGet + cudaMemcpy cudaStreamDestroy cudaFree - cudaStreamCreate cudaDeviceSynchronize cudaMalloc - cudaMemcpy + cudaStreamCreate whole @@ -67,6 +67,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/4_CUDA_Libraries/cuSolverRf/README.md b/Samples/4_CUDA_Libraries/cuSolverRf/README.md index d0a99cf47..c268cf070 100644 --- a/Samples/4_CUDA_Libraries/cuSolverRf/README.md +++ b/Samples/4_CUDA_Libraries/cuSolverRf/README.md @@ -10,7 +10,7 @@ Linear Algebra, CUSOLVER Library ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,17 +23,17 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html) -cuDoubleComplex, cuComplex, cuGet +cuGet, cuDoubleComplex, cuComplex ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaStreamDestroy, cudaFree, cudaStreamCreate, cudaDeviceSynchronize, cudaMalloc, cudaMemcpy +cudaMemcpy, cudaStreamDestroy, cudaFree, cudaDeviceSynchronize, cudaMalloc, cudaStreamCreate ## Dependencies needed to build/run [CUSOLVER](../../../README.md#cusolver), [CUBLAS](../../../README.md#cublas), [CUSPARSE](../../../README.md#cusparse) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/4_CUDA_Libraries/cuSolverRf/cuSolverRf_vs2017.vcxproj b/Samples/4_CUDA_Libraries/cuSolverRf/cuSolverRf_vs2017.vcxproj index fc5a4d2f7..bcc7a6a4b 100644 --- a/Samples/4_CUDA_Libraries/cuSolverRf/cuSolverRf_vs2017.vcxproj +++ b/Samples/4_CUDA_Libraries/cuSolverRf/cuSolverRf_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/cuSolverRf.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -109,6 +109,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/cuSolverRf/cuSolverRf_vs2019.vcxproj b/Samples/4_CUDA_Libraries/cuSolverRf/cuSolverRf_vs2019.vcxproj index 02802989e..16948fcd1 100644 --- a/Samples/4_CUDA_Libraries/cuSolverRf/cuSolverRf_vs2019.vcxproj +++ b/Samples/4_CUDA_Libraries/cuSolverRf/cuSolverRf_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/cuSolverRf.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -105,6 +105,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/cuSolverRf/cuSolverRf_vs2022.vcxproj b/Samples/4_CUDA_Libraries/cuSolverRf/cuSolverRf_vs2022.vcxproj index 9cd7669f5..6de3db9bf 100644 --- a/Samples/4_CUDA_Libraries/cuSolverRf/cuSolverRf_vs2022.vcxproj +++ b/Samples/4_CUDA_Libraries/cuSolverRf/cuSolverRf_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/cuSolverRf.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -105,6 +105,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/Makefile b/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/Makefile index 59b0c3d09..04cdc6ea9 100644 --- a/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/Makefile +++ b/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/Makefile @@ -283,9 +283,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/NsightEclipse.xml b/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/NsightEclipse.xml index cc3a57adc..f6b92a544 100644 --- a/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/NsightEclipse.xml +++ b/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/NsightEclipse.xml @@ -7,9 +7,9 @@ cuComplex cudaStreamDestroy cudaFree - cudaStreamCreate cudaDeviceSynchronize cudaMalloc + cudaStreamCreate cudaMemcpyAsync @@ -68,6 +68,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/README.md b/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/README.md index 2ac87f314..25ce28654 100644 --- a/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/README.md +++ b/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/README.md @@ -10,7 +10,7 @@ Linear Algebra, CUSOLVER Library ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -26,14 +26,14 @@ x86_64, ppc64le, armv7l cuDoubleComplex, cuComplex ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaStreamDestroy, cudaFree, cudaStreamCreate, cudaDeviceSynchronize, cudaMalloc, cudaMemcpyAsync +cudaStreamDestroy, cudaFree, cudaDeviceSynchronize, cudaMalloc, cudaStreamCreate, cudaMemcpyAsync ## Dependencies needed to build/run [CUSOLVER](../../../README.md#cusolver), [CUSPARSE](../../../README.md#cusparse) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/cuSolverSp_LinearSolver_vs2017.vcxproj b/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/cuSolverSp_LinearSolver_vs2017.vcxproj index 78512bbdc..9978ec3e7 100644 --- a/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/cuSolverSp_LinearSolver_vs2017.vcxproj +++ b/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/cuSolverSp_LinearSolver_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/cuSolverSp_LinearSolver.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -109,6 +109,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/cuSolverSp_LinearSolver_vs2019.vcxproj b/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/cuSolverSp_LinearSolver_vs2019.vcxproj index fe6195019..a7aef7dd5 100644 --- a/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/cuSolverSp_LinearSolver_vs2019.vcxproj +++ b/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/cuSolverSp_LinearSolver_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/cuSolverSp_LinearSolver.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -105,6 +105,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/cuSolverSp_LinearSolver_vs2022.vcxproj b/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/cuSolverSp_LinearSolver_vs2022.vcxproj index 0d22534df..d6a6ef404 100644 --- a/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/cuSolverSp_LinearSolver_vs2022.vcxproj +++ b/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/cuSolverSp_LinearSolver_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/cuSolverSp_LinearSolver.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -105,6 +105,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/Makefile b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/Makefile index e80902e4c..649da6f8f 100644 --- a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/Makefile +++ b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/Makefile @@ -283,9 +283,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/NsightEclipse.xml b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/NsightEclipse.xml index 32bfa4d34..65fa85566 100644 --- a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/NsightEclipse.xml +++ b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/NsightEclipse.xml @@ -5,11 +5,11 @@ cuDoubleComplex cuComplex + cudaMemcpy cudaStreamDestroy cudaFree - cudaStreamCreate cudaMalloc - cudaMemcpy + cudaStreamCreate whole @@ -59,6 +59,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/README.md b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/README.md index a992f9ef1..3cf4112f0 100644 --- a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/README.md +++ b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/README.md @@ -10,7 +10,7 @@ Linear Algebra, CUSOLVER Library ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -26,14 +26,14 @@ x86_64, ppc64le, armv7l cuDoubleComplex, cuComplex ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaStreamDestroy, cudaFree, cudaStreamCreate, cudaMalloc, cudaMemcpy +cudaMemcpy, cudaStreamDestroy, cudaFree, cudaMalloc, cudaStreamCreate ## Dependencies needed to build/run [CUSOLVER](../../../README.md#cusolver), [CUSPARSE](../../../README.md#cusparse) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/cuSolverSp_LowlevelCholesky_vs2017.vcxproj b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/cuSolverSp_LowlevelCholesky_vs2017.vcxproj index d04553b80..d2c8031e1 100644 --- a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/cuSolverSp_LowlevelCholesky_vs2017.vcxproj +++ b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/cuSolverSp_LowlevelCholesky_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/cuSolverSp_LowlevelCholesky.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -109,6 +109,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/cuSolverSp_LowlevelCholesky_vs2019.vcxproj b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/cuSolverSp_LowlevelCholesky_vs2019.vcxproj index 3b686a782..2703da3ad 100644 --- a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/cuSolverSp_LowlevelCholesky_vs2019.vcxproj +++ b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/cuSolverSp_LowlevelCholesky_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/cuSolverSp_LowlevelCholesky.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -105,6 +105,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/cuSolverSp_LowlevelCholesky_vs2022.vcxproj b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/cuSolverSp_LowlevelCholesky_vs2022.vcxproj index 94e8d4035..b85749bf5 100644 --- a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/cuSolverSp_LowlevelCholesky_vs2022.vcxproj +++ b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/cuSolverSp_LowlevelCholesky_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/cuSolverSp_LowlevelCholesky.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -105,6 +105,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/Makefile b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/Makefile index 5e3384bc8..2e7d1c9f8 100644 --- a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/Makefile +++ b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/Makefile @@ -283,9 +283,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/NsightEclipse.xml b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/NsightEclipse.xml index 3e3dc5c40..51bab24a0 100644 --- a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/NsightEclipse.xml +++ b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/NsightEclipse.xml @@ -3,14 +3,14 @@ cuSolverSp_LowlevelQR + cuGet cuDoubleComplex cuComplex - cuGet + cudaMemcpy cudaStreamDestroy cudaFree - cudaStreamCreate cudaMalloc - cudaMemcpy + cudaStreamCreate whole @@ -60,6 +60,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/README.md b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/README.md index eae84e4dd..df5f2a84d 100644 --- a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/README.md +++ b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/README.md @@ -10,7 +10,7 @@ Linear Algebra, CUSOLVER Library ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,17 +23,17 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html) -cuDoubleComplex, cuComplex, cuGet +cuGet, cuDoubleComplex, cuComplex ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaStreamDestroy, cudaFree, cudaStreamCreate, cudaMalloc, cudaMemcpy +cudaMemcpy, cudaStreamDestroy, cudaFree, cudaMalloc, cudaStreamCreate ## Dependencies needed to build/run [CUSOLVER](../../../README.md#cusolver), [CUSPARSE](../../../README.md#cusparse) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/cuSolverSp_LowlevelQR_vs2017.vcxproj b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/cuSolverSp_LowlevelQR_vs2017.vcxproj index b62c633e8..b25aa37d9 100644 --- a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/cuSolverSp_LowlevelQR_vs2017.vcxproj +++ b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/cuSolverSp_LowlevelQR_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/cuSolverSp_LowlevelQR.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -109,6 +109,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/cuSolverSp_LowlevelQR_vs2019.vcxproj b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/cuSolverSp_LowlevelQR_vs2019.vcxproj index 5ab45d7df..af967c450 100644 --- a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/cuSolverSp_LowlevelQR_vs2019.vcxproj +++ b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/cuSolverSp_LowlevelQR_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/cuSolverSp_LowlevelQR.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -105,6 +105,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/cuSolverSp_LowlevelQR_vs2022.vcxproj b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/cuSolverSp_LowlevelQR_vs2022.vcxproj index d8ee669f5..bb98790f2 100644 --- a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/cuSolverSp_LowlevelQR_vs2022.vcxproj +++ b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/cuSolverSp_LowlevelQR_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/cuSolverSp_LowlevelQR.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -105,6 +105,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/cudaNvSci/Makefile b/Samples/4_CUDA_Libraries/cudaNvSci/Makefile index dd8e7bce1..f6561b1b2 100644 --- a/Samples/4_CUDA_Libraries/cudaNvSci/Makefile +++ b/Samples/4_CUDA_Libraries/cudaNvSci/Makefile @@ -327,9 +327,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/4_CUDA_Libraries/cudaNvSci/NsightEclipse.xml b/Samples/4_CUDA_Libraries/cudaNvSci/NsightEclipse.xml index a8659d179..0995196ed 100644 --- a/Samples/4_CUDA_Libraries/cudaNvSci/NsightEclipse.xml +++ b/Samples/4_CUDA_Libraries/cudaNvSci/NsightEclipse.xml @@ -7,39 +7,39 @@ cuDeviceGetUuid + cudaExternalMemoryGetMappedBuffer + cudaImportExternalSemaphore + cudaDeviceGetAttribute + cudaNvSciSignal cudaGetMipmappedArrayLevel + cudaImportNvSciRawBuf + cudaSetDevice cudaImportNvSciImage - cudaImportExternalSemaphore cudaNvSciApp - cudaStreamCreateWithFlags - cudaExternalMemoryGetMappedMipmappedArray + cudaDeviceId + cudaMallocHost + cudaSignalExternalSemaphoresAsync + cudaCreateTextureObject + cudaFreeHost + cudaNvSci cudaNvSciWait - cudaDestroyExternalMemory + cudaGetDeviceCount cudaMemcpyAsync + cudaStreamCreateWithFlags + cudaExternalMemoryGetMappedMipmappedArray cudaStreamDestroy - cudaSignalExternalSemaphoresAsync cudaDeviceGetNvSciSyncAttributes - cudaFreeMipmappedArray - cudaMallocHost - cudaNvSci - cudaImportExternalMemory - cudaSetDevice - cudaImportNvSciRawBuf - cudaImportNvSciSemaphore - cudaGetDeviceCount cudaDestroyTextureObject - cudaDeviceGetAttribute + cudaDestroyExternalMemory + cudaImportExternalMemory cudaDestroyExternalSemaphore - cudaStreamSynchronize - cudaNvSciSignal + cudaFreeMipmappedArray cudaFree - cudaDeviceId - cudaExternalMemoryGetMappedBuffer - cudaCreateTextureObject - cudaFreeHost + cudaStreamSynchronize cudaWaitExternalSemaphoresAsync + cudaImportNvSciSemaphore - + whole ./ @@ -80,6 +80,7 @@ sm80 sm86 sm87 + sm90 aarch64 diff --git a/Samples/4_CUDA_Libraries/cudaNvSci/README.md b/Samples/4_CUDA_Libraries/cudaNvSci/README.md index baac35c69..2e12e227b 100644 --- a/Samples/4_CUDA_Libraries/cudaNvSci/README.md +++ b/Samples/4_CUDA_Libraries/cudaNvSci/README.md @@ -2,7 +2,7 @@ ## Description -This sample demonstrates CUDA-NvSciBuf/NvSciSync Interop. Two CPU threads import the NvSciBuf and NvSciSync into CUDA to perform two image processing algorithms on a ppm image - image rotation in 1st thread &amp;amp;amp;amp;amp;amp;amp;amp;amp; rgba to grayscale conversion of rotated image in 2nd thread. Currently only supported on Ubuntu 18.04 +This sample demonstrates CUDA-NvSciBuf/NvSciSync Interop. Two CPU threads import the NvSciBuf and NvSciSync into CUDA to perform two image processing algorithms on a ppm image - image rotation in 1st thread &amp;amp;amp;amp;amp;amp;amp;amp;amp;amp;amp;amp; rgba to grayscale conversion of rotated image in 2nd thread. Currently only supported on Ubuntu 18.04 ## Key Concepts @@ -10,7 +10,7 @@ CUDA NvSci Interop, Data Parallel Algorithms, Image Processing ## Supported SM Architectures -[SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -26,14 +26,14 @@ x86_64, aarch64 cuDeviceGetUuid ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaGetMipmappedArrayLevel, cudaImportNvSciImage, cudaImportExternalSemaphore, cudaNvSciApp, cudaStreamCreateWithFlags, cudaExternalMemoryGetMappedMipmappedArray, cudaNvSciWait, cudaDestroyExternalMemory, cudaMemcpyAsync, cudaStreamDestroy, cudaSignalExternalSemaphoresAsync, cudaDeviceGetNvSciSyncAttributes, cudaFreeMipmappedArray, cudaMallocHost, cudaNvSci, cudaImportExternalMemory, cudaSetDevice, cudaImportNvSciRawBuf, cudaImportNvSciSemaphore, cudaGetDeviceCount, cudaDestroyTextureObject, cudaDeviceGetAttribute, cudaDestroyExternalSemaphore, cudaStreamSynchronize, cudaNvSciSignal, cudaFree, cudaDeviceId, cudaExternalMemoryGetMappedBuffer, cudaCreateTextureObject, cudaFreeHost, cudaWaitExternalSemaphoresAsync +cudaExternalMemoryGetMappedBuffer, cudaImportExternalSemaphore, cudaDeviceGetAttribute, cudaNvSciSignal, cudaGetMipmappedArrayLevel, cudaImportNvSciRawBuf, cudaSetDevice, cudaImportNvSciImage, cudaNvSciApp, cudaDeviceId, cudaMallocHost, cudaSignalExternalSemaphoresAsync, cudaCreateTextureObject, cudaFreeHost, cudaNvSci, cudaNvSciWait, cudaGetDeviceCount, cudaMemcpyAsync, cudaStreamCreateWithFlags, cudaExternalMemoryGetMappedMipmappedArray, cudaStreamDestroy, cudaDeviceGetNvSciSyncAttributes, cudaDestroyTextureObject, cudaDestroyExternalMemory, cudaImportExternalMemory, cudaDestroyExternalSemaphore, cudaFreeMipmappedArray, cudaFree, cudaStreamSynchronize, cudaWaitExternalSemaphoresAsync, cudaImportNvSciSemaphore ## Dependencies needed to build/run [NVSCI](../../../README.md#nvsci) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/4_CUDA_Libraries/cudaNvSci/findnvsci.mk b/Samples/4_CUDA_Libraries/cudaNvSci/findnvsci.mk index 1934e1380..0d6d157c4 100644 --- a/Samples/4_CUDA_Libraries/cudaNvSci/findnvsci.mk +++ b/Samples/4_CUDA_Libraries/cudaNvSci/findnvsci.mk @@ -56,6 +56,7 @@ ifeq ("$(TARGET_OS)","linux") RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) SUSE = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?) + KYLIN = $(shell echo $(DISTRO) | grep -i kylin >/dev/null 2>&1; echo $$?) ifeq ("$(UBUNTU)","0") ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l) GLPATH := /usr/arm-linux-gnueabihf/lib @@ -101,6 +102,11 @@ ifeq ("$(TARGET_OS)","linux") GLLINK ?= -L/usr/lib64/nvidia DFLT_PATH ?= /usr/lib64 endif + ifeq ("$(KYLIN)","0") + GLPATH ?= /usr/lib64/nvidia + GLLINK ?= -L/usr/lib64/nvidia + DFLT_PATH ?= /usr/lib64 + endif NVSCIBUFLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libnvscibuf.so -print 2>/dev/null) NVSCISYNCLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libnvscisync.so -print 2>/dev/null) diff --git a/Samples/4_CUDA_Libraries/cudaNvSciNvMedia/Makefile b/Samples/4_CUDA_Libraries/cudaNvSciNvMedia/Makefile index 390fb6f24..f9a79cde8 100644 --- a/Samples/4_CUDA_Libraries/cudaNvSciNvMedia/Makefile +++ b/Samples/4_CUDA_Libraries/cudaNvSciNvMedia/Makefile @@ -338,9 +338,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/4_CUDA_Libraries/cudaNvSciNvMedia/NsightEclipse.xml b/Samples/4_CUDA_Libraries/cudaNvSciNvMedia/NsightEclipse.xml index 31ebd06cd..0b3b44cb3 100644 --- a/Samples/4_CUDA_Libraries/cudaNvSciNvMedia/NsightEclipse.xml +++ b/Samples/4_CUDA_Libraries/cudaNvSciNvMedia/NsightEclipse.xml @@ -7,32 +7,32 @@ cuDeviceGetUuid + cudaImportExternalSemaphore cudaGetMipmappedArrayLevel + cudaSetDevice + cudaDestroySurfaceObject + cudaCreateSurfaceObject cudaImportNvSciImage - cudaImportExternalSemaphore + cudaCreateChannelDesc + cudaMallocHost + cudaSignalExternalSemaphoresAsync + cudaFreeHost + cudaMemcpyAsync cudaStreamCreateWithFlags cudaExternalMemoryGetMappedMipmappedArray - cudaDestroyExternalMemory - cudaMemcpyAsync + cudaMallocArray + cudaFreeArray cudaStreamDestroy - cudaSignalExternalSemaphoresAsync cudaDeviceGetNvSciSyncAttributes - cudaFreeMipmappedArray - cudaMallocHost - cudaDestroySurfaceObject + cudaDestroyExternalMemory cudaImportExternalMemory - cudaMalloc - cudaSetDevice cudaDestroyExternalSemaphore - cudaCreateChannelDesc - cudaStreamSynchronize + cudaFreeMipmappedArray + cudaImportNvSciSync cudaFree - cudaFreeArray - cudaCreateSurfaceObject - cudaFreeHost - cudaMallocArray + cudaStreamSynchronize + cudaMalloc cudaWaitExternalSemaphoresAsync - cudaImportNvSciSync whole @@ -77,6 +77,7 @@ sm80 sm86 sm87 + sm90 aarch64 diff --git a/Samples/4_CUDA_Libraries/cudaNvSciNvMedia/README.md b/Samples/4_CUDA_Libraries/cudaNvSciNvMedia/README.md index 2ef3f7691..9d1cd1367 100644 --- a/Samples/4_CUDA_Libraries/cudaNvSciNvMedia/README.md +++ b/Samples/4_CUDA_Libraries/cudaNvSciNvMedia/README.md @@ -10,7 +10,7 @@ CUDA NvSci Interop, Data Parallel Algorithms, Image Processing ## Supported SM Architectures -[SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -26,14 +26,14 @@ aarch64 cuDeviceGetUuid ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaGetMipmappedArrayLevel, cudaImportNvSciImage, cudaImportExternalSemaphore, cudaStreamCreateWithFlags, cudaExternalMemoryGetMappedMipmappedArray, cudaDestroyExternalMemory, cudaMemcpyAsync, cudaStreamDestroy, cudaSignalExternalSemaphoresAsync, cudaDeviceGetNvSciSyncAttributes, cudaFreeMipmappedArray, cudaMallocHost, cudaDestroySurfaceObject, cudaImportExternalMemory, cudaMalloc, cudaSetDevice, cudaDestroyExternalSemaphore, cudaCreateChannelDesc, cudaStreamSynchronize, cudaFree, cudaFreeArray, cudaCreateSurfaceObject, cudaFreeHost, cudaMallocArray, cudaWaitExternalSemaphoresAsync, cudaImportNvSciSync +cudaImportExternalSemaphore, cudaGetMipmappedArrayLevel, cudaSetDevice, cudaDestroySurfaceObject, cudaCreateSurfaceObject, cudaImportNvSciImage, cudaCreateChannelDesc, cudaMallocHost, cudaSignalExternalSemaphoresAsync, cudaFreeHost, cudaMemcpyAsync, cudaStreamCreateWithFlags, cudaExternalMemoryGetMappedMipmappedArray, cudaMallocArray, cudaFreeArray, cudaStreamDestroy, cudaDeviceGetNvSciSyncAttributes, cudaDestroyExternalMemory, cudaImportExternalMemory, cudaDestroyExternalSemaphore, cudaFreeMipmappedArray, cudaImportNvSciSync, cudaFree, cudaStreamSynchronize, cudaMalloc, cudaWaitExternalSemaphoresAsync ## Dependencies needed to build/run [NVSCI](../../../README.md#nvsci), [NvMedia](../../../README.md#nvmedia) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/4_CUDA_Libraries/cudaNvSciNvMedia/findnvmedia.mk b/Samples/4_CUDA_Libraries/cudaNvSciNvMedia/findnvmedia.mk index 7b8cd1b64..23cfcd539 100644 --- a/Samples/4_CUDA_Libraries/cudaNvSciNvMedia/findnvmedia.mk +++ b/Samples/4_CUDA_Libraries/cudaNvSciNvMedia/findnvmedia.mk @@ -56,6 +56,7 @@ ifeq ("$(TARGET_OS)","linux") RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) SUSE = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?) + KYLIN = $(shell echo $(DISTRO) | grep -i kylin >/dev/null 2>&1; echo $$?) ifeq ("$(UBUNTU)","0") ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l) GLPATH := /usr/arm-linux-gnueabihf/lib @@ -101,6 +102,11 @@ ifeq ("$(TARGET_OS)","linux") GLLINK ?= -L/usr/lib64/nvidia DFLT_PATH ?= /usr/lib64 endif + ifeq ("$(KYLIN)","0") + GLPATH ?= /usr/lib64/nvidia + GLLINK ?= -L/usr/lib64/nvidia + DFLT_PATH ?= /usr/lib64 + endif NVMEDIALIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libnvmedia.so -print 2>/dev/null) diff --git a/Samples/4_CUDA_Libraries/cudaNvSciNvMedia/findnvsci.mk b/Samples/4_CUDA_Libraries/cudaNvSciNvMedia/findnvsci.mk index 1934e1380..0d6d157c4 100644 --- a/Samples/4_CUDA_Libraries/cudaNvSciNvMedia/findnvsci.mk +++ b/Samples/4_CUDA_Libraries/cudaNvSciNvMedia/findnvsci.mk @@ -56,6 +56,7 @@ ifeq ("$(TARGET_OS)","linux") RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) SUSE = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?) + KYLIN = $(shell echo $(DISTRO) | grep -i kylin >/dev/null 2>&1; echo $$?) ifeq ("$(UBUNTU)","0") ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l) GLPATH := /usr/arm-linux-gnueabihf/lib @@ -101,6 +102,11 @@ ifeq ("$(TARGET_OS)","linux") GLLINK ?= -L/usr/lib64/nvidia DFLT_PATH ?= /usr/lib64 endif + ifeq ("$(KYLIN)","0") + GLPATH ?= /usr/lib64/nvidia + GLLINK ?= -L/usr/lib64/nvidia + DFLT_PATH ?= /usr/lib64 + endif NVSCIBUFLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libnvscibuf.so -print 2>/dev/null) NVSCISYNCLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libnvscisync.so -print 2>/dev/null) diff --git a/Samples/4_CUDA_Libraries/freeImageInteropNPP/NsightEclipse.xml b/Samples/4_CUDA_Libraries/freeImageInteropNPP/NsightEclipse.xml index 0296542c3..33f807557 100644 --- a/Samples/4_CUDA_Libraries/freeImageInteropNPP/NsightEclipse.xml +++ b/Samples/4_CUDA_Libraries/freeImageInteropNPP/NsightEclipse.xml @@ -3,11 +3,11 @@ freeImageInteropNPP + cudaRuntimeGetVersion + cudaSetDevice cudaGetDeviceCount - cudaDriverGetVersion cudaDeviceInit - cudaSetDevice - cudaRuntimeGetVersion + cudaDriverGetVersion cudaGetDeviceProperties @@ -65,6 +65,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/4_CUDA_Libraries/freeImageInteropNPP/README.md b/Samples/4_CUDA_Libraries/freeImageInteropNPP/README.md index 20361fe8f..96e072b68 100644 --- a/Samples/4_CUDA_Libraries/freeImageInteropNPP/README.md +++ b/Samples/4_CUDA_Libraries/freeImageInteropNPP/README.md @@ -10,7 +10,7 @@ Performance Strategies, Image Processing, NPP Library ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaGetDeviceCount, cudaDriverGetVersion, cudaDeviceInit, cudaSetDevice, cudaRuntimeGetVersion, cudaGetDeviceProperties +cudaRuntimeGetVersion, cudaSetDevice, cudaGetDeviceCount, cudaDeviceInit, cudaDriverGetVersion, cudaGetDeviceProperties ## Dependencies needed to build/run [FreeImage](../../../README.md#freeimage), [NPP](../../../README.md#npp) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/4_CUDA_Libraries/freeImageInteropNPP/freeImageInteropNPP_vs2017.vcxproj b/Samples/4_CUDA_Libraries/freeImageInteropNPP/freeImageInteropNPP_vs2017.vcxproj index e791cbe15..b81f5f26a 100644 --- a/Samples/4_CUDA_Libraries/freeImageInteropNPP/freeImageInteropNPP_vs2017.vcxproj +++ b/Samples/4_CUDA_Libraries/freeImageInteropNPP/freeImageInteropNPP_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -117,6 +117,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/freeImageInteropNPP/freeImageInteropNPP_vs2019.vcxproj b/Samples/4_CUDA_Libraries/freeImageInteropNPP/freeImageInteropNPP_vs2019.vcxproj index d6b27ff1b..032895951 100644 --- a/Samples/4_CUDA_Libraries/freeImageInteropNPP/freeImageInteropNPP_vs2019.vcxproj +++ b/Samples/4_CUDA_Libraries/freeImageInteropNPP/freeImageInteropNPP_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -113,6 +113,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/freeImageInteropNPP/freeImageInteropNPP_vs2022.vcxproj b/Samples/4_CUDA_Libraries/freeImageInteropNPP/freeImageInteropNPP_vs2022.vcxproj index 6b803978f..624cfaa8d 100644 --- a/Samples/4_CUDA_Libraries/freeImageInteropNPP/freeImageInteropNPP_vs2022.vcxproj +++ b/Samples/4_CUDA_Libraries/freeImageInteropNPP/freeImageInteropNPP_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -113,6 +113,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/histEqualizationNPP/NsightEclipse.xml b/Samples/4_CUDA_Libraries/histEqualizationNPP/NsightEclipse.xml index eadb54385..9b1554bb9 100644 --- a/Samples/4_CUDA_Libraries/histEqualizationNPP/NsightEclipse.xml +++ b/Samples/4_CUDA_Libraries/histEqualizationNPP/NsightEclipse.xml @@ -3,14 +3,14 @@ histEqualizationNPP + cudaRuntimeGetVersion + cudaMemcpy cudaFree + cudaSetDevice cudaGetDeviceCount + cudaDeviceInit cudaDriverGetVersion cudaMalloc - cudaDeviceInit - cudaSetDevice - cudaRuntimeGetVersion - cudaMemcpy cudaGetDeviceProperties @@ -71,6 +71,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/4_CUDA_Libraries/histEqualizationNPP/README.md b/Samples/4_CUDA_Libraries/histEqualizationNPP/README.md index dff01a26f..ecf77bcb9 100644 --- a/Samples/4_CUDA_Libraries/histEqualizationNPP/README.md +++ b/Samples/4_CUDA_Libraries/histEqualizationNPP/README.md @@ -10,7 +10,7 @@ Image Processing, Performance Strategies, NPP Library ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaGetDeviceCount, cudaDriverGetVersion, cudaMalloc, cudaDeviceInit, cudaSetDevice, cudaRuntimeGetVersion, cudaMemcpy, cudaGetDeviceProperties +cudaRuntimeGetVersion, cudaMemcpy, cudaFree, cudaSetDevice, cudaGetDeviceCount, cudaDeviceInit, cudaDriverGetVersion, cudaMalloc, cudaGetDeviceProperties ## Dependencies needed to build/run [FreeImage](../../../README.md#freeimage), [NPP](../../../README.md#npp) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/4_CUDA_Libraries/histEqualizationNPP/histEqualizationNPP_vs2017.vcxproj b/Samples/4_CUDA_Libraries/histEqualizationNPP/histEqualizationNPP_vs2017.vcxproj index a97516a6c..6eb23e169 100644 --- a/Samples/4_CUDA_Libraries/histEqualizationNPP/histEqualizationNPP_vs2017.vcxproj +++ b/Samples/4_CUDA_Libraries/histEqualizationNPP/histEqualizationNPP_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -117,6 +117,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/histEqualizationNPP/histEqualizationNPP_vs2019.vcxproj b/Samples/4_CUDA_Libraries/histEqualizationNPP/histEqualizationNPP_vs2019.vcxproj index 2a220a887..6dd57051b 100644 --- a/Samples/4_CUDA_Libraries/histEqualizationNPP/histEqualizationNPP_vs2019.vcxproj +++ b/Samples/4_CUDA_Libraries/histEqualizationNPP/histEqualizationNPP_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -113,6 +113,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/histEqualizationNPP/histEqualizationNPP_vs2022.vcxproj b/Samples/4_CUDA_Libraries/histEqualizationNPP/histEqualizationNPP_vs2022.vcxproj index b1780d1d5..3f16252a7 100644 --- a/Samples/4_CUDA_Libraries/histEqualizationNPP/histEqualizationNPP_vs2022.vcxproj +++ b/Samples/4_CUDA_Libraries/histEqualizationNPP/histEqualizationNPP_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -113,6 +113,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/lineOfSight/Makefile b/Samples/4_CUDA_Libraries/lineOfSight/Makefile index 55adba98e..21b842fee 100644 --- a/Samples/4_CUDA_Libraries/lineOfSight/Makefile +++ b/Samples/4_CUDA_Libraries/lineOfSight/Makefile @@ -304,9 +304,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/4_CUDA_Libraries/lineOfSight/NsightEclipse.xml b/Samples/4_CUDA_Libraries/lineOfSight/NsightEclipse.xml index c97e235ab..e279a4fca 100644 --- a/Samples/4_CUDA_Libraries/lineOfSight/NsightEclipse.xml +++ b/Samples/4_CUDA_Libraries/lineOfSight/NsightEclipse.xml @@ -6,11 +6,11 @@ --std=c++14 + cudaCreateChannelDesc + cudaMallocArray cudaFreeArray cudaDeviceSynchronize cudaCreateTextureObject - cudaMallocArray - cudaCreateChannelDesc whole @@ -57,6 +57,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/4_CUDA_Libraries/lineOfSight/README.md b/Samples/4_CUDA_Libraries/lineOfSight/README.md index cbd95d409..e704d3e2c 100644 --- a/Samples/4_CUDA_Libraries/lineOfSight/README.md +++ b/Samples/4_CUDA_Libraries/lineOfSight/README.md @@ -10,7 +10,7 @@ Thrust Library ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFreeArray, cudaDeviceSynchronize, cudaCreateTextureObject, cudaMallocArray, cudaCreateChannelDesc +cudaCreateChannelDesc, cudaMallocArray, cudaFreeArray, cudaDeviceSynchronize, cudaCreateTextureObject ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/4_CUDA_Libraries/lineOfSight/lineOfSight_vs2017.vcxproj b/Samples/4_CUDA_Libraries/lineOfSight/lineOfSight_vs2017.vcxproj index 0e4b882b6..14b93fad3 100644 --- a/Samples/4_CUDA_Libraries/lineOfSight/lineOfSight_vs2017.vcxproj +++ b/Samples/4_CUDA_Libraries/lineOfSight/lineOfSight_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/lineOfSight.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/lineOfSight/lineOfSight_vs2019.vcxproj b/Samples/4_CUDA_Libraries/lineOfSight/lineOfSight_vs2019.vcxproj index 937d7a1b6..e2dc1bf96 100644 --- a/Samples/4_CUDA_Libraries/lineOfSight/lineOfSight_vs2019.vcxproj +++ b/Samples/4_CUDA_Libraries/lineOfSight/lineOfSight_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/lineOfSight.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/lineOfSight/lineOfSight_vs2022.vcxproj b/Samples/4_CUDA_Libraries/lineOfSight/lineOfSight_vs2022.vcxproj index 41f708799..2b15511d3 100644 --- a/Samples/4_CUDA_Libraries/lineOfSight/lineOfSight_vs2022.vcxproj +++ b/Samples/4_CUDA_Libraries/lineOfSight/lineOfSight_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/lineOfSight.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/matrixMulCUBLAS/Makefile b/Samples/4_CUDA_Libraries/matrixMulCUBLAS/Makefile index c5e76fb9f..dc4bc8dd1 100644 --- a/Samples/4_CUDA_Libraries/matrixMulCUBLAS/Makefile +++ b/Samples/4_CUDA_Libraries/matrixMulCUBLAS/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/4_CUDA_Libraries/matrixMulCUBLAS/NsightEclipse.xml b/Samples/4_CUDA_Libraries/matrixMulCUBLAS/NsightEclipse.xml index 6f7125d96..8c19d35ab 100644 --- a/Samples/4_CUDA_Libraries/matrixMulCUBLAS/NsightEclipse.xml +++ b/Samples/4_CUDA_Libraries/matrixMulCUBLAS/NsightEclipse.xml @@ -3,14 +3,14 @@ matrixMulCUBLAS + cudaMemcpy cudaFree - cudaEventRecord - cudaEventCreate - cudaEventElapsedTime cudaEventSynchronize + cudaEventRecord cudaMalloc - cudaMemcpy + cudaEventElapsedTime cudaGetDeviceProperties + cudaEventCreate whole @@ -57,6 +57,7 @@ sm80 sm86 sm87 + sm90 new/matrixMulCUBLAS.cpp diff --git a/Samples/4_CUDA_Libraries/matrixMulCUBLAS/README.md b/Samples/4_CUDA_Libraries/matrixMulCUBLAS/README.md index 1390e9ebd..a5d683530 100644 --- a/Samples/4_CUDA_Libraries/matrixMulCUBLAS/README.md +++ b/Samples/4_CUDA_Libraries/matrixMulCUBLAS/README.md @@ -10,7 +10,7 @@ CUDA Runtime API, Performance Strategies, Linear Algebra, CUBLAS ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaEventRecord, cudaEventCreate, cudaEventElapsedTime, cudaEventSynchronize, cudaMalloc, cudaMemcpy, cudaGetDeviceProperties +cudaMemcpy, cudaFree, cudaEventSynchronize, cudaEventRecord, cudaMalloc, cudaEventElapsedTime, cudaGetDeviceProperties, cudaEventCreate ## Dependencies needed to build/run [CUBLAS](../../../README.md#cublas) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/4_CUDA_Libraries/matrixMulCUBLAS/matrixMulCUBLAS_vs2017.vcxproj b/Samples/4_CUDA_Libraries/matrixMulCUBLAS/matrixMulCUBLAS_vs2017.vcxproj index ccc83a662..7cf90b098 100644 --- a/Samples/4_CUDA_Libraries/matrixMulCUBLAS/matrixMulCUBLAS_vs2017.vcxproj +++ b/Samples/4_CUDA_Libraries/matrixMulCUBLAS/matrixMulCUBLAS_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/matrixMulCUBLAS.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/matrixMulCUBLAS/matrixMulCUBLAS_vs2019.vcxproj b/Samples/4_CUDA_Libraries/matrixMulCUBLAS/matrixMulCUBLAS_vs2019.vcxproj index 6f820ba19..1665d0fa2 100644 --- a/Samples/4_CUDA_Libraries/matrixMulCUBLAS/matrixMulCUBLAS_vs2019.vcxproj +++ b/Samples/4_CUDA_Libraries/matrixMulCUBLAS/matrixMulCUBLAS_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/matrixMulCUBLAS.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/matrixMulCUBLAS/matrixMulCUBLAS_vs2022.vcxproj b/Samples/4_CUDA_Libraries/matrixMulCUBLAS/matrixMulCUBLAS_vs2022.vcxproj index 4b4d87227..e9257bfbe 100644 --- a/Samples/4_CUDA_Libraries/matrixMulCUBLAS/matrixMulCUBLAS_vs2022.vcxproj +++ b/Samples/4_CUDA_Libraries/matrixMulCUBLAS/matrixMulCUBLAS_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/matrixMulCUBLAS.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/nvJPEG/NsightEclipse.xml b/Samples/4_CUDA_Libraries/nvJPEG/NsightEclipse.xml index 901147812..baba3f94a 100644 --- a/Samples/4_CUDA_Libraries/nvJPEG/NsightEclipse.xml +++ b/Samples/4_CUDA_Libraries/nvJPEG/NsightEclipse.xml @@ -3,18 +3,18 @@ nvJPEG + cudaHostAlloc + cudaStreamCreateWithFlags cudaStreamDestroy cudaFree - cudaEventRecord - cudaHostAlloc - cudaEventCreate - cudaEventElapsedTime cudaEventSynchronize + cudaEventRecord cudaFreeHost - cudaMalloc - cudaStreamCreateWithFlags cudaStreamSynchronize + cudaMalloc + cudaEventElapsedTime cudaGetDeviceProperties + cudaEventCreate whole @@ -58,6 +58,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/4_CUDA_Libraries/nvJPEG/README.md b/Samples/4_CUDA_Libraries/nvJPEG/README.md index e1d3f7fe8..a54a46ca8 100644 --- a/Samples/4_CUDA_Libraries/nvJPEG/README.md +++ b/Samples/4_CUDA_Libraries/nvJPEG/README.md @@ -10,7 +10,7 @@ Image Decoding, NVJPEG Library ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, aarch64 ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaStreamDestroy, cudaFree, cudaEventRecord, cudaHostAlloc, cudaEventCreate, cudaEventElapsedTime, cudaEventSynchronize, cudaFreeHost, cudaMalloc, cudaStreamCreateWithFlags, cudaStreamSynchronize, cudaGetDeviceProperties +cudaHostAlloc, cudaStreamCreateWithFlags, cudaStreamDestroy, cudaFree, cudaEventSynchronize, cudaEventRecord, cudaFreeHost, cudaStreamSynchronize, cudaMalloc, cudaEventElapsedTime, cudaGetDeviceProperties, cudaEventCreate ## Dependencies needed to build/run [NVJPEG](../../../README.md#nvjpeg) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/4_CUDA_Libraries/nvJPEG/nvJPEG_vs2017.vcxproj b/Samples/4_CUDA_Libraries/nvJPEG/nvJPEG_vs2017.vcxproj index 5abe980b0..c5931b667 100644 --- a/Samples/4_CUDA_Libraries/nvJPEG/nvJPEG_vs2017.vcxproj +++ b/Samples/4_CUDA_Libraries/nvJPEG/nvJPEG_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -107,6 +107,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/nvJPEG/nvJPEG_vs2019.vcxproj b/Samples/4_CUDA_Libraries/nvJPEG/nvJPEG_vs2019.vcxproj index 1bae3470e..05906affc 100644 --- a/Samples/4_CUDA_Libraries/nvJPEG/nvJPEG_vs2019.vcxproj +++ b/Samples/4_CUDA_Libraries/nvJPEG/nvJPEG_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -103,6 +103,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/nvJPEG/nvJPEG_vs2022.vcxproj b/Samples/4_CUDA_Libraries/nvJPEG/nvJPEG_vs2022.vcxproj index 9b68e545f..f861ff460 100644 --- a/Samples/4_CUDA_Libraries/nvJPEG/nvJPEG_vs2022.vcxproj +++ b/Samples/4_CUDA_Libraries/nvJPEG/nvJPEG_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -103,6 +103,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/nvJPEG_encoder/NsightEclipse.xml b/Samples/4_CUDA_Libraries/nvJPEG_encoder/NsightEclipse.xml index 157b43cdc..fa59430ad 100644 --- a/Samples/4_CUDA_Libraries/nvJPEG_encoder/NsightEclipse.xml +++ b/Samples/4_CUDA_Libraries/nvJPEG_encoder/NsightEclipse.xml @@ -4,14 +4,14 @@ nvJPEG_encoder cudaFree - cudaEventRecord - cudaEventCreate - cudaEventElapsedTime - cudaDeviceSynchronize + cudaGetErrorString cudaEventSynchronize + cudaDeviceSynchronize + cudaEventRecord cudaMalloc - cudaGetErrorString + cudaEventElapsedTime cudaGetDeviceProperties + cudaEventCreate whole @@ -55,6 +55,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/4_CUDA_Libraries/nvJPEG_encoder/README.md b/Samples/4_CUDA_Libraries/nvJPEG_encoder/README.md index d3170c4b2..09e2227b7 100644 --- a/Samples/4_CUDA_Libraries/nvJPEG_encoder/README.md +++ b/Samples/4_CUDA_Libraries/nvJPEG_encoder/README.md @@ -10,7 +10,7 @@ Image Encoding, NVJPEG Library ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, aarch64 ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaEventRecord, cudaEventCreate, cudaEventElapsedTime, cudaDeviceSynchronize, cudaEventSynchronize, cudaMalloc, cudaGetErrorString, cudaGetDeviceProperties +cudaFree, cudaGetErrorString, cudaEventSynchronize, cudaDeviceSynchronize, cudaEventRecord, cudaMalloc, cudaEventElapsedTime, cudaGetDeviceProperties, cudaEventCreate ## Dependencies needed to build/run [NVJPEG](../../../README.md#nvjpeg) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/4_CUDA_Libraries/nvJPEG_encoder/nvJPEG_encoder_vs2017.vcxproj b/Samples/4_CUDA_Libraries/nvJPEG_encoder/nvJPEG_encoder_vs2017.vcxproj index bd7bdf907..22a093777 100644 --- a/Samples/4_CUDA_Libraries/nvJPEG_encoder/nvJPEG_encoder_vs2017.vcxproj +++ b/Samples/4_CUDA_Libraries/nvJPEG_encoder/nvJPEG_encoder_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -107,6 +107,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/nvJPEG_encoder/nvJPEG_encoder_vs2019.vcxproj b/Samples/4_CUDA_Libraries/nvJPEG_encoder/nvJPEG_encoder_vs2019.vcxproj index bf27a1ed6..4e23250e7 100644 --- a/Samples/4_CUDA_Libraries/nvJPEG_encoder/nvJPEG_encoder_vs2019.vcxproj +++ b/Samples/4_CUDA_Libraries/nvJPEG_encoder/nvJPEG_encoder_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -103,6 +103,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/nvJPEG_encoder/nvJPEG_encoder_vs2022.vcxproj b/Samples/4_CUDA_Libraries/nvJPEG_encoder/nvJPEG_encoder_vs2022.vcxproj index fcef7dc3d..735c98926 100644 --- a/Samples/4_CUDA_Libraries/nvJPEG_encoder/nvJPEG_encoder_vs2022.vcxproj +++ b/Samples/4_CUDA_Libraries/nvJPEG_encoder/nvJPEG_encoder_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -103,6 +103,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/oceanFFT/Makefile b/Samples/4_CUDA_Libraries/oceanFFT/Makefile index 333096a42..eaa571723 100644 --- a/Samples/4_CUDA_Libraries/oceanFFT/Makefile +++ b/Samples/4_CUDA_Libraries/oceanFFT/Makefile @@ -299,9 +299,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/4_CUDA_Libraries/oceanFFT/NsightEclipse.xml b/Samples/4_CUDA_Libraries/oceanFFT/NsightEclipse.xml index 3c846efc6..8ac28a1ba 100644 --- a/Samples/4_CUDA_Libraries/oceanFFT/NsightEclipse.xml +++ b/Samples/4_CUDA_Libraries/oceanFFT/NsightEclipse.xml @@ -3,17 +3,17 @@ oceanFFT + cudaGraphicsUnmapResources + cudaMemcpy + cudaMalloc cudaFree + cudaGraphicsResourceGetMappedPointer + cudaCalculateSlopeKernel cudaGraphicsMapResources cudaUpdateHeightmapKernel - cudaGraphicsGLRegisterBuffer - cudaCalculateSlopeKernel - cudaGraphicsResourceGetMappedPointer - cudaMalloc - cudaGenerateSpectrumKernel cudaGraphicsUnregisterResource - cudaGraphicsUnmapResources - cudaMemcpy + cudaGenerateSpectrumKernel + cudaGraphicsGLRegisterBuffer cudaGetDeviceProperties @@ -85,6 +85,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/4_CUDA_Libraries/oceanFFT/README.md b/Samples/4_CUDA_Libraries/oceanFFT/README.md index e6f1b2a22..00f7aa57e 100644 --- a/Samples/4_CUDA_Libraries/oceanFFT/README.md +++ b/Samples/4_CUDA_Libraries/oceanFFT/README.md @@ -10,7 +10,7 @@ Graphics Interop, Image Processing, CUFFT Library ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaGraphicsMapResources, cudaUpdateHeightmapKernel, cudaGraphicsGLRegisterBuffer, cudaCalculateSlopeKernel, cudaGraphicsResourceGetMappedPointer, cudaMalloc, cudaGenerateSpectrumKernel, cudaGraphicsUnregisterResource, cudaGraphicsUnmapResources, cudaMemcpy, cudaGetDeviceProperties +cudaGraphicsUnmapResources, cudaMemcpy, cudaMalloc, cudaFree, cudaGraphicsResourceGetMappedPointer, cudaCalculateSlopeKernel, cudaGraphicsMapResources, cudaUpdateHeightmapKernel, cudaGraphicsUnregisterResource, cudaGenerateSpectrumKernel, cudaGraphicsGLRegisterBuffer, cudaGetDeviceProperties ## Dependencies needed to build/run [X11](../../../README.md#x11), [GL](../../../README.md#gl), [CUFFT](../../../README.md#cufft) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/4_CUDA_Libraries/oceanFFT/findgllib.mk b/Samples/4_CUDA_Libraries/oceanFFT/findgllib.mk index f0a5c5512..998fcf0f1 100644 --- a/Samples/4_CUDA_Libraries/oceanFFT/findgllib.mk +++ b/Samples/4_CUDA_Libraries/oceanFFT/findgllib.mk @@ -53,11 +53,12 @@ endif ifeq ("$(TARGET_OS)","linux") # $(info) >> findgllib.mk -> LINUX path <<<) # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside - UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) - FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) - RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) - CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) + UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) + FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) + RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) + CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) SUSE = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?) + KYLIN = $(shell echo $(DISTRO) | grep -i kylin >/dev/null 2>&1; echo $$?) ifeq ("$(UBUNTU)","0") ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l) GLPATH := /usr/arm-linux-gnueabihf/lib @@ -87,27 +88,17 @@ ifeq ("$(TARGET_OS)","linux") DFLT_PATH ?= /usr/lib endif endif + ifeq ("$(SUSE)","0") GLPATH ?= /usr/X11R6/lib64 GLLINK ?= -L/usr/X11R6/lib64 DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(FEDORA)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(RHEL)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(CENTOS)","0") + else GLPATH ?= /usr/lib64/nvidia GLLINK ?= -L/usr/lib64/nvidia DFLT_PATH ?= /usr/lib64 endif - + # find libGL, libGLU GLLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGL.so -print 2>/dev/null) GLULIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLU.so -print 2>/dev/null) diff --git a/Samples/4_CUDA_Libraries/oceanFFT/oceanFFT_vs2017.vcxproj b/Samples/4_CUDA_Libraries/oceanFFT/oceanFFT_vs2017.vcxproj index ff470ed3b..09d8130bc 100644 --- a/Samples/4_CUDA_Libraries/oceanFFT/oceanFFT_vs2017.vcxproj +++ b/Samples/4_CUDA_Libraries/oceanFFT/oceanFFT_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/oceanFFT.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -118,6 +118,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/oceanFFT/oceanFFT_vs2019.vcxproj b/Samples/4_CUDA_Libraries/oceanFFT/oceanFFT_vs2019.vcxproj index 2283b05fa..84a21720f 100644 --- a/Samples/4_CUDA_Libraries/oceanFFT/oceanFFT_vs2019.vcxproj +++ b/Samples/4_CUDA_Libraries/oceanFFT/oceanFFT_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/oceanFFT.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -114,6 +114,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/oceanFFT/oceanFFT_vs2022.vcxproj b/Samples/4_CUDA_Libraries/oceanFFT/oceanFFT_vs2022.vcxproj index 5e71f639f..9a86a3380 100644 --- a/Samples/4_CUDA_Libraries/oceanFFT/oceanFFT_vs2022.vcxproj +++ b/Samples/4_CUDA_Libraries/oceanFFT/oceanFFT_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/oceanFFT.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -114,6 +114,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/randomFog/NsightEclipse.xml b/Samples/4_CUDA_Libraries/randomFog/NsightEclipse.xml index 71960c724..467d2ef07 100644 --- a/Samples/4_CUDA_Libraries/randomFog/NsightEclipse.xml +++ b/Samples/4_CUDA_Libraries/randomFog/NsightEclipse.xml @@ -7,9 +7,9 @@ cudaMalloc - cudaFree - cudaMemcpy cudaGetErrorString + cudaMemcpy + cudaFree whole @@ -76,6 +76,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/4_CUDA_Libraries/randomFog/README.md b/Samples/4_CUDA_Libraries/randomFog/README.md index 9a4779940..e101b5b6a 100644 --- a/Samples/4_CUDA_Libraries/randomFog/README.md +++ b/Samples/4_CUDA_Libraries/randomFog/README.md @@ -10,7 +10,7 @@ This sample illustrates pseudo- and quasi- random numbers produced by CURAND. ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMalloc, cudaFree, cudaMemcpy, cudaGetErrorString +cudaMalloc, cudaGetErrorString, cudaMemcpy, cudaFree ## Dependencies needed to build/run [X11](../../../README.md#x11), [GL](../../../README.md#gl), [CURAND](../../../README.md#curand) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/4_CUDA_Libraries/randomFog/findgllib.mk b/Samples/4_CUDA_Libraries/randomFog/findgllib.mk index f0a5c5512..998fcf0f1 100644 --- a/Samples/4_CUDA_Libraries/randomFog/findgllib.mk +++ b/Samples/4_CUDA_Libraries/randomFog/findgllib.mk @@ -53,11 +53,12 @@ endif ifeq ("$(TARGET_OS)","linux") # $(info) >> findgllib.mk -> LINUX path <<<) # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside - UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) - FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) - RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) - CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) + UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) + FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) + RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) + CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) SUSE = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?) + KYLIN = $(shell echo $(DISTRO) | grep -i kylin >/dev/null 2>&1; echo $$?) ifeq ("$(UBUNTU)","0") ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l) GLPATH := /usr/arm-linux-gnueabihf/lib @@ -87,27 +88,17 @@ ifeq ("$(TARGET_OS)","linux") DFLT_PATH ?= /usr/lib endif endif + ifeq ("$(SUSE)","0") GLPATH ?= /usr/X11R6/lib64 GLLINK ?= -L/usr/X11R6/lib64 DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(FEDORA)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(RHEL)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(CENTOS)","0") + else GLPATH ?= /usr/lib64/nvidia GLLINK ?= -L/usr/lib64/nvidia DFLT_PATH ?= /usr/lib64 endif - + # find libGL, libGLU GLLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGL.so -print 2>/dev/null) GLULIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLU.so -print 2>/dev/null) diff --git a/Samples/4_CUDA_Libraries/randomFog/randomFog_vs2017.vcxproj b/Samples/4_CUDA_Libraries/randomFog/randomFog_vs2017.vcxproj index 6c354dee5..4e8773d61 100644 --- a/Samples/4_CUDA_Libraries/randomFog/randomFog_vs2017.vcxproj +++ b/Samples/4_CUDA_Libraries/randomFog/randomFog_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -118,6 +118,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/randomFog/randomFog_vs2019.vcxproj b/Samples/4_CUDA_Libraries/randomFog/randomFog_vs2019.vcxproj index ec4d468c5..52ae3ec8c 100644 --- a/Samples/4_CUDA_Libraries/randomFog/randomFog_vs2019.vcxproj +++ b/Samples/4_CUDA_Libraries/randomFog/randomFog_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -114,6 +114,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/randomFog/randomFog_vs2022.vcxproj b/Samples/4_CUDA_Libraries/randomFog/randomFog_vs2022.vcxproj index 680161ec1..126ba72fa 100644 --- a/Samples/4_CUDA_Libraries/randomFog/randomFog_vs2022.vcxproj +++ b/Samples/4_CUDA_Libraries/randomFog/randomFog_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -114,6 +114,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/simpleCUBLAS/NsightEclipse.xml b/Samples/4_CUDA_Libraries/simpleCUBLAS/NsightEclipse.xml index 14436606e..47e065759 100644 --- a/Samples/4_CUDA_Libraries/simpleCUBLAS/NsightEclipse.xml +++ b/Samples/4_CUDA_Libraries/simpleCUBLAS/NsightEclipse.xml @@ -50,6 +50,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/4_CUDA_Libraries/simpleCUBLAS/README.md b/Samples/4_CUDA_Libraries/simpleCUBLAS/README.md index d398137e1..dbb814b9e 100644 --- a/Samples/4_CUDA_Libraries/simpleCUBLAS/README.md +++ b/Samples/4_CUDA_Libraries/simpleCUBLAS/README.md @@ -10,7 +10,7 @@ Image Processing, CUBLAS Library ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -30,7 +30,7 @@ cudaMalloc, cudaFree ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/4_CUDA_Libraries/simpleCUBLAS/simpleCUBLAS_vs2017.vcxproj b/Samples/4_CUDA_Libraries/simpleCUBLAS/simpleCUBLAS_vs2017.vcxproj index 569946e5b..181e913b9 100644 --- a/Samples/4_CUDA_Libraries/simpleCUBLAS/simpleCUBLAS_vs2017.vcxproj +++ b/Samples/4_CUDA_Libraries/simpleCUBLAS/simpleCUBLAS_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -107,6 +107,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/simpleCUBLAS/simpleCUBLAS_vs2019.vcxproj b/Samples/4_CUDA_Libraries/simpleCUBLAS/simpleCUBLAS_vs2019.vcxproj index 1870c7cca..f0994fdf4 100644 --- a/Samples/4_CUDA_Libraries/simpleCUBLAS/simpleCUBLAS_vs2019.vcxproj +++ b/Samples/4_CUDA_Libraries/simpleCUBLAS/simpleCUBLAS_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -103,6 +103,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/simpleCUBLAS/simpleCUBLAS_vs2022.vcxproj b/Samples/4_CUDA_Libraries/simpleCUBLAS/simpleCUBLAS_vs2022.vcxproj index 21e6dcf2f..9640014c4 100644 --- a/Samples/4_CUDA_Libraries/simpleCUBLAS/simpleCUBLAS_vs2022.vcxproj +++ b/Samples/4_CUDA_Libraries/simpleCUBLAS/simpleCUBLAS_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -103,6 +103,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/simpleCUBLASXT/NsightEclipse.xml b/Samples/4_CUDA_Libraries/simpleCUBLASXT/NsightEclipse.xml index 9aa988232..47bd44a5a 100644 --- a/Samples/4_CUDA_Libraries/simpleCUBLASXT/NsightEclipse.xml +++ b/Samples/4_CUDA_Libraries/simpleCUBLASXT/NsightEclipse.xml @@ -3,9 +3,9 @@ simpleCUBLASXT - cudaFree - cudaGetDeviceCount cudaGetDeviceProperties + cudaGetDeviceCount + cudaFree whole @@ -50,6 +50,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/4_CUDA_Libraries/simpleCUBLASXT/README.md b/Samples/4_CUDA_Libraries/simpleCUBLASXT/README.md index 37d6d4e9d..0d8c9695a 100644 --- a/Samples/4_CUDA_Libraries/simpleCUBLASXT/README.md +++ b/Samples/4_CUDA_Libraries/simpleCUBLASXT/README.md @@ -10,7 +10,7 @@ CUBLAS-XT Library ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, aarch64 ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaGetDeviceCount, cudaGetDeviceProperties +cudaGetDeviceProperties, cudaGetDeviceCount, cudaFree ## Dependencies needed to build/run [CUBLAS](../../../README.md#cublas) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/4_CUDA_Libraries/simpleCUBLASXT/simpleCUBLASXT_vs2017.vcxproj b/Samples/4_CUDA_Libraries/simpleCUBLASXT/simpleCUBLASXT_vs2017.vcxproj index eec501a78..297026580 100644 --- a/Samples/4_CUDA_Libraries/simpleCUBLASXT/simpleCUBLASXT_vs2017.vcxproj +++ b/Samples/4_CUDA_Libraries/simpleCUBLASXT/simpleCUBLASXT_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -107,6 +107,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/simpleCUBLASXT/simpleCUBLASXT_vs2019.vcxproj b/Samples/4_CUDA_Libraries/simpleCUBLASXT/simpleCUBLASXT_vs2019.vcxproj index 064460547..32a4ace12 100644 --- a/Samples/4_CUDA_Libraries/simpleCUBLASXT/simpleCUBLASXT_vs2019.vcxproj +++ b/Samples/4_CUDA_Libraries/simpleCUBLASXT/simpleCUBLASXT_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -103,6 +103,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/simpleCUBLASXT/simpleCUBLASXT_vs2022.vcxproj b/Samples/4_CUDA_Libraries/simpleCUBLASXT/simpleCUBLASXT_vs2022.vcxproj index 114ef777d..c900da38c 100644 --- a/Samples/4_CUDA_Libraries/simpleCUBLASXT/simpleCUBLASXT_vs2022.vcxproj +++ b/Samples/4_CUDA_Libraries/simpleCUBLASXT/simpleCUBLASXT_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -103,6 +103,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/Makefile b/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/Makefile index 2a75e025e..86638c5ee 100644 --- a/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/Makefile +++ b/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/Makefile @@ -291,9 +291,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/NsightEclipse.xml b/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/NsightEclipse.xml index 2b64842d7..3dcea4ef2 100644 --- a/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/NsightEclipse.xml +++ b/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/NsightEclipse.xml @@ -4,9 +4,9 @@ simpleCUBLAS_LU cudaGetErrorEnum - cudaFree cudaMalloc cudaMemcpy + cudaFree whole @@ -52,6 +52,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/README.md b/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/README.md index 94bdb4e85..2b1b93dcd 100644 --- a/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/README.md +++ b/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/README.md @@ -10,7 +10,7 @@ CUBLAS Library, LU decomposition ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, aarch64 ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaGetErrorEnum, cudaFree, cudaMalloc, cudaMemcpy +cudaGetErrorEnum, cudaMalloc, cudaMemcpy, cudaFree ## Dependencies needed to build/run [CUBLAS](../../../README.md#cublas) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/simpleCUBLAS_LU_vs2017.vcxproj b/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/simpleCUBLAS_LU_vs2017.vcxproj index 9f13e4e8b..bffe80d5f 100644 --- a/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/simpleCUBLAS_LU_vs2017.vcxproj +++ b/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/simpleCUBLAS_LU_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/simpleCUBLAS_LU.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/simpleCUBLAS_LU_vs2019.vcxproj b/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/simpleCUBLAS_LU_vs2019.vcxproj index 9c1278ca1..18c1b1172 100644 --- a/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/simpleCUBLAS_LU_vs2019.vcxproj +++ b/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/simpleCUBLAS_LU_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleCUBLAS_LU.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/simpleCUBLAS_LU_vs2022.vcxproj b/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/simpleCUBLAS_LU_vs2022.vcxproj index 4e775803f..2ff518799 100644 --- a/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/simpleCUBLAS_LU_vs2022.vcxproj +++ b/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/simpleCUBLAS_LU_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleCUBLAS_LU.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT/Makefile b/Samples/4_CUDA_Libraries/simpleCUFFT/Makefile index 3c37107dd..080c25d28 100644 --- a/Samples/4_CUDA_Libraries/simpleCUFFT/Makefile +++ b/Samples/4_CUDA_Libraries/simpleCUFFT/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT/NsightEclipse.xml b/Samples/4_CUDA_Libraries/simpleCUFFT/NsightEclipse.xml index cd63fbb55..6ba60d809 100644 --- a/Samples/4_CUDA_Libraries/simpleCUFFT/NsightEclipse.xml +++ b/Samples/4_CUDA_Libraries/simpleCUFFT/NsightEclipse.xml @@ -4,8 +4,8 @@ simpleCUFFT cudaMalloc - cudaFree cudaMemcpy + cudaFree whole @@ -49,6 +49,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT/README.md b/Samples/4_CUDA_Libraries/simpleCUFFT/README.md index 1d8cb4044..e91252be4 100644 --- a/Samples/4_CUDA_Libraries/simpleCUFFT/README.md +++ b/Samples/4_CUDA_Libraries/simpleCUFFT/README.md @@ -10,7 +10,7 @@ Image Processing, CUFFT Library ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l, aarch64 ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMalloc, cudaFree, cudaMemcpy +cudaMalloc, cudaMemcpy, cudaFree ## Dependencies needed to build/run [CUFFT](../../../README.md#cufft) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT/simpleCUFFT_vs2017.vcxproj b/Samples/4_CUDA_Libraries/simpleCUFFT/simpleCUFFT_vs2017.vcxproj index dadcc1f8c..a6e80d875 100644 --- a/Samples/4_CUDA_Libraries/simpleCUFFT/simpleCUFFT_vs2017.vcxproj +++ b/Samples/4_CUDA_Libraries/simpleCUFFT/simpleCUFFT_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/simpleCUFFT.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT/simpleCUFFT_vs2019.vcxproj b/Samples/4_CUDA_Libraries/simpleCUFFT/simpleCUFFT_vs2019.vcxproj index 4920b00dc..5eb648920 100644 --- a/Samples/4_CUDA_Libraries/simpleCUFFT/simpleCUFFT_vs2019.vcxproj +++ b/Samples/4_CUDA_Libraries/simpleCUFFT/simpleCUFFT_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleCUFFT.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT/simpleCUFFT_vs2022.vcxproj b/Samples/4_CUDA_Libraries/simpleCUFFT/simpleCUFFT_vs2022.vcxproj index 0b3dd99a7..2c5983796 100644 --- a/Samples/4_CUDA_Libraries/simpleCUFFT/simpleCUFFT_vs2022.vcxproj +++ b/Samples/4_CUDA_Libraries/simpleCUFFT/simpleCUFFT_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleCUFFT.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/Makefile b/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/Makefile index 610b3cc9e..c21a0c600 100644 --- a/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/Makefile +++ b/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/NsightEclipse.xml b/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/NsightEclipse.xml index 55dddd19a..a22e53a06 100644 --- a/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/NsightEclipse.xml +++ b/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/NsightEclipse.xml @@ -6,13 +6,13 @@ _USE_MATH_DEFINES - cudaFree cudaXtFree + cudaMemcpy + cudaFree + cudaSetDevice cudaGetDeviceCount cudaDeviceSynchronize cudaMalloc - cudaSetDevice - cudaMemcpy cudaGetDeviceProperties @@ -57,6 +57,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/README.md b/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/README.md index 0c941fbb4..9cd1ad571 100644 --- a/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/README.md +++ b/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/README.md @@ -10,7 +10,7 @@ Image Processing, CUFFT Library ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaXtFree, cudaGetDeviceCount, cudaDeviceSynchronize, cudaMalloc, cudaSetDevice, cudaMemcpy, cudaGetDeviceProperties +cudaXtFree, cudaMemcpy, cudaFree, cudaSetDevice, cudaGetDeviceCount, cudaDeviceSynchronize, cudaMalloc, cudaGetDeviceProperties ## Dependencies needed to build/run [CUFFT](../../../README.md#cufft) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/simpleCUFFT_2d_MGPU_vs2017.vcxproj b/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/simpleCUFFT_2d_MGPU_vs2017.vcxproj index 219ca46b9..836ea0639 100644 --- a/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/simpleCUFFT_2d_MGPU_vs2017.vcxproj +++ b/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/simpleCUFFT_2d_MGPU_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/simpleCUFFT_2d_MGPU.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/simpleCUFFT_2d_MGPU_vs2019.vcxproj b/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/simpleCUFFT_2d_MGPU_vs2019.vcxproj index d8d5f3603..86780b708 100644 --- a/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/simpleCUFFT_2d_MGPU_vs2019.vcxproj +++ b/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/simpleCUFFT_2d_MGPU_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleCUFFT_2d_MGPU.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/simpleCUFFT_2d_MGPU_vs2022.vcxproj b/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/simpleCUFFT_2d_MGPU_vs2022.vcxproj index 44c8dc9aa..6e6b95b91 100644 --- a/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/simpleCUFFT_2d_MGPU_vs2022.vcxproj +++ b/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/simpleCUFFT_2d_MGPU_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleCUFFT_2d_MGPU.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/Makefile b/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/Makefile index 91eef96c4..94cb18d76 100644 --- a/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/Makefile +++ b/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/NsightEclipse.xml b/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/NsightEclipse.xml index 5af2b802e..f7274f8ab 100644 --- a/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/NsightEclipse.xml +++ b/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/NsightEclipse.xml @@ -3,10 +3,10 @@ simpleCUFFT_MGPU + cudaXtFree + cudaSetDevice cudaGetDeviceCount cudaDeviceSynchronize - cudaSetDevice - cudaXtFree cudaGetDeviceProperties @@ -51,6 +51,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/README.md b/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/README.md index 9d894764b..bfb6e031d 100644 --- a/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/README.md +++ b/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/README.md @@ -10,7 +10,7 @@ Image Processing, CUFFT Library ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaGetDeviceCount, cudaDeviceSynchronize, cudaSetDevice, cudaXtFree, cudaGetDeviceProperties +cudaXtFree, cudaSetDevice, cudaGetDeviceCount, cudaDeviceSynchronize, cudaGetDeviceProperties ## Dependencies needed to build/run [CUFFT](../../../README.md#cufft) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/simpleCUFFT_MGPU_vs2017.vcxproj b/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/simpleCUFFT_MGPU_vs2017.vcxproj index 8b222ff97..c9da79110 100644 --- a/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/simpleCUFFT_MGPU_vs2017.vcxproj +++ b/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/simpleCUFFT_MGPU_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/simpleCUFFT_MGPU.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/simpleCUFFT_MGPU_vs2019.vcxproj b/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/simpleCUFFT_MGPU_vs2019.vcxproj index 06f274044..694fc4fd5 100644 --- a/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/simpleCUFFT_MGPU_vs2019.vcxproj +++ b/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/simpleCUFFT_MGPU_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleCUFFT_MGPU.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/simpleCUFFT_MGPU_vs2022.vcxproj b/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/simpleCUFFT_MGPU_vs2022.vcxproj index 8a2ca6a0b..a2f9ad9cd 100644 --- a/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/simpleCUFFT_MGPU_vs2022.vcxproj +++ b/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/simpleCUFFT_MGPU_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleCUFFT_MGPU.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT_callback/Makefile b/Samples/4_CUDA_Libraries/simpleCUFFT_callback/Makefile index b9577d0d1..c5159bed5 100644 --- a/Samples/4_CUDA_Libraries/simpleCUFFT_callback/Makefile +++ b/Samples/4_CUDA_Libraries/simpleCUFFT_callback/Makefile @@ -316,9 +316,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 70 72 75 80 86 87 +SMS ?= 53 70 72 75 80 86 87 90 else -SMS ?= 35 50 60 70 75 80 86 +SMS ?= 35 50 60 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT_callback/NsightEclipse.xml b/Samples/4_CUDA_Libraries/simpleCUFFT_callback/NsightEclipse.xml index ca2c23e69..eaa551ee6 100644 --- a/Samples/4_CUDA_Libraries/simpleCUFFT_callback/NsightEclipse.xml +++ b/Samples/4_CUDA_Libraries/simpleCUFFT_callback/NsightEclipse.xml @@ -7,12 +7,12 @@ -std=c++11 - cudaFree - cudaMalloc cudaMemcpy + cudaFree cudaMemcpyFromSymbol - cudaGetDeviceProperties cudaGetDevice + cudaMalloc + cudaGetDeviceProperties separate @@ -60,6 +60,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT_callback/README.md b/Samples/4_CUDA_Libraries/simpleCUFFT_callback/README.md index 299380028..71cd8ad11 100644 --- a/Samples/4_CUDA_Libraries/simpleCUFFT_callback/README.md +++ b/Samples/4_CUDA_Libraries/simpleCUFFT_callback/README.md @@ -10,7 +10,7 @@ Image Processing, CUFFT Library ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, aarch64 ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaMalloc, cudaMemcpy, cudaMemcpyFromSymbol, cudaGetDeviceProperties, cudaGetDevice +cudaMemcpy, cudaFree, cudaMemcpyFromSymbol, cudaGetDevice, cudaMalloc, cudaGetDeviceProperties ## Dependencies needed to build/run [callback](../../../README.md#callback), [CUFFT](../../../README.md#cufft) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/4_CUDA_Libraries/watershedSegmentationNPP/NsightEclipse.xml b/Samples/4_CUDA_Libraries/watershedSegmentationNPP/NsightEclipse.xml index 3d351a11c..b16c392f5 100644 --- a/Samples/4_CUDA_Libraries/watershedSegmentationNPP/NsightEclipse.xml +++ b/Samples/4_CUDA_Libraries/watershedSegmentationNPP/NsightEclipse.xml @@ -3,14 +3,14 @@ watershedSegmentationNPP + cudaRuntimeGetVersion cudaFree cudaDeviceGetAttribute cudaDriverGetVersion + cudaGetDevice cudaStreamGetFlags cudaStreamSynchronize - cudaRuntimeGetVersion cudaGetDeviceProperties - cudaGetDevice whole @@ -62,6 +62,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/4_CUDA_Libraries/watershedSegmentationNPP/README.md b/Samples/4_CUDA_Libraries/watershedSegmentationNPP/README.md index c06333e2d..540e74432 100644 --- a/Samples/4_CUDA_Libraries/watershedSegmentationNPP/README.md +++ b/Samples/4_CUDA_Libraries/watershedSegmentationNPP/README.md @@ -10,7 +10,7 @@ Performance Strategies, Image Processing, NPP Library ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaDeviceGetAttribute, cudaDriverGetVersion, cudaStreamGetFlags, cudaStreamSynchronize, cudaRuntimeGetVersion, cudaGetDeviceProperties, cudaGetDevice +cudaRuntimeGetVersion, cudaFree, cudaDeviceGetAttribute, cudaDriverGetVersion, cudaGetDevice, cudaStreamGetFlags, cudaStreamSynchronize, cudaGetDeviceProperties ## Dependencies needed to build/run [NPP](../../../README.md#npp) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/4_CUDA_Libraries/watershedSegmentationNPP/watershedSegmentationNPP_vs2017.vcxproj b/Samples/4_CUDA_Libraries/watershedSegmentationNPP/watershedSegmentationNPP_vs2017.vcxproj index 6df4766bc..c752f1355 100644 --- a/Samples/4_CUDA_Libraries/watershedSegmentationNPP/watershedSegmentationNPP_vs2017.vcxproj +++ b/Samples/4_CUDA_Libraries/watershedSegmentationNPP/watershedSegmentationNPP_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -107,6 +107,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/watershedSegmentationNPP/watershedSegmentationNPP_vs2019.vcxproj b/Samples/4_CUDA_Libraries/watershedSegmentationNPP/watershedSegmentationNPP_vs2019.vcxproj index 494b75bcd..10f8fef82 100644 --- a/Samples/4_CUDA_Libraries/watershedSegmentationNPP/watershedSegmentationNPP_vs2019.vcxproj +++ b/Samples/4_CUDA_Libraries/watershedSegmentationNPP/watershedSegmentationNPP_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -103,6 +103,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/watershedSegmentationNPP/watershedSegmentationNPP_vs2022.vcxproj b/Samples/4_CUDA_Libraries/watershedSegmentationNPP/watershedSegmentationNPP_vs2022.vcxproj index e4e32e491..d960f01c0 100644 --- a/Samples/4_CUDA_Libraries/watershedSegmentationNPP/watershedSegmentationNPP_vs2022.vcxproj +++ b/Samples/4_CUDA_Libraries/watershedSegmentationNPP/watershedSegmentationNPP_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -103,6 +103,6 @@ - + diff --git a/Samples/5_Domain_Specific/BlackScholes/BlackScholes_vs2017.vcxproj b/Samples/5_Domain_Specific/BlackScholes/BlackScholes_vs2017.vcxproj index 625c2f57c..c63b2d70e 100644 --- a/Samples/5_Domain_Specific/BlackScholes/BlackScholes_vs2017.vcxproj +++ b/Samples/5_Domain_Specific/BlackScholes/BlackScholes_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/BlackScholes.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -108,6 +108,6 @@ - + diff --git a/Samples/5_Domain_Specific/BlackScholes/BlackScholes_vs2019.vcxproj b/Samples/5_Domain_Specific/BlackScholes/BlackScholes_vs2019.vcxproj index d872e9986..adf0db2df 100644 --- a/Samples/5_Domain_Specific/BlackScholes/BlackScholes_vs2019.vcxproj +++ b/Samples/5_Domain_Specific/BlackScholes/BlackScholes_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/BlackScholes.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -104,6 +104,6 @@ - + diff --git a/Samples/5_Domain_Specific/BlackScholes/BlackScholes_vs2022.vcxproj b/Samples/5_Domain_Specific/BlackScholes/BlackScholes_vs2022.vcxproj index 698318227..aa30adea6 100644 --- a/Samples/5_Domain_Specific/BlackScholes/BlackScholes_vs2022.vcxproj +++ b/Samples/5_Domain_Specific/BlackScholes/BlackScholes_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/BlackScholes.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -104,6 +104,6 @@ - + diff --git a/Samples/5_Domain_Specific/BlackScholes/Makefile b/Samples/5_Domain_Specific/BlackScholes/Makefile index b7d3c5292..7935e5400 100644 --- a/Samples/5_Domain_Specific/BlackScholes/Makefile +++ b/Samples/5_Domain_Specific/BlackScholes/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/5_Domain_Specific/BlackScholes/NsightEclipse.xml b/Samples/5_Domain_Specific/BlackScholes/NsightEclipse.xml index ab1f601db..8af9aa6dc 100644 --- a/Samples/5_Domain_Specific/BlackScholes/NsightEclipse.xml +++ b/Samples/5_Domain_Specific/BlackScholes/NsightEclipse.xml @@ -7,9 +7,9 @@ cudaMalloc - cudaFree cudaDeviceSynchronize cudaMemcpy + cudaFree whole @@ -50,6 +50,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/5_Domain_Specific/BlackScholes/README.md b/Samples/5_Domain_Specific/BlackScholes/README.md index 858b17e6e..2d5354900 100644 --- a/Samples/5_Domain_Specific/BlackScholes/README.md +++ b/Samples/5_Domain_Specific/BlackScholes/README.md @@ -10,7 +10,7 @@ Computational Finance ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMalloc, cudaFree, cudaDeviceSynchronize, cudaMemcpy +cudaMalloc, cudaDeviceSynchronize, cudaMemcpy, cudaFree ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/5_Domain_Specific/BlackScholes_nvrtc/BlackScholes_nvrtc_vs2017.vcxproj b/Samples/5_Domain_Specific/BlackScholes_nvrtc/BlackScholes_nvrtc_vs2017.vcxproj index 01556903c..0432f89e9 100644 --- a/Samples/5_Domain_Specific/BlackScholes_nvrtc/BlackScholes_nvrtc_vs2017.vcxproj +++ b/Samples/5_Domain_Specific/BlackScholes_nvrtc/BlackScholes_nvrtc_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -108,6 +108,6 @@ - + diff --git a/Samples/5_Domain_Specific/BlackScholes_nvrtc/BlackScholes_nvrtc_vs2019.vcxproj b/Samples/5_Domain_Specific/BlackScholes_nvrtc/BlackScholes_nvrtc_vs2019.vcxproj index 309579cc2..c97e0a3cf 100644 --- a/Samples/5_Domain_Specific/BlackScholes_nvrtc/BlackScholes_nvrtc_vs2019.vcxproj +++ b/Samples/5_Domain_Specific/BlackScholes_nvrtc/BlackScholes_nvrtc_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -104,6 +104,6 @@ - + diff --git a/Samples/5_Domain_Specific/BlackScholes_nvrtc/BlackScholes_nvrtc_vs2022.vcxproj b/Samples/5_Domain_Specific/BlackScholes_nvrtc/BlackScholes_nvrtc_vs2022.vcxproj index d65aea1d8..3796da00f 100644 --- a/Samples/5_Domain_Specific/BlackScholes_nvrtc/BlackScholes_nvrtc_vs2022.vcxproj +++ b/Samples/5_Domain_Specific/BlackScholes_nvrtc/BlackScholes_nvrtc_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -104,6 +104,6 @@ - + diff --git a/Samples/5_Domain_Specific/BlackScholes_nvrtc/README.md b/Samples/5_Domain_Specific/BlackScholes_nvrtc/README.md index b0f955812..a0e4aa679 100644 --- a/Samples/5_Domain_Specific/BlackScholes_nvrtc/README.md +++ b/Samples/5_Domain_Specific/BlackScholes_nvrtc/README.md @@ -10,7 +10,7 @@ Computational Finance, Runtime Compilation ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,7 +23,7 @@ x86_64, ppc64le, aarch64 ## CUDA APIs involved ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html) -cuModuleGetFunction, cuMemAlloc, cuLaunchKernel, cuCtxSynchronize, cuMemFree, cuMemcpyDtoH, cuMemcpyHtoD +cuMemcpyDtoH, cuLaunchKernel, cuMemcpyHtoD, cuCtxSynchronize, cuMemAlloc, cuMemFree, cuModuleGetFunction ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) cudaBlockSize, cudaGridSize @@ -33,7 +33,7 @@ cudaBlockSize, cudaGridSize ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/5_Domain_Specific/FDTD3d/FDTD3d_vs2017.vcxproj b/Samples/5_Domain_Specific/FDTD3d/FDTD3d_vs2017.vcxproj index 1227bc24e..09368fc92 100644 --- a/Samples/5_Domain_Specific/FDTD3d/FDTD3d_vs2017.vcxproj +++ b/Samples/5_Domain_Specific/FDTD3d/FDTD3d_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/FDTD3d.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -112,6 +112,6 @@ - + diff --git a/Samples/5_Domain_Specific/FDTD3d/FDTD3d_vs2019.vcxproj b/Samples/5_Domain_Specific/FDTD3d/FDTD3d_vs2019.vcxproj index 10f2dc237..09c340056 100644 --- a/Samples/5_Domain_Specific/FDTD3d/FDTD3d_vs2019.vcxproj +++ b/Samples/5_Domain_Specific/FDTD3d/FDTD3d_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/FDTD3d.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -108,6 +108,6 @@ - + diff --git a/Samples/5_Domain_Specific/FDTD3d/FDTD3d_vs2022.vcxproj b/Samples/5_Domain_Specific/FDTD3d/FDTD3d_vs2022.vcxproj index 8f01460ab..6af53b8b3 100644 --- a/Samples/5_Domain_Specific/FDTD3d/FDTD3d_vs2022.vcxproj +++ b/Samples/5_Domain_Specific/FDTD3d/FDTD3d_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/FDTD3d.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -108,6 +108,6 @@ - + diff --git a/Samples/5_Domain_Specific/FDTD3d/Makefile b/Samples/5_Domain_Specific/FDTD3d/Makefile index 9f41e1d1a..bbbee3e3a 100644 --- a/Samples/5_Domain_Specific/FDTD3d/Makefile +++ b/Samples/5_Domain_Specific/FDTD3d/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/5_Domain_Specific/FDTD3d/NsightEclipse.xml b/Samples/5_Domain_Specific/FDTD3d/NsightEclipse.xml index 68eaef75d..7528550ba 100644 --- a/Samples/5_Domain_Specific/FDTD3d/NsightEclipse.xml +++ b/Samples/5_Domain_Specific/FDTD3d/NsightEclipse.xml @@ -3,19 +3,19 @@ FDTD3d - cudaMemcpyToSymbol + cudaMemcpy + cudaMalloc cudaFree - cudaEventRecord - cudaEventCreate cudaFuncGetAttributes - cudaGetDeviceCount - cudaEventElapsedTime - cudaDeviceSynchronize - cudaMalloc - cudaEventDestroy cudaSetDevice - cudaMemcpy cudaGetDeviceProperties + cudaDeviceSynchronize + cudaEventRecord + cudaMemcpyToSymbol + cudaEventDestroy + cudaEventElapsedTime + cudaGetDeviceCount + cudaEventCreate whole @@ -60,6 +60,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/5_Domain_Specific/FDTD3d/README.md b/Samples/5_Domain_Specific/FDTD3d/README.md index c884443d4..80774398f 100644 --- a/Samples/5_Domain_Specific/FDTD3d/README.md +++ b/Samples/5_Domain_Specific/FDTD3d/README.md @@ -10,7 +10,7 @@ Performance Strategies ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMemcpyToSymbol, cudaFree, cudaEventRecord, cudaEventCreate, cudaFuncGetAttributes, cudaGetDeviceCount, cudaEventElapsedTime, cudaDeviceSynchronize, cudaMalloc, cudaEventDestroy, cudaSetDevice, cudaMemcpy, cudaGetDeviceProperties +cudaMemcpy, cudaMalloc, cudaFree, cudaFuncGetAttributes, cudaSetDevice, cudaGetDeviceProperties, cudaDeviceSynchronize, cudaEventRecord, cudaMemcpyToSymbol, cudaEventDestroy, cudaEventElapsedTime, cudaGetDeviceCount, cudaEventCreate ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/5_Domain_Specific/HSOpticalFlow/FlowCPU.flo b/Samples/5_Domain_Specific/HSOpticalFlow/FlowCPU.flo new file mode 100644 index 000000000..d92d45490 Binary files /dev/null and b/Samples/5_Domain_Specific/HSOpticalFlow/FlowCPU.flo differ diff --git a/Samples/5_Domain_Specific/HSOpticalFlow/FlowGPU.flo b/Samples/5_Domain_Specific/HSOpticalFlow/FlowGPU.flo new file mode 100644 index 000000000..45e8df6f9 Binary files /dev/null and b/Samples/5_Domain_Specific/HSOpticalFlow/FlowGPU.flo differ diff --git a/Samples/5_Domain_Specific/HSOpticalFlow/HSOpticalFlow_vs2017.vcxproj b/Samples/5_Domain_Specific/HSOpticalFlow/HSOpticalFlow_vs2017.vcxproj index 26fe77dc5..28f83eadf 100644 --- a/Samples/5_Domain_Specific/HSOpticalFlow/HSOpticalFlow_vs2017.vcxproj +++ b/Samples/5_Domain_Specific/HSOpticalFlow/HSOpticalFlow_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/HSOpticalFlow.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -117,6 +117,6 @@ - + diff --git a/Samples/5_Domain_Specific/HSOpticalFlow/HSOpticalFlow_vs2019.vcxproj b/Samples/5_Domain_Specific/HSOpticalFlow/HSOpticalFlow_vs2019.vcxproj index 563a57bef..f9a6290c1 100644 --- a/Samples/5_Domain_Specific/HSOpticalFlow/HSOpticalFlow_vs2019.vcxproj +++ b/Samples/5_Domain_Specific/HSOpticalFlow/HSOpticalFlow_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/HSOpticalFlow.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -113,6 +113,6 @@ - + diff --git a/Samples/5_Domain_Specific/HSOpticalFlow/HSOpticalFlow_vs2022.vcxproj b/Samples/5_Domain_Specific/HSOpticalFlow/HSOpticalFlow_vs2022.vcxproj index 241a3653f..fe372f4e5 100644 --- a/Samples/5_Domain_Specific/HSOpticalFlow/HSOpticalFlow_vs2022.vcxproj +++ b/Samples/5_Domain_Specific/HSOpticalFlow/HSOpticalFlow_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/HSOpticalFlow.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -113,6 +113,6 @@ - + diff --git a/Samples/5_Domain_Specific/HSOpticalFlow/Makefile b/Samples/5_Domain_Specific/HSOpticalFlow/Makefile index f6f1a0878..98ce7ac61 100644 --- a/Samples/5_Domain_Specific/HSOpticalFlow/Makefile +++ b/Samples/5_Domain_Specific/HSOpticalFlow/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/5_Domain_Specific/HSOpticalFlow/NsightEclipse.xml b/Samples/5_Domain_Specific/HSOpticalFlow/NsightEclipse.xml index 3de542e30..548a90c34 100644 --- a/Samples/5_Domain_Specific/HSOpticalFlow/NsightEclipse.xml +++ b/Samples/5_Domain_Specific/HSOpticalFlow/NsightEclipse.xml @@ -4,9 +4,9 @@ HSOpticalFlow cudaMalloc + cudaMemcpy cudaMemset cudaFree - cudaMemcpy whole @@ -52,6 +52,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/5_Domain_Specific/HSOpticalFlow/README.md b/Samples/5_Domain_Specific/HSOpticalFlow/README.md index 363d7f17c..195c8954c 100644 --- a/Samples/5_Domain_Specific/HSOpticalFlow/README.md +++ b/Samples/5_Domain_Specific/HSOpticalFlow/README.md @@ -10,7 +10,7 @@ Image Processing, Data Parallel Algorithms ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMalloc, cudaMemset, cudaFree, cudaMemcpy +cudaMalloc, cudaMemcpy, cudaMemset, cudaFree ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/5_Domain_Specific/Mandelbrot/Makefile b/Samples/5_Domain_Specific/Mandelbrot/Makefile index 3daf6eeaf..777aa73cd 100644 --- a/Samples/5_Domain_Specific/Mandelbrot/Makefile +++ b/Samples/5_Domain_Specific/Mandelbrot/Makefile @@ -299,9 +299,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/5_Domain_Specific/Mandelbrot/Mandelbrot_vs2017.vcxproj b/Samples/5_Domain_Specific/Mandelbrot/Mandelbrot_vs2017.vcxproj index ef345fe8e..4d8405080 100644 --- a/Samples/5_Domain_Specific/Mandelbrot/Mandelbrot_vs2017.vcxproj +++ b/Samples/5_Domain_Specific/Mandelbrot/Mandelbrot_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/Mandelbrot.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -121,6 +121,6 @@ - + diff --git a/Samples/5_Domain_Specific/Mandelbrot/Mandelbrot_vs2019.vcxproj b/Samples/5_Domain_Specific/Mandelbrot/Mandelbrot_vs2019.vcxproj index 645983939..f57745885 100644 --- a/Samples/5_Domain_Specific/Mandelbrot/Mandelbrot_vs2019.vcxproj +++ b/Samples/5_Domain_Specific/Mandelbrot/Mandelbrot_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/Mandelbrot.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -117,6 +117,6 @@ - + diff --git a/Samples/5_Domain_Specific/Mandelbrot/Mandelbrot_vs2022.vcxproj b/Samples/5_Domain_Specific/Mandelbrot/Mandelbrot_vs2022.vcxproj index 786f81783..ce2b9586f 100644 --- a/Samples/5_Domain_Specific/Mandelbrot/Mandelbrot_vs2022.vcxproj +++ b/Samples/5_Domain_Specific/Mandelbrot/Mandelbrot_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/Mandelbrot.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -117,6 +117,6 @@ - + diff --git a/Samples/5_Domain_Specific/Mandelbrot/NsightEclipse.xml b/Samples/5_Domain_Specific/Mandelbrot/NsightEclipse.xml index 00d22c181..d74f3c4f8 100644 --- a/Samples/5_Domain_Specific/Mandelbrot/NsightEclipse.xml +++ b/Samples/5_Domain_Specific/Mandelbrot/NsightEclipse.xml @@ -3,19 +3,19 @@ Mandelbrot + cudaGLUnmapBufferObject + cudaGraphicsUnmapResources + cudaMemcpy cudaFree + cudaGLMapBufferObject + cudaGraphicsResourceGetMappedPointer cudaGraphicsMapResources - cudaGraphicsUnregisterResource - cudaGraphicsGLRegisterBuffer cudaGLUnregisterBufferObject - cudaGraphicsResourceGetMappedPointer - cudaGLUnmapBufferObject cudaDeviceSynchronize - cudaGLMapBufferObject - cudaMalloc cudaGLRegisterBufferObject - cudaGraphicsUnmapResources - cudaMemcpy + cudaGraphicsUnregisterResource + cudaMalloc + cudaGraphicsGLRegisterBuffer cudaGetDeviceProperties @@ -79,6 +79,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/5_Domain_Specific/Mandelbrot/README.md b/Samples/5_Domain_Specific/Mandelbrot/README.md index 57131463e..a09cfeed1 100644 --- a/Samples/5_Domain_Specific/Mandelbrot/README.md +++ b/Samples/5_Domain_Specific/Mandelbrot/README.md @@ -10,7 +10,7 @@ Graphics Interop, Data Parallel Algorithms ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaGraphicsMapResources, cudaGraphicsUnregisterResource, cudaGraphicsGLRegisterBuffer, cudaGLUnregisterBufferObject, cudaGraphicsResourceGetMappedPointer, cudaGLUnmapBufferObject, cudaDeviceSynchronize, cudaGLMapBufferObject, cudaMalloc, cudaGLRegisterBufferObject, cudaGraphicsUnmapResources, cudaMemcpy, cudaGetDeviceProperties +cudaGLUnmapBufferObject, cudaGraphicsUnmapResources, cudaMemcpy, cudaFree, cudaGLMapBufferObject, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaGLUnregisterBufferObject, cudaDeviceSynchronize, cudaGLRegisterBufferObject, cudaGraphicsUnregisterResource, cudaMalloc, cudaGraphicsGLRegisterBuffer, cudaGetDeviceProperties ## Dependencies needed to build/run [X11](../../../README.md#x11), [GL](../../../README.md#gl) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/5_Domain_Specific/Mandelbrot/findgllib.mk b/Samples/5_Domain_Specific/Mandelbrot/findgllib.mk index f0a5c5512..998fcf0f1 100644 --- a/Samples/5_Domain_Specific/Mandelbrot/findgllib.mk +++ b/Samples/5_Domain_Specific/Mandelbrot/findgllib.mk @@ -53,11 +53,12 @@ endif ifeq ("$(TARGET_OS)","linux") # $(info) >> findgllib.mk -> LINUX path <<<) # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside - UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) - FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) - RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) - CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) + UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) + FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) + RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) + CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) SUSE = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?) + KYLIN = $(shell echo $(DISTRO) | grep -i kylin >/dev/null 2>&1; echo $$?) ifeq ("$(UBUNTU)","0") ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l) GLPATH := /usr/arm-linux-gnueabihf/lib @@ -87,27 +88,17 @@ ifeq ("$(TARGET_OS)","linux") DFLT_PATH ?= /usr/lib endif endif + ifeq ("$(SUSE)","0") GLPATH ?= /usr/X11R6/lib64 GLLINK ?= -L/usr/X11R6/lib64 DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(FEDORA)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(RHEL)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(CENTOS)","0") + else GLPATH ?= /usr/lib64/nvidia GLLINK ?= -L/usr/lib64/nvidia DFLT_PATH ?= /usr/lib64 endif - + # find libGL, libGLU GLLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGL.so -print 2>/dev/null) GLULIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLU.so -print 2>/dev/null) diff --git a/Samples/5_Domain_Specific/MonteCarloMultiGPU/Makefile b/Samples/5_Domain_Specific/MonteCarloMultiGPU/Makefile index 503c96781..c38f7e446 100644 --- a/Samples/5_Domain_Specific/MonteCarloMultiGPU/Makefile +++ b/Samples/5_Domain_Specific/MonteCarloMultiGPU/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/5_Domain_Specific/MonteCarloMultiGPU/MonteCarloMultiGPU_vs2017.vcxproj b/Samples/5_Domain_Specific/MonteCarloMultiGPU/MonteCarloMultiGPU_vs2017.vcxproj index f37ca9a98..3330def6a 100644 --- a/Samples/5_Domain_Specific/MonteCarloMultiGPU/MonteCarloMultiGPU_vs2017.vcxproj +++ b/Samples/5_Domain_Specific/MonteCarloMultiGPU/MonteCarloMultiGPU_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/MonteCarloMultiGPU.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -113,6 +113,6 @@ - + diff --git a/Samples/5_Domain_Specific/MonteCarloMultiGPU/MonteCarloMultiGPU_vs2019.vcxproj b/Samples/5_Domain_Specific/MonteCarloMultiGPU/MonteCarloMultiGPU_vs2019.vcxproj index 9c88c3f35..cf2d4ad26 100644 --- a/Samples/5_Domain_Specific/MonteCarloMultiGPU/MonteCarloMultiGPU_vs2019.vcxproj +++ b/Samples/5_Domain_Specific/MonteCarloMultiGPU/MonteCarloMultiGPU_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/MonteCarloMultiGPU.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -109,6 +109,6 @@ - + diff --git a/Samples/5_Domain_Specific/MonteCarloMultiGPU/MonteCarloMultiGPU_vs2022.vcxproj b/Samples/5_Domain_Specific/MonteCarloMultiGPU/MonteCarloMultiGPU_vs2022.vcxproj index a047f1c0d..852394d84 100644 --- a/Samples/5_Domain_Specific/MonteCarloMultiGPU/MonteCarloMultiGPU_vs2022.vcxproj +++ b/Samples/5_Domain_Specific/MonteCarloMultiGPU/MonteCarloMultiGPU_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/MonteCarloMultiGPU.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -109,6 +109,6 @@ - + diff --git a/Samples/5_Domain_Specific/MonteCarloMultiGPU/NsightEclipse.xml b/Samples/5_Domain_Specific/MonteCarloMultiGPU/NsightEclipse.xml index 34d2d8e04..bbe55936c 100644 --- a/Samples/5_Domain_Specific/MonteCarloMultiGPU/NsightEclipse.xml +++ b/Samples/5_Domain_Specific/MonteCarloMultiGPU/NsightEclipse.xml @@ -3,23 +3,23 @@ MonteCarloMultiGPU - cudaMemset - cudaFree cudaStreamDestroy - cudaEventRecord + cudaMalloc + cudaFree cudaMallocHost - cudaStreamCreate - cudaEventCreate - cudaGetDeviceCount - cudaDeviceSynchronize + cudaSetDevice cudaEventSynchronize + cudaGetDeviceProperties + cudaDeviceSynchronize + cudaEventRecord cudaFreeHost - cudaMalloc + cudaMemset + cudaStreamSynchronize cudaEventDestroy - cudaSetDevice cudaMemcpyAsync - cudaStreamSynchronize - cudaGetDeviceProperties + cudaStreamCreate + cudaGetDeviceCount + cudaEventCreate whole @@ -69,6 +69,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/5_Domain_Specific/MonteCarloMultiGPU/README.md b/Samples/5_Domain_Specific/MonteCarloMultiGPU/README.md index 7a0f77cb6..5eff98b76 100644 --- a/Samples/5_Domain_Specific/MonteCarloMultiGPU/README.md +++ b/Samples/5_Domain_Specific/MonteCarloMultiGPU/README.md @@ -10,7 +10,7 @@ Random Number Generator, Computational Finance, CURAND Library ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMemset, cudaFree, cudaStreamDestroy, cudaEventRecord, cudaMallocHost, cudaStreamCreate, cudaEventCreate, cudaGetDeviceCount, cudaDeviceSynchronize, cudaEventSynchronize, cudaFreeHost, cudaMalloc, cudaEventDestroy, cudaSetDevice, cudaMemcpyAsync, cudaStreamSynchronize, cudaGetDeviceProperties +cudaStreamDestroy, cudaMalloc, cudaFree, cudaMallocHost, cudaSetDevice, cudaEventSynchronize, cudaGetDeviceProperties, cudaDeviceSynchronize, cudaEventRecord, cudaFreeHost, cudaMemset, cudaStreamSynchronize, cudaEventDestroy, cudaMemcpyAsync, cudaStreamCreate, cudaGetDeviceCount, cudaEventCreate ## Dependencies needed to build/run [CURAND](../../../README.md#curand) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/5_Domain_Specific/NV12toBGRandResize/Makefile b/Samples/5_Domain_Specific/NV12toBGRandResize/Makefile index cb4fefd63..f0ea1f7ce 100644 --- a/Samples/5_Domain_Specific/NV12toBGRandResize/Makefile +++ b/Samples/5_Domain_Specific/NV12toBGRandResize/Makefile @@ -285,9 +285,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/5_Domain_Specific/NV12toBGRandResize/NV12toBGRandResize_vs2017.vcxproj b/Samples/5_Domain_Specific/NV12toBGRandResize/NV12toBGRandResize_vs2017.vcxproj index fe9e7d3b9..845bded6b 100644 --- a/Samples/5_Domain_Specific/NV12toBGRandResize/NV12toBGRandResize_vs2017.vcxproj +++ b/Samples/5_Domain_Specific/NV12toBGRandResize/NV12toBGRandResize_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/NV12toBGRandResize.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -112,6 +112,6 @@ - + diff --git a/Samples/5_Domain_Specific/NV12toBGRandResize/NV12toBGRandResize_vs2019.vcxproj b/Samples/5_Domain_Specific/NV12toBGRandResize/NV12toBGRandResize_vs2019.vcxproj index b4c0cf63c..88c92e39c 100644 --- a/Samples/5_Domain_Specific/NV12toBGRandResize/NV12toBGRandResize_vs2019.vcxproj +++ b/Samples/5_Domain_Specific/NV12toBGRandResize/NV12toBGRandResize_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/NV12toBGRandResize.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -108,6 +108,6 @@ - + diff --git a/Samples/5_Domain_Specific/NV12toBGRandResize/NV12toBGRandResize_vs2022.vcxproj b/Samples/5_Domain_Specific/NV12toBGRandResize/NV12toBGRandResize_vs2022.vcxproj index 290d571c6..09b89f949 100644 --- a/Samples/5_Domain_Specific/NV12toBGRandResize/NV12toBGRandResize_vs2022.vcxproj +++ b/Samples/5_Domain_Specific/NV12toBGRandResize/NV12toBGRandResize_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/NV12toBGRandResize.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -108,6 +108,6 @@ - + diff --git a/Samples/5_Domain_Specific/NV12toBGRandResize/NsightEclipse.xml b/Samples/5_Domain_Specific/NV12toBGRandResize/NsightEclipse.xml index 6cd413145..cec28308d 100644 --- a/Samples/5_Domain_Specific/NV12toBGRandResize/NsightEclipse.xml +++ b/Samples/5_Domain_Specific/NV12toBGRandResize/NsightEclipse.xml @@ -3,21 +3,21 @@ NV12toBGRandResize + cudaMemcpy cudaStreamDestroy + cudaMalloc cudaFree - cudaEventRecord cudaMallocManaged - cudaStreamCreate - cudaEventCreate - cudaEventElapsedTime - cudaDeviceSynchronize + cudaStreamAttachMemAsync cudaDestroyTextureObject cudaEventSynchronize - cudaStreamAttachMemAsync + cudaDeviceSynchronize cudaCreateTextureObject - cudaMalloc + cudaEventRecord cudaEventDestroy - cudaMemcpy + cudaEventElapsedTime + cudaStreamCreate + cudaEventCreate whole @@ -58,6 +58,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/5_Domain_Specific/NV12toBGRandResize/README.md b/Samples/5_Domain_Specific/NV12toBGRandResize/README.md index 738dd5b35..f7899913c 100644 --- a/Samples/5_Domain_Specific/NV12toBGRandResize/README.md +++ b/Samples/5_Domain_Specific/NV12toBGRandResize/README.md @@ -10,7 +10,7 @@ Graphics Interop, Image Processing, Video Processing ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, aarch64 ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaStreamDestroy, cudaFree, cudaEventRecord, cudaMallocManaged, cudaStreamCreate, cudaEventCreate, cudaEventElapsedTime, cudaDeviceSynchronize, cudaDestroyTextureObject, cudaEventSynchronize, cudaStreamAttachMemAsync, cudaCreateTextureObject, cudaMalloc, cudaEventDestroy, cudaMemcpy +cudaMemcpy, cudaStreamDestroy, cudaMalloc, cudaFree, cudaMallocManaged, cudaStreamAttachMemAsync, cudaDestroyTextureObject, cudaEventSynchronize, cudaDeviceSynchronize, cudaCreateTextureObject, cudaEventRecord, cudaEventDestroy, cudaEventElapsedTime, cudaStreamCreate, cudaEventCreate ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/5_Domain_Specific/SLID3D10Texture/README.md b/Samples/5_Domain_Specific/SLID3D10Texture/README.md index 0c62e6009..ddd18f042 100644 --- a/Samples/5_Domain_Specific/SLID3D10Texture/README.md +++ b/Samples/5_Domain_Specific/SLID3D10Texture/README.md @@ -10,7 +10,7 @@ Performance Strategies, Graphics Interop, Image Processing, 2D Textures ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -26,14 +26,14 @@ x86_64 cuCtxPushCurrent, cuCtxPopCurrent ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMemset, cudaFree, cudaGraphicsMapResources, cudaGraphicsUnregisterResource, cudaGraphicsResourceSetMapFlags, cudaGetDeviceCount, cudaMallocPitch, cudaMalloc, cudaGraphicsSubResourceGetMappedArray, cudaGetLastError, cudaGraphicsUnmapResources, cudaGetErrorString, cudaGetDeviceProperties +cudaGraphicsUnmapResources, cudaMalloc, cudaMallocPitch, cudaGetErrorString, cudaFree, cudaGraphicsResourceSetMapFlags, cudaGetLastError, cudaGraphicsMapResources, cudaGetDeviceCount, cudaMemset, cudaGraphicsUnregisterResource, cudaGraphicsSubResourceGetMappedArray, cudaGetDeviceProperties ## Dependencies needed to build/run [DirectX](../../../README.md#directx) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/5_Domain_Specific/SLID3D10Texture/SLID3D10Texture_vs2017.vcxproj b/Samples/5_Domain_Specific/SLID3D10Texture/SLID3D10Texture_vs2017.vcxproj index 4e8e897eb..3b3d82ca8 100644 --- a/Samples/5_Domain_Specific/SLID3D10Texture/SLID3D10Texture_vs2017.vcxproj +++ b/Samples/5_Domain_Specific/SLID3D10Texture/SLID3D10Texture_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/SLID3D10Texture.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -109,6 +109,6 @@ - + diff --git a/Samples/5_Domain_Specific/SLID3D10Texture/SLID3D10Texture_vs2019.vcxproj b/Samples/5_Domain_Specific/SLID3D10Texture/SLID3D10Texture_vs2019.vcxproj index a7944ca58..96c3165a7 100644 --- a/Samples/5_Domain_Specific/SLID3D10Texture/SLID3D10Texture_vs2019.vcxproj +++ b/Samples/5_Domain_Specific/SLID3D10Texture/SLID3D10Texture_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/SLID3D10Texture.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -105,6 +105,6 @@ - + diff --git a/Samples/5_Domain_Specific/SLID3D10Texture/SLID3D10Texture_vs2022.vcxproj b/Samples/5_Domain_Specific/SLID3D10Texture/SLID3D10Texture_vs2022.vcxproj index b0a7dccf0..e26cc9a44 100644 --- a/Samples/5_Domain_Specific/SLID3D10Texture/SLID3D10Texture_vs2022.vcxproj +++ b/Samples/5_Domain_Specific/SLID3D10Texture/SLID3D10Texture_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/SLID3D10Texture.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -105,6 +105,6 @@ - + diff --git a/Samples/5_Domain_Specific/SobelFilter/Makefile b/Samples/5_Domain_Specific/SobelFilter/Makefile index bc7f79a22..16893f8c0 100644 --- a/Samples/5_Domain_Specific/SobelFilter/Makefile +++ b/Samples/5_Domain_Specific/SobelFilter/Makefile @@ -299,9 +299,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/5_Domain_Specific/SobelFilter/NsightEclipse.xml b/Samples/5_Domain_Specific/SobelFilter/NsightEclipse.xml index 48e1cb0de..1409139c1 100644 --- a/Samples/5_Domain_Specific/SobelFilter/NsightEclipse.xml +++ b/Samples/5_Domain_Specific/SobelFilter/NsightEclipse.xml @@ -3,20 +3,20 @@ SobelFilter - cudaFree - cudaGraphicsMapResources + cudaGraphicsUnmapResources + cudaMemcpy + cudaMallocArray cudaFreeArray - cudaGraphicsGLRegisterBuffer + cudaFree + cudaGetErrorString cudaGraphicsResourceGetMappedPointer - cudaDeviceSynchronize + cudaGraphicsMapResources cudaDestroyTextureObject + cudaDeviceSynchronize cudaCreateTextureObject - cudaMalloc - cudaMallocArray cudaGraphicsUnregisterResource - cudaGraphicsUnmapResources - cudaMemcpy - cudaGetErrorString + cudaMalloc + cudaGraphicsGLRegisterBuffer whole @@ -89,6 +89,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/5_Domain_Specific/SobelFilter/README.md b/Samples/5_Domain_Specific/SobelFilter/README.md index 2c8b25648..f33e8df73 100644 --- a/Samples/5_Domain_Specific/SobelFilter/README.md +++ b/Samples/5_Domain_Specific/SobelFilter/README.md @@ -10,7 +10,7 @@ Graphics Interop, Image Processing ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaGraphicsMapResources, cudaFreeArray, cudaGraphicsGLRegisterBuffer, cudaGraphicsResourceGetMappedPointer, cudaDeviceSynchronize, cudaDestroyTextureObject, cudaCreateTextureObject, cudaMalloc, cudaMallocArray, cudaGraphicsUnregisterResource, cudaGraphicsUnmapResources, cudaMemcpy, cudaGetErrorString +cudaGraphicsUnmapResources, cudaMemcpy, cudaMallocArray, cudaFreeArray, cudaFree, cudaGetErrorString, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDestroyTextureObject, cudaDeviceSynchronize, cudaCreateTextureObject, cudaGraphicsUnregisterResource, cudaMalloc, cudaGraphicsGLRegisterBuffer ## Dependencies needed to build/run [X11](../../../README.md#x11), [GL](../../../README.md#gl) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/5_Domain_Specific/SobelFilter/SobelFilter_vs2017.vcxproj b/Samples/5_Domain_Specific/SobelFilter/SobelFilter_vs2017.vcxproj index 4914918d6..bb26dfe6d 100644 --- a/Samples/5_Domain_Specific/SobelFilter/SobelFilter_vs2017.vcxproj +++ b/Samples/5_Domain_Specific/SobelFilter/SobelFilter_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/SobelFilter.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -118,6 +118,6 @@ - + diff --git a/Samples/5_Domain_Specific/SobelFilter/SobelFilter_vs2019.vcxproj b/Samples/5_Domain_Specific/SobelFilter/SobelFilter_vs2019.vcxproj index 6bef30108..8dcd7a832 100644 --- a/Samples/5_Domain_Specific/SobelFilter/SobelFilter_vs2019.vcxproj +++ b/Samples/5_Domain_Specific/SobelFilter/SobelFilter_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/SobelFilter.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -114,6 +114,6 @@ - + diff --git a/Samples/5_Domain_Specific/SobelFilter/SobelFilter_vs2022.vcxproj b/Samples/5_Domain_Specific/SobelFilter/SobelFilter_vs2022.vcxproj index cffc57574..d9f50ec32 100644 --- a/Samples/5_Domain_Specific/SobelFilter/SobelFilter_vs2022.vcxproj +++ b/Samples/5_Domain_Specific/SobelFilter/SobelFilter_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/SobelFilter.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -114,6 +114,6 @@ - + diff --git a/Samples/5_Domain_Specific/SobelFilter/findgllib.mk b/Samples/5_Domain_Specific/SobelFilter/findgllib.mk index f0a5c5512..998fcf0f1 100644 --- a/Samples/5_Domain_Specific/SobelFilter/findgllib.mk +++ b/Samples/5_Domain_Specific/SobelFilter/findgllib.mk @@ -53,11 +53,12 @@ endif ifeq ("$(TARGET_OS)","linux") # $(info) >> findgllib.mk -> LINUX path <<<) # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside - UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) - FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) - RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) - CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) + UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) + FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) + RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) + CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) SUSE = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?) + KYLIN = $(shell echo $(DISTRO) | grep -i kylin >/dev/null 2>&1; echo $$?) ifeq ("$(UBUNTU)","0") ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l) GLPATH := /usr/arm-linux-gnueabihf/lib @@ -87,27 +88,17 @@ ifeq ("$(TARGET_OS)","linux") DFLT_PATH ?= /usr/lib endif endif + ifeq ("$(SUSE)","0") GLPATH ?= /usr/X11R6/lib64 GLLINK ?= -L/usr/X11R6/lib64 DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(FEDORA)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(RHEL)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(CENTOS)","0") + else GLPATH ?= /usr/lib64/nvidia GLLINK ?= -L/usr/lib64/nvidia DFLT_PATH ?= /usr/lib64 endif - + # find libGL, libGLU GLLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGL.so -print 2>/dev/null) GLULIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLU.so -print 2>/dev/null) diff --git a/Samples/5_Domain_Specific/SobolQRNG/Makefile b/Samples/5_Domain_Specific/SobolQRNG/Makefile index 080a5c7c1..7d80c57eb 100644 --- a/Samples/5_Domain_Specific/SobolQRNG/Makefile +++ b/Samples/5_Domain_Specific/SobolQRNG/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/5_Domain_Specific/SobolQRNG/NsightEclipse.xml b/Samples/5_Domain_Specific/SobolQRNG/NsightEclipse.xml index d12d97cce..cddf025fb 100644 --- a/Samples/5_Domain_Specific/SobolQRNG/NsightEclipse.xml +++ b/Samples/5_Domain_Specific/SobolQRNG/NsightEclipse.xml @@ -3,13 +3,13 @@ SobolQRNG + cudaMemcpy + cudaGetErrorString cudaFree cudaDeviceSynchronize + cudaGetDevice cudaMalloc - cudaMemcpy - cudaGetErrorString cudaGetDeviceProperties - cudaGetDevice whole @@ -50,6 +50,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/5_Domain_Specific/SobolQRNG/README.md b/Samples/5_Domain_Specific/SobolQRNG/README.md index 13789f59a..72bb3f844 100644 --- a/Samples/5_Domain_Specific/SobolQRNG/README.md +++ b/Samples/5_Domain_Specific/SobolQRNG/README.md @@ -10,7 +10,7 @@ Computational Finance ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaDeviceSynchronize, cudaMalloc, cudaMemcpy, cudaGetErrorString, cudaGetDeviceProperties, cudaGetDevice +cudaMemcpy, cudaGetErrorString, cudaFree, cudaDeviceSynchronize, cudaGetDevice, cudaMalloc, cudaGetDeviceProperties ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/5_Domain_Specific/SobolQRNG/SobolQRNG_vs2017.vcxproj b/Samples/5_Domain_Specific/SobolQRNG/SobolQRNG_vs2017.vcxproj index ba88ef6a0..2dc8daa7c 100644 --- a/Samples/5_Domain_Specific/SobolQRNG/SobolQRNG_vs2017.vcxproj +++ b/Samples/5_Domain_Specific/SobolQRNG/SobolQRNG_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/SobolQRNG.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -113,6 +113,6 @@ - + diff --git a/Samples/5_Domain_Specific/SobolQRNG/SobolQRNG_vs2019.vcxproj b/Samples/5_Domain_Specific/SobolQRNG/SobolQRNG_vs2019.vcxproj index acb7b91ab..1f7410880 100644 --- a/Samples/5_Domain_Specific/SobolQRNG/SobolQRNG_vs2019.vcxproj +++ b/Samples/5_Domain_Specific/SobolQRNG/SobolQRNG_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/SobolQRNG.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -109,6 +109,6 @@ - + diff --git a/Samples/5_Domain_Specific/SobolQRNG/SobolQRNG_vs2022.vcxproj b/Samples/5_Domain_Specific/SobolQRNG/SobolQRNG_vs2022.vcxproj index 26344f69c..d54706be4 100644 --- a/Samples/5_Domain_Specific/SobolQRNG/SobolQRNG_vs2022.vcxproj +++ b/Samples/5_Domain_Specific/SobolQRNG/SobolQRNG_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/SobolQRNG.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -109,6 +109,6 @@ - + diff --git a/Samples/5_Domain_Specific/VFlockingD3D10/README.md b/Samples/5_Domain_Specific/VFlockingD3D10/README.md index 0db3f2f80..613b56639 100644 --- a/Samples/5_Domain_Specific/VFlockingD3D10/README.md +++ b/Samples/5_Domain_Specific/VFlockingD3D10/README.md @@ -10,7 +10,7 @@ Graphics Interop, Data Parallel Algorithms, Physically-Based Simulation, Perform ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64 ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMemset, cudaFree, cudaGraphicsMapResources, cudaEventRecord, cudaGraphicsUnregisterResource, cudaEventCreate, cudaGraphicsResourceGetMappedPointer, cudaGetDeviceCount, cudaEventElapsedTime, cudaDeviceSynchronize, cudaEventSynchronize, cudaGraphicsResourceSetMapFlags, cudaMalloc, cudaGetLastError, cudaGraphicsUnmapResources, cudaMemcpy, cudaGetErrorString, cudaGetDeviceProperties +cudaGraphicsUnmapResources, cudaMemcpy, cudaFree, cudaGetErrorString, cudaGraphicsResourceSetMapFlags, cudaGraphicsResourceGetMappedPointer, cudaGetLastError, cudaGraphicsMapResources, cudaEventSynchronize, cudaGetDeviceProperties, cudaDeviceSynchronize, cudaEventRecord, cudaMemset, cudaGraphicsUnregisterResource, cudaMalloc, cudaEventElapsedTime, cudaGetDeviceCount, cudaEventCreate ## Dependencies needed to build/run [DirectX](../../../README.md#directx) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/5_Domain_Specific/VFlockingD3D10/VFlockingD3D10_vs2017.vcxproj b/Samples/5_Domain_Specific/VFlockingD3D10/VFlockingD3D10_vs2017.vcxproj index 37fe522b8..7744ff267 100644 --- a/Samples/5_Domain_Specific/VFlockingD3D10/VFlockingD3D10_vs2017.vcxproj +++ b/Samples/5_Domain_Specific/VFlockingD3D10/VFlockingD3D10_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/VFlockingD3D10.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -110,6 +110,6 @@ - + diff --git a/Samples/5_Domain_Specific/VFlockingD3D10/VFlockingD3D10_vs2019.vcxproj b/Samples/5_Domain_Specific/VFlockingD3D10/VFlockingD3D10_vs2019.vcxproj index de8440f74..3c776cc94 100644 --- a/Samples/5_Domain_Specific/VFlockingD3D10/VFlockingD3D10_vs2019.vcxproj +++ b/Samples/5_Domain_Specific/VFlockingD3D10/VFlockingD3D10_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/VFlockingD3D10.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -106,6 +106,6 @@ - + diff --git a/Samples/5_Domain_Specific/VFlockingD3D10/VFlockingD3D10_vs2022.vcxproj b/Samples/5_Domain_Specific/VFlockingD3D10/VFlockingD3D10_vs2022.vcxproj index 0213cd64d..65635eceb 100644 --- a/Samples/5_Domain_Specific/VFlockingD3D10/VFlockingD3D10_vs2022.vcxproj +++ b/Samples/5_Domain_Specific/VFlockingD3D10/VFlockingD3D10_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/VFlockingD3D10.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -106,6 +106,6 @@ - + diff --git a/Samples/5_Domain_Specific/bicubicTexture/Makefile b/Samples/5_Domain_Specific/bicubicTexture/Makefile index 2e3a5627b..cc6c26808 100644 --- a/Samples/5_Domain_Specific/bicubicTexture/Makefile +++ b/Samples/5_Domain_Specific/bicubicTexture/Makefile @@ -299,9 +299,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/5_Domain_Specific/bicubicTexture/NsightEclipse.xml b/Samples/5_Domain_Specific/bicubicTexture/NsightEclipse.xml index b9f435b02..bb42ff843 100644 --- a/Samples/5_Domain_Specific/bicubicTexture/NsightEclipse.xml +++ b/Samples/5_Domain_Specific/bicubicTexture/NsightEclipse.xml @@ -3,20 +3,20 @@ bicubicTexture - cudaFree - cudaGraphicsMapResources + cudaGraphicsUnmapResources + cudaCreateChannelDesc + cudaMallocArray cudaFreeArray - cudaGraphicsGLRegisterBuffer + cudaFree + cudaMemcpy cudaGraphicsResourceGetMappedPointer - cudaDeviceSynchronize + cudaGraphicsMapResources cudaDestroyTextureObject + cudaDeviceSynchronize cudaCreateTextureObject - cudaMalloc - cudaMallocArray - cudaCreateChannelDesc cudaGraphicsUnregisterResource - cudaGraphicsUnmapResources - cudaMemcpy + cudaMalloc + cudaGraphicsGLRegisterBuffer cudaGetDeviceProperties @@ -86,6 +86,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/5_Domain_Specific/bicubicTexture/README.md b/Samples/5_Domain_Specific/bicubicTexture/README.md index db1fc40f2..c972ebdab 100644 --- a/Samples/5_Domain_Specific/bicubicTexture/README.md +++ b/Samples/5_Domain_Specific/bicubicTexture/README.md @@ -10,7 +10,7 @@ Graphics Interop, Image Processing ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaGraphicsMapResources, cudaFreeArray, cudaGraphicsGLRegisterBuffer, cudaGraphicsResourceGetMappedPointer, cudaDeviceSynchronize, cudaDestroyTextureObject, cudaCreateTextureObject, cudaMalloc, cudaMallocArray, cudaCreateChannelDesc, cudaGraphicsUnregisterResource, cudaGraphicsUnmapResources, cudaMemcpy, cudaGetDeviceProperties +cudaGraphicsUnmapResources, cudaCreateChannelDesc, cudaMallocArray, cudaFreeArray, cudaFree, cudaMemcpy, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDestroyTextureObject, cudaDeviceSynchronize, cudaCreateTextureObject, cudaGraphicsUnregisterResource, cudaMalloc, cudaGraphicsGLRegisterBuffer, cudaGetDeviceProperties ## Dependencies needed to build/run [X11](../../../README.md#x11), [GL](../../../README.md#gl) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/5_Domain_Specific/bicubicTexture/bicubicTexture_vs2017.vcxproj b/Samples/5_Domain_Specific/bicubicTexture/bicubicTexture_vs2017.vcxproj index f7c07a0ed..11e527bb3 100644 --- a/Samples/5_Domain_Specific/bicubicTexture/bicubicTexture_vs2017.vcxproj +++ b/Samples/5_Domain_Specific/bicubicTexture/bicubicTexture_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/bicubicTexture.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -118,6 +118,6 @@ - + diff --git a/Samples/5_Domain_Specific/bicubicTexture/bicubicTexture_vs2019.vcxproj b/Samples/5_Domain_Specific/bicubicTexture/bicubicTexture_vs2019.vcxproj index 9a4ebd55b..dbb64abb2 100644 --- a/Samples/5_Domain_Specific/bicubicTexture/bicubicTexture_vs2019.vcxproj +++ b/Samples/5_Domain_Specific/bicubicTexture/bicubicTexture_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/bicubicTexture.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -114,6 +114,6 @@ - + diff --git a/Samples/5_Domain_Specific/bicubicTexture/bicubicTexture_vs2022.vcxproj b/Samples/5_Domain_Specific/bicubicTexture/bicubicTexture_vs2022.vcxproj index 9bb627c09..a54bf6b97 100644 --- a/Samples/5_Domain_Specific/bicubicTexture/bicubicTexture_vs2022.vcxproj +++ b/Samples/5_Domain_Specific/bicubicTexture/bicubicTexture_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/bicubicTexture.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -114,6 +114,6 @@ - + diff --git a/Samples/5_Domain_Specific/bicubicTexture/data/teapot512.pgm b/Samples/5_Domain_Specific/bicubicTexture/data/teapot512.pgm index 23db9b9a3..4e5ca4595 100644 Binary files a/Samples/5_Domain_Specific/bicubicTexture/data/teapot512.pgm and b/Samples/5_Domain_Specific/bicubicTexture/data/teapot512.pgm differ diff --git a/Samples/5_Domain_Specific/bicubicTexture/findgllib.mk b/Samples/5_Domain_Specific/bicubicTexture/findgllib.mk index f0a5c5512..998fcf0f1 100644 --- a/Samples/5_Domain_Specific/bicubicTexture/findgllib.mk +++ b/Samples/5_Domain_Specific/bicubicTexture/findgllib.mk @@ -53,11 +53,12 @@ endif ifeq ("$(TARGET_OS)","linux") # $(info) >> findgllib.mk -> LINUX path <<<) # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside - UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) - FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) - RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) - CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) + UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) + FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) + RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) + CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) SUSE = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?) + KYLIN = $(shell echo $(DISTRO) | grep -i kylin >/dev/null 2>&1; echo $$?) ifeq ("$(UBUNTU)","0") ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l) GLPATH := /usr/arm-linux-gnueabihf/lib @@ -87,27 +88,17 @@ ifeq ("$(TARGET_OS)","linux") DFLT_PATH ?= /usr/lib endif endif + ifeq ("$(SUSE)","0") GLPATH ?= /usr/X11R6/lib64 GLLINK ?= -L/usr/X11R6/lib64 DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(FEDORA)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(RHEL)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(CENTOS)","0") + else GLPATH ?= /usr/lib64/nvidia GLLINK ?= -L/usr/lib64/nvidia DFLT_PATH ?= /usr/lib64 endif - + # find libGL, libGLU GLLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGL.so -print 2>/dev/null) GLULIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLU.so -print 2>/dev/null) diff --git a/Samples/5_Domain_Specific/bilateralFilter/Makefile b/Samples/5_Domain_Specific/bilateralFilter/Makefile index 800dfc137..2a4ee06bc 100644 --- a/Samples/5_Domain_Specific/bilateralFilter/Makefile +++ b/Samples/5_Domain_Specific/bilateralFilter/Makefile @@ -299,9 +299,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/5_Domain_Specific/bilateralFilter/NsightEclipse.xml b/Samples/5_Domain_Specific/bilateralFilter/NsightEclipse.xml index 8ca67bd8f..b9e13ffa6 100644 --- a/Samples/5_Domain_Specific/bilateralFilter/NsightEclipse.xml +++ b/Samples/5_Domain_Specific/bilateralFilter/NsightEclipse.xml @@ -3,18 +3,18 @@ bilateralFilter - cudaMemcpyToSymbol + cudaRuntimeGetVersion + cudaGraphicsUnmapResources + cudaMallocPitch cudaFree - cudaGraphicsMapResources - cudaGraphicsGLRegisterBuffer cudaGraphicsResourceGetMappedPointer - cudaDeviceSynchronize + cudaGraphicsMapResources cudaDestroyTextureObject - cudaMallocPitch + cudaDeviceSynchronize cudaCreateTextureObject + cudaMemcpyToSymbol cudaGraphicsUnregisterResource - cudaGraphicsUnmapResources - cudaRuntimeGetVersion + cudaGraphicsGLRegisterBuffer cudaGetDeviceProperties @@ -86,6 +86,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/5_Domain_Specific/bilateralFilter/README.md b/Samples/5_Domain_Specific/bilateralFilter/README.md index 8741c5892..b31f086f5 100644 --- a/Samples/5_Domain_Specific/bilateralFilter/README.md +++ b/Samples/5_Domain_Specific/bilateralFilter/README.md @@ -10,7 +10,7 @@ Graphics Interop, Image Processing ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMemcpyToSymbol, cudaFree, cudaGraphicsMapResources, cudaGraphicsGLRegisterBuffer, cudaGraphicsResourceGetMappedPointer, cudaDeviceSynchronize, cudaDestroyTextureObject, cudaMallocPitch, cudaCreateTextureObject, cudaGraphicsUnregisterResource, cudaGraphicsUnmapResources, cudaRuntimeGetVersion, cudaGetDeviceProperties +cudaRuntimeGetVersion, cudaGraphicsUnmapResources, cudaMallocPitch, cudaFree, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDestroyTextureObject, cudaDeviceSynchronize, cudaCreateTextureObject, cudaMemcpyToSymbol, cudaGraphicsUnregisterResource, cudaGraphicsGLRegisterBuffer, cudaGetDeviceProperties ## Dependencies needed to build/run [X11](../../../README.md#x11), [GL](../../../README.md#gl) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/5_Domain_Specific/bilateralFilter/bilateralFilter_vs2017.vcxproj b/Samples/5_Domain_Specific/bilateralFilter/bilateralFilter_vs2017.vcxproj index 5a64d5c92..66d5cb169 100644 --- a/Samples/5_Domain_Specific/bilateralFilter/bilateralFilter_vs2017.vcxproj +++ b/Samples/5_Domain_Specific/bilateralFilter/bilateralFilter_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/bilateralFilter.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -120,6 +120,6 @@ - + diff --git a/Samples/5_Domain_Specific/bilateralFilter/bilateralFilter_vs2019.vcxproj b/Samples/5_Domain_Specific/bilateralFilter/bilateralFilter_vs2019.vcxproj index 231749363..908162125 100644 --- a/Samples/5_Domain_Specific/bilateralFilter/bilateralFilter_vs2019.vcxproj +++ b/Samples/5_Domain_Specific/bilateralFilter/bilateralFilter_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/bilateralFilter.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -116,6 +116,6 @@ - + diff --git a/Samples/5_Domain_Specific/bilateralFilter/bilateralFilter_vs2022.vcxproj b/Samples/5_Domain_Specific/bilateralFilter/bilateralFilter_vs2022.vcxproj index 3a4c27a8f..8f7f94ad9 100644 --- a/Samples/5_Domain_Specific/bilateralFilter/bilateralFilter_vs2022.vcxproj +++ b/Samples/5_Domain_Specific/bilateralFilter/bilateralFilter_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/bilateralFilter.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -116,6 +116,6 @@ - + diff --git a/Samples/5_Domain_Specific/bilateralFilter/findgllib.mk b/Samples/5_Domain_Specific/bilateralFilter/findgllib.mk index f0a5c5512..998fcf0f1 100644 --- a/Samples/5_Domain_Specific/bilateralFilter/findgllib.mk +++ b/Samples/5_Domain_Specific/bilateralFilter/findgllib.mk @@ -53,11 +53,12 @@ endif ifeq ("$(TARGET_OS)","linux") # $(info) >> findgllib.mk -> LINUX path <<<) # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside - UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) - FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) - RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) - CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) + UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) + FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) + RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) + CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) SUSE = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?) + KYLIN = $(shell echo $(DISTRO) | grep -i kylin >/dev/null 2>&1; echo $$?) ifeq ("$(UBUNTU)","0") ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l) GLPATH := /usr/arm-linux-gnueabihf/lib @@ -87,27 +88,17 @@ ifeq ("$(TARGET_OS)","linux") DFLT_PATH ?= /usr/lib endif endif + ifeq ("$(SUSE)","0") GLPATH ?= /usr/X11R6/lib64 GLLINK ?= -L/usr/X11R6/lib64 DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(FEDORA)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(RHEL)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(CENTOS)","0") + else GLPATH ?= /usr/lib64/nvidia GLLINK ?= -L/usr/lib64/nvidia DFLT_PATH ?= /usr/lib64 endif - + # find libGL, libGLU GLLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGL.so -print 2>/dev/null) GLULIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLU.so -print 2>/dev/null) diff --git a/Samples/5_Domain_Specific/binomialOptions/Makefile b/Samples/5_Domain_Specific/binomialOptions/Makefile index 2c85e8cec..7d498db9c 100644 --- a/Samples/5_Domain_Specific/binomialOptions/Makefile +++ b/Samples/5_Domain_Specific/binomialOptions/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/5_Domain_Specific/binomialOptions/NsightEclipse.xml b/Samples/5_Domain_Specific/binomialOptions/NsightEclipse.xml index 0730f13d0..f5273e497 100644 --- a/Samples/5_Domain_Specific/binomialOptions/NsightEclipse.xml +++ b/Samples/5_Domain_Specific/binomialOptions/NsightEclipse.xml @@ -3,8 +3,8 @@ binomialOptions - cudaMemcpyToSymbol cudaDeviceSynchronize + cudaMemcpyToSymbol cudaMemcpyFromSymbol @@ -46,6 +46,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/5_Domain_Specific/binomialOptions/README.md b/Samples/5_Domain_Specific/binomialOptions/README.md index 869a40cf3..574d7e1fb 100644 --- a/Samples/5_Domain_Specific/binomialOptions/README.md +++ b/Samples/5_Domain_Specific/binomialOptions/README.md @@ -10,7 +10,7 @@ Computational Finance ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMemcpyToSymbol, cudaDeviceSynchronize, cudaMemcpyFromSymbol +cudaDeviceSynchronize, cudaMemcpyToSymbol, cudaMemcpyFromSymbol ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/5_Domain_Specific/binomialOptions/binomialOptions_vs2017.vcxproj b/Samples/5_Domain_Specific/binomialOptions/binomialOptions_vs2017.vcxproj index a5a832abc..8416e0338 100644 --- a/Samples/5_Domain_Specific/binomialOptions/binomialOptions_vs2017.vcxproj +++ b/Samples/5_Domain_Specific/binomialOptions/binomialOptions_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/binomialOptions.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -110,6 +110,6 @@ - + diff --git a/Samples/5_Domain_Specific/binomialOptions/binomialOptions_vs2019.vcxproj b/Samples/5_Domain_Specific/binomialOptions/binomialOptions_vs2019.vcxproj index 95af14387..806fed73f 100644 --- a/Samples/5_Domain_Specific/binomialOptions/binomialOptions_vs2019.vcxproj +++ b/Samples/5_Domain_Specific/binomialOptions/binomialOptions_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/binomialOptions.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -106,6 +106,6 @@ - + diff --git a/Samples/5_Domain_Specific/binomialOptions/binomialOptions_vs2022.vcxproj b/Samples/5_Domain_Specific/binomialOptions/binomialOptions_vs2022.vcxproj index 7ff63d934..616a1dc67 100644 --- a/Samples/5_Domain_Specific/binomialOptions/binomialOptions_vs2022.vcxproj +++ b/Samples/5_Domain_Specific/binomialOptions/binomialOptions_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/binomialOptions.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -106,6 +106,6 @@ - + diff --git a/Samples/5_Domain_Specific/binomialOptions_nvrtc/README.md b/Samples/5_Domain_Specific/binomialOptions_nvrtc/README.md index 67b0cf18f..0d96c7ef8 100644 --- a/Samples/5_Domain_Specific/binomialOptions_nvrtc/README.md +++ b/Samples/5_Domain_Specific/binomialOptions_nvrtc/README.md @@ -10,7 +10,7 @@ Computational Finance, Runtime Compilation ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,7 +23,7 @@ x86_64, ppc64le, aarch64 ## CUDA APIs involved ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html) -cuLaunchKernel, cuModuleGetGlobal, cuCtxSynchronize, cuMemcpyDtoH, cuModuleGetFunction, cuMemcpyHtoD +cuMemcpyDtoH, cuLaunchKernel, cuMemcpyHtoD, cuModuleGetGlobal, cuCtxSynchronize, cuModuleGetFunction ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) cudaBlockSize, cudaGridSize @@ -33,7 +33,7 @@ cudaBlockSize, cudaGridSize ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/5_Domain_Specific/binomialOptions_nvrtc/binomialOptions_nvrtc_vs2017.vcxproj b/Samples/5_Domain_Specific/binomialOptions_nvrtc/binomialOptions_nvrtc_vs2017.vcxproj index b5cd46deb..4e020948c 100644 --- a/Samples/5_Domain_Specific/binomialOptions_nvrtc/binomialOptions_nvrtc_vs2017.vcxproj +++ b/Samples/5_Domain_Specific/binomialOptions_nvrtc/binomialOptions_nvrtc_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -112,6 +112,6 @@ - + diff --git a/Samples/5_Domain_Specific/binomialOptions_nvrtc/binomialOptions_nvrtc_vs2019.vcxproj b/Samples/5_Domain_Specific/binomialOptions_nvrtc/binomialOptions_nvrtc_vs2019.vcxproj index e79add30f..153e2b1c8 100644 --- a/Samples/5_Domain_Specific/binomialOptions_nvrtc/binomialOptions_nvrtc_vs2019.vcxproj +++ b/Samples/5_Domain_Specific/binomialOptions_nvrtc/binomialOptions_nvrtc_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -108,6 +108,6 @@ - + diff --git a/Samples/5_Domain_Specific/binomialOptions_nvrtc/binomialOptions_nvrtc_vs2022.vcxproj b/Samples/5_Domain_Specific/binomialOptions_nvrtc/binomialOptions_nvrtc_vs2022.vcxproj index 7dbfcf7a3..d2720c08d 100644 --- a/Samples/5_Domain_Specific/binomialOptions_nvrtc/binomialOptions_nvrtc_vs2022.vcxproj +++ b/Samples/5_Domain_Specific/binomialOptions_nvrtc/binomialOptions_nvrtc_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -108,6 +108,6 @@ - + diff --git a/Samples/5_Domain_Specific/convolutionFFT2D/Makefile b/Samples/5_Domain_Specific/convolutionFFT2D/Makefile index 35185f284..0528320af 100644 --- a/Samples/5_Domain_Specific/convolutionFFT2D/Makefile +++ b/Samples/5_Domain_Specific/convolutionFFT2D/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/5_Domain_Specific/convolutionFFT2D/NsightEclipse.xml b/Samples/5_Domain_Specific/convolutionFFT2D/NsightEclipse.xml index f197fa28d..f81340552 100644 --- a/Samples/5_Domain_Specific/convolutionFFT2D/NsightEclipse.xml +++ b/Samples/5_Domain_Specific/convolutionFFT2D/NsightEclipse.xml @@ -3,13 +3,13 @@ convolutionFFT2D - cudaMemset + cudaMemcpy cudaFree - cudaDeviceSynchronize cudaDestroyTextureObject + cudaDeviceSynchronize cudaCreateTextureObject + cudaMemset cudaMalloc - cudaMemcpy whole @@ -58,6 +58,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/5_Domain_Specific/convolutionFFT2D/README.md b/Samples/5_Domain_Specific/convolutionFFT2D/README.md index 089b847d8..0f8d51935 100644 --- a/Samples/5_Domain_Specific/convolutionFFT2D/README.md +++ b/Samples/5_Domain_Specific/convolutionFFT2D/README.md @@ -10,7 +10,7 @@ Image Processing, CUFFT Library ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMemset, cudaFree, cudaDeviceSynchronize, cudaDestroyTextureObject, cudaCreateTextureObject, cudaMalloc, cudaMemcpy +cudaMemcpy, cudaFree, cudaDestroyTextureObject, cudaDeviceSynchronize, cudaCreateTextureObject, cudaMemset, cudaMalloc ## Dependencies needed to build/run [CUFFT](../../../README.md#cufft) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/5_Domain_Specific/convolutionFFT2D/convolutionFFT2D_vs2017.vcxproj b/Samples/5_Domain_Specific/convolutionFFT2D/convolutionFFT2D_vs2017.vcxproj index 997c79125..4fe20d854 100644 --- a/Samples/5_Domain_Specific/convolutionFFT2D/convolutionFFT2D_vs2017.vcxproj +++ b/Samples/5_Domain_Specific/convolutionFFT2D/convolutionFFT2D_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/convolutionFFT2D.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -110,6 +110,6 @@ - + diff --git a/Samples/5_Domain_Specific/convolutionFFT2D/convolutionFFT2D_vs2019.vcxproj b/Samples/5_Domain_Specific/convolutionFFT2D/convolutionFFT2D_vs2019.vcxproj index 3156c87f0..71bad451e 100644 --- a/Samples/5_Domain_Specific/convolutionFFT2D/convolutionFFT2D_vs2019.vcxproj +++ b/Samples/5_Domain_Specific/convolutionFFT2D/convolutionFFT2D_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/convolutionFFT2D.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -106,6 +106,6 @@ - + diff --git a/Samples/5_Domain_Specific/convolutionFFT2D/convolutionFFT2D_vs2022.vcxproj b/Samples/5_Domain_Specific/convolutionFFT2D/convolutionFFT2D_vs2022.vcxproj index 641a761c0..a73a0cedb 100644 --- a/Samples/5_Domain_Specific/convolutionFFT2D/convolutionFFT2D_vs2022.vcxproj +++ b/Samples/5_Domain_Specific/convolutionFFT2D/convolutionFFT2D_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/convolutionFFT2D.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -106,6 +106,6 @@ - + diff --git a/Samples/5_Domain_Specific/dwtHaar1D/Makefile b/Samples/5_Domain_Specific/dwtHaar1D/Makefile index d24cac3f6..abd407f4f 100644 --- a/Samples/5_Domain_Specific/dwtHaar1D/Makefile +++ b/Samples/5_Domain_Specific/dwtHaar1D/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/5_Domain_Specific/dwtHaar1D/NsightEclipse.xml b/Samples/5_Domain_Specific/dwtHaar1D/NsightEclipse.xml index 386a5469b..daa961211 100644 --- a/Samples/5_Domain_Specific/dwtHaar1D/NsightEclipse.xml +++ b/Samples/5_Domain_Specific/dwtHaar1D/NsightEclipse.xml @@ -4,8 +4,8 @@ dwtHaar1D cudaMalloc - cudaFree cudaMemcpy + cudaFree whole @@ -46,6 +46,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/5_Domain_Specific/dwtHaar1D/README.md b/Samples/5_Domain_Specific/dwtHaar1D/README.md index 6d4eb9989..da368a4e9 100644 --- a/Samples/5_Domain_Specific/dwtHaar1D/README.md +++ b/Samples/5_Domain_Specific/dwtHaar1D/README.md @@ -10,7 +10,7 @@ Image Processing, Video Compression ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMalloc, cudaFree, cudaMemcpy +cudaMalloc, cudaMemcpy, cudaFree ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/5_Domain_Specific/dwtHaar1D/dwtHaar1D_vs2017.vcxproj b/Samples/5_Domain_Specific/dwtHaar1D/dwtHaar1D_vs2017.vcxproj index 6592422c6..6b6935741 100644 --- a/Samples/5_Domain_Specific/dwtHaar1D/dwtHaar1D_vs2017.vcxproj +++ b/Samples/5_Domain_Specific/dwtHaar1D/dwtHaar1D_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/dwtHaar1D.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/5_Domain_Specific/dwtHaar1D/dwtHaar1D_vs2019.vcxproj b/Samples/5_Domain_Specific/dwtHaar1D/dwtHaar1D_vs2019.vcxproj index 9cd792c5d..c191c4ff6 100644 --- a/Samples/5_Domain_Specific/dwtHaar1D/dwtHaar1D_vs2019.vcxproj +++ b/Samples/5_Domain_Specific/dwtHaar1D/dwtHaar1D_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/dwtHaar1D.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/5_Domain_Specific/dwtHaar1D/dwtHaar1D_vs2022.vcxproj b/Samples/5_Domain_Specific/dwtHaar1D/dwtHaar1D_vs2022.vcxproj index 739fd5e63..bba596d8e 100644 --- a/Samples/5_Domain_Specific/dwtHaar1D/dwtHaar1D_vs2022.vcxproj +++ b/Samples/5_Domain_Specific/dwtHaar1D/dwtHaar1D_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/dwtHaar1D.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/5_Domain_Specific/dxtc/Makefile b/Samples/5_Domain_Specific/dxtc/Makefile index 2de94434f..b379d6c88 100644 --- a/Samples/5_Domain_Specific/dxtc/Makefile +++ b/Samples/5_Domain_Specific/dxtc/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/5_Domain_Specific/dxtc/NsightEclipse.xml b/Samples/5_Domain_Specific/dxtc/NsightEclipse.xml index f194bea9c..a31d16cbc 100644 --- a/Samples/5_Domain_Specific/dxtc/NsightEclipse.xml +++ b/Samples/5_Domain_Specific/dxtc/NsightEclipse.xml @@ -3,12 +3,12 @@ dxtc + cudaMemcpy cudaFree cudaDeviceSynchronize + cudaGetDevice cudaMalloc - cudaMemcpy cudaGetDeviceProperties - cudaGetDevice whole @@ -61,6 +61,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/5_Domain_Specific/dxtc/README.md b/Samples/5_Domain_Specific/dxtc/README.md index f45d97eac..3a805668a 100644 --- a/Samples/5_Domain_Specific/dxtc/README.md +++ b/Samples/5_Domain_Specific/dxtc/README.md @@ -10,7 +10,7 @@ Cooperative Groups, Image Processing, Image Compression ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaDeviceSynchronize, cudaMalloc, cudaMemcpy, cudaGetDeviceProperties, cudaGetDevice +cudaMemcpy, cudaFree, cudaDeviceSynchronize, cudaGetDevice, cudaMalloc, cudaGetDeviceProperties ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/5_Domain_Specific/dxtc/dxtc_vs2017.vcxproj b/Samples/5_Domain_Specific/dxtc/dxtc_vs2017.vcxproj index c655b3046..e1d68433b 100644 --- a/Samples/5_Domain_Specific/dxtc/dxtc_vs2017.vcxproj +++ b/Samples/5_Domain_Specific/dxtc/dxtc_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/dxtc.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -109,6 +109,6 @@ - + diff --git a/Samples/5_Domain_Specific/dxtc/dxtc_vs2019.vcxproj b/Samples/5_Domain_Specific/dxtc/dxtc_vs2019.vcxproj index 53567caa1..c1b7f6dbc 100644 --- a/Samples/5_Domain_Specific/dxtc/dxtc_vs2019.vcxproj +++ b/Samples/5_Domain_Specific/dxtc/dxtc_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/dxtc.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -105,6 +105,6 @@ - + diff --git a/Samples/5_Domain_Specific/dxtc/dxtc_vs2022.vcxproj b/Samples/5_Domain_Specific/dxtc/dxtc_vs2022.vcxproj index bf3c5c259..8609b640a 100644 --- a/Samples/5_Domain_Specific/dxtc/dxtc_vs2022.vcxproj +++ b/Samples/5_Domain_Specific/dxtc/dxtc_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/dxtc.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -105,6 +105,6 @@ - + diff --git a/Samples/5_Domain_Specific/fastWalshTransform/Makefile b/Samples/5_Domain_Specific/fastWalshTransform/Makefile index 1ba7282a1..3cf3f547c 100644 --- a/Samples/5_Domain_Specific/fastWalshTransform/Makefile +++ b/Samples/5_Domain_Specific/fastWalshTransform/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/5_Domain_Specific/fastWalshTransform/NsightEclipse.xml b/Samples/5_Domain_Specific/fastWalshTransform/NsightEclipse.xml index a88475e9f..9e627352e 100644 --- a/Samples/5_Domain_Specific/fastWalshTransform/NsightEclipse.xml +++ b/Samples/5_Domain_Specific/fastWalshTransform/NsightEclipse.xml @@ -3,11 +3,11 @@ fastWalshTransform - cudaMemset + cudaMemcpy cudaFree cudaDeviceSynchronize + cudaMemset cudaMalloc - cudaMemcpy whole @@ -54,6 +54,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/5_Domain_Specific/fastWalshTransform/README.md b/Samples/5_Domain_Specific/fastWalshTransform/README.md index 00dd99d20..473f4ce96 100644 --- a/Samples/5_Domain_Specific/fastWalshTransform/README.md +++ b/Samples/5_Domain_Specific/fastWalshTransform/README.md @@ -10,7 +10,7 @@ Linear Algebra, Data-Parallel Algorithms, Video Compression ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMemset, cudaFree, cudaDeviceSynchronize, cudaMalloc, cudaMemcpy +cudaMemcpy, cudaFree, cudaDeviceSynchronize, cudaMemset, cudaMalloc ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/5_Domain_Specific/fastWalshTransform/fastWalshTransform_vs2017.vcxproj b/Samples/5_Domain_Specific/fastWalshTransform/fastWalshTransform_vs2017.vcxproj index 6b2fb2f24..6ee445df1 100644 --- a/Samples/5_Domain_Specific/fastWalshTransform/fastWalshTransform_vs2017.vcxproj +++ b/Samples/5_Domain_Specific/fastWalshTransform/fastWalshTransform_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/fastWalshTransform.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -108,6 +108,6 @@ - + diff --git a/Samples/5_Domain_Specific/fastWalshTransform/fastWalshTransform_vs2019.vcxproj b/Samples/5_Domain_Specific/fastWalshTransform/fastWalshTransform_vs2019.vcxproj index d6fdbf349..71cafa089 100644 --- a/Samples/5_Domain_Specific/fastWalshTransform/fastWalshTransform_vs2019.vcxproj +++ b/Samples/5_Domain_Specific/fastWalshTransform/fastWalshTransform_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/fastWalshTransform.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -104,6 +104,6 @@ - + diff --git a/Samples/5_Domain_Specific/fastWalshTransform/fastWalshTransform_vs2022.vcxproj b/Samples/5_Domain_Specific/fastWalshTransform/fastWalshTransform_vs2022.vcxproj index 129775785..8e30886b2 100644 --- a/Samples/5_Domain_Specific/fastWalshTransform/fastWalshTransform_vs2022.vcxproj +++ b/Samples/5_Domain_Specific/fastWalshTransform/fastWalshTransform_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/fastWalshTransform.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -104,6 +104,6 @@ - + diff --git a/Samples/5_Domain_Specific/fluidsD3D9/README.md b/Samples/5_Domain_Specific/fluidsD3D9/README.md index 716a5aab9..912936d8d 100644 --- a/Samples/5_Domain_Specific/fluidsD3D9/README.md +++ b/Samples/5_Domain_Specific/fluidsD3D9/README.md @@ -10,7 +10,7 @@ Graphics Interop, CUFFT Library, Physically-Based Simulation ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64 ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaGraphicsMapResources, cudaGraphicsUnregisterResource, cudaFreeArray, cudaGraphicsResourceGetMappedPointer, cudaDestroyTextureObject, cudaMallocPitch, cudaCreateTextureObject, cudaMalloc, cudaMallocArray, cudaGetLastError, cudaGraphicsUnmapResources, cudaMemcpy, cudaGetDeviceProperties, cudaGetDevice +cudaGraphicsUnmapResources, cudaMemcpy, cudaMallocArray, cudaFreeArray, cudaFree, cudaMallocPitch, cudaGraphicsResourceGetMappedPointer, cudaGetLastError, cudaGraphicsMapResources, cudaDestroyTextureObject, cudaGetDevice, cudaCreateTextureObject, cudaGraphicsUnregisterResource, cudaMalloc, cudaGetDeviceProperties ## Dependencies needed to build/run [DirectX](../../../README.md#directx) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/5_Domain_Specific/fluidsD3D9/fluidsD3D9_vs2017.vcxproj b/Samples/5_Domain_Specific/fluidsD3D9/fluidsD3D9_vs2017.vcxproj index fe88c5063..442216834 100644 --- a/Samples/5_Domain_Specific/fluidsD3D9/fluidsD3D9_vs2017.vcxproj +++ b/Samples/5_Domain_Specific/fluidsD3D9/fluidsD3D9_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/fluidsD3D9.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -110,6 +110,6 @@ - + diff --git a/Samples/5_Domain_Specific/fluidsD3D9/fluidsD3D9_vs2019.vcxproj b/Samples/5_Domain_Specific/fluidsD3D9/fluidsD3D9_vs2019.vcxproj index c7cd7d376..3feb3a219 100644 --- a/Samples/5_Domain_Specific/fluidsD3D9/fluidsD3D9_vs2019.vcxproj +++ b/Samples/5_Domain_Specific/fluidsD3D9/fluidsD3D9_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/fluidsD3D9.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -106,6 +106,6 @@ - + diff --git a/Samples/5_Domain_Specific/fluidsD3D9/fluidsD3D9_vs2022.vcxproj b/Samples/5_Domain_Specific/fluidsD3D9/fluidsD3D9_vs2022.vcxproj index 2ff8ad414..3dedaf001 100644 --- a/Samples/5_Domain_Specific/fluidsD3D9/fluidsD3D9_vs2022.vcxproj +++ b/Samples/5_Domain_Specific/fluidsD3D9/fluidsD3D9_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/fluidsD3D9.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -106,6 +106,6 @@ - + diff --git a/Samples/5_Domain_Specific/fluidsGL/Makefile b/Samples/5_Domain_Specific/fluidsGL/Makefile index 4596e5edd..1c9872284 100644 --- a/Samples/5_Domain_Specific/fluidsGL/Makefile +++ b/Samples/5_Domain_Specific/fluidsGL/Makefile @@ -299,9 +299,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/5_Domain_Specific/fluidsGL/NsightEclipse.xml b/Samples/5_Domain_Specific/fluidsGL/NsightEclipse.xml index e2ebcaaa0..96bb4ea0d 100644 --- a/Samples/5_Domain_Specific/fluidsGL/NsightEclipse.xml +++ b/Samples/5_Domain_Specific/fluidsGL/NsightEclipse.xml @@ -3,19 +3,19 @@ fluidsGL - cudaFree - cudaGraphicsMapResources + cudaGraphicsUnmapResources + cudaMemcpy + cudaMallocArray cudaFreeArray - cudaGraphicsGLRegisterBuffer + cudaFree + cudaMallocPitch cudaGraphicsResourceGetMappedPointer + cudaGraphicsMapResources cudaDestroyTextureObject - cudaMallocPitch cudaCreateTextureObject - cudaMalloc - cudaMallocArray cudaGraphicsUnregisterResource - cudaGraphicsUnmapResources - cudaMemcpy + cudaMalloc + cudaGraphicsGLRegisterBuffer cudaGetDeviceProperties @@ -83,6 +83,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/5_Domain_Specific/fluidsGL/README.md b/Samples/5_Domain_Specific/fluidsGL/README.md index ea572321d..0d492ebc8 100644 --- a/Samples/5_Domain_Specific/fluidsGL/README.md +++ b/Samples/5_Domain_Specific/fluidsGL/README.md @@ -10,7 +10,7 @@ Graphics Interop, CUFFT Library, Physically-Based Simulation ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaGraphicsMapResources, cudaFreeArray, cudaGraphicsGLRegisterBuffer, cudaGraphicsResourceGetMappedPointer, cudaDestroyTextureObject, cudaMallocPitch, cudaCreateTextureObject, cudaMalloc, cudaMallocArray, cudaGraphicsUnregisterResource, cudaGraphicsUnmapResources, cudaMemcpy, cudaGetDeviceProperties +cudaGraphicsUnmapResources, cudaMemcpy, cudaMallocArray, cudaFreeArray, cudaFree, cudaMallocPitch, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDestroyTextureObject, cudaCreateTextureObject, cudaGraphicsUnregisterResource, cudaMalloc, cudaGraphicsGLRegisterBuffer, cudaGetDeviceProperties ## Dependencies needed to build/run [X11](../../../README.md#x11), [GL](../../../README.md#gl), [CUFFT](../../../README.md#cufft) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/5_Domain_Specific/fluidsGL/findgllib.mk b/Samples/5_Domain_Specific/fluidsGL/findgllib.mk index f0a5c5512..998fcf0f1 100644 --- a/Samples/5_Domain_Specific/fluidsGL/findgllib.mk +++ b/Samples/5_Domain_Specific/fluidsGL/findgllib.mk @@ -53,11 +53,12 @@ endif ifeq ("$(TARGET_OS)","linux") # $(info) >> findgllib.mk -> LINUX path <<<) # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside - UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) - FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) - RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) - CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) + UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) + FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) + RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) + CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) SUSE = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?) + KYLIN = $(shell echo $(DISTRO) | grep -i kylin >/dev/null 2>&1; echo $$?) ifeq ("$(UBUNTU)","0") ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l) GLPATH := /usr/arm-linux-gnueabihf/lib @@ -87,27 +88,17 @@ ifeq ("$(TARGET_OS)","linux") DFLT_PATH ?= /usr/lib endif endif + ifeq ("$(SUSE)","0") GLPATH ?= /usr/X11R6/lib64 GLLINK ?= -L/usr/X11R6/lib64 DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(FEDORA)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(RHEL)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(CENTOS)","0") + else GLPATH ?= /usr/lib64/nvidia GLLINK ?= -L/usr/lib64/nvidia DFLT_PATH ?= /usr/lib64 endif - + # find libGL, libGLU GLLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGL.so -print 2>/dev/null) GLULIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLU.so -print 2>/dev/null) diff --git a/Samples/5_Domain_Specific/fluidsGL/fluidsGL_vs2017.vcxproj b/Samples/5_Domain_Specific/fluidsGL/fluidsGL_vs2017.vcxproj index c90c550ec..8d2822ef0 100644 --- a/Samples/5_Domain_Specific/fluidsGL/fluidsGL_vs2017.vcxproj +++ b/Samples/5_Domain_Specific/fluidsGL/fluidsGL_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/fluidsGL.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -120,6 +120,6 @@ - + diff --git a/Samples/5_Domain_Specific/fluidsGL/fluidsGL_vs2019.vcxproj b/Samples/5_Domain_Specific/fluidsGL/fluidsGL_vs2019.vcxproj index 1e2ee21ea..aa2839e78 100644 --- a/Samples/5_Domain_Specific/fluidsGL/fluidsGL_vs2019.vcxproj +++ b/Samples/5_Domain_Specific/fluidsGL/fluidsGL_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/fluidsGL.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -116,6 +116,6 @@ - + diff --git a/Samples/5_Domain_Specific/fluidsGL/fluidsGL_vs2022.vcxproj b/Samples/5_Domain_Specific/fluidsGL/fluidsGL_vs2022.vcxproj index 8e7553c73..eeae6a675 100644 --- a/Samples/5_Domain_Specific/fluidsGL/fluidsGL_vs2022.vcxproj +++ b/Samples/5_Domain_Specific/fluidsGL/fluidsGL_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/fluidsGL.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -116,6 +116,6 @@ - + diff --git a/Samples/5_Domain_Specific/fluidsGLES/Makefile b/Samples/5_Domain_Specific/fluidsGLES/Makefile index 269debb05..00b514cf3 100644 --- a/Samples/5_Domain_Specific/fluidsGLES/Makefile +++ b/Samples/5_Domain_Specific/fluidsGLES/Makefile @@ -313,9 +313,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/5_Domain_Specific/fluidsGLES/NsightEclipse.xml b/Samples/5_Domain_Specific/fluidsGLES/NsightEclipse.xml index e5feea24b..afaeff7e7 100644 --- a/Samples/5_Domain_Specific/fluidsGLES/NsightEclipse.xml +++ b/Samples/5_Domain_Specific/fluidsGLES/NsightEclipse.xml @@ -3,19 +3,19 @@ fluidsGLES - cudaFree - cudaGraphicsMapResources + cudaGraphicsUnmapResources + cudaMemcpy + cudaMallocArray cudaFreeArray - cudaGraphicsGLRegisterBuffer + cudaFree + cudaMallocPitch cudaGraphicsResourceGetMappedPointer + cudaGraphicsMapResources cudaDestroyTextureObject - cudaMallocPitch cudaCreateTextureObject - cudaMalloc - cudaMallocArray cudaGraphicsUnregisterResource - cudaGraphicsUnmapResources - cudaMemcpy + cudaMalloc + cudaGraphicsGLRegisterBuffer cudaGetDeviceProperties @@ -74,6 +74,7 @@ sm80 sm86 sm87 + sm90 arm diff --git a/Samples/5_Domain_Specific/fluidsGLES/README.md b/Samples/5_Domain_Specific/fluidsGLES/README.md index 406eb1bcb..b2432dd03 100644 --- a/Samples/5_Domain_Specific/fluidsGLES/README.md +++ b/Samples/5_Domain_Specific/fluidsGLES/README.md @@ -10,7 +10,7 @@ Graphics Interop, CUFFT Library, Physically-Based Simulation ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaGraphicsMapResources, cudaFreeArray, cudaGraphicsGLRegisterBuffer, cudaGraphicsResourceGetMappedPointer, cudaDestroyTextureObject, cudaMallocPitch, cudaCreateTextureObject, cudaMalloc, cudaMallocArray, cudaGraphicsUnregisterResource, cudaGraphicsUnmapResources, cudaMemcpy, cudaGetDeviceProperties +cudaGraphicsUnmapResources, cudaMemcpy, cudaMallocArray, cudaFreeArray, cudaFree, cudaMallocPitch, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDestroyTextureObject, cudaCreateTextureObject, cudaGraphicsUnregisterResource, cudaMalloc, cudaGraphicsGLRegisterBuffer, cudaGetDeviceProperties ## Dependencies needed to build/run [X11](../../../README.md#x11), [GLES](../../../README.md#gles), [CUFFT](../../../README.md#cufft) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/5_Domain_Specific/fluidsGLES/findgleslib.mk b/Samples/5_Domain_Specific/fluidsGLES/findgleslib.mk index bcb335c1f..6da2f0781 100644 --- a/Samples/5_Domain_Specific/fluidsGLES/findgleslib.mk +++ b/Samples/5_Domain_Specific/fluidsGLES/findgleslib.mk @@ -60,6 +60,7 @@ ifeq ("$(TARGET_OS)","linux") RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) SUSE = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?) + KYLIN = $(shell echo $(DISTRO) | grep -i kylin >/dev/null 2>&1; echo $$?) ifeq ("$(UBUNTU)","0") ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l) GLPATH := /usr/arm-linux-gnueabihf/lib @@ -86,27 +87,17 @@ ifeq ("$(TARGET_OS)","linux") DFLT_PATH ?= /usr/lib endif endif - ifeq ("$(SUSE)","0") + + ifeq ("$(SUSE)","0") GLPATH ?= /usr/X11R6/lib64 GLLINK ?= -L/usr/X11R6/lib64 DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(FEDORA)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(RHEL)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(CENTOS)","0") + else GLPATH ?= /usr/lib64/nvidia GLLINK ?= -L/usr/lib64/nvidia DFLT_PATH ?= /usr/lib64 endif - + # find libGL, libGLU, libXi, EGLLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libEGL.so -print 2>/dev/null) GLESLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLESv2.so -print 2>/dev/null) diff --git a/Samples/5_Domain_Specific/marchingCubes/Makefile b/Samples/5_Domain_Specific/marchingCubes/Makefile index 91180207e..baf4f0f0f 100644 --- a/Samples/5_Domain_Specific/marchingCubes/Makefile +++ b/Samples/5_Domain_Specific/marchingCubes/Makefile @@ -324,9 +324,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/5_Domain_Specific/marchingCubes/NsightEclipse.xml b/Samples/5_Domain_Specific/marchingCubes/NsightEclipse.xml index ea03311eb..0c23d0201 100644 --- a/Samples/5_Domain_Specific/marchingCubes/NsightEclipse.xml +++ b/Samples/5_Domain_Specific/marchingCubes/NsightEclipse.xml @@ -6,21 +6,21 @@ --std=c++14 + cudaGLUnmapBufferObject + cudaGraphicsUnmapResources + cudaCreateChannelDesc + cudaMemcpy cudaFree - cudaGraphicsMapResources - cudaGraphicsUnregisterResource - cudaGraphicsGLRegisterBuffer - cudaGLUnregisterBufferObject + cudaGLMapBufferObject cudaGraphicsResourceGetMappedPointer - cudaGLUnmapBufferObject + cudaGraphicsMapResources cudaDestroyTextureObject - cudaGLMapBufferObject + cudaGLUnregisterBufferObject cudaCreateTextureObject - cudaMalloc - cudaCreateChannelDesc cudaGLRegisterBufferObject - cudaGraphicsUnmapResources - cudaMemcpy + cudaGraphicsUnregisterResource + cudaMalloc + cudaGraphicsGLRegisterBuffer whole @@ -92,6 +92,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/5_Domain_Specific/marchingCubes/README.md b/Samples/5_Domain_Specific/marchingCubes/README.md index 683f4f6b6..0c2ed9e1a 100644 --- a/Samples/5_Domain_Specific/marchingCubes/README.md +++ b/Samples/5_Domain_Specific/marchingCubes/README.md @@ -10,7 +10,7 @@ OpenGL Graphics Interop, Vertex Buffers, 3D Graphics, Physically Based Simulatio ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaGraphicsMapResources, cudaGraphicsUnregisterResource, cudaGraphicsGLRegisterBuffer, cudaGLUnregisterBufferObject, cudaGraphicsResourceGetMappedPointer, cudaGLUnmapBufferObject, cudaDestroyTextureObject, cudaGLMapBufferObject, cudaCreateTextureObject, cudaMalloc, cudaCreateChannelDesc, cudaGLRegisterBufferObject, cudaGraphicsUnmapResources, cudaMemcpy +cudaGLUnmapBufferObject, cudaGraphicsUnmapResources, cudaCreateChannelDesc, cudaMemcpy, cudaFree, cudaGLMapBufferObject, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDestroyTextureObject, cudaGLUnregisterBufferObject, cudaCreateTextureObject, cudaGLRegisterBufferObject, cudaGraphicsUnregisterResource, cudaMalloc, cudaGraphicsGLRegisterBuffer ## Dependencies needed to build/run [X11](../../../README.md#x11), [GL](../../../README.md#gl) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/5_Domain_Specific/marchingCubes/findgllib.mk b/Samples/5_Domain_Specific/marchingCubes/findgllib.mk index f0a5c5512..998fcf0f1 100644 --- a/Samples/5_Domain_Specific/marchingCubes/findgllib.mk +++ b/Samples/5_Domain_Specific/marchingCubes/findgllib.mk @@ -53,11 +53,12 @@ endif ifeq ("$(TARGET_OS)","linux") # $(info) >> findgllib.mk -> LINUX path <<<) # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside - UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) - FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) - RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) - CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) + UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) + FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) + RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) + CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) SUSE = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?) + KYLIN = $(shell echo $(DISTRO) | grep -i kylin >/dev/null 2>&1; echo $$?) ifeq ("$(UBUNTU)","0") ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l) GLPATH := /usr/arm-linux-gnueabihf/lib @@ -87,27 +88,17 @@ ifeq ("$(TARGET_OS)","linux") DFLT_PATH ?= /usr/lib endif endif + ifeq ("$(SUSE)","0") GLPATH ?= /usr/X11R6/lib64 GLLINK ?= -L/usr/X11R6/lib64 DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(FEDORA)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(RHEL)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(CENTOS)","0") + else GLPATH ?= /usr/lib64/nvidia GLLINK ?= -L/usr/lib64/nvidia DFLT_PATH ?= /usr/lib64 endif - + # find libGL, libGLU GLLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGL.so -print 2>/dev/null) GLULIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLU.so -print 2>/dev/null) diff --git a/Samples/5_Domain_Specific/marchingCubes/marchingCubes_vs2017.vcxproj b/Samples/5_Domain_Specific/marchingCubes/marchingCubes_vs2017.vcxproj index 9eb7601b4..9d5e9d284 100644 --- a/Samples/5_Domain_Specific/marchingCubes/marchingCubes_vs2017.vcxproj +++ b/Samples/5_Domain_Specific/marchingCubes/marchingCubes_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/marchingCubes.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -119,6 +119,6 @@ - + diff --git a/Samples/5_Domain_Specific/marchingCubes/marchingCubes_vs2019.vcxproj b/Samples/5_Domain_Specific/marchingCubes/marchingCubes_vs2019.vcxproj index e2edca520..9e370d0bd 100644 --- a/Samples/5_Domain_Specific/marchingCubes/marchingCubes_vs2019.vcxproj +++ b/Samples/5_Domain_Specific/marchingCubes/marchingCubes_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/marchingCubes.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -115,6 +115,6 @@ - + diff --git a/Samples/5_Domain_Specific/marchingCubes/marchingCubes_vs2022.vcxproj b/Samples/5_Domain_Specific/marchingCubes/marchingCubes_vs2022.vcxproj index 284e00cbe..ef1da8804 100644 --- a/Samples/5_Domain_Specific/marchingCubes/marchingCubes_vs2022.vcxproj +++ b/Samples/5_Domain_Specific/marchingCubes/marchingCubes_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/marchingCubes.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -115,6 +115,6 @@ - + diff --git a/Samples/5_Domain_Specific/nbody/Makefile b/Samples/5_Domain_Specific/nbody/Makefile index d1d2e6141..f4e1df427 100644 --- a/Samples/5_Domain_Specific/nbody/Makefile +++ b/Samples/5_Domain_Specific/nbody/Makefile @@ -299,9 +299,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/5_Domain_Specific/nbody/NsightEclipse.xml b/Samples/5_Domain_Specific/nbody/NsightEclipse.xml index 293adc581..213de2217 100644 --- a/Samples/5_Domain_Specific/nbody/NsightEclipse.xml +++ b/Samples/5_Domain_Specific/nbody/NsightEclipse.xml @@ -6,24 +6,24 @@ -ftz=true - cudaMemcpyToSymbol - cudaGraphicsMapResources - cudaEventRecord - cudaStreamQuery - cudaEventCreate + cudaGraphicsUnmapResources + cudaSetDeviceFlags + cudaGraphicsResourceSetMapFlags cudaGraphicsResourceGetMappedPointer + cudaGraphicsMapResources + cudaSetDevice + cudaEventSynchronize cudaGetDeviceCount - cudaEventElapsedTime + cudaGetDeviceProperties cudaDeviceSynchronize - cudaEventSynchronize - cudaGraphicsResourceSetMapFlags - cudaSetDeviceFlags + cudaEventRecord + cudaGetDevice + cudaMemcpyToSymbol + cudaStreamQuery cudaEventDestroy + cudaEventElapsedTime cudaDeviceCanAccessPeer - cudaSetDevice - cudaGraphicsUnmapResources - cudaGetDeviceProperties - cudaGetDevice + cudaEventCreate " to the command line will allow users to set # of bodies for simulation. Adding “-numdevices=” to the command line option will cause the sample to use N devices (if available) for simulation. In this mode, the position and velocity data for all bodies are read from system memory using “zero copy” rather than from device memory. For a small number of devices (4 or fewer) and a large enough number of bodies, bandwidth is not a bottleneck so we can achieve strong scaling across these devices.]]> whole @@ -91,6 +91,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/5_Domain_Specific/nbody/README.md b/Samples/5_Domain_Specific/nbody/README.md index 40d122a7b..837296c76 100644 --- a/Samples/5_Domain_Specific/nbody/README.md +++ b/Samples/5_Domain_Specific/nbody/README.md @@ -10,7 +10,7 @@ Graphics Interop, Data Parallel Algorithms, Physically-Based Simulation ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMemcpyToSymbol, cudaGraphicsMapResources, cudaEventRecord, cudaStreamQuery, cudaEventCreate, cudaGraphicsResourceGetMappedPointer, cudaGetDeviceCount, cudaEventElapsedTime, cudaDeviceSynchronize, cudaEventSynchronize, cudaGraphicsResourceSetMapFlags, cudaSetDeviceFlags, cudaEventDestroy, cudaDeviceCanAccessPeer, cudaSetDevice, cudaGraphicsUnmapResources, cudaGetDeviceProperties, cudaGetDevice +cudaGraphicsUnmapResources, cudaSetDeviceFlags, cudaGraphicsResourceSetMapFlags, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaSetDevice, cudaEventSynchronize, cudaGetDeviceCount, cudaGetDeviceProperties, cudaDeviceSynchronize, cudaEventRecord, cudaGetDevice, cudaMemcpyToSymbol, cudaStreamQuery, cudaEventDestroy, cudaEventElapsedTime, cudaDeviceCanAccessPeer, cudaEventCreate ## Dependencies needed to build/run [X11](../../../README.md#x11), [GL](../../../README.md#gl) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/5_Domain_Specific/nbody/findgllib.mk b/Samples/5_Domain_Specific/nbody/findgllib.mk index f0a5c5512..998fcf0f1 100644 --- a/Samples/5_Domain_Specific/nbody/findgllib.mk +++ b/Samples/5_Domain_Specific/nbody/findgllib.mk @@ -53,11 +53,12 @@ endif ifeq ("$(TARGET_OS)","linux") # $(info) >> findgllib.mk -> LINUX path <<<) # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside - UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) - FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) - RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) - CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) + UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) + FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) + RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) + CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) SUSE = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?) + KYLIN = $(shell echo $(DISTRO) | grep -i kylin >/dev/null 2>&1; echo $$?) ifeq ("$(UBUNTU)","0") ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l) GLPATH := /usr/arm-linux-gnueabihf/lib @@ -87,27 +88,17 @@ ifeq ("$(TARGET_OS)","linux") DFLT_PATH ?= /usr/lib endif endif + ifeq ("$(SUSE)","0") GLPATH ?= /usr/X11R6/lib64 GLLINK ?= -L/usr/X11R6/lib64 DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(FEDORA)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(RHEL)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(CENTOS)","0") + else GLPATH ?= /usr/lib64/nvidia GLLINK ?= -L/usr/lib64/nvidia DFLT_PATH ?= /usr/lib64 endif - + # find libGL, libGLU GLLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGL.so -print 2>/dev/null) GLULIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLU.so -print 2>/dev/null) diff --git a/Samples/5_Domain_Specific/nbody/nbody_vs2017.vcxproj b/Samples/5_Domain_Specific/nbody/nbody_vs2017.vcxproj index 1406b31bf..99e5a6bfb 100644 --- a/Samples/5_Domain_Specific/nbody/nbody_vs2017.vcxproj +++ b/Samples/5_Domain_Specific/nbody/nbody_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/nbody.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -125,6 +125,6 @@ - + diff --git a/Samples/5_Domain_Specific/nbody/nbody_vs2019.vcxproj b/Samples/5_Domain_Specific/nbody/nbody_vs2019.vcxproj index ae04b090f..7662e5009 100644 --- a/Samples/5_Domain_Specific/nbody/nbody_vs2019.vcxproj +++ b/Samples/5_Domain_Specific/nbody/nbody_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/nbody.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -121,6 +121,6 @@ - + diff --git a/Samples/5_Domain_Specific/nbody/nbody_vs2022.vcxproj b/Samples/5_Domain_Specific/nbody/nbody_vs2022.vcxproj index c1495525e..1c2c9ecfb 100644 --- a/Samples/5_Domain_Specific/nbody/nbody_vs2022.vcxproj +++ b/Samples/5_Domain_Specific/nbody/nbody_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/nbody.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -121,6 +121,6 @@ - + diff --git a/Samples/5_Domain_Specific/nbody_opengles/Makefile b/Samples/5_Domain_Specific/nbody_opengles/Makefile index 5b56f1054..ef0b753bf 100644 --- a/Samples/5_Domain_Specific/nbody_opengles/Makefile +++ b/Samples/5_Domain_Specific/nbody_opengles/Makefile @@ -313,9 +313,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/5_Domain_Specific/nbody_opengles/NsightEclipse.xml b/Samples/5_Domain_Specific/nbody_opengles/NsightEclipse.xml index e7e56c9a8..4ffba110a 100644 --- a/Samples/5_Domain_Specific/nbody_opengles/NsightEclipse.xml +++ b/Samples/5_Domain_Specific/nbody_opengles/NsightEclipse.xml @@ -6,23 +6,23 @@ -ftz=true - cudaMemcpyToSymbol - cudaGraphicsMapResources - cudaEventRecord - cudaStreamQuery - cudaEventCreate - cudaGraphicsResourceGetMappedPointer - cudaGetDeviceCount - cudaEventElapsedTime - cudaDeviceSynchronize - cudaEventSynchronize - cudaGraphicsResourceSetMapFlags + cudaGraphicsUnmapResources cudaSetDeviceFlags - cudaEventDestroy + cudaGraphicsResourceSetMapFlags + cudaGraphicsResourceGetMappedPointer + cudaGraphicsMapResources cudaSetDevice - cudaGraphicsUnmapResources + cudaEventSynchronize cudaGetDeviceProperties + cudaDeviceSynchronize + cudaEventRecord cudaGetDevice + cudaMemcpyToSymbol + cudaStreamQuery + cudaEventDestroy + cudaEventElapsedTime + cudaGetDeviceCount + cudaEventCreate whole @@ -81,6 +81,7 @@ sm80 sm86 sm87 + sm90 arm diff --git a/Samples/5_Domain_Specific/nbody_opengles/README.md b/Samples/5_Domain_Specific/nbody_opengles/README.md index 3c4772d26..a7911e09e 100644 --- a/Samples/5_Domain_Specific/nbody_opengles/README.md +++ b/Samples/5_Domain_Specific/nbody_opengles/README.md @@ -10,7 +10,7 @@ Graphics Interop, Data Parallel Algorithms, Physically-Based Simulation ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMemcpyToSymbol, cudaGraphicsMapResources, cudaEventRecord, cudaStreamQuery, cudaEventCreate, cudaGraphicsResourceGetMappedPointer, cudaGetDeviceCount, cudaEventElapsedTime, cudaDeviceSynchronize, cudaEventSynchronize, cudaGraphicsResourceSetMapFlags, cudaSetDeviceFlags, cudaEventDestroy, cudaSetDevice, cudaGraphicsUnmapResources, cudaGetDeviceProperties, cudaGetDevice +cudaGraphicsUnmapResources, cudaSetDeviceFlags, cudaGraphicsResourceSetMapFlags, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaSetDevice, cudaEventSynchronize, cudaGetDeviceProperties, cudaDeviceSynchronize, cudaEventRecord, cudaGetDevice, cudaMemcpyToSymbol, cudaStreamQuery, cudaEventDestroy, cudaEventElapsedTime, cudaGetDeviceCount, cudaEventCreate ## Dependencies needed to build/run [X11](../../../README.md#x11), [GLES](../../../README.md#gles) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/5_Domain_Specific/nbody_opengles/findgleslib.mk b/Samples/5_Domain_Specific/nbody_opengles/findgleslib.mk index bcb335c1f..6da2f0781 100644 --- a/Samples/5_Domain_Specific/nbody_opengles/findgleslib.mk +++ b/Samples/5_Domain_Specific/nbody_opengles/findgleslib.mk @@ -60,6 +60,7 @@ ifeq ("$(TARGET_OS)","linux") RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) SUSE = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?) + KYLIN = $(shell echo $(DISTRO) | grep -i kylin >/dev/null 2>&1; echo $$?) ifeq ("$(UBUNTU)","0") ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l) GLPATH := /usr/arm-linux-gnueabihf/lib @@ -86,27 +87,17 @@ ifeq ("$(TARGET_OS)","linux") DFLT_PATH ?= /usr/lib endif endif - ifeq ("$(SUSE)","0") + + ifeq ("$(SUSE)","0") GLPATH ?= /usr/X11R6/lib64 GLLINK ?= -L/usr/X11R6/lib64 DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(FEDORA)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(RHEL)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(CENTOS)","0") + else GLPATH ?= /usr/lib64/nvidia GLLINK ?= -L/usr/lib64/nvidia DFLT_PATH ?= /usr/lib64 endif - + # find libGL, libGLU, libXi, EGLLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libEGL.so -print 2>/dev/null) GLESLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLESv2.so -print 2>/dev/null) diff --git a/Samples/5_Domain_Specific/nbody_screen/Makefile b/Samples/5_Domain_Specific/nbody_screen/Makefile index 60558f494..b54e17953 100644 --- a/Samples/5_Domain_Specific/nbody_screen/Makefile +++ b/Samples/5_Domain_Specific/nbody_screen/Makefile @@ -320,9 +320,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/5_Domain_Specific/nbody_screen/NsightEclipse.xml b/Samples/5_Domain_Specific/nbody_screen/NsightEclipse.xml index 1c3275ce4..074a2e5b4 100644 --- a/Samples/5_Domain_Specific/nbody_screen/NsightEclipse.xml +++ b/Samples/5_Domain_Specific/nbody_screen/NsightEclipse.xml @@ -6,23 +6,23 @@ -ftz=true - cudaMemcpyToSymbol - cudaGraphicsMapResources - cudaEventRecord - cudaStreamQuery - cudaEventCreate - cudaGraphicsResourceGetMappedPointer - cudaGetDeviceCount - cudaEventElapsedTime - cudaDeviceSynchronize - cudaEventSynchronize - cudaGraphicsResourceSetMapFlags + cudaGraphicsUnmapResources cudaSetDeviceFlags - cudaEventDestroy + cudaGraphicsResourceSetMapFlags + cudaGraphicsResourceGetMappedPointer + cudaGraphicsMapResources cudaSetDevice - cudaGraphicsUnmapResources + cudaEventSynchronize cudaGetDeviceProperties + cudaDeviceSynchronize + cudaEventRecord cudaGetDevice + cudaMemcpyToSymbol + cudaStreamQuery + cudaEventDestroy + cudaEventElapsedTime + cudaGetDeviceCount + cudaEventCreate whole @@ -79,6 +79,7 @@ sm80 sm86 sm87 + sm90 qnx diff --git a/Samples/5_Domain_Specific/nbody_screen/README.md b/Samples/5_Domain_Specific/nbody_screen/README.md index 73787bdd8..54b9df1c0 100644 --- a/Samples/5_Domain_Specific/nbody_screen/README.md +++ b/Samples/5_Domain_Specific/nbody_screen/README.md @@ -10,7 +10,7 @@ Graphics Interop, Data Parallel Algorithms, Physically-Based Simulation ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ aarch64 ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMemcpyToSymbol, cudaGraphicsMapResources, cudaEventRecord, cudaStreamQuery, cudaEventCreate, cudaGraphicsResourceGetMappedPointer, cudaGetDeviceCount, cudaEventElapsedTime, cudaDeviceSynchronize, cudaEventSynchronize, cudaGraphicsResourceSetMapFlags, cudaSetDeviceFlags, cudaEventDestroy, cudaSetDevice, cudaGraphicsUnmapResources, cudaGetDeviceProperties, cudaGetDevice +cudaGraphicsUnmapResources, cudaSetDeviceFlags, cudaGraphicsResourceSetMapFlags, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaSetDevice, cudaEventSynchronize, cudaGetDeviceProperties, cudaDeviceSynchronize, cudaEventRecord, cudaGetDevice, cudaMemcpyToSymbol, cudaStreamQuery, cudaEventDestroy, cudaEventElapsedTime, cudaGetDeviceCount, cudaEventCreate ## Dependencies needed to build/run [screen](../../../README.md#screen), [GLES](../../../README.md#gles) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/5_Domain_Specific/nbody_screen/findgleslib.mk b/Samples/5_Domain_Specific/nbody_screen/findgleslib.mk index bcb335c1f..6da2f0781 100644 --- a/Samples/5_Domain_Specific/nbody_screen/findgleslib.mk +++ b/Samples/5_Domain_Specific/nbody_screen/findgleslib.mk @@ -60,6 +60,7 @@ ifeq ("$(TARGET_OS)","linux") RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) SUSE = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?) + KYLIN = $(shell echo $(DISTRO) | grep -i kylin >/dev/null 2>&1; echo $$?) ifeq ("$(UBUNTU)","0") ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l) GLPATH := /usr/arm-linux-gnueabihf/lib @@ -86,27 +87,17 @@ ifeq ("$(TARGET_OS)","linux") DFLT_PATH ?= /usr/lib endif endif - ifeq ("$(SUSE)","0") + + ifeq ("$(SUSE)","0") GLPATH ?= /usr/X11R6/lib64 GLLINK ?= -L/usr/X11R6/lib64 DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(FEDORA)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(RHEL)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(CENTOS)","0") + else GLPATH ?= /usr/lib64/nvidia GLLINK ?= -L/usr/lib64/nvidia DFLT_PATH ?= /usr/lib64 endif - + # find libGL, libGLU, libXi, EGLLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libEGL.so -print 2>/dev/null) GLESLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLESv2.so -print 2>/dev/null) diff --git a/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/Makefile b/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/Makefile index 6933444e7..37afba153 100644 --- a/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/Makefile +++ b/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/NsightEclipse.xml b/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/NsightEclipse.xml index 1fc38a6c9..57679e4cf 100644 --- a/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/NsightEclipse.xml +++ b/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/NsightEclipse.xml @@ -3,30 +3,30 @@ p2pBandwidthLatencyTest - cudaDeviceEnablePeerAccess - cudaOccupancyMaxPotentialBlockSize - cudaStreamCreateWithFlags - cudaDeviceCanAccessPeer - cudaStreamDestroy - cudaHostAlloc - cudaEventCreate - cudaMalloc - cudaEventDestroy cudaSetDevice - cudaMemcpyPeerAsync - cudaGetDeviceProperties + cudaEventDestroy + cudaOccupancyMaxPotentialBlockSize cudaCheckError + cudaFreeHost cudaGetDeviceCount - cudaEventElapsedTime + cudaDeviceCanAccessPeer + cudaStreamCreateWithFlags + cudaStreamDestroy cudaGetLastError - cudaDeviceDisablePeerAccess - cudaStreamSynchronize - cudaGetErrorString - cudaStreamWaitEvent cudaMemset + cudaStreamWaitEvent + cudaEventElapsedTime + cudaEventCreate + cudaHostAlloc cudaFree + cudaGetErrorString + cudaMemcpyPeerAsync + cudaDeviceDisablePeerAccess cudaEventRecord - cudaFreeHost + cudaStreamSynchronize + cudaDeviceEnablePeerAccess + cudaMalloc + cudaGetDeviceProperties whole @@ -71,6 +71,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/README.md b/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/README.md index 72e34fb1b..1df07a630 100644 --- a/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/README.md +++ b/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/README.md @@ -10,7 +10,7 @@ Performance Strategies, Asynchronous Data Transfers, Unified Virtual Address Spa ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaDeviceEnablePeerAccess, cudaOccupancyMaxPotentialBlockSize, cudaStreamCreateWithFlags, cudaDeviceCanAccessPeer, cudaStreamDestroy, cudaHostAlloc, cudaEventCreate, cudaMalloc, cudaEventDestroy, cudaSetDevice, cudaMemcpyPeerAsync, cudaGetDeviceProperties, cudaCheckError, cudaGetDeviceCount, cudaEventElapsedTime, cudaGetLastError, cudaDeviceDisablePeerAccess, cudaStreamSynchronize, cudaGetErrorString, cudaStreamWaitEvent, cudaMemset, cudaFree, cudaEventRecord, cudaFreeHost +cudaSetDevice, cudaEventDestroy, cudaOccupancyMaxPotentialBlockSize, cudaCheckError, cudaFreeHost, cudaGetDeviceCount, cudaDeviceCanAccessPeer, cudaStreamCreateWithFlags, cudaStreamDestroy, cudaGetLastError, cudaMemset, cudaStreamWaitEvent, cudaEventElapsedTime, cudaEventCreate, cudaHostAlloc, cudaFree, cudaGetErrorString, cudaMemcpyPeerAsync, cudaDeviceDisablePeerAccess, cudaEventRecord, cudaStreamSynchronize, cudaDeviceEnablePeerAccess, cudaMalloc, cudaGetDeviceProperties ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/p2pBandwidthLatencyTest_vs2017.vcxproj b/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/p2pBandwidthLatencyTest_vs2017.vcxproj index 545fa82c6..cbed6fc18 100644 --- a/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/p2pBandwidthLatencyTest_vs2017.vcxproj +++ b/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/p2pBandwidthLatencyTest_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/p2pBandwidthLatencyTest.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/p2pBandwidthLatencyTest_vs2019.vcxproj b/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/p2pBandwidthLatencyTest_vs2019.vcxproj index f8e092744..43fbfc443 100644 --- a/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/p2pBandwidthLatencyTest_vs2019.vcxproj +++ b/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/p2pBandwidthLatencyTest_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/p2pBandwidthLatencyTest.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/p2pBandwidthLatencyTest_vs2022.vcxproj b/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/p2pBandwidthLatencyTest_vs2022.vcxproj index 6e35634da..284865521 100644 --- a/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/p2pBandwidthLatencyTest_vs2022.vcxproj +++ b/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/p2pBandwidthLatencyTest_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/p2pBandwidthLatencyTest.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/5_Domain_Specific/postProcessGL/Makefile b/Samples/5_Domain_Specific/postProcessGL/Makefile index 03790fc8d..c6f18f8df 100644 --- a/Samples/5_Domain_Specific/postProcessGL/Makefile +++ b/Samples/5_Domain_Specific/postProcessGL/Makefile @@ -299,9 +299,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/5_Domain_Specific/postProcessGL/NsightEclipse.xml b/Samples/5_Domain_Specific/postProcessGL/NsightEclipse.xml index c477aeef3..756864f0a 100644 --- a/Samples/5_Domain_Specific/postProcessGL/NsightEclipse.xml +++ b/Samples/5_Domain_Specific/postProcessGL/NsightEclipse.xml @@ -3,22 +3,22 @@ postProcessGL + cudaHostAlloc + cudaGraphicsUnmapResources + cudaMalloc cudaFree + cudaGetChannelDesc + cudaGraphicsResourceGetMappedPointer cudaGraphicsMapResources - cudaGraphicsUnregisterResource + cudaDestroyTextureObject cudaMemcpyToArray - cudaGraphicsGLRegisterBuffer - cudaHostAlloc - cudaGraphicsResourceGetMappedPointer - cudaProcess cudaDeviceSynchronize - cudaDestroyTextureObject cudaCreateTextureObject + cudaProcess + cudaGraphicsUnregisterResource cudaGraphicsSubResourceGetMappedArray - cudaGetChannelDesc - cudaMalloc + cudaGraphicsGLRegisterBuffer cudaGraphicsGLRegisterImage - cudaGraphicsUnmapResources whole @@ -85,6 +85,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/5_Domain_Specific/postProcessGL/README.md b/Samples/5_Domain_Specific/postProcessGL/README.md index 53aa76e78..821e00ff5 100644 --- a/Samples/5_Domain_Specific/postProcessGL/README.md +++ b/Samples/5_Domain_Specific/postProcessGL/README.md @@ -10,7 +10,7 @@ Graphics Interop, Image Processing ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaGraphicsMapResources, cudaGraphicsUnregisterResource, cudaMemcpyToArray, cudaGraphicsGLRegisterBuffer, cudaHostAlloc, cudaGraphicsResourceGetMappedPointer, cudaProcess, cudaDeviceSynchronize, cudaDestroyTextureObject, cudaCreateTextureObject, cudaGraphicsSubResourceGetMappedArray, cudaGetChannelDesc, cudaMalloc, cudaGraphicsGLRegisterImage, cudaGraphicsUnmapResources +cudaHostAlloc, cudaGraphicsUnmapResources, cudaMalloc, cudaFree, cudaGetChannelDesc, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDestroyTextureObject, cudaMemcpyToArray, cudaDeviceSynchronize, cudaCreateTextureObject, cudaProcess, cudaGraphicsUnregisterResource, cudaGraphicsSubResourceGetMappedArray, cudaGraphicsGLRegisterBuffer, cudaGraphicsGLRegisterImage ## Dependencies needed to build/run [X11](../../../README.md#x11), [GL](../../../README.md#gl) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/5_Domain_Specific/postProcessGL/findgllib.mk b/Samples/5_Domain_Specific/postProcessGL/findgllib.mk index f0a5c5512..998fcf0f1 100644 --- a/Samples/5_Domain_Specific/postProcessGL/findgllib.mk +++ b/Samples/5_Domain_Specific/postProcessGL/findgllib.mk @@ -53,11 +53,12 @@ endif ifeq ("$(TARGET_OS)","linux") # $(info) >> findgllib.mk -> LINUX path <<<) # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside - UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) - FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) - RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) - CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) + UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) + FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) + RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) + CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) SUSE = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?) + KYLIN = $(shell echo $(DISTRO) | grep -i kylin >/dev/null 2>&1; echo $$?) ifeq ("$(UBUNTU)","0") ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l) GLPATH := /usr/arm-linux-gnueabihf/lib @@ -87,27 +88,17 @@ ifeq ("$(TARGET_OS)","linux") DFLT_PATH ?= /usr/lib endif endif + ifeq ("$(SUSE)","0") GLPATH ?= /usr/X11R6/lib64 GLLINK ?= -L/usr/X11R6/lib64 DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(FEDORA)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(RHEL)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(CENTOS)","0") + else GLPATH ?= /usr/lib64/nvidia GLLINK ?= -L/usr/lib64/nvidia DFLT_PATH ?= /usr/lib64 endif - + # find libGL, libGLU GLLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGL.so -print 2>/dev/null) GLULIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLU.so -print 2>/dev/null) diff --git a/Samples/5_Domain_Specific/postProcessGL/postProcessGL_vs2017.vcxproj b/Samples/5_Domain_Specific/postProcessGL/postProcessGL_vs2017.vcxproj index 015eb83de..23cf04064 100644 --- a/Samples/5_Domain_Specific/postProcessGL/postProcessGL_vs2017.vcxproj +++ b/Samples/5_Domain_Specific/postProcessGL/postProcessGL_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/postProcessGL.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -118,6 +118,6 @@ - + diff --git a/Samples/5_Domain_Specific/postProcessGL/postProcessGL_vs2019.vcxproj b/Samples/5_Domain_Specific/postProcessGL/postProcessGL_vs2019.vcxproj index 7ac06e892..08c28e4dd 100644 --- a/Samples/5_Domain_Specific/postProcessGL/postProcessGL_vs2019.vcxproj +++ b/Samples/5_Domain_Specific/postProcessGL/postProcessGL_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/postProcessGL.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -114,6 +114,6 @@ - + diff --git a/Samples/5_Domain_Specific/postProcessGL/postProcessGL_vs2022.vcxproj b/Samples/5_Domain_Specific/postProcessGL/postProcessGL_vs2022.vcxproj index b549c0923..b70819bab 100644 --- a/Samples/5_Domain_Specific/postProcessGL/postProcessGL_vs2022.vcxproj +++ b/Samples/5_Domain_Specific/postProcessGL/postProcessGL_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/postProcessGL.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -114,6 +114,6 @@ - + diff --git a/Samples/5_Domain_Specific/quasirandomGenerator/Makefile b/Samples/5_Domain_Specific/quasirandomGenerator/Makefile index 3bc715cbe..04628cd3c 100644 --- a/Samples/5_Domain_Specific/quasirandomGenerator/Makefile +++ b/Samples/5_Domain_Specific/quasirandomGenerator/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/5_Domain_Specific/quasirandomGenerator/NsightEclipse.xml b/Samples/5_Domain_Specific/quasirandomGenerator/NsightEclipse.xml index b66fab731..c35eaa112 100644 --- a/Samples/5_Domain_Specific/quasirandomGenerator/NsightEclipse.xml +++ b/Samples/5_Domain_Specific/quasirandomGenerator/NsightEclipse.xml @@ -3,12 +3,12 @@ quasirandomGenerator - cudaMemcpyToSymbol - cudaMemset + cudaMemcpy cudaFree cudaDeviceSynchronize + cudaMemset + cudaMemcpyToSymbol cudaMalloc - cudaMemcpy whole @@ -49,6 +49,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/5_Domain_Specific/quasirandomGenerator/README.md b/Samples/5_Domain_Specific/quasirandomGenerator/README.md index 8abc9977a..54afdbbaa 100644 --- a/Samples/5_Domain_Specific/quasirandomGenerator/README.md +++ b/Samples/5_Domain_Specific/quasirandomGenerator/README.md @@ -10,7 +10,7 @@ Computational Finance ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMemcpyToSymbol, cudaMemset, cudaFree, cudaDeviceSynchronize, cudaMalloc, cudaMemcpy +cudaMemcpy, cudaFree, cudaDeviceSynchronize, cudaMemset, cudaMemcpyToSymbol, cudaMalloc ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/5_Domain_Specific/quasirandomGenerator/quasirandomGenerator_vs2017.vcxproj b/Samples/5_Domain_Specific/quasirandomGenerator/quasirandomGenerator_vs2017.vcxproj index c23b11728..be6fcdaeb 100644 --- a/Samples/5_Domain_Specific/quasirandomGenerator/quasirandomGenerator_vs2017.vcxproj +++ b/Samples/5_Domain_Specific/quasirandomGenerator/quasirandomGenerator_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/quasirandomGenerator.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -109,6 +109,6 @@ - + diff --git a/Samples/5_Domain_Specific/quasirandomGenerator/quasirandomGenerator_vs2019.vcxproj b/Samples/5_Domain_Specific/quasirandomGenerator/quasirandomGenerator_vs2019.vcxproj index 4d1c0c507..a7083425b 100644 --- a/Samples/5_Domain_Specific/quasirandomGenerator/quasirandomGenerator_vs2019.vcxproj +++ b/Samples/5_Domain_Specific/quasirandomGenerator/quasirandomGenerator_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/quasirandomGenerator.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -105,6 +105,6 @@ - + diff --git a/Samples/5_Domain_Specific/quasirandomGenerator/quasirandomGenerator_vs2022.vcxproj b/Samples/5_Domain_Specific/quasirandomGenerator/quasirandomGenerator_vs2022.vcxproj index dbec91a6c..0861b23ae 100644 --- a/Samples/5_Domain_Specific/quasirandomGenerator/quasirandomGenerator_vs2022.vcxproj +++ b/Samples/5_Domain_Specific/quasirandomGenerator/quasirandomGenerator_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/quasirandomGenerator.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -105,6 +105,6 @@ - + diff --git a/Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/README.md b/Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/README.md index 3e20ffa03..c91d1a249 100644 --- a/Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/README.md +++ b/Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/README.md @@ -10,7 +10,7 @@ Computational Finance, Runtime Compilation ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, aarch64 ## CUDA APIs involved ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html) -cuMemFree, cuMemcpyDtoH, cuMemAlloc +cuMemcpyDtoH, cuMemAlloc, cuMemFree ## Dependencies needed to build/run [NVRTC](../../../README.md#nvrtc) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/quasirandomGenerator_nvrtc_vs2017.vcxproj b/Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/quasirandomGenerator_nvrtc_vs2017.vcxproj index b07a5e07e..3faf85502 100644 --- a/Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/quasirandomGenerator_nvrtc_vs2017.vcxproj +++ b/Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/quasirandomGenerator_nvrtc_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -110,6 +110,6 @@ - + diff --git a/Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/quasirandomGenerator_nvrtc_vs2019.vcxproj b/Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/quasirandomGenerator_nvrtc_vs2019.vcxproj index e11918b8b..cb8893b84 100644 --- a/Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/quasirandomGenerator_nvrtc_vs2019.vcxproj +++ b/Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/quasirandomGenerator_nvrtc_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -106,6 +106,6 @@ - + diff --git a/Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/quasirandomGenerator_nvrtc_vs2022.vcxproj b/Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/quasirandomGenerator_nvrtc_vs2022.vcxproj index 0bdb2adbb..9dc93ac6f 100644 --- a/Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/quasirandomGenerator_nvrtc_vs2022.vcxproj +++ b/Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/quasirandomGenerator_nvrtc_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -106,6 +106,6 @@ - + diff --git a/Samples/5_Domain_Specific/recursiveGaussian/Makefile b/Samples/5_Domain_Specific/recursiveGaussian/Makefile index 011149f3c..f228dea7d 100644 --- a/Samples/5_Domain_Specific/recursiveGaussian/Makefile +++ b/Samples/5_Domain_Specific/recursiveGaussian/Makefile @@ -299,9 +299,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/5_Domain_Specific/recursiveGaussian/NsightEclipse.xml b/Samples/5_Domain_Specific/recursiveGaussian/NsightEclipse.xml index 00c1b1b7f..624f69c5a 100644 --- a/Samples/5_Domain_Specific/recursiveGaussian/NsightEclipse.xml +++ b/Samples/5_Domain_Specific/recursiveGaussian/NsightEclipse.xml @@ -3,17 +3,17 @@ recursiveGaussian + cudaGraphicsUnmapResources + cudaMemcpy cudaFree - cudaGraphicsMapResources - cudaGraphicsGLRegisterBuffer cudaGraphicsResourceGetMappedPointer + cudaGraphicsMapResources cudaDeviceSynchronize - cudaMalloc + cudaGetDevice cudaGraphicsUnregisterResource - cudaGraphicsUnmapResources - cudaMemcpy + cudaMalloc + cudaGraphicsGLRegisterBuffer cudaGetDeviceProperties - cudaGetDevice whole @@ -84,6 +84,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/5_Domain_Specific/recursiveGaussian/README.md b/Samples/5_Domain_Specific/recursiveGaussian/README.md index 50d331525..9e1475c4b 100644 --- a/Samples/5_Domain_Specific/recursiveGaussian/README.md +++ b/Samples/5_Domain_Specific/recursiveGaussian/README.md @@ -10,7 +10,7 @@ Graphics Interop, Image Processing ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaGraphicsMapResources, cudaGraphicsGLRegisterBuffer, cudaGraphicsResourceGetMappedPointer, cudaDeviceSynchronize, cudaMalloc, cudaGraphicsUnregisterResource, cudaGraphicsUnmapResources, cudaMemcpy, cudaGetDeviceProperties, cudaGetDevice +cudaGraphicsUnmapResources, cudaMemcpy, cudaFree, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDeviceSynchronize, cudaGetDevice, cudaGraphicsUnregisterResource, cudaMalloc, cudaGraphicsGLRegisterBuffer, cudaGetDeviceProperties ## Dependencies needed to build/run [X11](../../../README.md#x11), [GL](../../../README.md#gl) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/5_Domain_Specific/recursiveGaussian/findgllib.mk b/Samples/5_Domain_Specific/recursiveGaussian/findgllib.mk index f0a5c5512..998fcf0f1 100644 --- a/Samples/5_Domain_Specific/recursiveGaussian/findgllib.mk +++ b/Samples/5_Domain_Specific/recursiveGaussian/findgllib.mk @@ -53,11 +53,12 @@ endif ifeq ("$(TARGET_OS)","linux") # $(info) >> findgllib.mk -> LINUX path <<<) # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside - UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) - FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) - RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) - CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) + UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) + FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) + RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) + CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) SUSE = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?) + KYLIN = $(shell echo $(DISTRO) | grep -i kylin >/dev/null 2>&1; echo $$?) ifeq ("$(UBUNTU)","0") ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l) GLPATH := /usr/arm-linux-gnueabihf/lib @@ -87,27 +88,17 @@ ifeq ("$(TARGET_OS)","linux") DFLT_PATH ?= /usr/lib endif endif + ifeq ("$(SUSE)","0") GLPATH ?= /usr/X11R6/lib64 GLLINK ?= -L/usr/X11R6/lib64 DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(FEDORA)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(RHEL)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(CENTOS)","0") + else GLPATH ?= /usr/lib64/nvidia GLLINK ?= -L/usr/lib64/nvidia DFLT_PATH ?= /usr/lib64 endif - + # find libGL, libGLU GLLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGL.so -print 2>/dev/null) GLULIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLU.so -print 2>/dev/null) diff --git a/Samples/5_Domain_Specific/recursiveGaussian/recursiveGaussian_vs2017.vcxproj b/Samples/5_Domain_Specific/recursiveGaussian/recursiveGaussian_vs2017.vcxproj index e6c68d03c..f706030a4 100644 --- a/Samples/5_Domain_Specific/recursiveGaussian/recursiveGaussian_vs2017.vcxproj +++ b/Samples/5_Domain_Specific/recursiveGaussian/recursiveGaussian_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/recursiveGaussian.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -118,6 +118,6 @@ - + diff --git a/Samples/5_Domain_Specific/recursiveGaussian/recursiveGaussian_vs2019.vcxproj b/Samples/5_Domain_Specific/recursiveGaussian/recursiveGaussian_vs2019.vcxproj index a0de34452..72663de2a 100644 --- a/Samples/5_Domain_Specific/recursiveGaussian/recursiveGaussian_vs2019.vcxproj +++ b/Samples/5_Domain_Specific/recursiveGaussian/recursiveGaussian_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/recursiveGaussian.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -114,6 +114,6 @@ - + diff --git a/Samples/5_Domain_Specific/recursiveGaussian/recursiveGaussian_vs2022.vcxproj b/Samples/5_Domain_Specific/recursiveGaussian/recursiveGaussian_vs2022.vcxproj index 44739af72..947a471ce 100644 --- a/Samples/5_Domain_Specific/recursiveGaussian/recursiveGaussian_vs2022.vcxproj +++ b/Samples/5_Domain_Specific/recursiveGaussian/recursiveGaussian_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/recursiveGaussian.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -114,6 +114,6 @@ - + diff --git a/Samples/5_Domain_Specific/simpleD3D10/README.md b/Samples/5_Domain_Specific/simpleD3D10/README.md index cf446dc41..a9d7cde79 100644 --- a/Samples/5_Domain_Specific/simpleD3D10/README.md +++ b/Samples/5_Domain_Specific/simpleD3D10/README.md @@ -10,7 +10,7 @@ Graphics Interop, 3D Graphics ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64 ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaGraphicsMapResources, cudaGraphicsUnregisterResource, cudaGraphicsResourceGetMappedPointer, cudaGetDeviceCount, cudaGetLastError, cudaGraphicsUnmapResources, cudaGetErrorString, cudaGetDeviceProperties +cudaGraphicsUnmapResources, cudaGetErrorString, cudaGraphicsResourceGetMappedPointer, cudaGetLastError, cudaGraphicsMapResources, cudaGetDeviceCount, cudaGraphicsUnregisterResource, cudaGetDeviceProperties ## Dependencies needed to build/run [DirectX](../../../README.md#directx) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/5_Domain_Specific/simpleD3D10/simpleD3D10_vs2017.vcxproj b/Samples/5_Domain_Specific/simpleD3D10/simpleD3D10_vs2017.vcxproj index b5bbaf2d1..4dadd1937 100644 --- a/Samples/5_Domain_Specific/simpleD3D10/simpleD3D10_vs2017.vcxproj +++ b/Samples/5_Domain_Specific/simpleD3D10/simpleD3D10_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/simpleD3D10.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -109,6 +109,6 @@ - + diff --git a/Samples/5_Domain_Specific/simpleD3D10/simpleD3D10_vs2019.vcxproj b/Samples/5_Domain_Specific/simpleD3D10/simpleD3D10_vs2019.vcxproj index 3b3f6b28f..1dca8e143 100644 --- a/Samples/5_Domain_Specific/simpleD3D10/simpleD3D10_vs2019.vcxproj +++ b/Samples/5_Domain_Specific/simpleD3D10/simpleD3D10_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleD3D10.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -105,6 +105,6 @@ - + diff --git a/Samples/5_Domain_Specific/simpleD3D10/simpleD3D10_vs2022.vcxproj b/Samples/5_Domain_Specific/simpleD3D10/simpleD3D10_vs2022.vcxproj index 738f55bc8..630c0ea1c 100644 --- a/Samples/5_Domain_Specific/simpleD3D10/simpleD3D10_vs2022.vcxproj +++ b/Samples/5_Domain_Specific/simpleD3D10/simpleD3D10_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleD3D10.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -105,6 +105,6 @@ - + diff --git a/Samples/5_Domain_Specific/simpleD3D10RenderTarget/README.md b/Samples/5_Domain_Specific/simpleD3D10RenderTarget/README.md index a8b689086..49077dc7b 100644 --- a/Samples/5_Domain_Specific/simpleD3D10RenderTarget/README.md +++ b/Samples/5_Domain_Specific/simpleD3D10RenderTarget/README.md @@ -10,7 +10,7 @@ Graphics Interop, Texture ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64 ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaGraphicsMapResources, cudaGraphicsUnregisterResource, cudaGraphicsResourceGetMappedPointer, cudaGetDeviceCount, cudaMalloc, cudaGraphicsSubResourceGetMappedArray, cudaBindTextureToArray, cudaUnbindTexture, cudaGetLastError, cudaGraphicsUnmapResources, cudaMemcpy, cudaGetErrorString, cudaGetDeviceProperties +cudaGraphicsUnmapResources, cudaMemcpy, cudaMalloc, cudaUnbindTexture, cudaGetErrorString, cudaGraphicsResourceGetMappedPointer, cudaGetLastError, cudaGraphicsMapResources, cudaGetDeviceCount, cudaGraphicsUnregisterResource, cudaGraphicsSubResourceGetMappedArray, cudaBindTextureToArray, cudaGetDeviceProperties ## Dependencies needed to build/run [DirectX](../../../README.md#directx) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/5_Domain_Specific/simpleD3D10RenderTarget/simpleD3D10RenderTarget_vs2017.vcxproj b/Samples/5_Domain_Specific/simpleD3D10RenderTarget/simpleD3D10RenderTarget_vs2017.vcxproj index 19a3c9f2e..08ad0dfc2 100644 --- a/Samples/5_Domain_Specific/simpleD3D10RenderTarget/simpleD3D10RenderTarget_vs2017.vcxproj +++ b/Samples/5_Domain_Specific/simpleD3D10RenderTarget/simpleD3D10RenderTarget_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/simpleD3D10RenderTarget.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -109,6 +109,6 @@ - + diff --git a/Samples/5_Domain_Specific/simpleD3D10RenderTarget/simpleD3D10RenderTarget_vs2019.vcxproj b/Samples/5_Domain_Specific/simpleD3D10RenderTarget/simpleD3D10RenderTarget_vs2019.vcxproj index 335175e08..5478db879 100644 --- a/Samples/5_Domain_Specific/simpleD3D10RenderTarget/simpleD3D10RenderTarget_vs2019.vcxproj +++ b/Samples/5_Domain_Specific/simpleD3D10RenderTarget/simpleD3D10RenderTarget_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleD3D10RenderTarget.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -105,6 +105,6 @@ - + diff --git a/Samples/5_Domain_Specific/simpleD3D10RenderTarget/simpleD3D10RenderTarget_vs2022.vcxproj b/Samples/5_Domain_Specific/simpleD3D10RenderTarget/simpleD3D10RenderTarget_vs2022.vcxproj index 75759d68e..532e9d4dd 100644 --- a/Samples/5_Domain_Specific/simpleD3D10RenderTarget/simpleD3D10RenderTarget_vs2022.vcxproj +++ b/Samples/5_Domain_Specific/simpleD3D10RenderTarget/simpleD3D10RenderTarget_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleD3D10RenderTarget.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -105,6 +105,6 @@ - + diff --git a/Samples/5_Domain_Specific/simpleD3D10Texture/README.md b/Samples/5_Domain_Specific/simpleD3D10Texture/README.md index f96f34c7f..c8f1a2ed2 100644 --- a/Samples/5_Domain_Specific/simpleD3D10Texture/README.md +++ b/Samples/5_Domain_Specific/simpleD3D10Texture/README.md @@ -10,7 +10,7 @@ Graphics Interop, Texture ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64 ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMemset, cudaFree, cudaGraphicsMapResources, cudaGraphicsUnregisterResource, cudaGetDeviceCount, cudaMallocPitch, cudaMalloc, cudaGraphicsSubResourceGetMappedArray, cudaGetLastError, cudaGraphicsUnmapResources, cudaGetErrorString, cudaGetDeviceProperties +cudaGraphicsUnmapResources, cudaMalloc, cudaMallocPitch, cudaGetErrorString, cudaFree, cudaGetLastError, cudaGraphicsMapResources, cudaGetDeviceCount, cudaMemset, cudaGraphicsUnregisterResource, cudaGraphicsSubResourceGetMappedArray, cudaGetDeviceProperties ## Dependencies needed to build/run [DirectX](../../../README.md#directx) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/5_Domain_Specific/simpleD3D10Texture/simpleD3D10Texture_vs2017.vcxproj b/Samples/5_Domain_Specific/simpleD3D10Texture/simpleD3D10Texture_vs2017.vcxproj index 2ad151f89..f5f7322f1 100644 --- a/Samples/5_Domain_Specific/simpleD3D10Texture/simpleD3D10Texture_vs2017.vcxproj +++ b/Samples/5_Domain_Specific/simpleD3D10Texture/simpleD3D10Texture_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/simpleD3D10Texture.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -111,6 +111,6 @@ - + diff --git a/Samples/5_Domain_Specific/simpleD3D10Texture/simpleD3D10Texture_vs2019.vcxproj b/Samples/5_Domain_Specific/simpleD3D10Texture/simpleD3D10Texture_vs2019.vcxproj index 793e7da35..d0c81315a 100644 --- a/Samples/5_Domain_Specific/simpleD3D10Texture/simpleD3D10Texture_vs2019.vcxproj +++ b/Samples/5_Domain_Specific/simpleD3D10Texture/simpleD3D10Texture_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleD3D10Texture.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/5_Domain_Specific/simpleD3D10Texture/simpleD3D10Texture_vs2022.vcxproj b/Samples/5_Domain_Specific/simpleD3D10Texture/simpleD3D10Texture_vs2022.vcxproj index aa188b082..ebc268195 100644 --- a/Samples/5_Domain_Specific/simpleD3D10Texture/simpleD3D10Texture_vs2022.vcxproj +++ b/Samples/5_Domain_Specific/simpleD3D10Texture/simpleD3D10Texture_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleD3D10Texture.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/5_Domain_Specific/simpleD3D11/README.md b/Samples/5_Domain_Specific/simpleD3D11/README.md index cfdb0c790..fadf5bf0d 100644 --- a/Samples/5_Domain_Specific/simpleD3D11/README.md +++ b/Samples/5_Domain_Specific/simpleD3D11/README.md @@ -10,7 +10,7 @@ Graphics Interop, Image Processing ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64 ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaSignalExternalSemaphoresAsync, cudaImportVertexBuffer, cudaAcquireSync, cudaGetDeviceCount, cudaImportExternalSemaphore, cudaReleaseSync, cudaImportExternalMemory, cudaExternalMemoryGetMappedBuffer, cudaImportKeyedMutex, cudaStreamCreateWithFlags, cudaDestroyExternalSemaphore, cudaSetDevice, cudaWaitExternalSemaphoresAsync, cudaDestroyExternalMemory +cudaImportKeyedMutex, cudaExternalMemoryGetMappedBuffer, cudaStreamCreateWithFlags, cudaWaitExternalSemaphoresAsync, cudaImportExternalSemaphore, cudaFree, cudaImportVertexBuffer, cudaReleaseSync, cudaSetDevice, cudaSignalExternalSemaphoresAsync, cudaAcquireSync, cudaDestroyExternalMemory, cudaImportExternalMemory, cudaGetDeviceCount, cudaDestroyExternalSemaphore ## Dependencies needed to build/run [DirectX](../../../README.md#directx) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/5_Domain_Specific/simpleD3D11/simpleD3D11_vs2017.vcxproj b/Samples/5_Domain_Specific/simpleD3D11/simpleD3D11_vs2017.vcxproj index 9f0f0e9d0..2a11df06e 100644 --- a/Samples/5_Domain_Specific/simpleD3D11/simpleD3D11_vs2017.vcxproj +++ b/Samples/5_Domain_Specific/simpleD3D11/simpleD3D11_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/simpleD3D11.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -111,6 +111,6 @@ - + diff --git a/Samples/5_Domain_Specific/simpleD3D11/simpleD3D11_vs2019.vcxproj b/Samples/5_Domain_Specific/simpleD3D11/simpleD3D11_vs2019.vcxproj index af23ccc6d..f3dbb2e0c 100644 --- a/Samples/5_Domain_Specific/simpleD3D11/simpleD3D11_vs2019.vcxproj +++ b/Samples/5_Domain_Specific/simpleD3D11/simpleD3D11_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleD3D11.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/5_Domain_Specific/simpleD3D11/simpleD3D11_vs2022.vcxproj b/Samples/5_Domain_Specific/simpleD3D11/simpleD3D11_vs2022.vcxproj index b29aa4bb1..7338f410c 100644 --- a/Samples/5_Domain_Specific/simpleD3D11/simpleD3D11_vs2022.vcxproj +++ b/Samples/5_Domain_Specific/simpleD3D11/simpleD3D11_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleD3D11.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/5_Domain_Specific/simpleD3D11Texture/README.md b/Samples/5_Domain_Specific/simpleD3D11Texture/README.md index da8a38751..9c4cf9544 100644 --- a/Samples/5_Domain_Specific/simpleD3D11Texture/README.md +++ b/Samples/5_Domain_Specific/simpleD3D11Texture/README.md @@ -10,7 +10,7 @@ Graphics Interop, Image Processing ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64 ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMemset, cudaFree, cudaGraphicsMapResources, cudaGraphicsUnregisterResource, cudaGetDeviceCount, cudaMallocPitch, cudaMalloc, cudaGraphicsSubResourceGetMappedArray, cudaGetLastError, cudaGraphicsUnmapResources, cudaGetErrorString, cudaGetDeviceProperties +cudaGraphicsUnmapResources, cudaMalloc, cudaMallocPitch, cudaGetErrorString, cudaFree, cudaGetLastError, cudaGraphicsMapResources, cudaGetDeviceCount, cudaMemset, cudaGraphicsUnregisterResource, cudaGraphicsSubResourceGetMappedArray, cudaGetDeviceProperties ## Dependencies needed to build/run [DirectX](../../../README.md#directx) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/5_Domain_Specific/simpleD3D11Texture/simpleD3D11Texture_vs2017.vcxproj b/Samples/5_Domain_Specific/simpleD3D11Texture/simpleD3D11Texture_vs2017.vcxproj index 81f6d1566..025c68ff9 100644 --- a/Samples/5_Domain_Specific/simpleD3D11Texture/simpleD3D11Texture_vs2017.vcxproj +++ b/Samples/5_Domain_Specific/simpleD3D11Texture/simpleD3D11Texture_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/simpleD3D11Texture.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -112,6 +112,6 @@ - + diff --git a/Samples/5_Domain_Specific/simpleD3D11Texture/simpleD3D11Texture_vs2019.vcxproj b/Samples/5_Domain_Specific/simpleD3D11Texture/simpleD3D11Texture_vs2019.vcxproj index e2b4e089e..67799b210 100644 --- a/Samples/5_Domain_Specific/simpleD3D11Texture/simpleD3D11Texture_vs2019.vcxproj +++ b/Samples/5_Domain_Specific/simpleD3D11Texture/simpleD3D11Texture_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleD3D11Texture.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -108,6 +108,6 @@ - + diff --git a/Samples/5_Domain_Specific/simpleD3D11Texture/simpleD3D11Texture_vs2022.vcxproj b/Samples/5_Domain_Specific/simpleD3D11Texture/simpleD3D11Texture_vs2022.vcxproj index 9b06a4fd2..6345c10cc 100644 --- a/Samples/5_Domain_Specific/simpleD3D11Texture/simpleD3D11Texture_vs2022.vcxproj +++ b/Samples/5_Domain_Specific/simpleD3D11Texture/simpleD3D11Texture_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleD3D11Texture.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -108,6 +108,6 @@ - + diff --git a/Samples/5_Domain_Specific/simpleD3D12/NsightEclipse.xml b/Samples/5_Domain_Specific/simpleD3D12/NsightEclipse.xml index f1843ba85..0e142a5a6 100644 --- a/Samples/5_Domain_Specific/simpleD3D12/NsightEclipse.xml +++ b/Samples/5_Domain_Specific/simpleD3D12/NsightEclipse.xml @@ -3,19 +3,19 @@ simpleD3D12 + cudaWaitExternalSemaphoresAsync + cudaExternalMemoryGetMappedBuffer + cudaImportExternalSemaphore cudaFree + cudaSetDevice cudaSignalExternalSemaphoresAsync - cudaStreamCreate - cudaGetDeviceCount - cudaImportExternalSemaphore cudaGetDeviceProperties - cudaImportExternalMemory - cudaExternalMemoryGetMappedBuffer - cudaDestroyExternalSemaphore - cudaSetDevice - cudaWaitExternalSemaphoresAsync cudaStreamSynchronize cudaDestroyExternalMemory + cudaStreamCreate + cudaImportExternalMemory + cudaGetDeviceCount + cudaDestroyExternalSemaphore whole @@ -59,6 +59,7 @@ sm80 sm86 sm87 + sm90 windows10 diff --git a/Samples/5_Domain_Specific/simpleD3D12/README.md b/Samples/5_Domain_Specific/simpleD3D12/README.md index 5dc8a4467..2e472bf01 100644 --- a/Samples/5_Domain_Specific/simpleD3D12/README.md +++ b/Samples/5_Domain_Specific/simpleD3D12/README.md @@ -10,7 +10,7 @@ Graphics Interop, CUDA DX12 Interop, Image Processing ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64 ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaSignalExternalSemaphoresAsync, cudaStreamCreate, cudaGetDeviceCount, cudaImportExternalSemaphore, cudaGetDeviceProperties, cudaImportExternalMemory, cudaExternalMemoryGetMappedBuffer, cudaDestroyExternalSemaphore, cudaSetDevice, cudaWaitExternalSemaphoresAsync, cudaStreamSynchronize, cudaDestroyExternalMemory +cudaWaitExternalSemaphoresAsync, cudaExternalMemoryGetMappedBuffer, cudaImportExternalSemaphore, cudaFree, cudaSetDevice, cudaSignalExternalSemaphoresAsync, cudaGetDeviceProperties, cudaStreamSynchronize, cudaDestroyExternalMemory, cudaStreamCreate, cudaImportExternalMemory, cudaGetDeviceCount, cudaDestroyExternalSemaphore ## Dependencies needed to build/run [DirectX12](../../../README.md#directx12) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/5_Domain_Specific/simpleD3D12/simpleD3D12_vs2017.vcxproj b/Samples/5_Domain_Specific/simpleD3D12/simpleD3D12_vs2017.vcxproj index b75c6dae9..35882386e 100644 --- a/Samples/5_Domain_Specific/simpleD3D12/simpleD3D12_vs2017.vcxproj +++ b/Samples/5_Domain_Specific/simpleD3D12/simpleD3D12_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/simpleD3D12.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -119,6 +119,6 @@ - + diff --git a/Samples/5_Domain_Specific/simpleD3D12/simpleD3D12_vs2019.vcxproj b/Samples/5_Domain_Specific/simpleD3D12/simpleD3D12_vs2019.vcxproj index 44e46df31..32c9763c8 100644 --- a/Samples/5_Domain_Specific/simpleD3D12/simpleD3D12_vs2019.vcxproj +++ b/Samples/5_Domain_Specific/simpleD3D12/simpleD3D12_vs2019.vcxproj @@ -39,7 +39,7 @@ - + @@ -68,7 +68,7 @@ $(OutDir)/simpleD3D12.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -120,6 +120,6 @@ - + diff --git a/Samples/5_Domain_Specific/simpleD3D12/simpleD3D12_vs2022.vcxproj b/Samples/5_Domain_Specific/simpleD3D12/simpleD3D12_vs2022.vcxproj index 5a793f3c6..a74620920 100644 --- a/Samples/5_Domain_Specific/simpleD3D12/simpleD3D12_vs2022.vcxproj +++ b/Samples/5_Domain_Specific/simpleD3D12/simpleD3D12_vs2022.vcxproj @@ -39,7 +39,7 @@ - + @@ -68,7 +68,7 @@ $(OutDir)/simpleD3D12.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -120,6 +120,6 @@ - + diff --git a/Samples/5_Domain_Specific/simpleD3D9/README.md b/Samples/5_Domain_Specific/simpleD3D9/README.md index 4a9ea67b1..708a6cf32 100644 --- a/Samples/5_Domain_Specific/simpleD3D9/README.md +++ b/Samples/5_Domain_Specific/simpleD3D9/README.md @@ -10,7 +10,7 @@ Graphics Interop ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64 ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaGraphicsMapResources, cudaGraphicsUnregisterResource, cudaGraphicsResourceGetMappedPointer, cudaGetLastError, cudaGraphicsUnmapResources +cudaGraphicsUnmapResources, cudaGraphicsResourceGetMappedPointer, cudaGetLastError, cudaGraphicsMapResources, cudaGraphicsUnregisterResource ## Dependencies needed to build/run [DirectX](../../../README.md#directx) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/5_Domain_Specific/simpleD3D9/simpleD3D9_vs2017.vcxproj b/Samples/5_Domain_Specific/simpleD3D9/simpleD3D9_vs2017.vcxproj index bea083382..513a7b289 100644 --- a/Samples/5_Domain_Specific/simpleD3D9/simpleD3D9_vs2017.vcxproj +++ b/Samples/5_Domain_Specific/simpleD3D9/simpleD3D9_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/simpleD3D9.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -109,6 +109,6 @@ - + diff --git a/Samples/5_Domain_Specific/simpleD3D9/simpleD3D9_vs2019.vcxproj b/Samples/5_Domain_Specific/simpleD3D9/simpleD3D9_vs2019.vcxproj index 9089cbf72..6c61823a8 100644 --- a/Samples/5_Domain_Specific/simpleD3D9/simpleD3D9_vs2019.vcxproj +++ b/Samples/5_Domain_Specific/simpleD3D9/simpleD3D9_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleD3D9.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -105,6 +105,6 @@ - + diff --git a/Samples/5_Domain_Specific/simpleD3D9/simpleD3D9_vs2022.vcxproj b/Samples/5_Domain_Specific/simpleD3D9/simpleD3D9_vs2022.vcxproj index ffda5e782..3389eb905 100644 --- a/Samples/5_Domain_Specific/simpleD3D9/simpleD3D9_vs2022.vcxproj +++ b/Samples/5_Domain_Specific/simpleD3D9/simpleD3D9_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleD3D9.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -105,6 +105,6 @@ - + diff --git a/Samples/5_Domain_Specific/simpleD3D9Texture/README.md b/Samples/5_Domain_Specific/simpleD3D9Texture/README.md index 96dffa342..51bf5c0b8 100644 --- a/Samples/5_Domain_Specific/simpleD3D9Texture/README.md +++ b/Samples/5_Domain_Specific/simpleD3D9Texture/README.md @@ -10,7 +10,7 @@ Graphics Interop, Texture ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64 ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMemset, cudaFree, cudaGraphicsMapResources, cudaGraphicsUnregisterResource, cudaMallocPitch, cudaMalloc, cudaGraphicsSubResourceGetMappedArray, cudaGetLastError, cudaGraphicsUnmapResources +cudaGraphicsUnmapResources, cudaMalloc, cudaMallocPitch, cudaFree, cudaGetLastError, cudaGraphicsMapResources, cudaMemset, cudaGraphicsUnregisterResource, cudaGraphicsSubResourceGetMappedArray ## Dependencies needed to build/run [DirectX](../../../README.md#directx) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/5_Domain_Specific/simpleD3D9Texture/simpleD3D9Texture_vs2017.vcxproj b/Samples/5_Domain_Specific/simpleD3D9Texture/simpleD3D9Texture_vs2017.vcxproj index 5e316cdcb..ab2a79d1f 100644 --- a/Samples/5_Domain_Specific/simpleD3D9Texture/simpleD3D9Texture_vs2017.vcxproj +++ b/Samples/5_Domain_Specific/simpleD3D9Texture/simpleD3D9Texture_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/simpleD3D9Texture.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -111,6 +111,6 @@ - + diff --git a/Samples/5_Domain_Specific/simpleD3D9Texture/simpleD3D9Texture_vs2019.vcxproj b/Samples/5_Domain_Specific/simpleD3D9Texture/simpleD3D9Texture_vs2019.vcxproj index 3280f4648..53853db53 100644 --- a/Samples/5_Domain_Specific/simpleD3D9Texture/simpleD3D9Texture_vs2019.vcxproj +++ b/Samples/5_Domain_Specific/simpleD3D9Texture/simpleD3D9Texture_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleD3D9Texture.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/5_Domain_Specific/simpleD3D9Texture/simpleD3D9Texture_vs2022.vcxproj b/Samples/5_Domain_Specific/simpleD3D9Texture/simpleD3D9Texture_vs2022.vcxproj index 0654afd27..de004fdc7 100644 --- a/Samples/5_Domain_Specific/simpleD3D9Texture/simpleD3D9Texture_vs2022.vcxproj +++ b/Samples/5_Domain_Specific/simpleD3D9Texture/simpleD3D9Texture_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleD3D9Texture.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/5_Domain_Specific/simpleGL/Makefile b/Samples/5_Domain_Specific/simpleGL/Makefile index ad666c8af..47d5ed800 100644 --- a/Samples/5_Domain_Specific/simpleGL/Makefile +++ b/Samples/5_Domain_Specific/simpleGL/Makefile @@ -299,9 +299,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/5_Domain_Specific/simpleGL/NsightEclipse.xml b/Samples/5_Domain_Specific/simpleGL/NsightEclipse.xml index 228af026d..0b73f33ba 100644 --- a/Samples/5_Domain_Specific/simpleGL/NsightEclipse.xml +++ b/Samples/5_Domain_Specific/simpleGL/NsightEclipse.xml @@ -3,15 +3,15 @@ simpleGL + cudaGraphicsUnmapResources + cudaMemcpy cudaFree - cudaGraphicsMapResources - cudaGraphicsGLRegisterBuffer cudaGraphicsResourceGetMappedPointer + cudaGraphicsMapResources cudaDeviceSynchronize - cudaMalloc cudaGraphicsUnregisterResource - cudaGraphicsUnmapResources - cudaMemcpy + cudaMalloc + cudaGraphicsGLRegisterBuffer whole @@ -70,6 +70,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/5_Domain_Specific/simpleGL/README.md b/Samples/5_Domain_Specific/simpleGL/README.md index e1fc492b7..5fc035275 100644 --- a/Samples/5_Domain_Specific/simpleGL/README.md +++ b/Samples/5_Domain_Specific/simpleGL/README.md @@ -10,7 +10,7 @@ Graphics Interop, Vertex Buffers, 3D Graphics ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaGraphicsMapResources, cudaGraphicsGLRegisterBuffer, cudaGraphicsResourceGetMappedPointer, cudaDeviceSynchronize, cudaMalloc, cudaGraphicsUnregisterResource, cudaGraphicsUnmapResources, cudaMemcpy +cudaGraphicsUnmapResources, cudaMemcpy, cudaFree, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDeviceSynchronize, cudaGraphicsUnregisterResource, cudaMalloc, cudaGraphicsGLRegisterBuffer ## Dependencies needed to build/run [X11](../../../README.md#x11), [GL](../../../README.md#gl) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/5_Domain_Specific/simpleGL/findgllib.mk b/Samples/5_Domain_Specific/simpleGL/findgllib.mk index f0a5c5512..998fcf0f1 100644 --- a/Samples/5_Domain_Specific/simpleGL/findgllib.mk +++ b/Samples/5_Domain_Specific/simpleGL/findgllib.mk @@ -53,11 +53,12 @@ endif ifeq ("$(TARGET_OS)","linux") # $(info) >> findgllib.mk -> LINUX path <<<) # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside - UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) - FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) - RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) - CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) + UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) + FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) + RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) + CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) SUSE = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?) + KYLIN = $(shell echo $(DISTRO) | grep -i kylin >/dev/null 2>&1; echo $$?) ifeq ("$(UBUNTU)","0") ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l) GLPATH := /usr/arm-linux-gnueabihf/lib @@ -87,27 +88,17 @@ ifeq ("$(TARGET_OS)","linux") DFLT_PATH ?= /usr/lib endif endif + ifeq ("$(SUSE)","0") GLPATH ?= /usr/X11R6/lib64 GLLINK ?= -L/usr/X11R6/lib64 DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(FEDORA)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(RHEL)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(CENTOS)","0") + else GLPATH ?= /usr/lib64/nvidia GLLINK ?= -L/usr/lib64/nvidia DFLT_PATH ?= /usr/lib64 endif - + # find libGL, libGLU GLLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGL.so -print 2>/dev/null) GLULIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLU.so -print 2>/dev/null) diff --git a/Samples/5_Domain_Specific/simpleGL/simpleGL_vs2017.vcxproj b/Samples/5_Domain_Specific/simpleGL/simpleGL_vs2017.vcxproj index cf0f4bdc7..89aab5d94 100644 --- a/Samples/5_Domain_Specific/simpleGL/simpleGL_vs2017.vcxproj +++ b/Samples/5_Domain_Specific/simpleGL/simpleGL_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/simpleGL.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -117,6 +117,6 @@ - + diff --git a/Samples/5_Domain_Specific/simpleGL/simpleGL_vs2019.vcxproj b/Samples/5_Domain_Specific/simpleGL/simpleGL_vs2019.vcxproj index 67bdb2314..934fb116e 100644 --- a/Samples/5_Domain_Specific/simpleGL/simpleGL_vs2019.vcxproj +++ b/Samples/5_Domain_Specific/simpleGL/simpleGL_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleGL.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -113,6 +113,6 @@ - + diff --git a/Samples/5_Domain_Specific/simpleGL/simpleGL_vs2022.vcxproj b/Samples/5_Domain_Specific/simpleGL/simpleGL_vs2022.vcxproj index f6a41b0ba..e571db13d 100644 --- a/Samples/5_Domain_Specific/simpleGL/simpleGL_vs2022.vcxproj +++ b/Samples/5_Domain_Specific/simpleGL/simpleGL_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleGL.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -113,6 +113,6 @@ - + diff --git a/Samples/5_Domain_Specific/simpleGLES/Makefile b/Samples/5_Domain_Specific/simpleGLES/Makefile index 51e327739..6e0e516e4 100644 --- a/Samples/5_Domain_Specific/simpleGLES/Makefile +++ b/Samples/5_Domain_Specific/simpleGLES/Makefile @@ -313,9 +313,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/5_Domain_Specific/simpleGLES/NsightEclipse.xml b/Samples/5_Domain_Specific/simpleGLES/NsightEclipse.xml index 93fd9c154..fc5a25be5 100644 --- a/Samples/5_Domain_Specific/simpleGLES/NsightEclipse.xml +++ b/Samples/5_Domain_Specific/simpleGLES/NsightEclipse.xml @@ -8,15 +8,15 @@ -DUSE_GLES + cudaGraphicsUnmapResources + cudaMemcpy cudaFree - cudaGraphicsMapResources - cudaGraphicsGLRegisterBuffer cudaGraphicsResourceGetMappedPointer + cudaGraphicsMapResources cudaDeviceSynchronize - cudaMalloc cudaGraphicsUnregisterResource - cudaGraphicsUnmapResources - cudaMemcpy + cudaMalloc + cudaGraphicsGLRegisterBuffer whole @@ -66,6 +66,7 @@ sm80 sm86 sm87 + sm90 graphics_interface.c diff --git a/Samples/5_Domain_Specific/simpleGLES/README.md b/Samples/5_Domain_Specific/simpleGLES/README.md index 454218cfa..50644d746 100644 --- a/Samples/5_Domain_Specific/simpleGLES/README.md +++ b/Samples/5_Domain_Specific/simpleGLES/README.md @@ -10,7 +10,7 @@ Graphics Interop, Vertex Buffers, 3D Graphics ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaGraphicsMapResources, cudaGraphicsGLRegisterBuffer, cudaGraphicsResourceGetMappedPointer, cudaDeviceSynchronize, cudaMalloc, cudaGraphicsUnregisterResource, cudaGraphicsUnmapResources, cudaMemcpy +cudaGraphicsUnmapResources, cudaMemcpy, cudaFree, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDeviceSynchronize, cudaGraphicsUnregisterResource, cudaMalloc, cudaGraphicsGLRegisterBuffer ## Dependencies needed to build/run [X11](../../../README.md#x11), [GLES](../../../README.md#gles) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/5_Domain_Specific/simpleGLES/findgleslib.mk b/Samples/5_Domain_Specific/simpleGLES/findgleslib.mk index bcb335c1f..6da2f0781 100644 --- a/Samples/5_Domain_Specific/simpleGLES/findgleslib.mk +++ b/Samples/5_Domain_Specific/simpleGLES/findgleslib.mk @@ -60,6 +60,7 @@ ifeq ("$(TARGET_OS)","linux") RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) SUSE = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?) + KYLIN = $(shell echo $(DISTRO) | grep -i kylin >/dev/null 2>&1; echo $$?) ifeq ("$(UBUNTU)","0") ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l) GLPATH := /usr/arm-linux-gnueabihf/lib @@ -86,27 +87,17 @@ ifeq ("$(TARGET_OS)","linux") DFLT_PATH ?= /usr/lib endif endif - ifeq ("$(SUSE)","0") + + ifeq ("$(SUSE)","0") GLPATH ?= /usr/X11R6/lib64 GLLINK ?= -L/usr/X11R6/lib64 DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(FEDORA)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(RHEL)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(CENTOS)","0") + else GLPATH ?= /usr/lib64/nvidia GLLINK ?= -L/usr/lib64/nvidia DFLT_PATH ?= /usr/lib64 endif - + # find libGL, libGLU, libXi, EGLLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libEGL.so -print 2>/dev/null) GLESLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLESv2.so -print 2>/dev/null) diff --git a/Samples/5_Domain_Specific/simpleGLES_EGLOutput/Makefile b/Samples/5_Domain_Specific/simpleGLES_EGLOutput/Makefile index d3e12b7a6..7debcfaa3 100644 --- a/Samples/5_Domain_Specific/simpleGLES_EGLOutput/Makefile +++ b/Samples/5_Domain_Specific/simpleGLES_EGLOutput/Makefile @@ -313,9 +313,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/5_Domain_Specific/simpleGLES_EGLOutput/NsightEclipse.xml b/Samples/5_Domain_Specific/simpleGLES_EGLOutput/NsightEclipse.xml index 27cd49821..a12a32e06 100644 --- a/Samples/5_Domain_Specific/simpleGLES_EGLOutput/NsightEclipse.xml +++ b/Samples/5_Domain_Specific/simpleGLES_EGLOutput/NsightEclipse.xml @@ -10,15 +10,15 @@ -I/usr/include/drm + cudaGraphicsUnmapResources + cudaMemcpy cudaFree - cudaGraphicsMapResources - cudaGraphicsGLRegisterBuffer cudaGraphicsResourceGetMappedPointer + cudaGraphicsMapResources cudaDeviceSynchronize - cudaMalloc cudaGraphicsUnregisterResource - cudaGraphicsUnmapResources - cudaMemcpy + cudaMalloc + cudaGraphicsGLRegisterBuffer sm80 sm86 sm87 + sm90 graphics_interface_egloutput_via_egl.c diff --git a/Samples/5_Domain_Specific/simpleGLES_EGLOutput/README.md b/Samples/5_Domain_Specific/simpleGLES_EGLOutput/README.md index c468f0bb0..f11b2411d 100644 --- a/Samples/5_Domain_Specific/simpleGLES_EGLOutput/README.md +++ b/Samples/5_Domain_Specific/simpleGLES_EGLOutput/README.md @@ -15,7 +15,7 @@ Graphics Interop, Vertex Buffers, 3D Graphics ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -28,14 +28,14 @@ armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaGraphicsMapResources, cudaGraphicsGLRegisterBuffer, cudaGraphicsResourceGetMappedPointer, cudaDeviceSynchronize, cudaMalloc, cudaGraphicsUnregisterResource, cudaGraphicsUnmapResources, cudaMemcpy +cudaGraphicsUnmapResources, cudaMemcpy, cudaFree, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDeviceSynchronize, cudaGraphicsUnregisterResource, cudaMalloc, cudaGraphicsGLRegisterBuffer ## Dependencies needed to build/run [EGLOutput](../../../README.md#egloutput), [GLES](../../../README.md#gles) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/5_Domain_Specific/simpleGLES_EGLOutput/findgleslib.mk b/Samples/5_Domain_Specific/simpleGLES_EGLOutput/findgleslib.mk index bcb335c1f..6da2f0781 100644 --- a/Samples/5_Domain_Specific/simpleGLES_EGLOutput/findgleslib.mk +++ b/Samples/5_Domain_Specific/simpleGLES_EGLOutput/findgleslib.mk @@ -60,6 +60,7 @@ ifeq ("$(TARGET_OS)","linux") RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) SUSE = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?) + KYLIN = $(shell echo $(DISTRO) | grep -i kylin >/dev/null 2>&1; echo $$?) ifeq ("$(UBUNTU)","0") ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l) GLPATH := /usr/arm-linux-gnueabihf/lib @@ -86,27 +87,17 @@ ifeq ("$(TARGET_OS)","linux") DFLT_PATH ?= /usr/lib endif endif - ifeq ("$(SUSE)","0") + + ifeq ("$(SUSE)","0") GLPATH ?= /usr/X11R6/lib64 GLLINK ?= -L/usr/X11R6/lib64 DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(FEDORA)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(RHEL)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(CENTOS)","0") + else GLPATH ?= /usr/lib64/nvidia GLLINK ?= -L/usr/lib64/nvidia DFLT_PATH ?= /usr/lib64 endif - + # find libGL, libGLU, libXi, EGLLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libEGL.so -print 2>/dev/null) GLESLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLESv2.so -print 2>/dev/null) diff --git a/Samples/5_Domain_Specific/simpleGLES_screen/Makefile b/Samples/5_Domain_Specific/simpleGLES_screen/Makefile index 971ce716c..2629cc492 100644 --- a/Samples/5_Domain_Specific/simpleGLES_screen/Makefile +++ b/Samples/5_Domain_Specific/simpleGLES_screen/Makefile @@ -320,9 +320,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/5_Domain_Specific/simpleGLES_screen/NsightEclipse.xml b/Samples/5_Domain_Specific/simpleGLES_screen/NsightEclipse.xml index 342e0ce5d..0b911915e 100644 --- a/Samples/5_Domain_Specific/simpleGLES_screen/NsightEclipse.xml +++ b/Samples/5_Domain_Specific/simpleGLES_screen/NsightEclipse.xml @@ -9,15 +9,15 @@ -DWIN_INTERFACE_CUSTOM + cudaGraphicsUnmapResources + cudaMemcpy cudaFree - cudaGraphicsMapResources - cudaGraphicsGLRegisterBuffer cudaGraphicsResourceGetMappedPointer + cudaGraphicsMapResources cudaDeviceSynchronize - cudaMalloc cudaGraphicsUnregisterResource - cudaGraphicsUnmapResources - cudaMemcpy + cudaMalloc + cudaGraphicsGLRegisterBuffer whole @@ -67,6 +67,7 @@ sm80 sm86 sm87 + sm90 graphics_interface.c diff --git a/Samples/5_Domain_Specific/simpleGLES_screen/README.md b/Samples/5_Domain_Specific/simpleGLES_screen/README.md index cf4804fd8..78f96be49 100644 --- a/Samples/5_Domain_Specific/simpleGLES_screen/README.md +++ b/Samples/5_Domain_Specific/simpleGLES_screen/README.md @@ -10,7 +10,7 @@ Graphics Interop, Vertex Buffers, 3D Graphics ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ aarch64 ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaGraphicsMapResources, cudaGraphicsGLRegisterBuffer, cudaGraphicsResourceGetMappedPointer, cudaDeviceSynchronize, cudaMalloc, cudaGraphicsUnregisterResource, cudaGraphicsUnmapResources, cudaMemcpy +cudaGraphicsUnmapResources, cudaMemcpy, cudaFree, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDeviceSynchronize, cudaGraphicsUnregisterResource, cudaMalloc, cudaGraphicsGLRegisterBuffer ## Dependencies needed to build/run [screen](../../../README.md#screen), [GLES](../../../README.md#gles) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/5_Domain_Specific/simpleGLES_screen/findgleslib.mk b/Samples/5_Domain_Specific/simpleGLES_screen/findgleslib.mk index bcb335c1f..6da2f0781 100644 --- a/Samples/5_Domain_Specific/simpleGLES_screen/findgleslib.mk +++ b/Samples/5_Domain_Specific/simpleGLES_screen/findgleslib.mk @@ -60,6 +60,7 @@ ifeq ("$(TARGET_OS)","linux") RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) SUSE = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?) + KYLIN = $(shell echo $(DISTRO) | grep -i kylin >/dev/null 2>&1; echo $$?) ifeq ("$(UBUNTU)","0") ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l) GLPATH := /usr/arm-linux-gnueabihf/lib @@ -86,27 +87,17 @@ ifeq ("$(TARGET_OS)","linux") DFLT_PATH ?= /usr/lib endif endif - ifeq ("$(SUSE)","0") + + ifeq ("$(SUSE)","0") GLPATH ?= /usr/X11R6/lib64 GLLINK ?= -L/usr/X11R6/lib64 DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(FEDORA)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(RHEL)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(CENTOS)","0") + else GLPATH ?= /usr/lib64/nvidia GLLINK ?= -L/usr/lib64/nvidia DFLT_PATH ?= /usr/lib64 endif - + # find libGL, libGLU, libXi, EGLLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libEGL.so -print 2>/dev/null) GLESLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLESv2.so -print 2>/dev/null) diff --git a/Samples/5_Domain_Specific/simpleVulkan/Makefile b/Samples/5_Domain_Specific/simpleVulkan/Makefile index 83f57ad69..8b5cfd465 100644 --- a/Samples/5_Domain_Specific/simpleVulkan/Makefile +++ b/Samples/5_Domain_Specific/simpleVulkan/Makefile @@ -338,9 +338,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/5_Domain_Specific/simpleVulkan/NsightEclipse.xml b/Samples/5_Domain_Specific/simpleVulkan/NsightEclipse.xml index a0ec1c564..1d9449e73 100644 --- a/Samples/5_Domain_Specific/simpleVulkan/NsightEclipse.xml +++ b/Samples/5_Domain_Specific/simpleVulkan/NsightEclipse.xml @@ -6,24 +6,24 @@ --std=c++11 - cudaTimelineSemaphore - cudaSignalExternalSemaphoresAsync - cudaOccupancyMaxActiveBlocksPerMultiprocessor - cudaGetDeviceCount - cudaImportExternalSemaphore - cudaVertMem - cudaImportExternalMemory - cudaDestroyExternalMemory + cudaStreamCreateWithFlags cudaExternalMemoryGetMappedBuffer cudaSignalSemaphore - cudaStreamCreateWithFlags + cudaWaitExternalSemaphoresAsync + cudaVertMem + cudaImportExternalSemaphore cudaWaitSemaphore cudaHeightMap - cudaDestroyExternalSemaphore cudaSetDevice - cudaWaitExternalSemaphoresAsync + cudaGetDeviceCount + cudaSignalExternalSemaphoresAsync + cudaTimelineSemaphore cudaStreamSynchronize + cudaDestroyExternalMemory + cudaOccupancyMaxActiveBlocksPerMultiprocessor + cudaImportExternalMemory cudaGetDeviceProperties + cudaDestroyExternalSemaphore whole @@ -75,6 +75,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/5_Domain_Specific/simpleVulkan/README.md b/Samples/5_Domain_Specific/simpleVulkan/README.md index 27e2dd04f..fd287027a 100644 --- a/Samples/5_Domain_Specific/simpleVulkan/README.md +++ b/Samples/5_Domain_Specific/simpleVulkan/README.md @@ -10,7 +10,7 @@ Graphics Interop, CUDA Vulkan Interop, Data Parallel Algorithms ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, aarch64 ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaTimelineSemaphore, cudaSignalExternalSemaphoresAsync, cudaOccupancyMaxActiveBlocksPerMultiprocessor, cudaGetDeviceCount, cudaImportExternalSemaphore, cudaVertMem, cudaImportExternalMemory, cudaDestroyExternalMemory, cudaExternalMemoryGetMappedBuffer, cudaSignalSemaphore, cudaStreamCreateWithFlags, cudaWaitSemaphore, cudaHeightMap, cudaDestroyExternalSemaphore, cudaSetDevice, cudaWaitExternalSemaphoresAsync, cudaStreamSynchronize, cudaGetDeviceProperties +cudaStreamCreateWithFlags, cudaExternalMemoryGetMappedBuffer, cudaSignalSemaphore, cudaWaitExternalSemaphoresAsync, cudaVertMem, cudaImportExternalSemaphore, cudaWaitSemaphore, cudaHeightMap, cudaSetDevice, cudaGetDeviceCount, cudaSignalExternalSemaphoresAsync, cudaTimelineSemaphore, cudaStreamSynchronize, cudaDestroyExternalMemory, cudaOccupancyMaxActiveBlocksPerMultiprocessor, cudaImportExternalMemory, cudaGetDeviceProperties, cudaDestroyExternalSemaphore ## Dependencies needed to build/run [X11](../../../README.md#x11), [VULKAN](../../../README.md#vulkan) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/5_Domain_Specific/simpleVulkan/findvulkan.mk b/Samples/5_Domain_Specific/simpleVulkan/findvulkan.mk index 4d4e8aed7..004ab22b5 100644 --- a/Samples/5_Domain_Specific/simpleVulkan/findvulkan.mk +++ b/Samples/5_Domain_Specific/simpleVulkan/findvulkan.mk @@ -57,6 +57,7 @@ ifeq ("$(TARGET_OS)","linux") RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) SUSE = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?) + KYLIN = $(shell echo $(DISTRO) | grep -i kylin >/dev/null 2>&1; echo $$?) ifeq ("$(UBUNTU)","0") ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l) GLPATH := /usr/arm-linux-gnueabihf/lib @@ -103,6 +104,11 @@ ifeq ("$(TARGET_OS)","linux") GLLINK ?= -L/usr/lib64/nvidia DFLT_PATH ?= /usr/lib64 endif + ifeq ("$(KYLIN)","0") + GLPATH ?= /usr/lib64/nvidia + GLLINK ?= -L/usr/lib64/nvidia + DFLT_PATH ?= /usr/lib64 + endif VULKAN_SDK_PATH ?= ${VULKAN_SDK} diff --git a/Samples/5_Domain_Specific/simpleVulkan/simpleVulkan_vs2017.vcxproj b/Samples/5_Domain_Specific/simpleVulkan/simpleVulkan_vs2017.vcxproj index 5e7353a09..1ec49d052 100644 --- a/Samples/5_Domain_Specific/simpleVulkan/simpleVulkan_vs2017.vcxproj +++ b/Samples/5_Domain_Specific/simpleVulkan/simpleVulkan_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/simpleVulkan.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -121,6 +121,6 @@ - + diff --git a/Samples/5_Domain_Specific/simpleVulkan/simpleVulkan_vs2019.vcxproj b/Samples/5_Domain_Specific/simpleVulkan/simpleVulkan_vs2019.vcxproj index 55b1a4b50..4a4e5a635 100644 --- a/Samples/5_Domain_Specific/simpleVulkan/simpleVulkan_vs2019.vcxproj +++ b/Samples/5_Domain_Specific/simpleVulkan/simpleVulkan_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleVulkan.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -117,6 +117,6 @@ - + diff --git a/Samples/5_Domain_Specific/simpleVulkan/simpleVulkan_vs2022.vcxproj b/Samples/5_Domain_Specific/simpleVulkan/simpleVulkan_vs2022.vcxproj index 9f655efa4..81252a966 100644 --- a/Samples/5_Domain_Specific/simpleVulkan/simpleVulkan_vs2022.vcxproj +++ b/Samples/5_Domain_Specific/simpleVulkan/simpleVulkan_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleVulkan.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -117,6 +117,6 @@ - + diff --git a/Samples/5_Domain_Specific/simpleVulkanMMAP/Makefile b/Samples/5_Domain_Specific/simpleVulkanMMAP/Makefile index 5a162a2b8..42e9802f9 100644 --- a/Samples/5_Domain_Specific/simpleVulkanMMAP/Makefile +++ b/Samples/5_Domain_Specific/simpleVulkanMMAP/Makefile @@ -340,9 +340,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/5_Domain_Specific/simpleVulkanMMAP/NsightEclipse.xml b/Samples/5_Domain_Specific/simpleVulkanMMAP/NsightEclipse.xml index fed119ce8..5f91fb68d 100644 --- a/Samples/5_Domain_Specific/simpleVulkanMMAP/NsightEclipse.xml +++ b/Samples/5_Domain_Specific/simpleVulkanMMAP/NsightEclipse.xml @@ -6,36 +6,36 @@ --std=c++11 - cuMemRelease - cuMemExportToShareableHandle - cuMemSetAccess - cuMemMap cuMemCreate - cuMemAddressFree + cuMemAddressReserve cuMemGetAllocationGranularity + cuMemAddressFree cuMemUnmap - cuMemAddressReserve - cudaOccupancyMaxActiveBlocksPerMultiprocessor + cuMemMap + cuMemRelease + cuMemExportToShareableHandle + cuMemSetAccess + cudaWaitExternalSemaphoresAsync cudaImportExternalSemaphore - cudaStreamCreateWithFlags - cudaMemcpyAsync - cudaStreamDestroy - cudaSignalExternalSemaphoresAsync - cudaMallocHost - cudaMalloc + cudaDeviceGetAttribute cudaSetDevice - cudaGetDeviceProperties + cudaLaunchHostFunc + cudaMallocHost + cudaSignalExternalSemaphoresAsync + cudaFreeHost + cudaMemsetAsync + cudaMemcpyAsync cudaGetDeviceCount - cudaDeviceGetAttribute + cudaStreamCreateWithFlags + cudaStreamDestroy + cudaDestroyExternalSemaphore cudaSignalSemaphore cudaWaitSemaphore - cudaDestroyExternalSemaphore - cudaStreamSynchronize cudaFree - cudaLaunchHostFunc - cudaMemsetAsync - cudaFreeHost - cudaWaitExternalSemaphoresAsync + cudaStreamSynchronize + cudaMalloc + cudaOccupancyMaxActiveBlocksPerMultiprocessor + cudaGetDeviceProperties whole @@ -91,6 +91,7 @@ sm80 sm86 sm87 + sm90 ../../../Common/helper_multiprocess.cpp ../../../Common/helper_multiprocess.h diff --git a/Samples/5_Domain_Specific/simpleVulkanMMAP/README.md b/Samples/5_Domain_Specific/simpleVulkanMMAP/README.md index eea7f28e9..3030b57b9 100644 --- a/Samples/5_Domain_Specific/simpleVulkanMMAP/README.md +++ b/Samples/5_Domain_Specific/simpleVulkanMMAP/README.md @@ -10,7 +10,7 @@ cuMemMap IPC, MMAP, Graphics Interop, CUDA Vulkan Interop, Data Parallel Algorit ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,17 +23,17 @@ x86_64, aarch64 ## CUDA APIs involved ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html) -cuMemRelease, cuMemExportToShareableHandle, cuMemSetAccess, cuMemMap, cuMemCreate, cuMemAddressFree, cuMemGetAllocationGranularity, cuMemUnmap, cuMemAddressReserve +cuMemCreate, cuMemAddressReserve, cuMemGetAllocationGranularity, cuMemAddressFree, cuMemUnmap, cuMemMap, cuMemRelease, cuMemExportToShareableHandle, cuMemSetAccess ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaOccupancyMaxActiveBlocksPerMultiprocessor, cudaImportExternalSemaphore, cudaStreamCreateWithFlags, cudaMemcpyAsync, cudaStreamDestroy, cudaSignalExternalSemaphoresAsync, cudaMallocHost, cudaMalloc, cudaSetDevice, cudaGetDeviceProperties, cudaGetDeviceCount, cudaDeviceGetAttribute, cudaSignalSemaphore, cudaWaitSemaphore, cudaDestroyExternalSemaphore, cudaStreamSynchronize, cudaFree, cudaLaunchHostFunc, cudaMemsetAsync, cudaFreeHost, cudaWaitExternalSemaphoresAsync +cudaWaitExternalSemaphoresAsync, cudaImportExternalSemaphore, cudaDeviceGetAttribute, cudaSetDevice, cudaLaunchHostFunc, cudaMallocHost, cudaSignalExternalSemaphoresAsync, cudaFreeHost, cudaMemsetAsync, cudaMemcpyAsync, cudaGetDeviceCount, cudaStreamCreateWithFlags, cudaStreamDestroy, cudaDestroyExternalSemaphore, cudaSignalSemaphore, cudaWaitSemaphore, cudaFree, cudaStreamSynchronize, cudaMalloc, cudaOccupancyMaxActiveBlocksPerMultiprocessor, cudaGetDeviceProperties ## Dependencies needed to build/run [X11](../../../README.md#x11), [VULKAN](../../../README.md#vulkan) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/5_Domain_Specific/simpleVulkanMMAP/findvulkan.mk b/Samples/5_Domain_Specific/simpleVulkanMMAP/findvulkan.mk index 4d4e8aed7..004ab22b5 100644 --- a/Samples/5_Domain_Specific/simpleVulkanMMAP/findvulkan.mk +++ b/Samples/5_Domain_Specific/simpleVulkanMMAP/findvulkan.mk @@ -57,6 +57,7 @@ ifeq ("$(TARGET_OS)","linux") RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) SUSE = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?) + KYLIN = $(shell echo $(DISTRO) | grep -i kylin >/dev/null 2>&1; echo $$?) ifeq ("$(UBUNTU)","0") ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l) GLPATH := /usr/arm-linux-gnueabihf/lib @@ -103,6 +104,11 @@ ifeq ("$(TARGET_OS)","linux") GLLINK ?= -L/usr/lib64/nvidia DFLT_PATH ?= /usr/lib64 endif + ifeq ("$(KYLIN)","0") + GLPATH ?= /usr/lib64/nvidia + GLLINK ?= -L/usr/lib64/nvidia + DFLT_PATH ?= /usr/lib64 + endif VULKAN_SDK_PATH ?= ${VULKAN_SDK} diff --git a/Samples/5_Domain_Specific/simpleVulkanMMAP/simpleVulkanMMAP_vs2017.vcxproj b/Samples/5_Domain_Specific/simpleVulkanMMAP/simpleVulkanMMAP_vs2017.vcxproj index ea52d956b..2c6ebec4e 100644 --- a/Samples/5_Domain_Specific/simpleVulkanMMAP/simpleVulkanMMAP_vs2017.vcxproj +++ b/Samples/5_Domain_Specific/simpleVulkanMMAP/simpleVulkanMMAP_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/simpleVulkanMMAP.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -123,6 +123,6 @@ - + diff --git a/Samples/5_Domain_Specific/simpleVulkanMMAP/simpleVulkanMMAP_vs2019.vcxproj b/Samples/5_Domain_Specific/simpleVulkanMMAP/simpleVulkanMMAP_vs2019.vcxproj index b41768d6e..1343dd048 100644 --- a/Samples/5_Domain_Specific/simpleVulkanMMAP/simpleVulkanMMAP_vs2019.vcxproj +++ b/Samples/5_Domain_Specific/simpleVulkanMMAP/simpleVulkanMMAP_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleVulkanMMAP.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -119,6 +119,6 @@ - + diff --git a/Samples/5_Domain_Specific/simpleVulkanMMAP/simpleVulkanMMAP_vs2022.vcxproj b/Samples/5_Domain_Specific/simpleVulkanMMAP/simpleVulkanMMAP_vs2022.vcxproj index a13ab7198..cf29fc5c7 100644 --- a/Samples/5_Domain_Specific/simpleVulkanMMAP/simpleVulkanMMAP_vs2022.vcxproj +++ b/Samples/5_Domain_Specific/simpleVulkanMMAP/simpleVulkanMMAP_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleVulkanMMAP.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -119,6 +119,6 @@ - + diff --git a/Samples/5_Domain_Specific/smokeParticles/Makefile b/Samples/5_Domain_Specific/smokeParticles/Makefile index b6f4f0ff4..f6afadbea 100644 --- a/Samples/5_Domain_Specific/smokeParticles/Makefile +++ b/Samples/5_Domain_Specific/smokeParticles/Makefile @@ -324,9 +324,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/5_Domain_Specific/smokeParticles/NsightEclipse.xml b/Samples/5_Domain_Specific/smokeParticles/NsightEclipse.xml index ca2f9c07b..581dba9a7 100644 --- a/Samples/5_Domain_Specific/smokeParticles/NsightEclipse.xml +++ b/Samples/5_Domain_Specific/smokeParticles/NsightEclipse.xml @@ -6,10 +6,10 @@ --std=c++14 - cudaCreateTextureObject cudaExtent - cudaMemcpyToSymbol cudaPitchedPtr + cudaCreateTextureObject + cudaMemcpyToSymbol whole @@ -79,6 +79,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/5_Domain_Specific/smokeParticles/README.md b/Samples/5_Domain_Specific/smokeParticles/README.md index e5082eba0..3750d7735 100644 --- a/Samples/5_Domain_Specific/smokeParticles/README.md +++ b/Samples/5_Domain_Specific/smokeParticles/README.md @@ -10,7 +10,7 @@ Graphics Interop, Data Parallel Algorithms, Physically-Based Simulation ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaCreateTextureObject, cudaExtent, cudaMemcpyToSymbol, cudaPitchedPtr +cudaExtent, cudaPitchedPtr, cudaCreateTextureObject, cudaMemcpyToSymbol ## Dependencies needed to build/run [X11](../../../README.md#x11), [GL](../../../README.md#gl) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/5_Domain_Specific/smokeParticles/findgllib.mk b/Samples/5_Domain_Specific/smokeParticles/findgllib.mk index f0a5c5512..998fcf0f1 100644 --- a/Samples/5_Domain_Specific/smokeParticles/findgllib.mk +++ b/Samples/5_Domain_Specific/smokeParticles/findgllib.mk @@ -53,11 +53,12 @@ endif ifeq ("$(TARGET_OS)","linux") # $(info) >> findgllib.mk -> LINUX path <<<) # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside - UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) - FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) - RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) - CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) + UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) + FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) + RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) + CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) SUSE = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?) + KYLIN = $(shell echo $(DISTRO) | grep -i kylin >/dev/null 2>&1; echo $$?) ifeq ("$(UBUNTU)","0") ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l) GLPATH := /usr/arm-linux-gnueabihf/lib @@ -87,27 +88,17 @@ ifeq ("$(TARGET_OS)","linux") DFLT_PATH ?= /usr/lib endif endif + ifeq ("$(SUSE)","0") GLPATH ?= /usr/X11R6/lib64 GLLINK ?= -L/usr/X11R6/lib64 DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(FEDORA)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(RHEL)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(CENTOS)","0") + else GLPATH ?= /usr/lib64/nvidia GLLINK ?= -L/usr/lib64/nvidia DFLT_PATH ?= /usr/lib64 endif - + # find libGL, libGLU GLLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGL.so -print 2>/dev/null) GLULIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLU.so -print 2>/dev/null) diff --git a/Samples/5_Domain_Specific/smokeParticles/smokeParticles_vs2017.vcxproj b/Samples/5_Domain_Specific/smokeParticles/smokeParticles_vs2017.vcxproj index 07d6838af..1807407de 100644 --- a/Samples/5_Domain_Specific/smokeParticles/smokeParticles_vs2017.vcxproj +++ b/Samples/5_Domain_Specific/smokeParticles/smokeParticles_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/smokeParticles.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -137,6 +137,6 @@ - + diff --git a/Samples/5_Domain_Specific/smokeParticles/smokeParticles_vs2019.vcxproj b/Samples/5_Domain_Specific/smokeParticles/smokeParticles_vs2019.vcxproj index e69a7c0b0..661e64103 100644 --- a/Samples/5_Domain_Specific/smokeParticles/smokeParticles_vs2019.vcxproj +++ b/Samples/5_Domain_Specific/smokeParticles/smokeParticles_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/smokeParticles.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -133,6 +133,6 @@ - + diff --git a/Samples/5_Domain_Specific/smokeParticles/smokeParticles_vs2022.vcxproj b/Samples/5_Domain_Specific/smokeParticles/smokeParticles_vs2022.vcxproj index 0a64cc05b..d6bb21aa4 100644 --- a/Samples/5_Domain_Specific/smokeParticles/smokeParticles_vs2022.vcxproj +++ b/Samples/5_Domain_Specific/smokeParticles/smokeParticles_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/smokeParticles.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -133,6 +133,6 @@ - + diff --git a/Samples/5_Domain_Specific/stereoDisparity/Makefile b/Samples/5_Domain_Specific/stereoDisparity/Makefile index 4b2ae8d0e..7608b56be 100644 --- a/Samples/5_Domain_Specific/stereoDisparity/Makefile +++ b/Samples/5_Domain_Specific/stereoDisparity/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/5_Domain_Specific/stereoDisparity/NsightEclipse.xml b/Samples/5_Domain_Specific/stereoDisparity/NsightEclipse.xml index e4c72e7fc..a0249c743 100644 --- a/Samples/5_Domain_Specific/stereoDisparity/NsightEclipse.xml +++ b/Samples/5_Domain_Specific/stereoDisparity/NsightEclipse.xml @@ -6,16 +6,16 @@ *.pgm + cudaMemcpy cudaFree - cudaEventRecord - cudaEventCreate - cudaEventElapsedTime - cudaDeviceSynchronize cudaEventSynchronize + cudaDeviceSynchronize cudaCreateTextureObject + cudaEventRecord cudaMalloc - cudaMemcpy + cudaEventElapsedTime cudaGetDeviceProperties + cudaEventCreate whole @@ -61,6 +61,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/5_Domain_Specific/stereoDisparity/README.md b/Samples/5_Domain_Specific/stereoDisparity/README.md index 1034eb62c..81b9eca46 100644 --- a/Samples/5_Domain_Specific/stereoDisparity/README.md +++ b/Samples/5_Domain_Specific/stereoDisparity/README.md @@ -10,7 +10,7 @@ Image Processing, Video Intrinsics ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaEventRecord, cudaEventCreate, cudaEventElapsedTime, cudaDeviceSynchronize, cudaEventSynchronize, cudaCreateTextureObject, cudaMalloc, cudaMemcpy, cudaGetDeviceProperties +cudaMemcpy, cudaFree, cudaEventSynchronize, cudaDeviceSynchronize, cudaCreateTextureObject, cudaEventRecord, cudaMalloc, cudaEventElapsedTime, cudaGetDeviceProperties, cudaEventCreate ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/5_Domain_Specific/stereoDisparity/stereoDisparity_vs2017.vcxproj b/Samples/5_Domain_Specific/stereoDisparity/stereoDisparity_vs2017.vcxproj index eb5a3d5bb..9305e49dd 100644 --- a/Samples/5_Domain_Specific/stereoDisparity/stereoDisparity_vs2017.vcxproj +++ b/Samples/5_Domain_Specific/stereoDisparity/stereoDisparity_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/stereoDisparity.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/5_Domain_Specific/stereoDisparity/stereoDisparity_vs2019.vcxproj b/Samples/5_Domain_Specific/stereoDisparity/stereoDisparity_vs2019.vcxproj index 1147bc9dc..bea60c05e 100644 --- a/Samples/5_Domain_Specific/stereoDisparity/stereoDisparity_vs2019.vcxproj +++ b/Samples/5_Domain_Specific/stereoDisparity/stereoDisparity_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/stereoDisparity.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/5_Domain_Specific/stereoDisparity/stereoDisparity_vs2022.vcxproj b/Samples/5_Domain_Specific/stereoDisparity/stereoDisparity_vs2022.vcxproj index 16e564159..0b22b48a1 100644 --- a/Samples/5_Domain_Specific/stereoDisparity/stereoDisparity_vs2022.vcxproj +++ b/Samples/5_Domain_Specific/stereoDisparity/stereoDisparity_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/stereoDisparity.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/5_Domain_Specific/volumeFiltering/Makefile b/Samples/5_Domain_Specific/volumeFiltering/Makefile index dfcda5bba..438c552e2 100644 --- a/Samples/5_Domain_Specific/volumeFiltering/Makefile +++ b/Samples/5_Domain_Specific/volumeFiltering/Makefile @@ -299,9 +299,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/5_Domain_Specific/volumeFiltering/NsightEclipse.xml b/Samples/5_Domain_Specific/volumeFiltering/NsightEclipse.xml index 813926c5c..7b54f46a9 100644 --- a/Samples/5_Domain_Specific/volumeFiltering/NsightEclipse.xml +++ b/Samples/5_Domain_Specific/volumeFiltering/NsightEclipse.xml @@ -3,25 +3,25 @@ volumeFiltering - cudaGraphicsMapResources - cudaGraphicsResourceGetMappedPointer cudaMemcpy - cudaMemcpyToSymbol + cudaGraphicsMapResources cudaDestroySurfaceObject - cudaPitchedPtr - cudaMalloc - cudaGraphicsUnregisterResource - cudaDeviceSynchronize - cudaDestroyTextureObject - cudaMemset - cudaFree - cudaFreeArray - cudaGraphicsGLRegisterBuffer cudaExtent + cudaDeviceSynchronize cudaCreateSurfaceObject + cudaMemcpyToSymbol + cudaPitchedPtr + cudaGraphicsResourceGetMappedPointer cudaCreateTextureObject - cudaMallocArray cudaGraphicsUnmapResources + cudaMallocArray + cudaFreeArray + cudaDestroyTextureObject + cudaMemset + cudaGraphicsGLRegisterBuffer + cudaFree + cudaGraphicsUnregisterResource + cudaMalloc whole @@ -86,6 +86,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/5_Domain_Specific/volumeFiltering/README.md b/Samples/5_Domain_Specific/volumeFiltering/README.md index 2476ecf5e..a2bc76e36 100644 --- a/Samples/5_Domain_Specific/volumeFiltering/README.md +++ b/Samples/5_Domain_Specific/volumeFiltering/README.md @@ -10,7 +10,7 @@ Graphics Interop, Image Processing, 3D Textures, Surface Writes ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaGraphicsMapResources, cudaGraphicsResourceGetMappedPointer, cudaMemcpy, cudaMemcpyToSymbol, cudaDestroySurfaceObject, cudaPitchedPtr, cudaMalloc, cudaGraphicsUnregisterResource, cudaDeviceSynchronize, cudaDestroyTextureObject, cudaMemset, cudaFree, cudaFreeArray, cudaGraphicsGLRegisterBuffer, cudaExtent, cudaCreateSurfaceObject, cudaCreateTextureObject, cudaMallocArray, cudaGraphicsUnmapResources +cudaMemcpy, cudaGraphicsMapResources, cudaDestroySurfaceObject, cudaExtent, cudaDeviceSynchronize, cudaCreateSurfaceObject, cudaMemcpyToSymbol, cudaPitchedPtr, cudaGraphicsResourceGetMappedPointer, cudaCreateTextureObject, cudaGraphicsUnmapResources, cudaMallocArray, cudaFreeArray, cudaDestroyTextureObject, cudaMemset, cudaGraphicsGLRegisterBuffer, cudaFree, cudaGraphicsUnregisterResource, cudaMalloc ## Dependencies needed to build/run [X11](../../../README.md#x11), [GL](../../../README.md#gl) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/5_Domain_Specific/volumeFiltering/findgllib.mk b/Samples/5_Domain_Specific/volumeFiltering/findgllib.mk index f0a5c5512..998fcf0f1 100644 --- a/Samples/5_Domain_Specific/volumeFiltering/findgllib.mk +++ b/Samples/5_Domain_Specific/volumeFiltering/findgllib.mk @@ -53,11 +53,12 @@ endif ifeq ("$(TARGET_OS)","linux") # $(info) >> findgllib.mk -> LINUX path <<<) # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside - UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) - FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) - RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) - CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) + UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) + FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) + RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) + CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) SUSE = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?) + KYLIN = $(shell echo $(DISTRO) | grep -i kylin >/dev/null 2>&1; echo $$?) ifeq ("$(UBUNTU)","0") ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l) GLPATH := /usr/arm-linux-gnueabihf/lib @@ -87,27 +88,17 @@ ifeq ("$(TARGET_OS)","linux") DFLT_PATH ?= /usr/lib endif endif + ifeq ("$(SUSE)","0") GLPATH ?= /usr/X11R6/lib64 GLLINK ?= -L/usr/X11R6/lib64 DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(FEDORA)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(RHEL)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(CENTOS)","0") + else GLPATH ?= /usr/lib64/nvidia GLLINK ?= -L/usr/lib64/nvidia DFLT_PATH ?= /usr/lib64 endif - + # find libGL, libGLU GLLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGL.so -print 2>/dev/null) GLULIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLU.so -print 2>/dev/null) diff --git a/Samples/5_Domain_Specific/volumeFiltering/volumeFiltering_vs2017.vcxproj b/Samples/5_Domain_Specific/volumeFiltering/volumeFiltering_vs2017.vcxproj index 71b27a634..6e0686632 100644 --- a/Samples/5_Domain_Specific/volumeFiltering/volumeFiltering_vs2017.vcxproj +++ b/Samples/5_Domain_Specific/volumeFiltering/volumeFiltering_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/volumeFiltering.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -122,6 +122,6 @@ - + diff --git a/Samples/5_Domain_Specific/volumeFiltering/volumeFiltering_vs2019.vcxproj b/Samples/5_Domain_Specific/volumeFiltering/volumeFiltering_vs2019.vcxproj index af2faa783..2a01aa1ce 100644 --- a/Samples/5_Domain_Specific/volumeFiltering/volumeFiltering_vs2019.vcxproj +++ b/Samples/5_Domain_Specific/volumeFiltering/volumeFiltering_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/volumeFiltering.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -118,6 +118,6 @@ - + diff --git a/Samples/5_Domain_Specific/volumeFiltering/volumeFiltering_vs2022.vcxproj b/Samples/5_Domain_Specific/volumeFiltering/volumeFiltering_vs2022.vcxproj index 3dc02a43b..04608aaec 100644 --- a/Samples/5_Domain_Specific/volumeFiltering/volumeFiltering_vs2022.vcxproj +++ b/Samples/5_Domain_Specific/volumeFiltering/volumeFiltering_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/volumeFiltering.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -118,6 +118,6 @@ - + diff --git a/Samples/5_Domain_Specific/volumeRender/Makefile b/Samples/5_Domain_Specific/volumeRender/Makefile index b2a74d0de..fdb5649f0 100644 --- a/Samples/5_Domain_Specific/volumeRender/Makefile +++ b/Samples/5_Domain_Specific/volumeRender/Makefile @@ -299,9 +299,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/5_Domain_Specific/volumeRender/NsightEclipse.xml b/Samples/5_Domain_Specific/volumeRender/NsightEclipse.xml index bfef15a20..05b789d99 100644 --- a/Samples/5_Domain_Specific/volumeRender/NsightEclipse.xml +++ b/Samples/5_Domain_Specific/volumeRender/NsightEclipse.xml @@ -3,24 +3,24 @@ volumeRender - cudaMemcpyToSymbol - cudaMemset - cudaFree - cudaGraphicsMapResources + cudaProfilerStop + cudaGraphicsUnmapResources + cudaMemcpy + cudaMallocArray cudaFreeArray - cudaGraphicsGLRegisterBuffer + cudaFree + cudaPitchedPtr cudaGraphicsResourceGetMappedPointer + cudaGraphicsMapResources + cudaDestroyTextureObject cudaExtent cudaDeviceSynchronize - cudaDestroyTextureObject - cudaPitchedPtr cudaCreateTextureObject - cudaMalloc - cudaProfilerStop - cudaMallocArray + cudaMemset + cudaMemcpyToSymbol cudaGraphicsUnregisterResource - cudaGraphicsUnmapResources - cudaMemcpy + cudaMalloc + cudaGraphicsGLRegisterBuffer whole @@ -84,6 +84,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/5_Domain_Specific/volumeRender/README.md b/Samples/5_Domain_Specific/volumeRender/README.md index acc1fb2a1..d5d6ef58e 100644 --- a/Samples/5_Domain_Specific/volumeRender/README.md +++ b/Samples/5_Domain_Specific/volumeRender/README.md @@ -10,7 +10,7 @@ Graphics Interop, Image Processing, 3D Textures ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMemcpyToSymbol, cudaMemset, cudaFree, cudaGraphicsMapResources, cudaFreeArray, cudaGraphicsGLRegisterBuffer, cudaGraphicsResourceGetMappedPointer, cudaExtent, cudaDeviceSynchronize, cudaDestroyTextureObject, cudaPitchedPtr, cudaCreateTextureObject, cudaMalloc, cudaProfilerStop, cudaMallocArray, cudaGraphicsUnregisterResource, cudaGraphicsUnmapResources, cudaMemcpy +cudaProfilerStop, cudaGraphicsUnmapResources, cudaMemcpy, cudaMallocArray, cudaFreeArray, cudaFree, cudaPitchedPtr, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDestroyTextureObject, cudaExtent, cudaDeviceSynchronize, cudaCreateTextureObject, cudaMemset, cudaMemcpyToSymbol, cudaGraphicsUnregisterResource, cudaMalloc, cudaGraphicsGLRegisterBuffer ## Dependencies needed to build/run [X11](../../../README.md#x11), [GL](../../../README.md#gl) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/5_Domain_Specific/volumeRender/findgllib.mk b/Samples/5_Domain_Specific/volumeRender/findgllib.mk index f0a5c5512..998fcf0f1 100644 --- a/Samples/5_Domain_Specific/volumeRender/findgllib.mk +++ b/Samples/5_Domain_Specific/volumeRender/findgllib.mk @@ -53,11 +53,12 @@ endif ifeq ("$(TARGET_OS)","linux") # $(info) >> findgllib.mk -> LINUX path <<<) # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside - UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) - FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) - RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) - CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) + UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) + FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) + RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) + CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) SUSE = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?) + KYLIN = $(shell echo $(DISTRO) | grep -i kylin >/dev/null 2>&1; echo $$?) ifeq ("$(UBUNTU)","0") ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l) GLPATH := /usr/arm-linux-gnueabihf/lib @@ -87,27 +88,17 @@ ifeq ("$(TARGET_OS)","linux") DFLT_PATH ?= /usr/lib endif endif + ifeq ("$(SUSE)","0") GLPATH ?= /usr/X11R6/lib64 GLLINK ?= -L/usr/X11R6/lib64 DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(FEDORA)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(RHEL)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(CENTOS)","0") + else GLPATH ?= /usr/lib64/nvidia GLLINK ?= -L/usr/lib64/nvidia DFLT_PATH ?= /usr/lib64 endif - + # find libGL, libGLU GLLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGL.so -print 2>/dev/null) GLULIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLU.so -print 2>/dev/null) diff --git a/Samples/5_Domain_Specific/volumeRender/volumeRender_vs2017.vcxproj b/Samples/5_Domain_Specific/volumeRender/volumeRender_vs2017.vcxproj index 3e6388cae..ab6037e94 100644 --- a/Samples/5_Domain_Specific/volumeRender/volumeRender_vs2017.vcxproj +++ b/Samples/5_Domain_Specific/volumeRender/volumeRender_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/volumeRender.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -118,6 +118,6 @@ - + diff --git a/Samples/5_Domain_Specific/volumeRender/volumeRender_vs2019.vcxproj b/Samples/5_Domain_Specific/volumeRender/volumeRender_vs2019.vcxproj index 44536db8e..c752510d0 100644 --- a/Samples/5_Domain_Specific/volumeRender/volumeRender_vs2019.vcxproj +++ b/Samples/5_Domain_Specific/volumeRender/volumeRender_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/volumeRender.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -114,6 +114,6 @@ - + diff --git a/Samples/5_Domain_Specific/volumeRender/volumeRender_vs2022.vcxproj b/Samples/5_Domain_Specific/volumeRender/volumeRender_vs2022.vcxproj index 30b3f8fe8..77698195a 100644 --- a/Samples/5_Domain_Specific/volumeRender/volumeRender_vs2022.vcxproj +++ b/Samples/5_Domain_Specific/volumeRender/volumeRender_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/volumeRender.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -114,6 +114,6 @@ - + diff --git a/Samples/5_Domain_Specific/vulkanImageCUDA/Makefile b/Samples/5_Domain_Specific/vulkanImageCUDA/Makefile index 7e592bbcb..56b3696a4 100644 --- a/Samples/5_Domain_Specific/vulkanImageCUDA/Makefile +++ b/Samples/5_Domain_Specific/vulkanImageCUDA/Makefile @@ -338,9 +338,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/5_Domain_Specific/vulkanImageCUDA/NsightEclipse.xml b/Samples/5_Domain_Specific/vulkanImageCUDA/NsightEclipse.xml index 33025d3a6..4fa867e9d 100644 --- a/Samples/5_Domain_Specific/vulkanImageCUDA/NsightEclipse.xml +++ b/Samples/5_Domain_Specific/vulkanImageCUDA/NsightEclipse.xml @@ -6,33 +6,33 @@ --std=c++11 - cudaGetMipmappedArrayLevel - cudaImportExternalSemaphore - cudaExternalMemoryGetMappedMipmappedArray - cudaMemcpy - cudaDestroyExternalMemory - cudaSignalExternalSemaphoresAsync - cudaFreeMipmappedArray cudaVkSemaphoreSignal - cudaVkImportImageMem - cudaDestroySurfaceObject - cudaImportExternalMemory - cudaMalloc + cudaWaitExternalSemaphoresAsync + cudaMemcpy + cudaVkImportSemaphore + cudaImportExternalSemaphore + cudaGetMipmappedArrayLevel cudaSetDevice - cudaGetDeviceProperties + cudaDestroySurfaceObject + cudaExtent + cudaMallocMipmappedArray + cudaCreateSurfaceObject + cudaStreamCreate + cudaSignalExternalSemaphoresAsync + cudaUpdateVkImage + cudaCreateTextureObject cudaGetDeviceCount + cudaExternalMemoryGetMappedMipmappedArray cudaDestroyTextureObject - cudaUpdateVkImage + cudaVkImportImageMem + cudaDestroyExternalMemory + cudaVkSemaphoreWait + cudaImportExternalMemory cudaDestroyExternalSemaphore + cudaFreeMipmappedArray cudaFree - cudaStreamCreate - cudaVkSemaphoreWait - cudaExtent - cudaVkImportSemaphore - cudaCreateSurfaceObject - cudaMallocMipmappedArray - cudaCreateTextureObject - cudaWaitExternalSemaphoresAsync + cudaMalloc + cudaGetDeviceProperties whole @@ -80,6 +80,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/5_Domain_Specific/vulkanImageCUDA/README.md b/Samples/5_Domain_Specific/vulkanImageCUDA/README.md index 4b5e262ad..97f2de7d1 100644 --- a/Samples/5_Domain_Specific/vulkanImageCUDA/README.md +++ b/Samples/5_Domain_Specific/vulkanImageCUDA/README.md @@ -10,7 +10,7 @@ Graphics Interop, CUDA Vulkan Interop, Data Parallel Algorithms ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, aarch64 ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaGetMipmappedArrayLevel, cudaImportExternalSemaphore, cudaExternalMemoryGetMappedMipmappedArray, cudaMemcpy, cudaDestroyExternalMemory, cudaSignalExternalSemaphoresAsync, cudaFreeMipmappedArray, cudaVkSemaphoreSignal, cudaVkImportImageMem, cudaDestroySurfaceObject, cudaImportExternalMemory, cudaMalloc, cudaSetDevice, cudaGetDeviceProperties, cudaGetDeviceCount, cudaDestroyTextureObject, cudaUpdateVkImage, cudaDestroyExternalSemaphore, cudaFree, cudaStreamCreate, cudaVkSemaphoreWait, cudaExtent, cudaVkImportSemaphore, cudaCreateSurfaceObject, cudaMallocMipmappedArray, cudaCreateTextureObject, cudaWaitExternalSemaphoresAsync +cudaVkSemaphoreSignal, cudaWaitExternalSemaphoresAsync, cudaMemcpy, cudaVkImportSemaphore, cudaImportExternalSemaphore, cudaGetMipmappedArrayLevel, cudaSetDevice, cudaDestroySurfaceObject, cudaExtent, cudaMallocMipmappedArray, cudaCreateSurfaceObject, cudaStreamCreate, cudaSignalExternalSemaphoresAsync, cudaUpdateVkImage, cudaCreateTextureObject, cudaGetDeviceCount, cudaExternalMemoryGetMappedMipmappedArray, cudaDestroyTextureObject, cudaVkImportImageMem, cudaDestroyExternalMemory, cudaVkSemaphoreWait, cudaImportExternalMemory, cudaDestroyExternalSemaphore, cudaFreeMipmappedArray, cudaFree, cudaMalloc, cudaGetDeviceProperties ## Dependencies needed to build/run [X11](../../../README.md#x11), [VULKAN](../../../README.md#vulkan) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/5_Domain_Specific/vulkanImageCUDA/findvulkan.mk b/Samples/5_Domain_Specific/vulkanImageCUDA/findvulkan.mk index 4d4e8aed7..004ab22b5 100644 --- a/Samples/5_Domain_Specific/vulkanImageCUDA/findvulkan.mk +++ b/Samples/5_Domain_Specific/vulkanImageCUDA/findvulkan.mk @@ -57,6 +57,7 @@ ifeq ("$(TARGET_OS)","linux") RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) SUSE = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?) + KYLIN = $(shell echo $(DISTRO) | grep -i kylin >/dev/null 2>&1; echo $$?) ifeq ("$(UBUNTU)","0") ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l) GLPATH := /usr/arm-linux-gnueabihf/lib @@ -103,6 +104,11 @@ ifeq ("$(TARGET_OS)","linux") GLLINK ?= -L/usr/lib64/nvidia DFLT_PATH ?= /usr/lib64 endif + ifeq ("$(KYLIN)","0") + GLPATH ?= /usr/lib64/nvidia + GLLINK ?= -L/usr/lib64/nvidia + DFLT_PATH ?= /usr/lib64 + endif VULKAN_SDK_PATH ?= ${VULKAN_SDK} diff --git a/Samples/5_Domain_Specific/vulkanImageCUDA/vulkanImageCUDA_vs2017.vcxproj b/Samples/5_Domain_Specific/vulkanImageCUDA/vulkanImageCUDA_vs2017.vcxproj index 93db012f7..424170ab9 100644 --- a/Samples/5_Domain_Specific/vulkanImageCUDA/vulkanImageCUDA_vs2017.vcxproj +++ b/Samples/5_Domain_Specific/vulkanImageCUDA/vulkanImageCUDA_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/vulkanImageCUDA.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -117,6 +117,6 @@ - + diff --git a/Samples/5_Domain_Specific/vulkanImageCUDA/vulkanImageCUDA_vs2019.vcxproj b/Samples/5_Domain_Specific/vulkanImageCUDA/vulkanImageCUDA_vs2019.vcxproj index 632afdc26..769231d57 100644 --- a/Samples/5_Domain_Specific/vulkanImageCUDA/vulkanImageCUDA_vs2019.vcxproj +++ b/Samples/5_Domain_Specific/vulkanImageCUDA/vulkanImageCUDA_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/vulkanImageCUDA.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -113,6 +113,6 @@ - + diff --git a/Samples/5_Domain_Specific/vulkanImageCUDA/vulkanImageCUDA_vs2022.vcxproj b/Samples/5_Domain_Specific/vulkanImageCUDA/vulkanImageCUDA_vs2022.vcxproj index 530a2f71f..997263975 100644 --- a/Samples/5_Domain_Specific/vulkanImageCUDA/vulkanImageCUDA_vs2022.vcxproj +++ b/Samples/5_Domain_Specific/vulkanImageCUDA/vulkanImageCUDA_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/vulkanImageCUDA.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -113,6 +113,6 @@ - + diff --git a/Samples/6_Performance/UnifiedMemoryPerf/Makefile b/Samples/6_Performance/UnifiedMemoryPerf/Makefile index 05eb45ee5..2f1a6188b 100644 --- a/Samples/6_Performance/UnifiedMemoryPerf/Makefile +++ b/Samples/6_Performance/UnifiedMemoryPerf/Makefile @@ -285,9 +285,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/6_Performance/UnifiedMemoryPerf/NsightEclipse.xml b/Samples/6_Performance/UnifiedMemoryPerf/NsightEclipse.xml index a3b50f03f..6217899d4 100644 --- a/Samples/6_Performance/UnifiedMemoryPerf/NsightEclipse.xml +++ b/Samples/6_Performance/UnifiedMemoryPerf/NsightEclipse.xml @@ -3,19 +3,19 @@ UnifiedMemoryPerf + cudaMemcpy cudaStreamDestroy + cudaMemPrefetchAsync cudaFree cudaMallocHost cudaMallocManaged - cudaMemPrefetchAsync - cudaStreamCreate cudaStreamAttachMemAsync + cudaHostGetDevicePointer cudaFreeHost + cudaStreamSynchronize cudaMalloc cudaMemcpyAsync - cudaStreamSynchronize - cudaHostGetDevicePointer - cudaMemcpy + cudaStreamCreate cudaGetDeviceProperties @@ -66,6 +66,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/6_Performance/UnifiedMemoryPerf/README.md b/Samples/6_Performance/UnifiedMemoryPerf/README.md index 575b7ee1c..6c1ce62c3 100644 --- a/Samples/6_Performance/UnifiedMemoryPerf/README.md +++ b/Samples/6_Performance/UnifiedMemoryPerf/README.md @@ -10,7 +10,7 @@ CUDA Systems Integration, Unified Memory, CUDA Streams and Events, Pinned System ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l, aarch64 ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaStreamDestroy, cudaFree, cudaMallocHost, cudaMallocManaged, cudaMemPrefetchAsync, cudaStreamCreate, cudaStreamAttachMemAsync, cudaFreeHost, cudaMalloc, cudaMemcpyAsync, cudaStreamSynchronize, cudaHostGetDevicePointer, cudaMemcpy, cudaGetDeviceProperties +cudaMemcpy, cudaStreamDestroy, cudaMemPrefetchAsync, cudaFree, cudaMallocHost, cudaMallocManaged, cudaStreamAttachMemAsync, cudaHostGetDevicePointer, cudaFreeHost, cudaStreamSynchronize, cudaMalloc, cudaMemcpyAsync, cudaStreamCreate, cudaGetDeviceProperties ## Dependencies needed to build/run [UVM](../../../README.md#uvm) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/6_Performance/UnifiedMemoryPerf/UnifiedMemoryPerf_vs2017.vcxproj b/Samples/6_Performance/UnifiedMemoryPerf/UnifiedMemoryPerf_vs2017.vcxproj index 5b99766a2..7d59c0aec 100644 --- a/Samples/6_Performance/UnifiedMemoryPerf/UnifiedMemoryPerf_vs2017.vcxproj +++ b/Samples/6_Performance/UnifiedMemoryPerf/UnifiedMemoryPerf_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/UnifiedMemoryPerf.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -110,6 +110,6 @@ - + diff --git a/Samples/6_Performance/UnifiedMemoryPerf/UnifiedMemoryPerf_vs2019.vcxproj b/Samples/6_Performance/UnifiedMemoryPerf/UnifiedMemoryPerf_vs2019.vcxproj index 823351a1f..9b5e36571 100644 --- a/Samples/6_Performance/UnifiedMemoryPerf/UnifiedMemoryPerf_vs2019.vcxproj +++ b/Samples/6_Performance/UnifiedMemoryPerf/UnifiedMemoryPerf_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/UnifiedMemoryPerf.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -106,6 +106,6 @@ - + diff --git a/Samples/6_Performance/UnifiedMemoryPerf/UnifiedMemoryPerf_vs2022.vcxproj b/Samples/6_Performance/UnifiedMemoryPerf/UnifiedMemoryPerf_vs2022.vcxproj index 7dfac552a..536d6d5b8 100644 --- a/Samples/6_Performance/UnifiedMemoryPerf/UnifiedMemoryPerf_vs2022.vcxproj +++ b/Samples/6_Performance/UnifiedMemoryPerf/UnifiedMemoryPerf_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/UnifiedMemoryPerf.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -106,6 +106,6 @@ - + diff --git a/Samples/6_Performance/alignedTypes/Makefile b/Samples/6_Performance/alignedTypes/Makefile index 907e3f446..492ad1974 100644 --- a/Samples/6_Performance/alignedTypes/Makefile +++ b/Samples/6_Performance/alignedTypes/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/6_Performance/alignedTypes/NsightEclipse.xml b/Samples/6_Performance/alignedTypes/NsightEclipse.xml index 388b2897e..0b1d16af4 100644 --- a/Samples/6_Performance/alignedTypes/NsightEclipse.xml +++ b/Samples/6_Performance/alignedTypes/NsightEclipse.xml @@ -3,11 +3,11 @@ alignedTypes - cudaMemset + cudaMemcpy cudaFree cudaDeviceSynchronize + cudaMemset cudaMalloc - cudaMemcpy cudaGetDeviceProperties @@ -46,6 +46,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/6_Performance/alignedTypes/README.md b/Samples/6_Performance/alignedTypes/README.md index 85c8bd55e..56e5e265a 100644 --- a/Samples/6_Performance/alignedTypes/README.md +++ b/Samples/6_Performance/alignedTypes/README.md @@ -10,7 +10,7 @@ Performance Strategies ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMemset, cudaFree, cudaDeviceSynchronize, cudaMalloc, cudaMemcpy, cudaGetDeviceProperties +cudaMemcpy, cudaFree, cudaDeviceSynchronize, cudaMemset, cudaMalloc, cudaGetDeviceProperties ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/6_Performance/alignedTypes/alignedTypes_vs2017.vcxproj b/Samples/6_Performance/alignedTypes/alignedTypes_vs2017.vcxproj index 73e89699f..7e3439f69 100644 --- a/Samples/6_Performance/alignedTypes/alignedTypes_vs2017.vcxproj +++ b/Samples/6_Performance/alignedTypes/alignedTypes_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/alignedTypes.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/6_Performance/alignedTypes/alignedTypes_vs2019.vcxproj b/Samples/6_Performance/alignedTypes/alignedTypes_vs2019.vcxproj index da732d283..833b531c6 100644 --- a/Samples/6_Performance/alignedTypes/alignedTypes_vs2019.vcxproj +++ b/Samples/6_Performance/alignedTypes/alignedTypes_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/alignedTypes.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/6_Performance/alignedTypes/alignedTypes_vs2022.vcxproj b/Samples/6_Performance/alignedTypes/alignedTypes_vs2022.vcxproj index d24f27490..ea9110365 100644 --- a/Samples/6_Performance/alignedTypes/alignedTypes_vs2022.vcxproj +++ b/Samples/6_Performance/alignedTypes/alignedTypes_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/alignedTypes.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/6_Performance/transpose/Makefile b/Samples/6_Performance/transpose/Makefile index 50355e562..83909bce0 100644 --- a/Samples/6_Performance/transpose/Makefile +++ b/Samples/6_Performance/transpose/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/6_Performance/transpose/NsightEclipse.xml b/Samples/6_Performance/transpose/NsightEclipse.xml index a080824a9..58f448f6b 100644 --- a/Samples/6_Performance/transpose/NsightEclipse.xml +++ b/Samples/6_Performance/transpose/NsightEclipse.xml @@ -3,17 +3,17 @@ transpose + cudaMemcpy + cudaMalloc cudaFree - cudaEventRecord - cudaEventCreate - cudaEventElapsedTime + cudaGetLastError cudaEventSynchronize - cudaMalloc + cudaEventRecord + cudaGetDevice cudaEventDestroy - cudaGetLastError - cudaMemcpy + cudaEventElapsedTime cudaGetDeviceProperties - cudaGetDevice + cudaEventCreate whole @@ -55,6 +55,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/6_Performance/transpose/README.md b/Samples/6_Performance/transpose/README.md index 2d7870689..0693888b0 100644 --- a/Samples/6_Performance/transpose/README.md +++ b/Samples/6_Performance/transpose/README.md @@ -10,7 +10,7 @@ Performance Strategies, Linear Algebra ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaEventRecord, cudaEventCreate, cudaEventElapsedTime, cudaEventSynchronize, cudaMalloc, cudaEventDestroy, cudaGetLastError, cudaMemcpy, cudaGetDeviceProperties, cudaGetDevice +cudaMemcpy, cudaMalloc, cudaFree, cudaGetLastError, cudaEventSynchronize, cudaEventRecord, cudaGetDevice, cudaEventDestroy, cudaEventElapsedTime, cudaGetDeviceProperties, cudaEventCreate ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/6_Performance/transpose/transpose_vs2017.vcxproj b/Samples/6_Performance/transpose/transpose_vs2017.vcxproj index 0c04b15f2..a9f215afa 100644 --- a/Samples/6_Performance/transpose/transpose_vs2017.vcxproj +++ b/Samples/6_Performance/transpose/transpose_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/transpose.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/6_Performance/transpose/transpose_vs2019.vcxproj b/Samples/6_Performance/transpose/transpose_vs2019.vcxproj index 6cf4cb22a..e4721874d 100644 --- a/Samples/6_Performance/transpose/transpose_vs2019.vcxproj +++ b/Samples/6_Performance/transpose/transpose_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/transpose.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/6_Performance/transpose/transpose_vs2022.vcxproj b/Samples/6_Performance/transpose/transpose_vs2022.vcxproj index 3c7e497c0..1db2b8cda 100644 --- a/Samples/6_Performance/transpose/transpose_vs2022.vcxproj +++ b/Samples/6_Performance/transpose/transpose_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/transpose.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - +