diff --git a/.gitignore b/.gitignore
new file mode 100644
index 000000000..a3062beae
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+.vscode/*
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4bd17b625..c619c9230 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,10 @@
 ## Changelog
 
+### CUDA 11.6
+* Added new folder structure for samples
+* Added support of Visual Studio 2022 to all samples supported on [Windows](#windows-1).
+* All CUDA samples are now only available on [GitHub](https://github.com/nvidia/cuda-samples). They are no longer available via CUDA toolkit.
+
 ### CUDA 11.5
 * Added `cuDLAHybridMode`. Demonstrate usage of cuDLA in hybrid mode.
 * Added `cuDLAStandaloneMode`. Demonstrate usage of cuDLA in standalone mode.
diff --git a/Common/helper_cuda.h b/Common/helper_cuda.h
index 98a5a7b61..f6bea97a2 100644
--- a/Common/helper_cuda.h
+++ b/Common/helper_cuda.h
@@ -666,6 +666,7 @@ inline int _ConvertSMVer2Cores(int major, int minor) {
       {0x80,  64},
       {0x86, 128},
       {0x87, 128},
+      {0x90, 128},
       {-1, -1}};
 
   int index = 0;
@@ -712,6 +713,8 @@ inline const char* _ConvertSMVer2ArchName(int major, int minor) {
       {0x75, "Turing"},
       {0x80, "Ampere"},
       {0x86, "Ampere"},
+      {0x87, "Ampere"},
+      {0x90, "Hopper"},
       {-1, "Graphics Device"}};
 
   int index = 0;
diff --git a/Common/helper_cuda_drvapi.h b/Common/helper_cuda_drvapi.h
index f0362d64b..80979b5b0 100644
--- a/Common/helper_cuda_drvapi.h
+++ b/Common/helper_cuda_drvapi.h
@@ -114,6 +114,7 @@ inline int _ConvertSMVer2CoresDRV(int major, int minor) {
       {0x80,  64},
       {0x86, 128},
       {0x87, 128},
+      {0x90, 128},
       {-1, -1}};
 
   int index = 0;
diff --git a/README.md b/README.md
index 7a37e1985..354fa6a45 100644
--- a/README.md
+++ b/README.md
@@ -1,12 +1,12 @@
 # CUDA Samples
 
-Samples for CUDA Developers which demonstrates features in CUDA Toolkit. This version supports [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads).
+Samples for CUDA Developers which demonstrates features in CUDA Toolkit. This version supports [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads).
 
 ## Release Notes
 
 This section describes the release notes for the CUDA Samples on GitHub only.
 
-### CUDA 11.6
+### CUDA 11.8
 * Added new folder structure for samples
 * Added support of Visual Studio 2022 to all samples supported on [Windows](#windows-1).
 * All CUDA samples are now only available on [GitHub](https://github.com/nvidia/cuda-samples). They are no longer available via CUDA toolkit.
@@ -17,7 +17,7 @@ This section describes the release notes for the CUDA Samples on GitHub only.
 
 ### Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 For system requirements and installation instructions of cuda toolkit, please refer to the [Linux Installation Guide](http://docs.nvidia.com/cuda/cuda-installation-guide-linux/), and the [Windows Installation Guide](http://docs.nvidia.com/cuda/cuda-installation-guide-microsoft-windows/index.html).
 
 ### Getting the CUDA Samples
@@ -263,4 +263,4 @@ Answers to frequently asked questions about CUDA can be found at http://develope
 
 ## Attributions
 
-*   Teapot image is obtained from [Wikimedia](https://en.wikipedia.org/wiki/File:Original_Utah_Teapot.jpg) and is licensed under the Creative Commons [Attribution-Share Alike 2.0](https://creativecommons.org/licenses/by-sa/2.0/deed.en) Generic license. The image is modified for samples use cases.
\ No newline at end of file
+*   Teapot image is obtained from [Wikimedia](https://en.wikipedia.org/wiki/File:Original_Utah_Teapot.jpg) and is licensed under the Creative Commons [Attribution-Share Alike 2.0](https://creativecommons.org/licenses/by-sa/2.0/deed.en) Generic license. The image is modified for samples use cases.
diff --git a/Samples/0_Introduction/UnifiedMemoryStreams/Makefile b/Samples/0_Introduction/UnifiedMemoryStreams/Makefile
index ba7b78b45..e244dab55 100644
--- a/Samples/0_Introduction/UnifiedMemoryStreams/Makefile
+++ b/Samples/0_Introduction/UnifiedMemoryStreams/Makefile
@@ -318,9 +318,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/UnifiedMemoryStreams/NsightEclipse.xml b/Samples/0_Introduction/UnifiedMemoryStreams/NsightEclipse.xml
index 033c1c50e..744caa124 100644
--- a/Samples/0_Introduction/UnifiedMemoryStreams/NsightEclipse.xml
+++ b/Samples/0_Introduction/UnifiedMemoryStreams/NsightEclipse.xml
@@ -6,11 +6,11 @@
     <toolkit>cudaStreamDestroy</toolkit>
     <toolkit>cudaFree</toolkit>
     <toolkit>cudaMallocManaged</toolkit>
-    <toolkit>cudaStreamCreate</toolkit>
-    <toolkit>cudaDeviceSynchronize</toolkit>
     <toolkit>cudaStreamAttachMemAsync</toolkit>
     <toolkit>cudaSetDevice</toolkit>
+    <toolkit>cudaDeviceSynchronize</toolkit>
     <toolkit>cudaStreamSynchronize</toolkit>
+    <toolkit>cudaStreamCreate</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample demonstrates the use of OpenMP and streams with Unified Memory on a single GPU.]]></description>
@@ -70,6 +70,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/0_Introduction/UnifiedMemoryStreams/README.md b/Samples/0_Introduction/UnifiedMemoryStreams/README.md
index 347649da1..417cf3a07 100644
--- a/Samples/0_Introduction/UnifiedMemoryStreams/README.md
+++ b/Samples/0_Introduction/UnifiedMemoryStreams/README.md
@@ -10,7 +10,7 @@ CUDA Systems Integration, OpenMP, CUBLAS, Multithreading, Unified Memory, CUDA S
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaStreamDestroy, cudaFree, cudaMallocManaged, cudaStreamCreate, cudaDeviceSynchronize, cudaStreamAttachMemAsync, cudaSetDevice, cudaStreamSynchronize, cudaGetDeviceProperties
+cudaStreamDestroy, cudaFree, cudaMallocManaged, cudaStreamAttachMemAsync, cudaSetDevice, cudaDeviceSynchronize, cudaStreamSynchronize, cudaStreamCreate, cudaGetDeviceProperties
 
 ## Dependencies needed to build/run
 [OpenMP](../../../README.md#openmp), [UVM](../../../README.md#uvm), [CUBLAS](../../../README.md#cublas)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/0_Introduction/UnifiedMemoryStreams/UnifiedMemoryStreams_vs2017.vcxproj b/Samples/0_Introduction/UnifiedMemoryStreams/UnifiedMemoryStreams_vs2017.vcxproj
index e5e99aacd..9680c777c 100644
--- a/Samples/0_Introduction/UnifiedMemoryStreams/UnifiedMemoryStreams_vs2017.vcxproj
+++ b/Samples/0_Introduction/UnifiedMemoryStreams/UnifiedMemoryStreams_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/UnifiedMemoryStreams.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/UnifiedMemoryStreams/UnifiedMemoryStreams_vs2019.vcxproj b/Samples/0_Introduction/UnifiedMemoryStreams/UnifiedMemoryStreams_vs2019.vcxproj
index 75e8d36d0..866e26dcb 100644
--- a/Samples/0_Introduction/UnifiedMemoryStreams/UnifiedMemoryStreams_vs2019.vcxproj
+++ b/Samples/0_Introduction/UnifiedMemoryStreams/UnifiedMemoryStreams_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/UnifiedMemoryStreams.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/UnifiedMemoryStreams/UnifiedMemoryStreams_vs2022.vcxproj b/Samples/0_Introduction/UnifiedMemoryStreams/UnifiedMemoryStreams_vs2022.vcxproj
index ba409655f..074787481 100644
--- a/Samples/0_Introduction/UnifiedMemoryStreams/UnifiedMemoryStreams_vs2022.vcxproj
+++ b/Samples/0_Introduction/UnifiedMemoryStreams/UnifiedMemoryStreams_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/UnifiedMemoryStreams.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/asyncAPI/Makefile b/Samples/0_Introduction/asyncAPI/Makefile
index 885bbc8e4..71bb47940 100644
--- a/Samples/0_Introduction/asyncAPI/Makefile
+++ b/Samples/0_Introduction/asyncAPI/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/asyncAPI/NsightEclipse.xml b/Samples/0_Introduction/asyncAPI/NsightEclipse.xml
index 6d0bbc624..d823ac8a0 100644
--- a/Samples/0_Introduction/asyncAPI/NsightEclipse.xml
+++ b/Samples/0_Introduction/asyncAPI/NsightEclipse.xml
@@ -3,21 +3,21 @@
 <entry>
   <name>asyncAPI</name>
   <cuda_api_list>
-    <toolkit>cudaMemset</toolkit>
+    <toolkit>cudaProfilerStop</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaMemcpyAsync</toolkit>
     <toolkit>cudaFree</toolkit>
-    <toolkit>cudaEventRecord</toolkit>
     <toolkit>cudaMallocHost</toolkit>
     <toolkit>cudaProfilerStart</toolkit>
-    <toolkit>cudaEventCreate</toolkit>
-    <toolkit>cudaEventElapsedTime</toolkit>
     <toolkit>cudaDeviceSynchronize</toolkit>
+    <toolkit>cudaEventRecord</toolkit>
     <toolkit>cudaFreeHost</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaEventQuery</toolkit>
-    <toolkit>cudaProfilerStop</toolkit>
+    <toolkit>cudaMemset</toolkit>
     <toolkit>cudaEventDestroy</toolkit>
-    <toolkit>cudaMemcpyAsync</toolkit>
+    <toolkit>cudaEventQuery</toolkit>
+    <toolkit>cudaEventElapsedTime</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
+    <toolkit>cudaEventCreate</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample illustrates the usage of CUDA events for both GPU timing and overlapping CPU and GPU execution. Events are inserted into a stream of CUDA calls. Since CUDA stream calls are asynchronous, the CPU can perform computations while GPU is executing (including DMA memcopies between the host and device). CPU can query CUDA events to determine whether GPU has completed tasks.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -59,6 +59,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/0_Introduction/asyncAPI/README.md b/Samples/0_Introduction/asyncAPI/README.md
index 81da4efc8..7f4f3b421 100644
--- a/Samples/0_Introduction/asyncAPI/README.md
+++ b/Samples/0_Introduction/asyncAPI/README.md
@@ -10,7 +10,7 @@ Asynchronous Data Transfers, CUDA Streams and Events
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMemset, cudaFree, cudaEventRecord, cudaMallocHost, cudaProfilerStart, cudaEventCreate, cudaEventElapsedTime, cudaDeviceSynchronize, cudaFreeHost, cudaMalloc, cudaEventQuery, cudaProfilerStop, cudaEventDestroy, cudaMemcpyAsync, cudaGetDeviceProperties
+cudaProfilerStop, cudaMalloc, cudaMemcpyAsync, cudaFree, cudaMallocHost, cudaProfilerStart, cudaDeviceSynchronize, cudaEventRecord, cudaFreeHost, cudaMemset, cudaEventDestroy, cudaEventQuery, cudaEventElapsedTime, cudaGetDeviceProperties, cudaEventCreate
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/0_Introduction/asyncAPI/asyncAPI_vs2017.vcxproj b/Samples/0_Introduction/asyncAPI/asyncAPI_vs2017.vcxproj
index f2de8d874..ccea698df 100644
--- a/Samples/0_Introduction/asyncAPI/asyncAPI_vs2017.vcxproj
+++ b/Samples/0_Introduction/asyncAPI/asyncAPI_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/asyncAPI.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/asyncAPI/asyncAPI_vs2019.vcxproj b/Samples/0_Introduction/asyncAPI/asyncAPI_vs2019.vcxproj
index be2679b82..56489567a 100644
--- a/Samples/0_Introduction/asyncAPI/asyncAPI_vs2019.vcxproj
+++ b/Samples/0_Introduction/asyncAPI/asyncAPI_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/asyncAPI.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/asyncAPI/asyncAPI_vs2022.vcxproj b/Samples/0_Introduction/asyncAPI/asyncAPI_vs2022.vcxproj
index 9d98bdff2..c4b23b8f2 100644
--- a/Samples/0_Introduction/asyncAPI/asyncAPI_vs2022.vcxproj
+++ b/Samples/0_Introduction/asyncAPI/asyncAPI_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/asyncAPI.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/c++11_cuda/Makefile b/Samples/0_Introduction/c++11_cuda/Makefile
index f70e1ad6d..d4c77f611 100644
--- a/Samples/0_Introduction/c++11_cuda/Makefile
+++ b/Samples/0_Introduction/c++11_cuda/Makefile
@@ -310,9 +310,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
@@ -363,7 +363,6 @@ run: build
 	$(EXEC) ./c++11_cuda
 
 testrun: build
-	$(EXEC) ./c++11_cuda --dummy-test-param
 
 clean:
 	rm -f c++11_cuda c++11_cuda.o
diff --git a/Samples/0_Introduction/c++11_cuda/NsightEclipse.xml b/Samples/0_Introduction/c++11_cuda/NsightEclipse.xml
index e9acaddc9..ccb26ce14 100644
--- a/Samples/0_Introduction/c++11_cuda/NsightEclipse.xml
+++ b/Samples/0_Introduction/c++11_cuda/NsightEclipse.xml
@@ -7,9 +7,9 @@
   </cflags>
   <cuda_api_list>
     <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaMemset</toolkit>
     <toolkit>cudaFree</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample demonstrates C++11 feature support in CUDA. It scans a input text file and prints no. of occurrences of x, y, z, w characters. ]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -31,9 +31,6 @@
   </librarypaths>
   <nsight_eclipse>true</nsight_eclipse>
   <primary_file>c++11_cuda.cu</primary_file>
-  <qatests>
-    <qatest>--dummy-test-param</qatest>
-  </qatests>
   <required_dependencies>
     <dependency>CPP11</dependency>
   </required_dependencies>
@@ -54,6 +51,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/0_Introduction/c++11_cuda/README.md b/Samples/0_Introduction/c++11_cuda/README.md
index 0ff9f23e4..a889fb7c1 100644
--- a/Samples/0_Introduction/c++11_cuda/README.md
+++ b/Samples/0_Introduction/c++11_cuda/README.md
@@ -10,7 +10,7 @@ CPP11 CUDA
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMalloc, cudaMemset, cudaFree, cudaMemcpy
+cudaMalloc, cudaMemcpy, cudaMemset, cudaFree
 
 ## Dependencies needed to build/run
 [CPP11](../../../README.md#cpp11)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/0_Introduction/c++11_cuda/c++11_cuda_vs2017.vcxproj b/Samples/0_Introduction/c++11_cuda/c++11_cuda_vs2017.vcxproj
index 33d8ff13b..705e575c0 100644
--- a/Samples/0_Introduction/c++11_cuda/c++11_cuda_vs2017.vcxproj
+++ b/Samples/0_Introduction/c++11_cuda/c++11_cuda_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/c++11_cuda.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/c++11_cuda/c++11_cuda_vs2019.vcxproj b/Samples/0_Introduction/c++11_cuda/c++11_cuda_vs2019.vcxproj
index 3d1bc27dd..e4e93deee 100644
--- a/Samples/0_Introduction/c++11_cuda/c++11_cuda_vs2019.vcxproj
+++ b/Samples/0_Introduction/c++11_cuda/c++11_cuda_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/c++11_cuda.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/c++11_cuda/c++11_cuda_vs2022.vcxproj b/Samples/0_Introduction/c++11_cuda/c++11_cuda_vs2022.vcxproj
index 9bf5532c4..8133b615c 100644
--- a/Samples/0_Introduction/c++11_cuda/c++11_cuda_vs2022.vcxproj
+++ b/Samples/0_Introduction/c++11_cuda/c++11_cuda_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/c++11_cuda.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/clock/Makefile b/Samples/0_Introduction/clock/Makefile
index dd8327571..df4722cb7 100644
--- a/Samples/0_Introduction/clock/Makefile
+++ b/Samples/0_Introduction/clock/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/clock/NsightEclipse.xml b/Samples/0_Introduction/clock/NsightEclipse.xml
index eee903b95..6d8cfb096 100644
--- a/Samples/0_Introduction/clock/NsightEclipse.xml
+++ b/Samples/0_Introduction/clock/NsightEclipse.xml
@@ -4,8 +4,8 @@
   <name>clock</name>
   <cuda_api_list>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaFree</toolkit>
     <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaFree</toolkit>
   </cuda_api_list>
   <description><![CDATA[This example shows how to use the clock function to measure the performance of block of threads of a kernel accurately.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -47,6 +47,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/0_Introduction/clock/README.md b/Samples/0_Introduction/clock/README.md
index 98ffd744f..11f9afd45 100644
--- a/Samples/0_Introduction/clock/README.md
+++ b/Samples/0_Introduction/clock/README.md
@@ -10,7 +10,7 @@ Performance Strategies
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMalloc, cudaFree, cudaMemcpy
+cudaMalloc, cudaMemcpy, cudaFree
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/0_Introduction/clock/clock_vs2017.vcxproj b/Samples/0_Introduction/clock/clock_vs2017.vcxproj
index ba348ad32..2350f365d 100644
--- a/Samples/0_Introduction/clock/clock_vs2017.vcxproj
+++ b/Samples/0_Introduction/clock/clock_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/clock.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/clock/clock_vs2019.vcxproj b/Samples/0_Introduction/clock/clock_vs2019.vcxproj
index a20c90b72..6649becab 100644
--- a/Samples/0_Introduction/clock/clock_vs2019.vcxproj
+++ b/Samples/0_Introduction/clock/clock_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/clock.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/clock/clock_vs2022.vcxproj b/Samples/0_Introduction/clock/clock_vs2022.vcxproj
index 10e923471..4cf6b8958 100644
--- a/Samples/0_Introduction/clock/clock_vs2022.vcxproj
+++ b/Samples/0_Introduction/clock/clock_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/clock.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/clock_nvrtc/README.md b/Samples/0_Introduction/clock_nvrtc/README.md
index 8f16c6d2d..5e1dbf0f9 100644
--- a/Samples/0_Introduction/clock_nvrtc/README.md
+++ b/Samples/0_Introduction/clock_nvrtc/README.md
@@ -10,7 +10,7 @@ Performance Strategies, Runtime Compilation
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,7 +23,7 @@ x86_64, ppc64le, aarch64
 ## CUDA APIs involved
 
 ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html)
-cuModuleGetFunction, cuMemAlloc, cuLaunchKernel, cuCtxSynchronize, cuMemFree, cuMemcpyDtoH, cuMemcpyHtoD
+cuMemcpyDtoH, cuLaunchKernel, cuMemcpyHtoD, cuCtxSynchronize, cuMemAlloc, cuMemFree, cuModuleGetFunction
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
 cudaBlockSize, cudaGridSize
@@ -33,7 +33,7 @@ cudaBlockSize, cudaGridSize
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/0_Introduction/clock_nvrtc/clock_nvrtc_vs2017.vcxproj b/Samples/0_Introduction/clock_nvrtc/clock_nvrtc_vs2017.vcxproj
index 03b11e36c..ec582a9fe 100644
--- a/Samples/0_Introduction/clock_nvrtc/clock_nvrtc_vs2017.vcxproj
+++ b/Samples/0_Introduction/clock_nvrtc/clock_nvrtc_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/clock_nvrtc/clock_nvrtc_vs2019.vcxproj b/Samples/0_Introduction/clock_nvrtc/clock_nvrtc_vs2019.vcxproj
index 80f3f59d3..e5b93b60d 100644
--- a/Samples/0_Introduction/clock_nvrtc/clock_nvrtc_vs2019.vcxproj
+++ b/Samples/0_Introduction/clock_nvrtc/clock_nvrtc_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/clock_nvrtc/clock_nvrtc_vs2022.vcxproj b/Samples/0_Introduction/clock_nvrtc/clock_nvrtc_vs2022.vcxproj
index 0cf812f4c..825d8e05e 100644
--- a/Samples/0_Introduction/clock_nvrtc/clock_nvrtc_vs2022.vcxproj
+++ b/Samples/0_Introduction/clock_nvrtc/clock_nvrtc_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/concurrentKernels/Makefile b/Samples/0_Introduction/concurrentKernels/Makefile
index 0073ee5e7..e6e4e2418 100644
--- a/Samples/0_Introduction/concurrentKernels/Makefile
+++ b/Samples/0_Introduction/concurrentKernels/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/concurrentKernels/NsightEclipse.xml b/Samples/0_Introduction/concurrentKernels/NsightEclipse.xml
index dd564b83c..edfb7ff58 100644
--- a/Samples/0_Introduction/concurrentKernels/NsightEclipse.xml
+++ b/Samples/0_Introduction/concurrentKernels/NsightEclipse.xml
@@ -3,22 +3,22 @@
 <entry>
   <name>concurrentKernels</name>
   <cuda_api_list>
-    <toolkit>cudaStreamWaitEvent</toolkit>
     <toolkit>cudaStreamDestroy</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaMemcpyAsync</toolkit>
     <toolkit>cudaFree</toolkit>
-    <toolkit>cudaEventRecord</toolkit>
     <toolkit>cudaMallocHost</toolkit>
-    <toolkit>cudaStreamCreate</toolkit>
-    <toolkit>cudaEventCreate</toolkit>
-    <toolkit>cudaEventElapsedTime</toolkit>
+    <toolkit>cudaEventCreateWithFlags</toolkit>
     <toolkit>cudaEventSynchronize</toolkit>
+    <toolkit>cudaEventRecord</toolkit>
     <toolkit>cudaFreeHost</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaEventCreateWithFlags</toolkit>
+    <toolkit>cudaGetDevice</toolkit>
+    <toolkit>cudaStreamWaitEvent</toolkit>
     <toolkit>cudaEventDestroy</toolkit>
-    <toolkit>cudaMemcpyAsync</toolkit>
+    <toolkit>cudaEventElapsedTime</toolkit>
+    <toolkit>cudaStreamCreate</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
-    <toolkit>cudaGetDevice</toolkit>
+    <toolkit>cudaEventCreate</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample demonstrates the use of CUDA streams for concurrent execution of several kernels on GPU device. It also illustrates how to introduce dependencies between CUDA streams with the new cudaStreamWaitEvent function.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -57,6 +57,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/0_Introduction/concurrentKernels/README.md b/Samples/0_Introduction/concurrentKernels/README.md
index 96816ffa2..f83e3bdd4 100644
--- a/Samples/0_Introduction/concurrentKernels/README.md
+++ b/Samples/0_Introduction/concurrentKernels/README.md
@@ -10,7 +10,7 @@ Performance Strategies
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaStreamWaitEvent, cudaStreamDestroy, cudaFree, cudaEventRecord, cudaMallocHost, cudaStreamCreate, cudaEventCreate, cudaEventElapsedTime, cudaEventSynchronize, cudaFreeHost, cudaMalloc, cudaEventCreateWithFlags, cudaEventDestroy, cudaMemcpyAsync, cudaGetDeviceProperties, cudaGetDevice
+cudaStreamDestroy, cudaMalloc, cudaMemcpyAsync, cudaFree, cudaMallocHost, cudaEventCreateWithFlags, cudaEventSynchronize, cudaEventRecord, cudaFreeHost, cudaGetDevice, cudaStreamWaitEvent, cudaEventDestroy, cudaEventElapsedTime, cudaStreamCreate, cudaGetDeviceProperties, cudaEventCreate
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/0_Introduction/concurrentKernels/concurrentKernels_vs2017.vcxproj b/Samples/0_Introduction/concurrentKernels/concurrentKernels_vs2017.vcxproj
index bb6467891..59cad7eb4 100644
--- a/Samples/0_Introduction/concurrentKernels/concurrentKernels_vs2017.vcxproj
+++ b/Samples/0_Introduction/concurrentKernels/concurrentKernels_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/concurrentKernels.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/concurrentKernels/concurrentKernels_vs2019.vcxproj b/Samples/0_Introduction/concurrentKernels/concurrentKernels_vs2019.vcxproj
index 2830f3105..faee059d5 100644
--- a/Samples/0_Introduction/concurrentKernels/concurrentKernels_vs2019.vcxproj
+++ b/Samples/0_Introduction/concurrentKernels/concurrentKernels_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/concurrentKernels.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/concurrentKernels/concurrentKernels_vs2022.vcxproj b/Samples/0_Introduction/concurrentKernels/concurrentKernels_vs2022.vcxproj
index a528a3fb8..abf2d5e05 100644
--- a/Samples/0_Introduction/concurrentKernels/concurrentKernels_vs2022.vcxproj
+++ b/Samples/0_Introduction/concurrentKernels/concurrentKernels_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/concurrentKernels.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/cppIntegration/Makefile b/Samples/0_Introduction/cppIntegration/Makefile
index 19301286d..ebe106e2f 100644
--- a/Samples/0_Introduction/cppIntegration/Makefile
+++ b/Samples/0_Introduction/cppIntegration/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/cppIntegration/NsightEclipse.xml b/Samples/0_Introduction/cppIntegration/NsightEclipse.xml
index 9f70719b8..9b5f9b41d 100644
--- a/Samples/0_Introduction/cppIntegration/NsightEclipse.xml
+++ b/Samples/0_Introduction/cppIntegration/NsightEclipse.xml
@@ -4,8 +4,8 @@
   <name>cppIntegration</name>
   <cuda_api_list>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaFree</toolkit>
     <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaFree</toolkit>
   </cuda_api_list>
   <description><![CDATA[This example demonstrates how to integrate CUDA into an existing C++ application, i.e. the CUDA entry point on host side is only a function which is called from C++ code and only the file containing this function is compiled with nvcc. It also demonstrates that vector types can be used from cpp.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -41,6 +41,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/0_Introduction/cppIntegration/README.md b/Samples/0_Introduction/cppIntegration/README.md
index 2ba64fd7c..4ac48bcf1 100644
--- a/Samples/0_Introduction/cppIntegration/README.md
+++ b/Samples/0_Introduction/cppIntegration/README.md
@@ -10,7 +10,7 @@ CPP-CUDA Integration
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMalloc, cudaFree, cudaMemcpy
+cudaMalloc, cudaMemcpy, cudaFree
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/0_Introduction/cppIntegration/cppIntegration_vs2017.vcxproj b/Samples/0_Introduction/cppIntegration/cppIntegration_vs2017.vcxproj
index e68462114..4070ae91f 100644
--- a/Samples/0_Introduction/cppIntegration/cppIntegration_vs2017.vcxproj
+++ b/Samples/0_Introduction/cppIntegration/cppIntegration_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/cppIntegration.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -109,6 +109,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/cppIntegration/cppIntegration_vs2019.vcxproj b/Samples/0_Introduction/cppIntegration/cppIntegration_vs2019.vcxproj
index a57aa19b0..67d587aab 100644
--- a/Samples/0_Introduction/cppIntegration/cppIntegration_vs2019.vcxproj
+++ b/Samples/0_Introduction/cppIntegration/cppIntegration_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/cppIntegration.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -105,6 +105,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/cppIntegration/cppIntegration_vs2022.vcxproj b/Samples/0_Introduction/cppIntegration/cppIntegration_vs2022.vcxproj
index 26e828033..8ed0d991e 100644
--- a/Samples/0_Introduction/cppIntegration/cppIntegration_vs2022.vcxproj
+++ b/Samples/0_Introduction/cppIntegration/cppIntegration_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/cppIntegration.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -105,6 +105,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/cppOverload/Makefile b/Samples/0_Introduction/cppOverload/Makefile
index cfd5ec9c5..a76aca058 100644
--- a/Samples/0_Introduction/cppOverload/Makefile
+++ b/Samples/0_Introduction/cppOverload/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/cppOverload/NsightEclipse.xml b/Samples/0_Introduction/cppOverload/NsightEclipse.xml
index 8c5b84a7f..9ad898beb 100644
--- a/Samples/0_Introduction/cppOverload/NsightEclipse.xml
+++ b/Samples/0_Introduction/cppOverload/NsightEclipse.xml
@@ -3,17 +3,17 @@
 <entry>
   <name>cppOverload</name>
   <cuda_api_list>
-    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaFuncSetCacheConfig</toolkit>
+    <toolkit>cudaFree</toolkit>
     <toolkit>cudaMallocHost</toolkit>
-    <toolkit>cudaFuncGetAttributes</toolkit>
-    <toolkit>cudaGetDeviceCount</toolkit>
+    <toolkit>cudaSetDevice</toolkit>
+    <toolkit>cudaGetDeviceProperties</toolkit>
     <toolkit>cudaDeviceSynchronize</toolkit>
     <toolkit>cudaFreeHost</toolkit>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaSetDevice</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
-    <toolkit>cudaGetDeviceProperties</toolkit>
+    <toolkit>cudaFuncGetAttributes</toolkit>
+    <toolkit>cudaGetDeviceCount</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample demonstrates how to use C++ function overloading on the GPU.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -52,6 +52,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/0_Introduction/cppOverload/README.md b/Samples/0_Introduction/cppOverload/README.md
index 16b6adc15..bc583bfd4 100644
--- a/Samples/0_Introduction/cppOverload/README.md
+++ b/Samples/0_Introduction/cppOverload/README.md
@@ -10,7 +10,7 @@ C++ Function Overloading, CUDA Streams and Events
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaFuncSetCacheConfig, cudaMallocHost, cudaFuncGetAttributes, cudaGetDeviceCount, cudaDeviceSynchronize, cudaFreeHost, cudaMalloc, cudaSetDevice, cudaMemcpy, cudaGetDeviceProperties
+cudaMemcpy, cudaFuncSetCacheConfig, cudaFree, cudaMallocHost, cudaSetDevice, cudaGetDeviceProperties, cudaDeviceSynchronize, cudaFreeHost, cudaMalloc, cudaFuncGetAttributes, cudaGetDeviceCount
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/0_Introduction/cppOverload/cppOverload_vs2017.vcxproj b/Samples/0_Introduction/cppOverload/cppOverload_vs2017.vcxproj
index 0a082724e..4adb6ea70 100644
--- a/Samples/0_Introduction/cppOverload/cppOverload_vs2017.vcxproj
+++ b/Samples/0_Introduction/cppOverload/cppOverload_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/cppOverload.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/cppOverload/cppOverload_vs2019.vcxproj b/Samples/0_Introduction/cppOverload/cppOverload_vs2019.vcxproj
index ba8bee01b..040f08cf4 100644
--- a/Samples/0_Introduction/cppOverload/cppOverload_vs2019.vcxproj
+++ b/Samples/0_Introduction/cppOverload/cppOverload_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/cppOverload.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/cppOverload/cppOverload_vs2022.vcxproj b/Samples/0_Introduction/cppOverload/cppOverload_vs2022.vcxproj
index 4e849ea5d..a9592ff89 100644
--- a/Samples/0_Introduction/cppOverload/cppOverload_vs2022.vcxproj
+++ b/Samples/0_Introduction/cppOverload/cppOverload_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/cppOverload.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/cudaOpenMP/Makefile b/Samples/0_Introduction/cudaOpenMP/Makefile
index 476ddbc9d..277357e24 100644
--- a/Samples/0_Introduction/cudaOpenMP/Makefile
+++ b/Samples/0_Introduction/cudaOpenMP/Makefile
@@ -321,9 +321,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/cudaOpenMP/README.md b/Samples/0_Introduction/cudaOpenMP/README.md
index 9f446d04e..5a57d9185 100644
--- a/Samples/0_Introduction/cudaOpenMP/README.md
+++ b/Samples/0_Introduction/cudaOpenMP/README.md
@@ -10,7 +10,7 @@ CUDA Systems Integration, OpenMP, Multithreading
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMemset, cudaFree, cudaGetDeviceCount, cudaSetDevice, cudaMalloc, cudaGetLastError, cudaMemcpy, cudaGetErrorString, cudaGetDeviceProperties, cudaGetDevice
+cudaMemcpy, cudaGetErrorString, cudaFree, cudaGetLastError, cudaSetDevice, cudaGetDeviceCount, cudaGetDevice, cudaMemset, cudaMalloc, cudaGetDeviceProperties
 
 ## Dependencies needed to build/run
 [OpenMP](../../../README.md#openmp)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/0_Introduction/cudaOpenMP/cudaOpenMP_vs2017.vcxproj b/Samples/0_Introduction/cudaOpenMP/cudaOpenMP_vs2017.vcxproj
index 57636e0cf..b6a822e04 100644
--- a/Samples/0_Introduction/cudaOpenMP/cudaOpenMP_vs2017.vcxproj
+++ b/Samples/0_Introduction/cudaOpenMP/cudaOpenMP_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/cudaOpenMP.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/cudaOpenMP/cudaOpenMP_vs2019.vcxproj b/Samples/0_Introduction/cudaOpenMP/cudaOpenMP_vs2019.vcxproj
index 5e7d2b503..991ca21ee 100644
--- a/Samples/0_Introduction/cudaOpenMP/cudaOpenMP_vs2019.vcxproj
+++ b/Samples/0_Introduction/cudaOpenMP/cudaOpenMP_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/cudaOpenMP.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/cudaOpenMP/cudaOpenMP_vs2022.vcxproj b/Samples/0_Introduction/cudaOpenMP/cudaOpenMP_vs2022.vcxproj
index 8628c83bc..adf14793d 100644
--- a/Samples/0_Introduction/cudaOpenMP/cudaOpenMP_vs2022.vcxproj
+++ b/Samples/0_Introduction/cudaOpenMP/cudaOpenMP_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/cudaOpenMP.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/fp16ScalarProduct/Makefile b/Samples/0_Introduction/fp16ScalarProduct/Makefile
index c80fe84ae..5dda1a895 100644
--- a/Samples/0_Introduction/fp16ScalarProduct/Makefile
+++ b/Samples/0_Introduction/fp16ScalarProduct/Makefile
@@ -285,9 +285,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 60 61 70 75 80 86
+SMS ?= 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/fp16ScalarProduct/NsightEclipse.xml b/Samples/0_Introduction/fp16ScalarProduct/NsightEclipse.xml
index fed67440f..045bce439 100644
--- a/Samples/0_Introduction/fp16ScalarProduct/NsightEclipse.xml
+++ b/Samples/0_Introduction/fp16ScalarProduct/NsightEclipse.xml
@@ -3,11 +3,11 @@
 <entry>
   <name>fp16ScalarProduct</name>
   <cuda_api_list>
+    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaFree</toolkit>
     <toolkit>cudaMallocHost</toolkit>
     <toolkit>cudaFreeHost</toolkit>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
   </cuda_api_list>
   <description><![CDATA[Calculates scalar product of two vectors of FP16 numbers.]]></description>
@@ -44,6 +44,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>arm</arch>
diff --git a/Samples/0_Introduction/fp16ScalarProduct/README.md b/Samples/0_Introduction/fp16ScalarProduct/README.md
index 3875a40a2..4aa2b89c3 100644
--- a/Samples/0_Introduction/fp16ScalarProduct/README.md
+++ b/Samples/0_Introduction/fp16ScalarProduct/README.md
@@ -10,7 +10,7 @@ CUDA Runtime API
 
 ## Supported SM Architectures
 
-[SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaMallocHost, cudaFreeHost, cudaMalloc, cudaMemcpy, cudaGetDeviceProperties
+cudaMemcpy, cudaFree, cudaMallocHost, cudaFreeHost, cudaMalloc, cudaGetDeviceProperties
 
 ## Dependencies needed to build/run
 [FP16](../../../README.md#fp16)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/0_Introduction/fp16ScalarProduct/fp16ScalarProduct_vs2017.vcxproj b/Samples/0_Introduction/fp16ScalarProduct/fp16ScalarProduct_vs2017.vcxproj
index f1199a8ee..c4dbdc75c 100644
--- a/Samples/0_Introduction/fp16ScalarProduct/fp16ScalarProduct_vs2017.vcxproj
+++ b/Samples/0_Introduction/fp16ScalarProduct/fp16ScalarProduct_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/fp16ScalarProduct.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/fp16ScalarProduct/fp16ScalarProduct_vs2019.vcxproj b/Samples/0_Introduction/fp16ScalarProduct/fp16ScalarProduct_vs2019.vcxproj
index bcd5c50cb..0b9a749fc 100644
--- a/Samples/0_Introduction/fp16ScalarProduct/fp16ScalarProduct_vs2019.vcxproj
+++ b/Samples/0_Introduction/fp16ScalarProduct/fp16ScalarProduct_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/fp16ScalarProduct.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/fp16ScalarProduct/fp16ScalarProduct_vs2022.vcxproj b/Samples/0_Introduction/fp16ScalarProduct/fp16ScalarProduct_vs2022.vcxproj
index c316ee629..ee4258a88 100644
--- a/Samples/0_Introduction/fp16ScalarProduct/fp16ScalarProduct_vs2022.vcxproj
+++ b/Samples/0_Introduction/fp16ScalarProduct/fp16ScalarProduct_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/fp16ScalarProduct.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/matrixMul/Makefile b/Samples/0_Introduction/matrixMul/Makefile
index a8b38ccdd..a4d336b55 100644
--- a/Samples/0_Introduction/matrixMul/Makefile
+++ b/Samples/0_Introduction/matrixMul/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/matrixMul/NsightEclipse.xml b/Samples/0_Introduction/matrixMul/NsightEclipse.xml
index e90b6c7e8..3f5179670 100644
--- a/Samples/0_Introduction/matrixMul/NsightEclipse.xml
+++ b/Samples/0_Introduction/matrixMul/NsightEclipse.xml
@@ -3,20 +3,20 @@
 <entry>
   <name>matrixMul</name>
   <cuda_api_list>
+    <toolkit>cudaStreamCreateWithFlags</toolkit>
+    <toolkit>cudaProfilerStop</toolkit>
+    <toolkit>cudaMalloc</toolkit>
     <toolkit>cudaFree</toolkit>
-    <toolkit>cudaEventRecord</toolkit>
     <toolkit>cudaMallocHost</toolkit>
     <toolkit>cudaProfilerStart</toolkit>
-    <toolkit>cudaEventCreate</toolkit>
-    <toolkit>cudaEventElapsedTime</toolkit>
     <toolkit>cudaEventSynchronize</toolkit>
+    <toolkit>cudaEventRecord</toolkit>
     <toolkit>cudaFreeHost</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaProfilerStop</toolkit>
-    <toolkit>cudaStreamCreateWithFlags</toolkit>
-    <toolkit>cudaEventDestroy</toolkit>
     <toolkit>cudaStreamSynchronize</toolkit>
+    <toolkit>cudaEventDestroy</toolkit>
+    <toolkit>cudaEventElapsedTime</toolkit>
     <toolkit>cudaMemcpyAsync</toolkit>
+    <toolkit>cudaEventCreate</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample implements matrix multiplication and is exactly the same as Chapter 6 of the programming guide. It has been written for clarity of exposition to illustrate various CUDA programming principles, not with the goal of providing the most performant generic kernel for matrix multiplication.  To illustrate GPU performance for matrix multiply, this sample also shows how to use the new CUDA 4.0 interface for CUBLAS to demonstrate high-performance performance for matrix multiplication.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -56,6 +56,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/0_Introduction/matrixMul/README.md b/Samples/0_Introduction/matrixMul/README.md
index c558141a3..b0e121b23 100644
--- a/Samples/0_Introduction/matrixMul/README.md
+++ b/Samples/0_Introduction/matrixMul/README.md
@@ -10,7 +10,7 @@ CUDA Runtime API, Linear Algebra
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l, aarch64
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaEventRecord, cudaMallocHost, cudaProfilerStart, cudaEventCreate, cudaEventElapsedTime, cudaEventSynchronize, cudaFreeHost, cudaMalloc, cudaProfilerStop, cudaStreamCreateWithFlags, cudaEventDestroy, cudaStreamSynchronize, cudaMemcpyAsync
+cudaStreamCreateWithFlags, cudaProfilerStop, cudaMalloc, cudaFree, cudaMallocHost, cudaProfilerStart, cudaEventSynchronize, cudaEventRecord, cudaFreeHost, cudaStreamSynchronize, cudaEventDestroy, cudaEventElapsedTime, cudaMemcpyAsync, cudaEventCreate
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/0_Introduction/matrixMul/matrixMul_vs2017.vcxproj b/Samples/0_Introduction/matrixMul/matrixMul_vs2017.vcxproj
index 5bc23eb0f..95f6a03af 100644
--- a/Samples/0_Introduction/matrixMul/matrixMul_vs2017.vcxproj
+++ b/Samples/0_Introduction/matrixMul/matrixMul_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/matrixMul.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/matrixMul/matrixMul_vs2019.vcxproj b/Samples/0_Introduction/matrixMul/matrixMul_vs2019.vcxproj
index 7373d3856..375f668ae 100644
--- a/Samples/0_Introduction/matrixMul/matrixMul_vs2019.vcxproj
+++ b/Samples/0_Introduction/matrixMul/matrixMul_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/matrixMul.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/matrixMul/matrixMul_vs2022.vcxproj b/Samples/0_Introduction/matrixMul/matrixMul_vs2022.vcxproj
index 36e7c4e9a..e406cc03b 100644
--- a/Samples/0_Introduction/matrixMul/matrixMul_vs2022.vcxproj
+++ b/Samples/0_Introduction/matrixMul/matrixMul_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/matrixMul.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/matrixMulDrv/Makefile b/Samples/0_Introduction/matrixMulDrv/Makefile
index 794345b6f..83476982a 100644
--- a/Samples/0_Introduction/matrixMulDrv/Makefile
+++ b/Samples/0_Introduction/matrixMulDrv/Makefile
@@ -283,9 +283,9 @@ FATBIN_FILE := matrixMul_kernel${TARGET_SIZE}.fatbin
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/matrixMulDrv/README.md b/Samples/0_Introduction/matrixMulDrv/README.md
index 804e7d814..682fb940c 100644
--- a/Samples/0_Introduction/matrixMulDrv/README.md
+++ b/Samples/0_Introduction/matrixMulDrv/README.md
@@ -10,7 +10,7 @@ CUDA Driver API, Matrix Multiply
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l, aarch64
 ## CUDA APIs involved
 
 ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html)
-cuModuleGetFunction, cuMemcpyHtoD, cuModuleLoadData, cuCtxCreate, cuLaunchKernel, cuDeviceGetName, cuMemAlloc, cuOccupancyMaxPotentialBlockSize, cuDeviceTotalMem, cuMemFree, cuMemcpyDtoH, cuCtxDestroy, cuDeviceGetAttribute
+cuMemcpyDtoH, cuLaunchKernel, cuMemcpyHtoD, cuDeviceGetName, cuDeviceTotalMem, cuDeviceGetAttribute, cuModuleLoadData, cuOccupancyMaxPotentialBlockSize, cuMemAlloc, cuMemFree, cuCtxDestroy, cuModuleGetFunction, cuCtxCreate
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/0_Introduction/matrixMulDrv/matrixMulDrv_vs2017.vcxproj b/Samples/0_Introduction/matrixMulDrv/matrixMulDrv_vs2017.vcxproj
index ff9114527..739987619 100644
--- a/Samples/0_Introduction/matrixMulDrv/matrixMulDrv_vs2017.vcxproj
+++ b/Samples/0_Introduction/matrixMulDrv/matrixMulDrv_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/matrixMulDrv.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -111,6 +111,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/matrixMulDrv/matrixMulDrv_vs2019.vcxproj b/Samples/0_Introduction/matrixMulDrv/matrixMulDrv_vs2019.vcxproj
index 1f6b88ada..0805c97cf 100644
--- a/Samples/0_Introduction/matrixMulDrv/matrixMulDrv_vs2019.vcxproj
+++ b/Samples/0_Introduction/matrixMulDrv/matrixMulDrv_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/matrixMulDrv.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/matrixMulDrv/matrixMulDrv_vs2022.vcxproj b/Samples/0_Introduction/matrixMulDrv/matrixMulDrv_vs2022.vcxproj
index e92ce9cef..a82bb6991 100644
--- a/Samples/0_Introduction/matrixMulDrv/matrixMulDrv_vs2022.vcxproj
+++ b/Samples/0_Introduction/matrixMulDrv/matrixMulDrv_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/matrixMulDrv.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/matrixMulDynlinkJIT/README.md b/Samples/0_Introduction/matrixMulDynlinkJIT/README.md
index 183509644..657811d3b 100644
--- a/Samples/0_Introduction/matrixMulDynlinkJIT/README.md
+++ b/Samples/0_Introduction/matrixMulDynlinkJIT/README.md
@@ -10,7 +10,7 @@ CUDA Driver API, CUDA Dynamically Linked Library
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html)
-cuParamSetv, cuMemFree, cuInit, cuModuleGetFunction, cuCtxDestroy, cuCtxCreate, cuDeviceGetName, cuCtxSynchronize, cuParamSeti, cuModuleLoadDataEx, cuDeviceGet, cuFuncSetSharedSize, cuMemAlloc, cuDeviceComputeCapability, cuFuncSetBlockShape, cuMemcpyHtoD, cuParamSetSize, cuLaunchGrid, cuDeviceGetCount, cuLaunchKernel, cuMemcpyDtoH
+cuMemcpyDtoH, cuDeviceGetName, cuParamSeti, cuModuleLoadDataEx, cuModuleGetFunction, cuLaunchGrid, cuFuncSetSharedSize, cuMemFree, cuParamSetSize, cuParamSetv, cuInit, cuMemcpyHtoD, cuLaunchKernel, cuDeviceGet, cuFuncSetBlockShape, cuCtxDestroy, cuDeviceGetCount, cuDeviceComputeCapability, cuCtxSynchronize, cuMemAlloc, cuCtxCreate
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/0_Introduction/matrixMulDynlinkJIT/helper_cuda_drvapi.h b/Samples/0_Introduction/matrixMulDynlinkJIT/helper_cuda_drvapi.h
index 5f69d3329..4ca66fde5 100644
--- a/Samples/0_Introduction/matrixMulDynlinkJIT/helper_cuda_drvapi.h
+++ b/Samples/0_Introduction/matrixMulDynlinkJIT/helper_cuda_drvapi.h
@@ -95,6 +95,7 @@ inline int _ConvertSMVer2CoresDRV(int major, int minor) {
       {0x80,  64},
       {0x86, 128},
       {0x87, 128},
+      {0x90, 128},
       {-1, -1}};
 
   int index = 0;
diff --git a/Samples/0_Introduction/matrixMulDynlinkJIT/matrixMulDynlinkJIT_vs2017.vcxproj b/Samples/0_Introduction/matrixMulDynlinkJIT/matrixMulDynlinkJIT_vs2017.vcxproj
index da13462f7..8b146bded 100644
--- a/Samples/0_Introduction/matrixMulDynlinkJIT/matrixMulDynlinkJIT_vs2017.vcxproj
+++ b/Samples/0_Introduction/matrixMulDynlinkJIT/matrixMulDynlinkJIT_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -116,6 +116,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/matrixMulDynlinkJIT/matrixMulDynlinkJIT_vs2019.vcxproj b/Samples/0_Introduction/matrixMulDynlinkJIT/matrixMulDynlinkJIT_vs2019.vcxproj
index 460bc3dea..3fc6842cf 100644
--- a/Samples/0_Introduction/matrixMulDynlinkJIT/matrixMulDynlinkJIT_vs2019.vcxproj
+++ b/Samples/0_Introduction/matrixMulDynlinkJIT/matrixMulDynlinkJIT_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -112,6 +112,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/matrixMulDynlinkJIT/matrixMulDynlinkJIT_vs2022.vcxproj b/Samples/0_Introduction/matrixMulDynlinkJIT/matrixMulDynlinkJIT_vs2022.vcxproj
index d5ac5358a..732e0b222 100644
--- a/Samples/0_Introduction/matrixMulDynlinkJIT/matrixMulDynlinkJIT_vs2022.vcxproj
+++ b/Samples/0_Introduction/matrixMulDynlinkJIT/matrixMulDynlinkJIT_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -112,6 +112,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/matrixMul_nvrtc/README.md b/Samples/0_Introduction/matrixMul_nvrtc/README.md
index 2cefe20e5..224c3ee04 100644
--- a/Samples/0_Introduction/matrixMul_nvrtc/README.md
+++ b/Samples/0_Introduction/matrixMul_nvrtc/README.md
@@ -10,7 +10,7 @@ CUDA Runtime API, Linear Algebra, Runtime Compilation
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, aarch64
 ## CUDA APIs involved
 
 ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html)
-cuModuleGetFunction, cuMemAlloc, cuLaunchKernel, cuCtxSynchronize, cuMemFree, cuMemcpyDtoH, cuMemcpyHtoD
+cuMemcpyDtoH, cuLaunchKernel, cuMemcpyHtoD, cuCtxSynchronize, cuMemAlloc, cuMemFree, cuModuleGetFunction
 
 ## Dependencies needed to build/run
 [NVRTC](../../../README.md#nvrtc)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/0_Introduction/matrixMul_nvrtc/matrixMul_nvrtc_vs2017.vcxproj b/Samples/0_Introduction/matrixMul_nvrtc/matrixMul_nvrtc_vs2017.vcxproj
index 1b4a7eb09..7833bb475 100644
--- a/Samples/0_Introduction/matrixMul_nvrtc/matrixMul_nvrtc_vs2017.vcxproj
+++ b/Samples/0_Introduction/matrixMul_nvrtc/matrixMul_nvrtc_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -113,6 +113,6 @@ xcopy /y /e /s "$(CudaToolkitDir)include\cooperative_groups" .\cooperative_group
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/matrixMul_nvrtc/matrixMul_nvrtc_vs2019.vcxproj b/Samples/0_Introduction/matrixMul_nvrtc/matrixMul_nvrtc_vs2019.vcxproj
index cf0c66c8e..d0b583663 100644
--- a/Samples/0_Introduction/matrixMul_nvrtc/matrixMul_nvrtc_vs2019.vcxproj
+++ b/Samples/0_Introduction/matrixMul_nvrtc/matrixMul_nvrtc_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -109,6 +109,6 @@ xcopy /y /e /s "$(CudaToolkitDir)include\cooperative_groups" .\cooperative_group
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/matrixMul_nvrtc/matrixMul_nvrtc_vs2022.vcxproj b/Samples/0_Introduction/matrixMul_nvrtc/matrixMul_nvrtc_vs2022.vcxproj
index f6dc2b6f8..6fa7922e7 100644
--- a/Samples/0_Introduction/matrixMul_nvrtc/matrixMul_nvrtc_vs2022.vcxproj
+++ b/Samples/0_Introduction/matrixMul_nvrtc/matrixMul_nvrtc_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -109,6 +109,6 @@ xcopy /y /e /s "$(CudaToolkitDir)include\cooperative_groups" .\cooperative_group
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/mergeSort/Makefile b/Samples/0_Introduction/mergeSort/Makefile
index ad45af877..815268b1d 100644
--- a/Samples/0_Introduction/mergeSort/Makefile
+++ b/Samples/0_Introduction/mergeSort/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/mergeSort/NsightEclipse.xml b/Samples/0_Introduction/mergeSort/NsightEclipse.xml
index 0a77b65e2..55cab9066 100644
--- a/Samples/0_Introduction/mergeSort/NsightEclipse.xml
+++ b/Samples/0_Introduction/mergeSort/NsightEclipse.xml
@@ -4,9 +4,9 @@
   <name>mergeSort</name>
   <cuda_api_list>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaFree</toolkit>
     <toolkit>cudaDeviceSynchronize</toolkit>
     <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaFree</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample implements a merge sort (also known as Batcher's sort), algorithms belonging to the class of sorting networks. While generally subefficient on large sequences compared to algorithms with better asymptotic algorithmic complexity (i.e. merge sort or radix sort), may be the algorithms of choice for sorting batches of short- to mid-sized (key, value) array pairs. Refer to the excellent tutorial by H. W. Lang http://www.iti.fh-flensburg.de/lang/algorithmen/sortieren/networks/indexen.htm]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -46,6 +46,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/0_Introduction/mergeSort/README.md b/Samples/0_Introduction/mergeSort/README.md
index d7ab7be23..d08538962 100644
--- a/Samples/0_Introduction/mergeSort/README.md
+++ b/Samples/0_Introduction/mergeSort/README.md
@@ -10,7 +10,7 @@ Data-Parallel Algorithms
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMalloc, cudaFree, cudaDeviceSynchronize, cudaMemcpy
+cudaMalloc, cudaDeviceSynchronize, cudaMemcpy, cudaFree
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/0_Introduction/mergeSort/mergeSort_vs2017.vcxproj b/Samples/0_Introduction/mergeSort/mergeSort_vs2017.vcxproj
index 9f5e39b3e..0ef070139 100644
--- a/Samples/0_Introduction/mergeSort/mergeSort_vs2017.vcxproj
+++ b/Samples/0_Introduction/mergeSort/mergeSort_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/mergeSort.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -111,6 +111,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/mergeSort/mergeSort_vs2019.vcxproj b/Samples/0_Introduction/mergeSort/mergeSort_vs2019.vcxproj
index 8639bd2e4..5796dda5f 100644
--- a/Samples/0_Introduction/mergeSort/mergeSort_vs2019.vcxproj
+++ b/Samples/0_Introduction/mergeSort/mergeSort_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/mergeSort.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/mergeSort/mergeSort_vs2022.vcxproj b/Samples/0_Introduction/mergeSort/mergeSort_vs2022.vcxproj
index c38e79d66..ed951e9f8 100644
--- a/Samples/0_Introduction/mergeSort/mergeSort_vs2022.vcxproj
+++ b/Samples/0_Introduction/mergeSort/mergeSort_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/mergeSort.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleAWBarrier/Makefile b/Samples/0_Introduction/simpleAWBarrier/Makefile
index cd8dc51d9..0fa1e665b 100644
--- a/Samples/0_Introduction/simpleAWBarrier/Makefile
+++ b/Samples/0_Introduction/simpleAWBarrier/Makefile
@@ -316,9 +316,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 70 72 75 80 86 87
+SMS ?= 70 72 75 80 86 87 90
 else
-SMS ?= 70 75 80 86
+SMS ?= 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/simpleAWBarrier/NsightEclipse.xml b/Samples/0_Introduction/simpleAWBarrier/NsightEclipse.xml
index e8738f6a4..87414f89f 100644
--- a/Samples/0_Introduction/simpleAWBarrier/NsightEclipse.xml
+++ b/Samples/0_Introduction/simpleAWBarrier/NsightEclipse.xml
@@ -6,17 +6,17 @@
     <flag>--std=c++11</flag>
   </cflags>
   <cuda_api_list>
+    <toolkit>cudaStreamCreateWithFlags</toolkit>
     <toolkit>cudaFree</toolkit>
-    <toolkit>cudaMallocHost</toolkit>
-    <toolkit>cudaOccupancyMaxActiveBlocksPerMultiprocessor</toolkit>
-    <toolkit>cudaOccupancyMaxPotentialBlockSize</toolkit>
     <toolkit>cudaDeviceGetAttribute</toolkit>
+    <toolkit>cudaMallocHost</toolkit>
     <toolkit>cudaFreeHost</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaStreamCreateWithFlags</toolkit>
-    <toolkit>cudaLaunchCooperativeKernel</toolkit>
     <toolkit>cudaStreamSynchronize</toolkit>
+    <toolkit>cudaLaunchCooperativeKernel</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaOccupancyMaxActiveBlocksPerMultiprocessor</toolkit>
     <toolkit>cudaMemcpyAsync</toolkit>
+    <toolkit>cudaOccupancyMaxPotentialBlockSize</toolkit>
   </cuda_api_list>
   <description><![CDATA[A simple demonstration of arrive wait barriers.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -53,6 +53,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/0_Introduction/simpleAWBarrier/README.md b/Samples/0_Introduction/simpleAWBarrier/README.md
index d81ac1f8b..064db83a5 100644
--- a/Samples/0_Introduction/simpleAWBarrier/README.md
+++ b/Samples/0_Introduction/simpleAWBarrier/README.md
@@ -10,7 +10,7 @@ Arrive Wait Barrier
 
 ## Supported SM Architectures
 
-[SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l, aarch64
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaMallocHost, cudaOccupancyMaxActiveBlocksPerMultiprocessor, cudaOccupancyMaxPotentialBlockSize, cudaDeviceGetAttribute, cudaFreeHost, cudaMalloc, cudaStreamCreateWithFlags, cudaLaunchCooperativeKernel, cudaStreamSynchronize, cudaMemcpyAsync
+cudaStreamCreateWithFlags, cudaFree, cudaDeviceGetAttribute, cudaMallocHost, cudaFreeHost, cudaStreamSynchronize, cudaLaunchCooperativeKernel, cudaMalloc, cudaOccupancyMaxActiveBlocksPerMultiprocessor, cudaMemcpyAsync, cudaOccupancyMaxPotentialBlockSize
 
 ## Dependencies needed to build/run
 [CPP11](../../../README.md#cpp11), [MBCG](../../../README.md#mbcg)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/0_Introduction/simpleAWBarrier/simpleAWBarrier_vs2017.vcxproj b/Samples/0_Introduction/simpleAWBarrier/simpleAWBarrier_vs2017.vcxproj
index ea64526a3..ed1365409 100644
--- a/Samples/0_Introduction/simpleAWBarrier/simpleAWBarrier_vs2017.vcxproj
+++ b/Samples/0_Introduction/simpleAWBarrier/simpleAWBarrier_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/simpleAWBarrier.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleAWBarrier/simpleAWBarrier_vs2019.vcxproj b/Samples/0_Introduction/simpleAWBarrier/simpleAWBarrier_vs2019.vcxproj
index aaf046ed0..eeddba29c 100644
--- a/Samples/0_Introduction/simpleAWBarrier/simpleAWBarrier_vs2019.vcxproj
+++ b/Samples/0_Introduction/simpleAWBarrier/simpleAWBarrier_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleAWBarrier.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleAWBarrier/simpleAWBarrier_vs2022.vcxproj b/Samples/0_Introduction/simpleAWBarrier/simpleAWBarrier_vs2022.vcxproj
index 28637338e..85eb24bf8 100644
--- a/Samples/0_Introduction/simpleAWBarrier/simpleAWBarrier_vs2022.vcxproj
+++ b/Samples/0_Introduction/simpleAWBarrier/simpleAWBarrier_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleAWBarrier.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleAssert/Makefile b/Samples/0_Introduction/simpleAssert/Makefile
index fb73574b4..bd790aa68 100644
--- a/Samples/0_Introduction/simpleAssert/Makefile
+++ b/Samples/0_Introduction/simpleAssert/Makefile
@@ -285,9 +285,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/simpleAssert/NsightEclipse.xml b/Samples/0_Introduction/simpleAssert/NsightEclipse.xml
index 7f9e81f5d..2ba03ec63 100644
--- a/Samples/0_Introduction/simpleAssert/NsightEclipse.xml
+++ b/Samples/0_Introduction/simpleAssert/NsightEclipse.xml
@@ -41,6 +41,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/0_Introduction/simpleAssert/README.md b/Samples/0_Introduction/simpleAssert/README.md
index e5fbc1b3d..05b753a18 100644
--- a/Samples/0_Introduction/simpleAssert/README.md
+++ b/Samples/0_Introduction/simpleAssert/README.md
@@ -10,7 +10,7 @@ Assert
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -27,7 +27,7 @@ cudaDeviceSynchronize, cudaGetErrorString
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/0_Introduction/simpleAssert/simpleAssert_vs2017.vcxproj b/Samples/0_Introduction/simpleAssert/simpleAssert_vs2017.vcxproj
index a033d7b00..731833f93 100644
--- a/Samples/0_Introduction/simpleAssert/simpleAssert_vs2017.vcxproj
+++ b/Samples/0_Introduction/simpleAssert/simpleAssert_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/simpleAssert.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleAssert/simpleAssert_vs2019.vcxproj b/Samples/0_Introduction/simpleAssert/simpleAssert_vs2019.vcxproj
index a32337825..88e2fa096 100644
--- a/Samples/0_Introduction/simpleAssert/simpleAssert_vs2019.vcxproj
+++ b/Samples/0_Introduction/simpleAssert/simpleAssert_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleAssert.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleAssert/simpleAssert_vs2022.vcxproj b/Samples/0_Introduction/simpleAssert/simpleAssert_vs2022.vcxproj
index de3d8f032..ce9351208 100644
--- a/Samples/0_Introduction/simpleAssert/simpleAssert_vs2022.vcxproj
+++ b/Samples/0_Introduction/simpleAssert/simpleAssert_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleAssert.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleAssert_nvrtc/README.md b/Samples/0_Introduction/simpleAssert_nvrtc/README.md
index d0ecd7e03..72c5de117 100644
--- a/Samples/0_Introduction/simpleAssert_nvrtc/README.md
+++ b/Samples/0_Introduction/simpleAssert_nvrtc/README.md
@@ -10,7 +10,7 @@ Assert, Runtime Compilation
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, aarch64
 ## CUDA APIs involved
 
 ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html)
-cuModuleGetFunction, cuCtxSynchronize, cuLaunchKernel
+cuModuleGetFunction, cuLaunchKernel, cuCtxSynchronize
 
 ## Dependencies needed to build/run
 [NVRTC](../../../README.md#nvrtc)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/0_Introduction/simpleAssert_nvrtc/simpleAssert_nvrtc_vs2017.vcxproj b/Samples/0_Introduction/simpleAssert_nvrtc/simpleAssert_nvrtc_vs2017.vcxproj
index 11b8003f2..3fc089e59 100644
--- a/Samples/0_Introduction/simpleAssert_nvrtc/simpleAssert_nvrtc_vs2017.vcxproj
+++ b/Samples/0_Introduction/simpleAssert_nvrtc/simpleAssert_nvrtc_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleAssert_nvrtc/simpleAssert_nvrtc_vs2019.vcxproj b/Samples/0_Introduction/simpleAssert_nvrtc/simpleAssert_nvrtc_vs2019.vcxproj
index 548b3baf6..0714d837b 100644
--- a/Samples/0_Introduction/simpleAssert_nvrtc/simpleAssert_nvrtc_vs2019.vcxproj
+++ b/Samples/0_Introduction/simpleAssert_nvrtc/simpleAssert_nvrtc_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleAssert_nvrtc/simpleAssert_nvrtc_vs2022.vcxproj b/Samples/0_Introduction/simpleAssert_nvrtc/simpleAssert_nvrtc_vs2022.vcxproj
index 59d571cec..fc010fb0e 100644
--- a/Samples/0_Introduction/simpleAssert_nvrtc/simpleAssert_nvrtc_vs2022.vcxproj
+++ b/Samples/0_Introduction/simpleAssert_nvrtc/simpleAssert_nvrtc_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleAtomicIntrinsics/Makefile b/Samples/0_Introduction/simpleAtomicIntrinsics/Makefile
index 7b221fb20..b72224450 100644
--- a/Samples/0_Introduction/simpleAtomicIntrinsics/Makefile
+++ b/Samples/0_Introduction/simpleAtomicIntrinsics/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/simpleAtomicIntrinsics/NsightEclipse.xml b/Samples/0_Introduction/simpleAtomicIntrinsics/NsightEclipse.xml
index 093f108b1..e9252d1cc 100644
--- a/Samples/0_Introduction/simpleAtomicIntrinsics/NsightEclipse.xml
+++ b/Samples/0_Introduction/simpleAtomicIntrinsics/NsightEclipse.xml
@@ -3,12 +3,12 @@
 <entry>
   <name>simpleAtomicIntrinsics</name>
   <cuda_api_list>
+    <toolkit>cudaStreamCreateWithFlags</toolkit>
     <toolkit>cudaFree</toolkit>
     <toolkit>cudaMallocHost</toolkit>
     <toolkit>cudaFreeHost</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaStreamCreateWithFlags</toolkit>
     <toolkit>cudaStreamSynchronize</toolkit>
+    <toolkit>cudaMalloc</toolkit>
     <toolkit>cudaMemcpyAsync</toolkit>
   </cuda_api_list>
   <description><![CDATA[A simple demonstration of global memory atomic instructions.]]></description>
@@ -48,6 +48,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/0_Introduction/simpleAtomicIntrinsics/README.md b/Samples/0_Introduction/simpleAtomicIntrinsics/README.md
index 81693b71b..0fa52781b 100644
--- a/Samples/0_Introduction/simpleAtomicIntrinsics/README.md
+++ b/Samples/0_Introduction/simpleAtomicIntrinsics/README.md
@@ -10,7 +10,7 @@ Atomic Intrinsics
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l, aarch64
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaMallocHost, cudaFreeHost, cudaMalloc, cudaStreamCreateWithFlags, cudaStreamSynchronize, cudaMemcpyAsync
+cudaStreamCreateWithFlags, cudaFree, cudaMallocHost, cudaFreeHost, cudaStreamSynchronize, cudaMalloc, cudaMemcpyAsync
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/0_Introduction/simpleAtomicIntrinsics/simpleAtomicIntrinsics_vs2017.vcxproj b/Samples/0_Introduction/simpleAtomicIntrinsics/simpleAtomicIntrinsics_vs2017.vcxproj
index 87308a5d1..d122ae68f 100644
--- a/Samples/0_Introduction/simpleAtomicIntrinsics/simpleAtomicIntrinsics_vs2017.vcxproj
+++ b/Samples/0_Introduction/simpleAtomicIntrinsics/simpleAtomicIntrinsics_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/simpleAtomicIntrinsics.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleAtomicIntrinsics/simpleAtomicIntrinsics_vs2019.vcxproj b/Samples/0_Introduction/simpleAtomicIntrinsics/simpleAtomicIntrinsics_vs2019.vcxproj
index b99f4190a..7f05dcc60 100644
--- a/Samples/0_Introduction/simpleAtomicIntrinsics/simpleAtomicIntrinsics_vs2019.vcxproj
+++ b/Samples/0_Introduction/simpleAtomicIntrinsics/simpleAtomicIntrinsics_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleAtomicIntrinsics.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleAtomicIntrinsics/simpleAtomicIntrinsics_vs2022.vcxproj b/Samples/0_Introduction/simpleAtomicIntrinsics/simpleAtomicIntrinsics_vs2022.vcxproj
index bb0bb8df1..7dd8d89e9 100644
--- a/Samples/0_Introduction/simpleAtomicIntrinsics/simpleAtomicIntrinsics_vs2022.vcxproj
+++ b/Samples/0_Introduction/simpleAtomicIntrinsics/simpleAtomicIntrinsics_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleAtomicIntrinsics.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/README.md b/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/README.md
index 0d1700a7b..a53e822b5 100644
--- a/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/README.md
+++ b/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/README.md
@@ -10,7 +10,7 @@ Atomic Intrinsics, Runtime Compilation
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,7 +23,7 @@ x86_64, ppc64le, aarch64
 ## CUDA APIs involved
 
 ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html)
-cuModuleGetFunction, cuMemAlloc, cuLaunchKernel, cuCtxSynchronize, cuMemFree, cuMemcpyDtoH, cuMemcpyHtoD
+cuMemcpyDtoH, cuLaunchKernel, cuMemcpyHtoD, cuCtxSynchronize, cuMemAlloc, cuMemFree, cuModuleGetFunction
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
 cudaBlockSize, cudaGridSize
@@ -33,7 +33,7 @@ cudaBlockSize, cudaGridSize
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/simpleAtomicIntrinsics_nvrtc_vs2017.vcxproj b/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/simpleAtomicIntrinsics_nvrtc_vs2017.vcxproj
index c51f99399..9db171b1f 100644
--- a/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/simpleAtomicIntrinsics_nvrtc_vs2017.vcxproj
+++ b/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/simpleAtomicIntrinsics_nvrtc_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/simpleAtomicIntrinsics_nvrtc_vs2019.vcxproj b/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/simpleAtomicIntrinsics_nvrtc_vs2019.vcxproj
index 75b918e79..b43cec916 100644
--- a/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/simpleAtomicIntrinsics_nvrtc_vs2019.vcxproj
+++ b/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/simpleAtomicIntrinsics_nvrtc_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/simpleAtomicIntrinsics_nvrtc_vs2022.vcxproj b/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/simpleAtomicIntrinsics_nvrtc_vs2022.vcxproj
index f7cb9e386..bd705f440 100644
--- a/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/simpleAtomicIntrinsics_nvrtc_vs2022.vcxproj
+++ b/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/simpleAtomicIntrinsics_nvrtc_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleAttributes/Makefile b/Samples/0_Introduction/simpleAttributes/Makefile
index 00e9c4f05..e685dd695 100644
--- a/Samples/0_Introduction/simpleAttributes/Makefile
+++ b/Samples/0_Introduction/simpleAttributes/Makefile
@@ -285,9 +285,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/simpleAttributes/NsightEclipse.xml b/Samples/0_Introduction/simpleAttributes/NsightEclipse.xml
index 5141efa16..fcad82351 100644
--- a/Samples/0_Introduction/simpleAttributes/NsightEclipse.xml
+++ b/Samples/0_Introduction/simpleAttributes/NsightEclipse.xml
@@ -3,15 +3,15 @@
 <entry>
   <name>simpleAttributes</name>
   <cuda_api_list>
-    <toolkit>cudaDeviceSetLimit</toolkit>
     <toolkit>cudaFree</toolkit>
     <toolkit>cudaMallocHost</toolkit>
-    <toolkit>cudaStreamCreate</toolkit>
     <toolkit>cudaFreeHost</toolkit>
-    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaStreamSynchronize</toolkit>
     <toolkit>cudaStreamSetAttribute</toolkit>
+    <toolkit>cudaDeviceSetLimit</toolkit>
+    <toolkit>cudaMalloc</toolkit>
     <toolkit>cudaMemcpyAsync</toolkit>
-    <toolkit>cudaStreamSynchronize</toolkit>
+    <toolkit>cudaStreamCreate</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
   </cuda_api_list>
   <description><![CDATA[This CUDA Runtime API sample is a very basic example that implements how to use the stream attributes that affect L2 locality. Performance improvement due to use of L2 access policy window can only be noticed on Compute capability 8.0 or higher.]]></description>
@@ -49,6 +49,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/0_Introduction/simpleAttributes/README.md b/Samples/0_Introduction/simpleAttributes/README.md
index e5b6fa875..5dc1787bc 100644
--- a/Samples/0_Introduction/simpleAttributes/README.md
+++ b/Samples/0_Introduction/simpleAttributes/README.md
@@ -10,7 +10,7 @@ Attributes usage on stream
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaDeviceSetLimit, cudaFree, cudaMallocHost, cudaStreamCreate, cudaFreeHost, cudaMalloc, cudaStreamSetAttribute, cudaMemcpyAsync, cudaStreamSynchronize, cudaGetDeviceProperties
+cudaFree, cudaMallocHost, cudaFreeHost, cudaStreamSynchronize, cudaStreamSetAttribute, cudaDeviceSetLimit, cudaMalloc, cudaMemcpyAsync, cudaStreamCreate, cudaGetDeviceProperties
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/0_Introduction/simpleAttributes/simpleAttributes_vs2017.vcxproj b/Samples/0_Introduction/simpleAttributes/simpleAttributes_vs2017.vcxproj
index 2752b2669..a446d3a9b 100644
--- a/Samples/0_Introduction/simpleAttributes/simpleAttributes_vs2017.vcxproj
+++ b/Samples/0_Introduction/simpleAttributes/simpleAttributes_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/simpleAttributes.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleAttributes/simpleAttributes_vs2019.vcxproj b/Samples/0_Introduction/simpleAttributes/simpleAttributes_vs2019.vcxproj
index 620fdca35..e49167d97 100644
--- a/Samples/0_Introduction/simpleAttributes/simpleAttributes_vs2019.vcxproj
+++ b/Samples/0_Introduction/simpleAttributes/simpleAttributes_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleAttributes.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleAttributes/simpleAttributes_vs2022.vcxproj b/Samples/0_Introduction/simpleAttributes/simpleAttributes_vs2022.vcxproj
index 9ab8f931b..1eb61252c 100644
--- a/Samples/0_Introduction/simpleAttributes/simpleAttributes_vs2022.vcxproj
+++ b/Samples/0_Introduction/simpleAttributes/simpleAttributes_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleAttributes.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleCUDA2GL/Makefile b/Samples/0_Introduction/simpleCUDA2GL/Makefile
index 39042604b..80e3250f2 100644
--- a/Samples/0_Introduction/simpleCUDA2GL/Makefile
+++ b/Samples/0_Introduction/simpleCUDA2GL/Makefile
@@ -311,9 +311,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/simpleCUDA2GL/NsightEclipse.xml b/Samples/0_Introduction/simpleCUDA2GL/NsightEclipse.xml
index 55b9400ce..1f40f86bd 100644
--- a/Samples/0_Introduction/simpleCUDA2GL/NsightEclipse.xml
+++ b/Samples/0_Introduction/simpleCUDA2GL/NsightEclipse.xml
@@ -3,19 +3,19 @@
 <entry>
   <name>simpleCUDA2GL</name>
   <cuda_api_list>
+    <toolkit>cudaHostAlloc</toolkit>
+    <toolkit>cudaGraphicsUnmapResources</toolkit>
+    <toolkit>cudaMalloc</toolkit>
     <toolkit>cudaFree</toolkit>
+    <toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
     <toolkit>cudaGraphicsMapResources</toolkit>
-    <toolkit>cudaGraphicsUnregisterResource</toolkit>
     <toolkit>cudaMemcpyToArray</toolkit>
-    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
-    <toolkit>cudaHostAlloc</toolkit>
-    <toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
-    <toolkit>cudaProcess</toolkit>
     <toolkit>cudaDeviceSynchronize</toolkit>
-    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaProcess</toolkit>
+    <toolkit>cudaGraphicsUnregisterResource</toolkit>
     <toolkit>cudaGraphicsSubResourceGetMappedArray</toolkit>
+    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
     <toolkit>cudaGraphicsGLRegisterImage</toolkit>
-    <toolkit>cudaGraphicsUnmapResources</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample shows how to copy CUDA image back to OpenGL using the most efficient methods.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -79,6 +79,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/0_Introduction/simpleCUDA2GL/README.md b/Samples/0_Introduction/simpleCUDA2GL/README.md
index c5f7bb6d2..7c46fb7eb 100644
--- a/Samples/0_Introduction/simpleCUDA2GL/README.md
+++ b/Samples/0_Introduction/simpleCUDA2GL/README.md
@@ -10,7 +10,7 @@ Graphics Interop, Image Processing, Performance Strategies
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaGraphicsMapResources, cudaGraphicsUnregisterResource, cudaMemcpyToArray, cudaGraphicsGLRegisterBuffer, cudaHostAlloc, cudaGraphicsResourceGetMappedPointer, cudaProcess, cudaDeviceSynchronize, cudaMalloc, cudaGraphicsSubResourceGetMappedArray, cudaGraphicsGLRegisterImage, cudaGraphicsUnmapResources
+cudaHostAlloc, cudaGraphicsUnmapResources, cudaMalloc, cudaFree, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaMemcpyToArray, cudaDeviceSynchronize, cudaProcess, cudaGraphicsUnregisterResource, cudaGraphicsSubResourceGetMappedArray, cudaGraphicsGLRegisterBuffer, cudaGraphicsGLRegisterImage
 
 ## Dependencies needed to build/run
 [X11](../../../README.md#x11), [GL](../../../README.md#gl)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/0_Introduction/simpleCUDA2GL/findgllib.mk b/Samples/0_Introduction/simpleCUDA2GL/findgllib.mk
index f0a5c5512..998fcf0f1 100644
--- a/Samples/0_Introduction/simpleCUDA2GL/findgllib.mk
+++ b/Samples/0_Introduction/simpleCUDA2GL/findgllib.mk
@@ -53,11 +53,12 @@ endif
 ifeq ("$(TARGET_OS)","linux")
     # $(info) >> findgllib.mk -> LINUX path <<<)
     # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside
-    UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu      >/dev/null 2>&1; echo $$?)
-    FEDORA = $(shell echo $(DISTRO) | grep -i fedora      >/dev/null 2>&1; echo $$?)
-    RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?)
-    CENTOS = $(shell echo $(DISTRO) | grep -i centos      >/dev/null 2>&1; echo $$?)
+    UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu       >/dev/null 2>&1; echo $$?)
+    FEDORA = $(shell echo $(DISTRO) | grep -i fedora       >/dev/null 2>&1; echo $$?)
+    RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel'  >/dev/null 2>&1; echo $$?)
+    CENTOS = $(shell echo $(DISTRO) | grep -i centos       >/dev/null 2>&1; echo $$?)
     SUSE   = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?)
+    KYLIN  = $(shell echo $(DISTRO) | grep -i kylin        >/dev/null 2>&1; echo $$?)
     ifeq ("$(UBUNTU)","0")
       ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
         GLPATH := /usr/arm-linux-gnueabihf/lib
@@ -87,27 +88,17 @@ ifeq ("$(TARGET_OS)","linux")
         DFLT_PATH ?= /usr/lib
       endif
     endif
+
     ifeq ("$(SUSE)","0")
       GLPATH    ?= /usr/X11R6/lib64
       GLLINK    ?= -L/usr/X11R6/lib64
       DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(FEDORA)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(RHEL)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(CENTOS)","0")
+    else 
       GLPATH    ?= /usr/lib64/nvidia
       GLLINK    ?= -L/usr/lib64/nvidia
       DFLT_PATH ?= /usr/lib64
     endif
-  
+      
   # find libGL, libGLU 
   GLLIB  := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGL.so  -print 2>/dev/null)
   GLULIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLU.so -print 2>/dev/null)
diff --git a/Samples/0_Introduction/simpleCUDA2GL/simpleCUDA2GL_vs2017.vcxproj b/Samples/0_Introduction/simpleCUDA2GL/simpleCUDA2GL_vs2017.vcxproj
index b1c84ae4a..658651178 100644
--- a/Samples/0_Introduction/simpleCUDA2GL/simpleCUDA2GL_vs2017.vcxproj
+++ b/Samples/0_Introduction/simpleCUDA2GL/simpleCUDA2GL_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/simpleCUDA2GL.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -118,6 +118,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleCUDA2GL/simpleCUDA2GL_vs2019.vcxproj b/Samples/0_Introduction/simpleCUDA2GL/simpleCUDA2GL_vs2019.vcxproj
index 955b060ab..19ad1de21 100644
--- a/Samples/0_Introduction/simpleCUDA2GL/simpleCUDA2GL_vs2019.vcxproj
+++ b/Samples/0_Introduction/simpleCUDA2GL/simpleCUDA2GL_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleCUDA2GL.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -114,6 +114,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleCUDA2GL/simpleCUDA2GL_vs2022.vcxproj b/Samples/0_Introduction/simpleCUDA2GL/simpleCUDA2GL_vs2022.vcxproj
index 46e97f626..f2b071ec9 100644
--- a/Samples/0_Introduction/simpleCUDA2GL/simpleCUDA2GL_vs2022.vcxproj
+++ b/Samples/0_Introduction/simpleCUDA2GL/simpleCUDA2GL_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleCUDA2GL.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -114,6 +114,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleCallback/Makefile b/Samples/0_Introduction/simpleCallback/Makefile
index fade686a6..ff334a834 100644
--- a/Samples/0_Introduction/simpleCallback/Makefile
+++ b/Samples/0_Introduction/simpleCallback/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/simpleCallback/NsightEclipse.xml b/Samples/0_Introduction/simpleCallback/NsightEclipse.xml
index 3ee3b90de..931c7c651 100644
--- a/Samples/0_Introduction/simpleCallback/NsightEclipse.xml
+++ b/Samples/0_Introduction/simpleCallback/NsightEclipse.xml
@@ -3,16 +3,16 @@
 <entry>
   <name>simpleCallback</name>
   <cuda_api_list>
+    <toolkit>cudaHostAlloc</toolkit>
     <toolkit>cudaStreamDestroy</toolkit>
     <toolkit>cudaFree</toolkit>
-    <toolkit>cudaStreamCreate</toolkit>
-    <toolkit>cudaHostAlloc</toolkit>
+    <toolkit>cudaSetDevice</toolkit>
     <toolkit>cudaGetDeviceCount</toolkit>
     <toolkit>cudaFreeHost</toolkit>
+    <toolkit>cudaStreamAddCallback</toolkit>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaSetDevice</toolkit>
     <toolkit>cudaMemcpyAsync</toolkit>
-    <toolkit>cudaStreamAddCallback</toolkit>
+    <toolkit>cudaStreamCreate</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample implements multi-threaded heterogeneous computing workloads with the new CPU callbacks for CUDA streams and events introduced with CUDA 5.0.]]></description>
@@ -53,6 +53,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/0_Introduction/simpleCallback/README.md b/Samples/0_Introduction/simpleCallback/README.md
index 24c76c25d..403685670 100644
--- a/Samples/0_Introduction/simpleCallback/README.md
+++ b/Samples/0_Introduction/simpleCallback/README.md
@@ -10,7 +10,7 @@ CUDA Streams, Callback Functions, Multithreading
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaStreamDestroy, cudaFree, cudaStreamCreate, cudaHostAlloc, cudaGetDeviceCount, cudaFreeHost, cudaMalloc, cudaSetDevice, cudaMemcpyAsync, cudaStreamAddCallback, cudaGetDeviceProperties
+cudaHostAlloc, cudaStreamDestroy, cudaFree, cudaSetDevice, cudaGetDeviceCount, cudaFreeHost, cudaStreamAddCallback, cudaMalloc, cudaMemcpyAsync, cudaStreamCreate, cudaGetDeviceProperties
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/0_Introduction/simpleCallback/simpleCallback_vs2017.vcxproj b/Samples/0_Introduction/simpleCallback/simpleCallback_vs2017.vcxproj
index 677ea6b21..ad8bf9007 100644
--- a/Samples/0_Introduction/simpleCallback/simpleCallback_vs2017.vcxproj
+++ b/Samples/0_Introduction/simpleCallback/simpleCallback_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/simpleCallback.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleCallback/simpleCallback_vs2019.vcxproj b/Samples/0_Introduction/simpleCallback/simpleCallback_vs2019.vcxproj
index 0dcbadea5..b200ba6f3 100644
--- a/Samples/0_Introduction/simpleCallback/simpleCallback_vs2019.vcxproj
+++ b/Samples/0_Introduction/simpleCallback/simpleCallback_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleCallback.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleCallback/simpleCallback_vs2022.vcxproj b/Samples/0_Introduction/simpleCallback/simpleCallback_vs2022.vcxproj
index 6f2e491b5..196f57949 100644
--- a/Samples/0_Introduction/simpleCallback/simpleCallback_vs2022.vcxproj
+++ b/Samples/0_Introduction/simpleCallback/simpleCallback_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleCallback.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleCooperativeGroups/Makefile b/Samples/0_Introduction/simpleCooperativeGroups/Makefile
index c45b73325..22efbff33 100644
--- a/Samples/0_Introduction/simpleCooperativeGroups/Makefile
+++ b/Samples/0_Introduction/simpleCooperativeGroups/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/simpleCooperativeGroups/NsightEclipse.xml b/Samples/0_Introduction/simpleCooperativeGroups/NsightEclipse.xml
index 54d9c4d3c..939f68ada 100644
--- a/Samples/0_Introduction/simpleCooperativeGroups/NsightEclipse.xml
+++ b/Samples/0_Introduction/simpleCooperativeGroups/NsightEclipse.xml
@@ -44,6 +44,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/0_Introduction/simpleCooperativeGroups/README.md b/Samples/0_Introduction/simpleCooperativeGroups/README.md
index 7e80f6bc1..ab3e11ccf 100644
--- a/Samples/0_Introduction/simpleCooperativeGroups/README.md
+++ b/Samples/0_Introduction/simpleCooperativeGroups/README.md
@@ -10,7 +10,7 @@ Cooperative Groups
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -27,7 +27,7 @@ cudaDeviceSynchronize, cudaGetErrorString
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/0_Introduction/simpleCooperativeGroups/simpleCooperativeGroups_vs2017.vcxproj b/Samples/0_Introduction/simpleCooperativeGroups/simpleCooperativeGroups_vs2017.vcxproj
index 4a7bac2af..061538d8d 100644
--- a/Samples/0_Introduction/simpleCooperativeGroups/simpleCooperativeGroups_vs2017.vcxproj
+++ b/Samples/0_Introduction/simpleCooperativeGroups/simpleCooperativeGroups_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/simpleCooperativeGroups.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleCooperativeGroups/simpleCooperativeGroups_vs2019.vcxproj b/Samples/0_Introduction/simpleCooperativeGroups/simpleCooperativeGroups_vs2019.vcxproj
index 09d331592..bf17882ac 100644
--- a/Samples/0_Introduction/simpleCooperativeGroups/simpleCooperativeGroups_vs2019.vcxproj
+++ b/Samples/0_Introduction/simpleCooperativeGroups/simpleCooperativeGroups_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleCooperativeGroups.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleCooperativeGroups/simpleCooperativeGroups_vs2022.vcxproj b/Samples/0_Introduction/simpleCooperativeGroups/simpleCooperativeGroups_vs2022.vcxproj
index 127592032..649221c2f 100644
--- a/Samples/0_Introduction/simpleCooperativeGroups/simpleCooperativeGroups_vs2022.vcxproj
+++ b/Samples/0_Introduction/simpleCooperativeGroups/simpleCooperativeGroups_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleCooperativeGroups.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleCubemapTexture/Makefile b/Samples/0_Introduction/simpleCubemapTexture/Makefile
index fdff39801..4c1fed172 100644
--- a/Samples/0_Introduction/simpleCubemapTexture/Makefile
+++ b/Samples/0_Introduction/simpleCubemapTexture/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/simpleCubemapTexture/NsightEclipse.xml b/Samples/0_Introduction/simpleCubemapTexture/NsightEclipse.xml
index 625ed7a5d..1bf6b0105 100644
--- a/Samples/0_Introduction/simpleCubemapTexture/NsightEclipse.xml
+++ b/Samples/0_Introduction/simpleCubemapTexture/NsightEclipse.xml
@@ -3,17 +3,17 @@
 <entry>
   <name>simpleCubemapTexture</name>
   <cuda_api_list>
-    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaCreateChannelDesc</toolkit>
     <toolkit>cudaFreeArray</toolkit>
+    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaPitchedPtr</toolkit>
+    <toolkit>cudaPos</toolkit>
+    <toolkit>cudaDestroyTextureObject</toolkit>
     <toolkit>cudaExtent</toolkit>
     <toolkit>cudaDeviceSynchronize</toolkit>
-    <toolkit>cudaDestroyTextureObject</toolkit>
-    <toolkit>cudaPitchedPtr</toolkit>
     <toolkit>cudaCreateTextureObject</toolkit>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaCreateChannelDesc</toolkit>
-    <toolkit>cudaPos</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
   </cuda_api_list>
   <description><![CDATA[Simple example that demonstrates how to use a new CUDA 4.1 feature to support cubemap Textures in CUDA C.]]></description>
@@ -52,6 +52,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/0_Introduction/simpleCubemapTexture/README.md b/Samples/0_Introduction/simpleCubemapTexture/README.md
index 68f069479..44c3896f1 100644
--- a/Samples/0_Introduction/simpleCubemapTexture/README.md
+++ b/Samples/0_Introduction/simpleCubemapTexture/README.md
@@ -10,7 +10,7 @@ Texture, Volume Processing
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaFreeArray, cudaExtent, cudaDeviceSynchronize, cudaDestroyTextureObject, cudaPitchedPtr, cudaCreateTextureObject, cudaMalloc, cudaCreateChannelDesc, cudaPos, cudaMemcpy, cudaGetDeviceProperties
+cudaMemcpy, cudaCreateChannelDesc, cudaFreeArray, cudaFree, cudaPitchedPtr, cudaPos, cudaDestroyTextureObject, cudaExtent, cudaDeviceSynchronize, cudaCreateTextureObject, cudaMalloc, cudaGetDeviceProperties
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/0_Introduction/simpleCubemapTexture/simpleCubemapTexture_vs2017.vcxproj b/Samples/0_Introduction/simpleCubemapTexture/simpleCubemapTexture_vs2017.vcxproj
index 26ae94238..307c5282e 100644
--- a/Samples/0_Introduction/simpleCubemapTexture/simpleCubemapTexture_vs2017.vcxproj
+++ b/Samples/0_Introduction/simpleCubemapTexture/simpleCubemapTexture_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/simpleCubemapTexture.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleCubemapTexture/simpleCubemapTexture_vs2019.vcxproj b/Samples/0_Introduction/simpleCubemapTexture/simpleCubemapTexture_vs2019.vcxproj
index bcdaec2f4..709f03620 100644
--- a/Samples/0_Introduction/simpleCubemapTexture/simpleCubemapTexture_vs2019.vcxproj
+++ b/Samples/0_Introduction/simpleCubemapTexture/simpleCubemapTexture_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleCubemapTexture.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleCubemapTexture/simpleCubemapTexture_vs2022.vcxproj b/Samples/0_Introduction/simpleCubemapTexture/simpleCubemapTexture_vs2022.vcxproj
index 0dd40bfd1..3c332afbe 100644
--- a/Samples/0_Introduction/simpleCubemapTexture/simpleCubemapTexture_vs2022.vcxproj
+++ b/Samples/0_Introduction/simpleCubemapTexture/simpleCubemapTexture_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleCubemapTexture.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleDrvRuntime/Makefile b/Samples/0_Introduction/simpleDrvRuntime/Makefile
index 3cbc5811b..46593a89b 100644
--- a/Samples/0_Introduction/simpleDrvRuntime/Makefile
+++ b/Samples/0_Introduction/simpleDrvRuntime/Makefile
@@ -283,9 +283,9 @@ FATBIN_FILE := vectorAdd_kernel${TARGET_SIZE}.fatbin
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/simpleDrvRuntime/README.md b/Samples/0_Introduction/simpleDrvRuntime/README.md
index 74ea4ad99..158157e82 100644
--- a/Samples/0_Introduction/simpleDrvRuntime/README.md
+++ b/Samples/0_Introduction/simpleDrvRuntime/README.md
@@ -10,7 +10,7 @@ CUDA Driver API, CUDA Runtime API, Vector Addition
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html)
-cuCtxDestroy, cuModuleLoadData, cuCtxCreate, cuLaunchKernel, cuModuleUnload, cuInit, cuModuleGetFunction
+cuLaunchKernel, cuModuleLoadData, cuCtxDestroy, cuModuleUnload, cuModuleGetFunction, cuCtxCreate, cuInit
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaMallocHost, cudaFreeHost, cudaMalloc, cudaStreamCreateWithFlags, cudaStreamSynchronize, cudaMemcpyAsync
+cudaStreamCreateWithFlags, cudaFree, cudaMallocHost, cudaFreeHost, cudaStreamSynchronize, cudaMalloc, cudaMemcpyAsync
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/0_Introduction/simpleDrvRuntime/simpleDrvRuntime_vs2017.vcxproj b/Samples/0_Introduction/simpleDrvRuntime/simpleDrvRuntime_vs2017.vcxproj
index 92e427cd0..019fc0cc1 100644
--- a/Samples/0_Introduction/simpleDrvRuntime/simpleDrvRuntime_vs2017.vcxproj
+++ b/Samples/0_Introduction/simpleDrvRuntime/simpleDrvRuntime_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/simpleDrvRuntime.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -111,6 +111,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleDrvRuntime/simpleDrvRuntime_vs2019.vcxproj b/Samples/0_Introduction/simpleDrvRuntime/simpleDrvRuntime_vs2019.vcxproj
index 64f8fab97..727c658dd 100644
--- a/Samples/0_Introduction/simpleDrvRuntime/simpleDrvRuntime_vs2019.vcxproj
+++ b/Samples/0_Introduction/simpleDrvRuntime/simpleDrvRuntime_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleDrvRuntime.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleDrvRuntime/simpleDrvRuntime_vs2022.vcxproj b/Samples/0_Introduction/simpleDrvRuntime/simpleDrvRuntime_vs2022.vcxproj
index 7f3d2b016..93b2ffad6 100644
--- a/Samples/0_Introduction/simpleDrvRuntime/simpleDrvRuntime_vs2022.vcxproj
+++ b/Samples/0_Introduction/simpleDrvRuntime/simpleDrvRuntime_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleDrvRuntime.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleHyperQ/Makefile b/Samples/0_Introduction/simpleHyperQ/Makefile
index 480185113..161406888 100644
--- a/Samples/0_Introduction/simpleHyperQ/Makefile
+++ b/Samples/0_Introduction/simpleHyperQ/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/simpleHyperQ/NsightEclipse.xml b/Samples/0_Introduction/simpleHyperQ/NsightEclipse.xml
index 8777270a9..1a5038455 100644
--- a/Samples/0_Introduction/simpleHyperQ/NsightEclipse.xml
+++ b/Samples/0_Introduction/simpleHyperQ/NsightEclipse.xml
@@ -3,20 +3,20 @@
 <entry>
   <name>simpleHyperQ</name>
   <cuda_api_list>
+    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaStreamDestroy</toolkit>
+    <toolkit>cudaMalloc</toolkit>
     <toolkit>cudaFree</toolkit>
-    <toolkit>cudaEventRecord</toolkit>
     <toolkit>cudaMallocHost</toolkit>
-    <toolkit>cudaStreamCreate</toolkit>
-    <toolkit>cudaEventCreate</toolkit>
-    <toolkit>cudaEventElapsedTime</toolkit>
     <toolkit>cudaEventSynchronize</toolkit>
+    <toolkit>cudaEventRecord</toolkit>
     <toolkit>cudaFreeHost</toolkit>
-    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaGetDevice</toolkit>
     <toolkit>cudaEventDestroy</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaEventElapsedTime</toolkit>
+    <toolkit>cudaStreamCreate</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
-    <toolkit>cudaGetDevice</toolkit>
+    <toolkit>cudaEventCreate</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample demonstrates the use of CUDA streams for concurrent execution of several kernels on devices which provide HyperQ (SM 3.5).  Devices without HyperQ (SM 2.0 and SM 3.0) will run a maximum of two kernels concurrently.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -62,6 +62,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/0_Introduction/simpleHyperQ/README.md b/Samples/0_Introduction/simpleHyperQ/README.md
index 8527317e1..467bc4b35 100644
--- a/Samples/0_Introduction/simpleHyperQ/README.md
+++ b/Samples/0_Introduction/simpleHyperQ/README.md
@@ -10,7 +10,7 @@ CUDA Systems Integration, Performance Strategies
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaStreamDestroy, cudaFree, cudaEventRecord, cudaMallocHost, cudaStreamCreate, cudaEventCreate, cudaEventElapsedTime, cudaEventSynchronize, cudaFreeHost, cudaMalloc, cudaEventDestroy, cudaMemcpy, cudaGetDeviceProperties, cudaGetDevice
+cudaMemcpy, cudaStreamDestroy, cudaMalloc, cudaFree, cudaMallocHost, cudaEventSynchronize, cudaEventRecord, cudaFreeHost, cudaGetDevice, cudaEventDestroy, cudaEventElapsedTime, cudaStreamCreate, cudaGetDeviceProperties, cudaEventCreate
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/0_Introduction/simpleHyperQ/simpleHyperQ_vs2017.vcxproj b/Samples/0_Introduction/simpleHyperQ/simpleHyperQ_vs2017.vcxproj
index 1e4bedc30..d2bbd16a2 100644
--- a/Samples/0_Introduction/simpleHyperQ/simpleHyperQ_vs2017.vcxproj
+++ b/Samples/0_Introduction/simpleHyperQ/simpleHyperQ_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/simpleHyperQ.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleHyperQ/simpleHyperQ_vs2019.vcxproj b/Samples/0_Introduction/simpleHyperQ/simpleHyperQ_vs2019.vcxproj
index b9180fa0b..3a6cc72cd 100644
--- a/Samples/0_Introduction/simpleHyperQ/simpleHyperQ_vs2019.vcxproj
+++ b/Samples/0_Introduction/simpleHyperQ/simpleHyperQ_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleHyperQ.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleHyperQ/simpleHyperQ_vs2022.vcxproj b/Samples/0_Introduction/simpleHyperQ/simpleHyperQ_vs2022.vcxproj
index b641b9716..c15d7eec2 100644
--- a/Samples/0_Introduction/simpleHyperQ/simpleHyperQ_vs2022.vcxproj
+++ b/Samples/0_Introduction/simpleHyperQ/simpleHyperQ_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleHyperQ.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleIPC/Makefile b/Samples/0_Introduction/simpleIPC/Makefile
index 31945c1e5..914f1ab6f 100644
--- a/Samples/0_Introduction/simpleIPC/Makefile
+++ b/Samples/0_Introduction/simpleIPC/Makefile
@@ -303,9 +303,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/simpleIPC/NsightEclipse.xml b/Samples/0_Introduction/simpleIPC/NsightEclipse.xml
index 23e3c5a7c..d25608dfe 100644
--- a/Samples/0_Introduction/simpleIPC/NsightEclipse.xml
+++ b/Samples/0_Introduction/simpleIPC/NsightEclipse.xml
@@ -3,29 +3,29 @@
 <entry>
   <name>simpleIPC</name>
   <cuda_api_list>
-    <toolkit>cudaDeviceEnablePeerAccess</toolkit>
-    <toolkit>cudaIpcOpenEventHandle</toolkit>
-    <toolkit>cudaOccupancyMaxActiveBlocksPerMultiprocessor</toolkit>
-    <toolkit>cudaStreamCreateWithFlags</toolkit>
-    <toolkit>cudaDeviceCanAccessPeer</toolkit>
+    <toolkit>cudaSetDevice</toolkit>
+    <toolkit>cudaIpcCloseMemHandle</toolkit>
+    <toolkit>cudaEventDestroy</toolkit>
+    <toolkit>cudaGetDeviceCount</toolkit>
     <toolkit>cudaMemcpyAsync</toolkit>
+    <toolkit>cudaDeviceCanAccessPeer</toolkit>
+    <toolkit>cudaStreamCreateWithFlags</toolkit>
     <toolkit>cudaStreamDestroy</toolkit>
-    <toolkit>cudaEventCreate</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaEventDestroy</toolkit>
-    <toolkit>cudaSetDevice</toolkit>
+    <toolkit>cudaGetLastError</toolkit>
+    <toolkit>cudaIpcOpenEventHandle</toolkit>
     <toolkit>cudaIpcOpenMemHandle</toolkit>
-    <toolkit>cudaGetDeviceProperties</toolkit>
-    <toolkit>cudaGetDeviceCount</toolkit>
     <toolkit>cudaIpcGetEventHandle</toolkit>
-    <toolkit>cudaGetLastError</toolkit>
-    <toolkit>cudaStreamSynchronize</toolkit>
     <toolkit>cudaStreamWaitEvent</toolkit>
+    <toolkit>cudaEventCreate</toolkit>
     <toolkit>cudaFree</toolkit>
-    <toolkit>cudaIpcCloseMemHandle</toolkit>
+    <toolkit>cudaEventSynchronize</toolkit>
     <toolkit>cudaEventRecord</toolkit>
     <toolkit>cudaIpcGetMemHandle</toolkit>
-    <toolkit>cudaEventSynchronize</toolkit>
+    <toolkit>cudaStreamSynchronize</toolkit>
+    <toolkit>cudaDeviceEnablePeerAccess</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaOccupancyMaxActiveBlocksPerMultiprocessor</toolkit>
+    <toolkit>cudaGetDeviceProperties</toolkit>
   </cuda_api_list>
   <description><![CDATA[This CUDA Runtime API sample is a very basic sample that demonstrates Inter Process Communication with one process per GPU for computation.  Requires Compute Capability 3.0 or higher and a Linux Operating System, or a Windows Operating System.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -71,6 +71,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <sources>
     <extracompilation>../../../Common/helper_multiprocess.cpp</extracompilation>
     <extraheader>../../../Common/helper_multiprocess.h</extraheader>
diff --git a/Samples/0_Introduction/simpleIPC/README.md b/Samples/0_Introduction/simpleIPC/README.md
index 1594c529a..a9d3336b1 100644
--- a/Samples/0_Introduction/simpleIPC/README.md
+++ b/Samples/0_Introduction/simpleIPC/README.md
@@ -10,7 +10,7 @@ CUDA Systems Integration, Peer to Peer, InterProcess Communication
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaDeviceEnablePeerAccess, cudaIpcOpenEventHandle, cudaOccupancyMaxActiveBlocksPerMultiprocessor, cudaStreamCreateWithFlags, cudaDeviceCanAccessPeer, cudaMemcpyAsync, cudaStreamDestroy, cudaEventCreate, cudaMalloc, cudaEventDestroy, cudaSetDevice, cudaIpcOpenMemHandle, cudaGetDeviceProperties, cudaGetDeviceCount, cudaIpcGetEventHandle, cudaGetLastError, cudaStreamSynchronize, cudaStreamWaitEvent, cudaFree, cudaIpcCloseMemHandle, cudaEventRecord, cudaIpcGetMemHandle, cudaEventSynchronize
+cudaSetDevice, cudaIpcCloseMemHandle, cudaEventDestroy, cudaGetDeviceCount, cudaMemcpyAsync, cudaDeviceCanAccessPeer, cudaStreamCreateWithFlags, cudaStreamDestroy, cudaGetLastError, cudaIpcOpenEventHandle, cudaIpcOpenMemHandle, cudaIpcGetEventHandle, cudaStreamWaitEvent, cudaEventCreate, cudaFree, cudaEventSynchronize, cudaEventRecord, cudaIpcGetMemHandle, cudaStreamSynchronize, cudaDeviceEnablePeerAccess, cudaMalloc, cudaOccupancyMaxActiveBlocksPerMultiprocessor, cudaGetDeviceProperties
 
 ## Dependencies needed to build/run
 [IPC](../../../README.md#ipc)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/0_Introduction/simpleIPC/simpleIPC_vs2017.vcxproj b/Samples/0_Introduction/simpleIPC/simpleIPC_vs2017.vcxproj
index 99d342ac9..037714303 100644
--- a/Samples/0_Introduction/simpleIPC/simpleIPC_vs2017.vcxproj
+++ b/Samples/0_Introduction/simpleIPC/simpleIPC_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/simpleIPC.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleIPC/simpleIPC_vs2019.vcxproj b/Samples/0_Introduction/simpleIPC/simpleIPC_vs2019.vcxproj
index 1e507919f..4d8096a2b 100644
--- a/Samples/0_Introduction/simpleIPC/simpleIPC_vs2019.vcxproj
+++ b/Samples/0_Introduction/simpleIPC/simpleIPC_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleIPC.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleIPC/simpleIPC_vs2022.vcxproj b/Samples/0_Introduction/simpleIPC/simpleIPC_vs2022.vcxproj
index d9e9f48d8..df3aba1f0 100644
--- a/Samples/0_Introduction/simpleIPC/simpleIPC_vs2022.vcxproj
+++ b/Samples/0_Introduction/simpleIPC/simpleIPC_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleIPC.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleLayeredTexture/Makefile b/Samples/0_Introduction/simpleLayeredTexture/Makefile
index bd2660f6e..eeb6d7eae 100644
--- a/Samples/0_Introduction/simpleLayeredTexture/Makefile
+++ b/Samples/0_Introduction/simpleLayeredTexture/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/simpleLayeredTexture/NsightEclipse.xml b/Samples/0_Introduction/simpleLayeredTexture/NsightEclipse.xml
index 91481c99c..ff2bc6f00 100644
--- a/Samples/0_Introduction/simpleLayeredTexture/NsightEclipse.xml
+++ b/Samples/0_Introduction/simpleLayeredTexture/NsightEclipse.xml
@@ -3,17 +3,17 @@
 <entry>
   <name>simpleLayeredTexture</name>
   <cuda_api_list>
-    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaCreateChannelDesc</toolkit>
     <toolkit>cudaFreeArray</toolkit>
+    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaPitchedPtr</toolkit>
+    <toolkit>cudaPos</toolkit>
+    <toolkit>cudaDestroyTextureObject</toolkit>
     <toolkit>cudaExtent</toolkit>
     <toolkit>cudaDeviceSynchronize</toolkit>
-    <toolkit>cudaDestroyTextureObject</toolkit>
-    <toolkit>cudaPitchedPtr</toolkit>
     <toolkit>cudaCreateTextureObject</toolkit>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaCreateChannelDesc</toolkit>
-    <toolkit>cudaPos</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
   </cuda_api_list>
   <description><![CDATA[Simple example that demonstrates how to use a new CUDA 4.0 feature to support layered Textures in CUDA C.]]></description>
@@ -52,6 +52,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/0_Introduction/simpleLayeredTexture/README.md b/Samples/0_Introduction/simpleLayeredTexture/README.md
index 3add67780..5dc0eb715 100644
--- a/Samples/0_Introduction/simpleLayeredTexture/README.md
+++ b/Samples/0_Introduction/simpleLayeredTexture/README.md
@@ -10,7 +10,7 @@ Texture, Volume Processing
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaFreeArray, cudaExtent, cudaDeviceSynchronize, cudaDestroyTextureObject, cudaPitchedPtr, cudaCreateTextureObject, cudaMalloc, cudaCreateChannelDesc, cudaPos, cudaMemcpy, cudaGetDeviceProperties
+cudaMemcpy, cudaCreateChannelDesc, cudaFreeArray, cudaFree, cudaPitchedPtr, cudaPos, cudaDestroyTextureObject, cudaExtent, cudaDeviceSynchronize, cudaCreateTextureObject, cudaMalloc, cudaGetDeviceProperties
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/0_Introduction/simpleLayeredTexture/simpleLayeredTexture_vs2017.vcxproj b/Samples/0_Introduction/simpleLayeredTexture/simpleLayeredTexture_vs2017.vcxproj
index 71e3a84c2..ee1e3e42b 100644
--- a/Samples/0_Introduction/simpleLayeredTexture/simpleLayeredTexture_vs2017.vcxproj
+++ b/Samples/0_Introduction/simpleLayeredTexture/simpleLayeredTexture_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/simpleLayeredTexture.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleLayeredTexture/simpleLayeredTexture_vs2019.vcxproj b/Samples/0_Introduction/simpleLayeredTexture/simpleLayeredTexture_vs2019.vcxproj
index fe147d3ec..3ae1a4f9e 100644
--- a/Samples/0_Introduction/simpleLayeredTexture/simpleLayeredTexture_vs2019.vcxproj
+++ b/Samples/0_Introduction/simpleLayeredTexture/simpleLayeredTexture_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleLayeredTexture.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleLayeredTexture/simpleLayeredTexture_vs2022.vcxproj b/Samples/0_Introduction/simpleLayeredTexture/simpleLayeredTexture_vs2022.vcxproj
index a99c2ee34..0d29aae5b 100644
--- a/Samples/0_Introduction/simpleLayeredTexture/simpleLayeredTexture_vs2022.vcxproj
+++ b/Samples/0_Introduction/simpleLayeredTexture/simpleLayeredTexture_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleLayeredTexture.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleMPI/Makefile b/Samples/0_Introduction/simpleMPI/Makefile
index 49fc56c36..8726e03d4 100644
--- a/Samples/0_Introduction/simpleMPI/Makefile
+++ b/Samples/0_Introduction/simpleMPI/Makefile
@@ -335,9 +335,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/simpleMPI/README.md b/Samples/0_Introduction/simpleMPI/README.md
index 6f56a03d6..5e0f97fa3 100644
--- a/Samples/0_Introduction/simpleMPI/README.md
+++ b/Samples/0_Introduction/simpleMPI/README.md
@@ -10,7 +10,7 @@ CUDA Systems Integration, MPI, Multithreading
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMalloc, cudaGetLastError, cudaFree, cudaMemcpy
+cudaMalloc, cudaGetLastError, cudaMemcpy, cudaFree
 
 ## Dependencies needed to build/run
 [MPI](../../../README.md#mpi)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/0_Introduction/simpleMPI/simpleMPI_vs2017.vcxproj b/Samples/0_Introduction/simpleMPI/simpleMPI_vs2017.vcxproj
index 4e1777bc4..94e77612e 100644
--- a/Samples/0_Introduction/simpleMPI/simpleMPI_vs2017.vcxproj
+++ b/Samples/0_Introduction/simpleMPI/simpleMPI_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/simpleMPI.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleMPI/simpleMPI_vs2019.vcxproj b/Samples/0_Introduction/simpleMPI/simpleMPI_vs2019.vcxproj
index cf5e568bd..978222207 100644
--- a/Samples/0_Introduction/simpleMPI/simpleMPI_vs2019.vcxproj
+++ b/Samples/0_Introduction/simpleMPI/simpleMPI_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleMPI.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleMPI/simpleMPI_vs2022.vcxproj b/Samples/0_Introduction/simpleMPI/simpleMPI_vs2022.vcxproj
index 2959c87e1..8f6ea5ae9 100644
--- a/Samples/0_Introduction/simpleMPI/simpleMPI_vs2022.vcxproj
+++ b/Samples/0_Introduction/simpleMPI/simpleMPI_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleMPI.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleMultiCopy/Makefile b/Samples/0_Introduction/simpleMultiCopy/Makefile
index 26974b350..d6d253c60 100644
--- a/Samples/0_Introduction/simpleMultiCopy/Makefile
+++ b/Samples/0_Introduction/simpleMultiCopy/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/simpleMultiCopy/NsightEclipse.xml b/Samples/0_Introduction/simpleMultiCopy/NsightEclipse.xml
index ca79562c0..bb76ce8c7 100644
--- a/Samples/0_Introduction/simpleMultiCopy/NsightEclipse.xml
+++ b/Samples/0_Introduction/simpleMultiCopy/NsightEclipse.xml
@@ -3,22 +3,22 @@
 <entry>
   <name>simpleMultiCopy</name>
   <cuda_api_list>
-    <toolkit>cudaMemset</toolkit>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaStreamDestroy</toolkit>
-    <toolkit>cudaEventRecord</toolkit>
-    <toolkit>cudaStreamCreate</toolkit>
     <toolkit>cudaHostAlloc</toolkit>
-    <toolkit>cudaEventCreate</toolkit>
-    <toolkit>cudaEventElapsedTime</toolkit>
-    <toolkit>cudaDeviceSynchronize</toolkit>
+    <toolkit>cudaStreamDestroy</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaMemcpyAsync</toolkit>
+    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaSetDevice</toolkit>
     <toolkit>cudaEventSynchronize</toolkit>
+    <toolkit>cudaDeviceSynchronize</toolkit>
+    <toolkit>cudaEventRecord</toolkit>
     <toolkit>cudaFreeHost</toolkit>
-    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaMemset</toolkit>
     <toolkit>cudaEventDestroy</toolkit>
-    <toolkit>cudaSetDevice</toolkit>
-    <toolkit>cudaMemcpyAsync</toolkit>
+    <toolkit>cudaEventElapsedTime</toolkit>
+    <toolkit>cudaStreamCreate</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
+    <toolkit>cudaEventCreate</toolkit>
   </cuda_api_list>
   <description><![CDATA[Supported in GPUs with Compute Capability 1.1, overlapping compute with one memcopy is possible from the host system.  For Quadro and Tesla GPUs with Compute Capability 2.0, a second overlapped copy operation in either direction at full speed is possible (PCI-e is symmetric).  This sample illustrates the usage of CUDA streams to achieve overlapping of kernel execution with data copies to and from the device.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -66,6 +66,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/0_Introduction/simpleMultiCopy/README.md b/Samples/0_Introduction/simpleMultiCopy/README.md
index 8f015c263..724042872 100644
--- a/Samples/0_Introduction/simpleMultiCopy/README.md
+++ b/Samples/0_Introduction/simpleMultiCopy/README.md
@@ -10,7 +10,7 @@ CUDA Streams and Events, Asynchronous Data Transfers, Overlap Compute and Copy,
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMemset, cudaFree, cudaStreamDestroy, cudaEventRecord, cudaStreamCreate, cudaHostAlloc, cudaEventCreate, cudaEventElapsedTime, cudaDeviceSynchronize, cudaEventSynchronize, cudaFreeHost, cudaMalloc, cudaEventDestroy, cudaSetDevice, cudaMemcpyAsync, cudaGetDeviceProperties
+cudaHostAlloc, cudaStreamDestroy, cudaMalloc, cudaMemcpyAsync, cudaFree, cudaSetDevice, cudaEventSynchronize, cudaDeviceSynchronize, cudaEventRecord, cudaFreeHost, cudaMemset, cudaEventDestroy, cudaEventElapsedTime, cudaStreamCreate, cudaGetDeviceProperties, cudaEventCreate
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/0_Introduction/simpleMultiCopy/simpleMultiCopy_vs2017.vcxproj b/Samples/0_Introduction/simpleMultiCopy/simpleMultiCopy_vs2017.vcxproj
index 86ccf67a3..8fbcf08cb 100644
--- a/Samples/0_Introduction/simpleMultiCopy/simpleMultiCopy_vs2017.vcxproj
+++ b/Samples/0_Introduction/simpleMultiCopy/simpleMultiCopy_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/simpleMultiCopy.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleMultiCopy/simpleMultiCopy_vs2019.vcxproj b/Samples/0_Introduction/simpleMultiCopy/simpleMultiCopy_vs2019.vcxproj
index d1a1609f6..1f77866f6 100644
--- a/Samples/0_Introduction/simpleMultiCopy/simpleMultiCopy_vs2019.vcxproj
+++ b/Samples/0_Introduction/simpleMultiCopy/simpleMultiCopy_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleMultiCopy.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleMultiCopy/simpleMultiCopy_vs2022.vcxproj b/Samples/0_Introduction/simpleMultiCopy/simpleMultiCopy_vs2022.vcxproj
index 5251d5928..447b63311 100644
--- a/Samples/0_Introduction/simpleMultiCopy/simpleMultiCopy_vs2022.vcxproj
+++ b/Samples/0_Introduction/simpleMultiCopy/simpleMultiCopy_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleMultiCopy.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleMultiGPU/Makefile b/Samples/0_Introduction/simpleMultiGPU/Makefile
index 6db255e49..15d13ddeb 100644
--- a/Samples/0_Introduction/simpleMultiGPU/Makefile
+++ b/Samples/0_Introduction/simpleMultiGPU/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/simpleMultiGPU/NsightEclipse.xml b/Samples/0_Introduction/simpleMultiGPU/NsightEclipse.xml
index 500fc9ea8..a1e377e56 100644
--- a/Samples/0_Introduction/simpleMultiGPU/NsightEclipse.xml
+++ b/Samples/0_Introduction/simpleMultiGPU/NsightEclipse.xml
@@ -6,13 +6,13 @@
     <toolkit>cudaStreamDestroy</toolkit>
     <toolkit>cudaFree</toolkit>
     <toolkit>cudaMallocHost</toolkit>
-    <toolkit>cudaStreamCreate</toolkit>
-    <toolkit>cudaGetDeviceCount</toolkit>
-    <toolkit>cudaFreeHost</toolkit>
-    <toolkit>cudaMalloc</toolkit>
     <toolkit>cudaSetDevice</toolkit>
+    <toolkit>cudaFreeHost</toolkit>
     <toolkit>cudaStreamSynchronize</toolkit>
+    <toolkit>cudaMalloc</toolkit>
     <toolkit>cudaMemcpyAsync</toolkit>
+    <toolkit>cudaStreamCreate</toolkit>
+    <toolkit>cudaGetDeviceCount</toolkit>
   </cuda_api_list>
   <description><![CDATA[This application demonstrates how to use the new CUDA 4.0 API for CUDA context management and multi-threaded access to run CUDA kernels on multiple-GPUs.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -53,6 +53,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/0_Introduction/simpleMultiGPU/README.md b/Samples/0_Introduction/simpleMultiGPU/README.md
index 0f8464c26..284904f8a 100644
--- a/Samples/0_Introduction/simpleMultiGPU/README.md
+++ b/Samples/0_Introduction/simpleMultiGPU/README.md
@@ -10,7 +10,7 @@ Asynchronous Data Transfers, CUDA Streams and Events, Multithreading, Multi-GPU
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaStreamDestroy, cudaFree, cudaMallocHost, cudaStreamCreate, cudaGetDeviceCount, cudaFreeHost, cudaMalloc, cudaSetDevice, cudaStreamSynchronize, cudaMemcpyAsync
+cudaStreamDestroy, cudaFree, cudaMallocHost, cudaSetDevice, cudaFreeHost, cudaStreamSynchronize, cudaMalloc, cudaMemcpyAsync, cudaStreamCreate, cudaGetDeviceCount
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/0_Introduction/simpleMultiGPU/simpleMultiGPU_vs2017.vcxproj b/Samples/0_Introduction/simpleMultiGPU/simpleMultiGPU_vs2017.vcxproj
index bcc574be1..a025b2ec4 100644
--- a/Samples/0_Introduction/simpleMultiGPU/simpleMultiGPU_vs2017.vcxproj
+++ b/Samples/0_Introduction/simpleMultiGPU/simpleMultiGPU_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/simpleMultiGPU.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleMultiGPU/simpleMultiGPU_vs2019.vcxproj b/Samples/0_Introduction/simpleMultiGPU/simpleMultiGPU_vs2019.vcxproj
index 30a6f1990..2a6ce2539 100644
--- a/Samples/0_Introduction/simpleMultiGPU/simpleMultiGPU_vs2019.vcxproj
+++ b/Samples/0_Introduction/simpleMultiGPU/simpleMultiGPU_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleMultiGPU.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleMultiGPU/simpleMultiGPU_vs2022.vcxproj b/Samples/0_Introduction/simpleMultiGPU/simpleMultiGPU_vs2022.vcxproj
index 6fd4139c3..315059cab 100644
--- a/Samples/0_Introduction/simpleMultiGPU/simpleMultiGPU_vs2022.vcxproj
+++ b/Samples/0_Introduction/simpleMultiGPU/simpleMultiGPU_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleMultiGPU.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleOccupancy/Makefile b/Samples/0_Introduction/simpleOccupancy/Makefile
index 85aa3c9f0..b735ec0c8 100644
--- a/Samples/0_Introduction/simpleOccupancy/Makefile
+++ b/Samples/0_Introduction/simpleOccupancy/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/simpleOccupancy/NsightEclipse.xml b/Samples/0_Introduction/simpleOccupancy/NsightEclipse.xml
index 34577846b..e4383b1c5 100644
--- a/Samples/0_Introduction/simpleOccupancy/NsightEclipse.xml
+++ b/Samples/0_Introduction/simpleOccupancy/NsightEclipse.xml
@@ -3,17 +3,17 @@
 <entry>
   <name>simpleOccupancy</name>
   <cuda_api_list>
+    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaFree</toolkit>
-    <toolkit>cudaEventRecord</toolkit>
-    <toolkit>cudaOccupancyMaxActiveBlocksPerMultiprocessor</toolkit>
-    <toolkit>cudaEventCreate</toolkit>
-    <toolkit>cudaOccupancyMaxPotentialBlockSize</toolkit>
-    <toolkit>cudaEventElapsedTime</toolkit>
     <toolkit>cudaDeviceSynchronize</toolkit>
+    <toolkit>cudaEventRecord</toolkit>
+    <toolkit>cudaGetDevice</toolkit>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaEventElapsedTime</toolkit>
+    <toolkit>cudaOccupancyMaxActiveBlocksPerMultiprocessor</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
-    <toolkit>cudaGetDevice</toolkit>
+    <toolkit>cudaOccupancyMaxPotentialBlockSize</toolkit>
+    <toolkit>cudaEventCreate</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample demonstrates the basic usage of the CUDA occupancy calculator and occupancy-based launch configurator APIs by launching a kernel with the launch configurator, and measures the utilization difference against a manually configured launch.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -52,6 +52,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/0_Introduction/simpleOccupancy/README.md b/Samples/0_Introduction/simpleOccupancy/README.md
index 4ca96acf9..ddc12f2a2 100644
--- a/Samples/0_Introduction/simpleOccupancy/README.md
+++ b/Samples/0_Introduction/simpleOccupancy/README.md
@@ -10,7 +10,7 @@ Occupancy Calculator
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaEventRecord, cudaOccupancyMaxActiveBlocksPerMultiprocessor, cudaEventCreate, cudaOccupancyMaxPotentialBlockSize, cudaEventElapsedTime, cudaDeviceSynchronize, cudaMalloc, cudaMemcpy, cudaGetDeviceProperties, cudaGetDevice
+cudaMemcpy, cudaFree, cudaDeviceSynchronize, cudaEventRecord, cudaGetDevice, cudaMalloc, cudaEventElapsedTime, cudaOccupancyMaxActiveBlocksPerMultiprocessor, cudaGetDeviceProperties, cudaOccupancyMaxPotentialBlockSize, cudaEventCreate
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/0_Introduction/simpleOccupancy/simpleOccupancy_vs2017.vcxproj b/Samples/0_Introduction/simpleOccupancy/simpleOccupancy_vs2017.vcxproj
index ee3e8ca57..d4d97a025 100644
--- a/Samples/0_Introduction/simpleOccupancy/simpleOccupancy_vs2017.vcxproj
+++ b/Samples/0_Introduction/simpleOccupancy/simpleOccupancy_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/simpleOccupancy.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleOccupancy/simpleOccupancy_vs2019.vcxproj b/Samples/0_Introduction/simpleOccupancy/simpleOccupancy_vs2019.vcxproj
index a0db9b8e0..096cea4af 100644
--- a/Samples/0_Introduction/simpleOccupancy/simpleOccupancy_vs2019.vcxproj
+++ b/Samples/0_Introduction/simpleOccupancy/simpleOccupancy_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleOccupancy.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleOccupancy/simpleOccupancy_vs2022.vcxproj b/Samples/0_Introduction/simpleOccupancy/simpleOccupancy_vs2022.vcxproj
index 312b5e699..57de8a550 100644
--- a/Samples/0_Introduction/simpleOccupancy/simpleOccupancy_vs2022.vcxproj
+++ b/Samples/0_Introduction/simpleOccupancy/simpleOccupancy_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleOccupancy.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleP2P/Makefile b/Samples/0_Introduction/simpleP2P/Makefile
index 036ff0d85..804aa449d 100644
--- a/Samples/0_Introduction/simpleP2P/Makefile
+++ b/Samples/0_Introduction/simpleP2P/Makefile
@@ -303,9 +303,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/simpleP2P/NsightEclipse.xml b/Samples/0_Introduction/simpleP2P/NsightEclipse.xml
index 69fc274d6..65fe83bb0 100644
--- a/Samples/0_Introduction/simpleP2P/NsightEclipse.xml
+++ b/Samples/0_Introduction/simpleP2P/NsightEclipse.xml
@@ -3,23 +3,23 @@
 <entry>
   <name>simpleP2P</name>
   <cuda_api_list>
-    <toolkit>cudaDeviceEnablePeerAccess</toolkit>
+    <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaMalloc</toolkit>
     <toolkit>cudaFree</toolkit>
-    <toolkit>cudaEventRecord</toolkit>
     <toolkit>cudaMallocHost</toolkit>
-    <toolkit>cudaGetDeviceCount</toolkit>
-    <toolkit>cudaEventElapsedTime</toolkit>
-    <toolkit>cudaDeviceSynchronize</toolkit>
-    <toolkit>cudaEventSynchronize</toolkit>
-    <toolkit>cudaFreeHost</toolkit>
-    <toolkit>cudaMalloc</toolkit>
     <toolkit>cudaEventCreateWithFlags</toolkit>
-    <toolkit>cudaDeviceCanAccessPeer</toolkit>
-    <toolkit>cudaEventDestroy</toolkit>
     <toolkit>cudaSetDevice</toolkit>
+    <toolkit>cudaEventSynchronize</toolkit>
     <toolkit>cudaDeviceDisablePeerAccess</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaGetDeviceCount</toolkit>
+    <toolkit>cudaDeviceSynchronize</toolkit>
+    <toolkit>cudaEventRecord</toolkit>
+    <toolkit>cudaFreeHost</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
+    <toolkit>cudaDeviceEnablePeerAccess</toolkit>
+    <toolkit>cudaEventDestroy</toolkit>
+    <toolkit>cudaEventElapsedTime</toolkit>
+    <toolkit>cudaDeviceCanAccessPeer</toolkit>
   </cuda_api_list>
   <description><![CDATA[This application demonstrates CUDA APIs that support Peer-To-Peer (P2P) copies, Peer-To-Peer (P2P) addressing, and Unified Virtual Memory Addressing (UVA) between multiple GPUs. In general, P2P is supported between two same GPUs with some exceptions, such as some Tesla and Quadro GPUs.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -67,6 +67,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/0_Introduction/simpleP2P/README.md b/Samples/0_Introduction/simpleP2P/README.md
index cbe3b2524..56b4b8bf1 100644
--- a/Samples/0_Introduction/simpleP2P/README.md
+++ b/Samples/0_Introduction/simpleP2P/README.md
@@ -10,7 +10,7 @@ Performance Strategies, Asynchronous Data Transfers, Unified Virtual Address Spa
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaDeviceEnablePeerAccess, cudaFree, cudaEventRecord, cudaMallocHost, cudaGetDeviceCount, cudaEventElapsedTime, cudaDeviceSynchronize, cudaEventSynchronize, cudaFreeHost, cudaMalloc, cudaEventCreateWithFlags, cudaDeviceCanAccessPeer, cudaEventDestroy, cudaSetDevice, cudaDeviceDisablePeerAccess, cudaMemcpy, cudaGetDeviceProperties
+cudaMemcpy, cudaMalloc, cudaFree, cudaMallocHost, cudaEventCreateWithFlags, cudaSetDevice, cudaEventSynchronize, cudaDeviceDisablePeerAccess, cudaGetDeviceCount, cudaDeviceSynchronize, cudaEventRecord, cudaFreeHost, cudaGetDeviceProperties, cudaDeviceEnablePeerAccess, cudaEventDestroy, cudaEventElapsedTime, cudaDeviceCanAccessPeer
 
 ## Dependencies needed to build/run
 [only-64-bit](../../../README.md#only-64-bit)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/0_Introduction/simpleP2P/simpleP2P_vs2017.vcxproj b/Samples/0_Introduction/simpleP2P/simpleP2P_vs2017.vcxproj
index aea119d94..41efff17b 100644
--- a/Samples/0_Introduction/simpleP2P/simpleP2P_vs2017.vcxproj
+++ b/Samples/0_Introduction/simpleP2P/simpleP2P_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/simpleP2P.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleP2P/simpleP2P_vs2019.vcxproj b/Samples/0_Introduction/simpleP2P/simpleP2P_vs2019.vcxproj
index af3b80747..d51f6d7bb 100644
--- a/Samples/0_Introduction/simpleP2P/simpleP2P_vs2019.vcxproj
+++ b/Samples/0_Introduction/simpleP2P/simpleP2P_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleP2P.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleP2P/simpleP2P_vs2022.vcxproj b/Samples/0_Introduction/simpleP2P/simpleP2P_vs2022.vcxproj
index ea28f0709..9ed321645 100644
--- a/Samples/0_Introduction/simpleP2P/simpleP2P_vs2022.vcxproj
+++ b/Samples/0_Introduction/simpleP2P/simpleP2P_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleP2P.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simplePitchLinearTexture/Makefile b/Samples/0_Introduction/simplePitchLinearTexture/Makefile
index 32d993f2c..98218e83a 100644
--- a/Samples/0_Introduction/simplePitchLinearTexture/Makefile
+++ b/Samples/0_Introduction/simplePitchLinearTexture/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/simplePitchLinearTexture/NsightEclipse.xml b/Samples/0_Introduction/simplePitchLinearTexture/NsightEclipse.xml
index e10820631..be7882b8b 100644
--- a/Samples/0_Introduction/simplePitchLinearTexture/NsightEclipse.xml
+++ b/Samples/0_Introduction/simplePitchLinearTexture/NsightEclipse.xml
@@ -3,18 +3,18 @@
 <entry>
   <name>simplePitchLinearTexture</name>
   <cuda_api_list>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaMemcpyToArray</toolkit>
-    <toolkit>cudaEventRecord</toolkit>
+    <toolkit>cudaMallocArray</toolkit>
     <toolkit>cudaFreeArray</toolkit>
-    <toolkit>cudaEventCreate</toolkit>
-    <toolkit>cudaEventElapsedTime</toolkit>
+    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaMallocPitch</toolkit>
     <toolkit>cudaDestroyTextureObject</toolkit>
     <toolkit>cudaEventSynchronize</toolkit>
-    <toolkit>cudaMallocPitch</toolkit>
+    <toolkit>cudaMemcpyToArray</toolkit>
+    <toolkit>cudaEventRecord</toolkit>
     <toolkit>cudaCreateTextureObject</toolkit>
     <toolkit>cudaEventDestroy</toolkit>
-    <toolkit>cudaMallocArray</toolkit>
+    <toolkit>cudaEventElapsedTime</toolkit>
+    <toolkit>cudaEventCreate</toolkit>
   </cuda_api_list>
   <description><![CDATA[Use of Pitch Linear Textures]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -54,6 +54,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/0_Introduction/simplePitchLinearTexture/README.md b/Samples/0_Introduction/simplePitchLinearTexture/README.md
index dc437cf2e..95944a886 100644
--- a/Samples/0_Introduction/simplePitchLinearTexture/README.md
+++ b/Samples/0_Introduction/simplePitchLinearTexture/README.md
@@ -10,7 +10,7 @@ Texture, Image Processing
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaMemcpyToArray, cudaEventRecord, cudaFreeArray, cudaEventCreate, cudaEventElapsedTime, cudaDestroyTextureObject, cudaEventSynchronize, cudaMallocPitch, cudaCreateTextureObject, cudaEventDestroy, cudaMallocArray
+cudaMallocArray, cudaFreeArray, cudaFree, cudaMallocPitch, cudaDestroyTextureObject, cudaEventSynchronize, cudaMemcpyToArray, cudaEventRecord, cudaCreateTextureObject, cudaEventDestroy, cudaEventElapsedTime, cudaEventCreate
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/0_Introduction/simplePitchLinearTexture/simplePitchLinearTexture_vs2017.vcxproj b/Samples/0_Introduction/simplePitchLinearTexture/simplePitchLinearTexture_vs2017.vcxproj
index ae30718fe..f33a061ea 100644
--- a/Samples/0_Introduction/simplePitchLinearTexture/simplePitchLinearTexture_vs2017.vcxproj
+++ b/Samples/0_Introduction/simplePitchLinearTexture/simplePitchLinearTexture_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/simplePitchLinearTexture.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simplePitchLinearTexture/simplePitchLinearTexture_vs2019.vcxproj b/Samples/0_Introduction/simplePitchLinearTexture/simplePitchLinearTexture_vs2019.vcxproj
index 60bf7a611..c08e4de30 100644
--- a/Samples/0_Introduction/simplePitchLinearTexture/simplePitchLinearTexture_vs2019.vcxproj
+++ b/Samples/0_Introduction/simplePitchLinearTexture/simplePitchLinearTexture_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simplePitchLinearTexture.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simplePitchLinearTexture/simplePitchLinearTexture_vs2022.vcxproj b/Samples/0_Introduction/simplePitchLinearTexture/simplePitchLinearTexture_vs2022.vcxproj
index e964badc3..441ae1c9d 100644
--- a/Samples/0_Introduction/simplePitchLinearTexture/simplePitchLinearTexture_vs2022.vcxproj
+++ b/Samples/0_Introduction/simplePitchLinearTexture/simplePitchLinearTexture_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simplePitchLinearTexture.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simplePrintf/Makefile b/Samples/0_Introduction/simplePrintf/Makefile
index e1297aa6f..3b8cf8a0a 100644
--- a/Samples/0_Introduction/simplePrintf/Makefile
+++ b/Samples/0_Introduction/simplePrintf/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/simplePrintf/README.md b/Samples/0_Introduction/simplePrintf/README.md
index fd29cf269..872faf89c 100644
--- a/Samples/0_Introduction/simplePrintf/README.md
+++ b/Samples/0_Introduction/simplePrintf/README.md
@@ -10,7 +10,7 @@ Debugging
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaDeviceSynchronize, cudaGetDeviceProperties, cudaGetDevice
+cudaGetDeviceProperties, cudaDeviceSynchronize, cudaGetDevice
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/0_Introduction/simplePrintf/simplePrintf_vs2017.vcxproj b/Samples/0_Introduction/simplePrintf/simplePrintf_vs2017.vcxproj
index e06b0f938..861f30b9b 100644
--- a/Samples/0_Introduction/simplePrintf/simplePrintf_vs2017.vcxproj
+++ b/Samples/0_Introduction/simplePrintf/simplePrintf_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/simplePrintf.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simplePrintf/simplePrintf_vs2019.vcxproj b/Samples/0_Introduction/simplePrintf/simplePrintf_vs2019.vcxproj
index 84c7cfbba..6dcb3c5e0 100644
--- a/Samples/0_Introduction/simplePrintf/simplePrintf_vs2019.vcxproj
+++ b/Samples/0_Introduction/simplePrintf/simplePrintf_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simplePrintf.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simplePrintf/simplePrintf_vs2022.vcxproj b/Samples/0_Introduction/simplePrintf/simplePrintf_vs2022.vcxproj
index 039c5fb25..e45b59538 100644
--- a/Samples/0_Introduction/simplePrintf/simplePrintf_vs2022.vcxproj
+++ b/Samples/0_Introduction/simplePrintf/simplePrintf_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simplePrintf.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleSeparateCompilation/Makefile b/Samples/0_Introduction/simpleSeparateCompilation/Makefile
index 59116ae72..4a92e480d 100644
--- a/Samples/0_Introduction/simpleSeparateCompilation/Makefile
+++ b/Samples/0_Introduction/simpleSeparateCompilation/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/simpleSeparateCompilation/NsightEclipse.xml b/Samples/0_Introduction/simpleSeparateCompilation/NsightEclipse.xml
index 586ab4771..2f1c15ab1 100644
--- a/Samples/0_Introduction/simpleSeparateCompilation/NsightEclipse.xml
+++ b/Samples/0_Introduction/simpleSeparateCompilation/NsightEclipse.xml
@@ -12,11 +12,11 @@
     </static>
   </compilations>
   <cuda_api_list>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaGetLastError</toolkit>
     <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaMemcpyFromSymbol</toolkit>
+    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaGetLastError</toolkit>
+    <toolkit>cudaMalloc</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample demonstrates a CUDA 5.0 feature, the ability to create a GPU device static library and use it within another CUDA kernel.  This example demonstrates how to pass in a GPU device function (from the GPU device static library) as a function pointer to be called.  This sample requires devices with compute capability 2.0 or higher.]]></description>
   <devicecompilation>separate</devicecompilation>
@@ -56,6 +56,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/0_Introduction/simpleSeparateCompilation/README.md b/Samples/0_Introduction/simpleSeparateCompilation/README.md
index b83c4f5c8..a4b54487b 100644
--- a/Samples/0_Introduction/simpleSeparateCompilation/README.md
+++ b/Samples/0_Introduction/simpleSeparateCompilation/README.md
@@ -10,7 +10,7 @@ Separate Compilation
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaMalloc, cudaGetLastError, cudaMemcpy, cudaMemcpyFromSymbol
+cudaMemcpy, cudaMemcpyFromSymbol, cudaFree, cudaGetLastError, cudaMalloc
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/0_Introduction/simpleSeparateCompilation/simpleSeparateCompilation_vs2017.vcxproj b/Samples/0_Introduction/simpleSeparateCompilation/simpleSeparateCompilation_vs2017.vcxproj
index f8293c639..fc05d0f38 100644
--- a/Samples/0_Introduction/simpleSeparateCompilation/simpleSeparateCompilation_vs2017.vcxproj
+++ b/Samples/0_Introduction/simpleSeparateCompilation/simpleSeparateCompilation_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/simpleSeparateCompilation.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -109,6 +109,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleSeparateCompilation/simpleSeparateCompilation_vs2019.vcxproj b/Samples/0_Introduction/simpleSeparateCompilation/simpleSeparateCompilation_vs2019.vcxproj
index db4e0716e..d6f50cf49 100644
--- a/Samples/0_Introduction/simpleSeparateCompilation/simpleSeparateCompilation_vs2019.vcxproj
+++ b/Samples/0_Introduction/simpleSeparateCompilation/simpleSeparateCompilation_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleSeparateCompilation.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -105,6 +105,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleSeparateCompilation/simpleSeparateCompilation_vs2022.vcxproj b/Samples/0_Introduction/simpleSeparateCompilation/simpleSeparateCompilation_vs2022.vcxproj
index 07e55c2ba..758766ef7 100644
--- a/Samples/0_Introduction/simpleSeparateCompilation/simpleSeparateCompilation_vs2022.vcxproj
+++ b/Samples/0_Introduction/simpleSeparateCompilation/simpleSeparateCompilation_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleSeparateCompilation.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -105,6 +105,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleStreams/Makefile b/Samples/0_Introduction/simpleStreams/Makefile
index 0c0886863..0e83a3071 100644
--- a/Samples/0_Introduction/simpleStreams/Makefile
+++ b/Samples/0_Introduction/simpleStreams/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/simpleStreams/NsightEclipse.xml b/Samples/0_Introduction/simpleStreams/NsightEclipse.xml
index b2b5aff62..d7fb6d38d 100644
--- a/Samples/0_Introduction/simpleStreams/NsightEclipse.xml
+++ b/Samples/0_Introduction/simpleStreams/NsightEclipse.xml
@@ -4,25 +4,25 @@
   <name>simpleStreams</name>
   <cuda_api_list>
     <toolkit>cudaMemcpy</toolkit>
-    <toolkit>cudaMemcpyAsync</toolkit>
-    <toolkit>cudaStreamDestroy</toolkit>
-    <toolkit>cudaMallocHost</toolkit>
-    <toolkit>cudaHostAlloc</toolkit>
-    <toolkit>cudaHostRegister</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaEventCreateWithFlags</toolkit>
-    <toolkit>cudaEventDestroy</toolkit>
     <toolkit>cudaSetDeviceFlags</toolkit>
     <toolkit>cudaSetDevice</toolkit>
-    <toolkit>cudaGetDeviceProperties</toolkit>
+    <toolkit>cudaEventDestroy</toolkit>
+    <toolkit>cudaStreamCreate</toolkit>
+    <toolkit>cudaMallocHost</toolkit>
+    <toolkit>cudaEventCreateWithFlags</toolkit>
+    <toolkit>cudaFreeHost</toolkit>
+    <toolkit>cudaMemcpyAsync</toolkit>
     <toolkit>cudaGetDeviceCount</toolkit>
-    <toolkit>cudaEventElapsedTime</toolkit>
+    <toolkit>cudaStreamDestroy</toolkit>
     <toolkit>cudaMemset</toolkit>
+    <toolkit>cudaEventElapsedTime</toolkit>
+    <toolkit>cudaHostAlloc</toolkit>
     <toolkit>cudaFree</toolkit>
-    <toolkit>cudaEventRecord</toolkit>
-    <toolkit>cudaStreamCreate</toolkit>
+    <toolkit>cudaHostRegister</toolkit>
     <toolkit>cudaEventSynchronize</toolkit>
-    <toolkit>cudaFreeHost</toolkit>
+    <toolkit>cudaEventRecord</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaGetDeviceProperties</toolkit>
     <toolkit>cudaHostUnregister</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample uses CUDA streams to overlap kernel executions with memory copies between the host and a GPU device.  This sample uses a new CUDA 4.0 feature that supports pinning of generic host memory.  Requires Compute Capability 2.0 or higher.]]></description>
@@ -61,6 +61,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/0_Introduction/simpleStreams/README.md b/Samples/0_Introduction/simpleStreams/README.md
index 7c33b9071..a9de18f80 100644
--- a/Samples/0_Introduction/simpleStreams/README.md
+++ b/Samples/0_Introduction/simpleStreams/README.md
@@ -10,7 +10,7 @@ Asynchronous Data Transfers, CUDA Streams and Events
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMemcpy, cudaMemcpyAsync, cudaStreamDestroy, cudaMallocHost, cudaHostAlloc, cudaHostRegister, cudaMalloc, cudaEventCreateWithFlags, cudaEventDestroy, cudaSetDeviceFlags, cudaSetDevice, cudaGetDeviceProperties, cudaGetDeviceCount, cudaEventElapsedTime, cudaMemset, cudaFree, cudaEventRecord, cudaStreamCreate, cudaEventSynchronize, cudaFreeHost, cudaHostUnregister
+cudaMemcpy, cudaSetDeviceFlags, cudaSetDevice, cudaEventDestroy, cudaStreamCreate, cudaMallocHost, cudaEventCreateWithFlags, cudaFreeHost, cudaMemcpyAsync, cudaGetDeviceCount, cudaStreamDestroy, cudaMemset, cudaEventElapsedTime, cudaHostAlloc, cudaFree, cudaHostRegister, cudaEventSynchronize, cudaEventRecord, cudaMalloc, cudaGetDeviceProperties, cudaHostUnregister
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/0_Introduction/simpleStreams/simpleStreams_vs2017.vcxproj b/Samples/0_Introduction/simpleStreams/simpleStreams_vs2017.vcxproj
index 1aae760f8..b0fc51da6 100644
--- a/Samples/0_Introduction/simpleStreams/simpleStreams_vs2017.vcxproj
+++ b/Samples/0_Introduction/simpleStreams/simpleStreams_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/simpleStreams.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleStreams/simpleStreams_vs2019.vcxproj b/Samples/0_Introduction/simpleStreams/simpleStreams_vs2019.vcxproj
index f950cfd87..6b96b6b69 100644
--- a/Samples/0_Introduction/simpleStreams/simpleStreams_vs2019.vcxproj
+++ b/Samples/0_Introduction/simpleStreams/simpleStreams_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleStreams.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleStreams/simpleStreams_vs2022.vcxproj b/Samples/0_Introduction/simpleStreams/simpleStreams_vs2022.vcxproj
index 580200404..cf0c0e4d2 100644
--- a/Samples/0_Introduction/simpleStreams/simpleStreams_vs2022.vcxproj
+++ b/Samples/0_Introduction/simpleStreams/simpleStreams_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleStreams.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleSurfaceWrite/Makefile b/Samples/0_Introduction/simpleSurfaceWrite/Makefile
index 024e0d4cd..7440eee76 100644
--- a/Samples/0_Introduction/simpleSurfaceWrite/Makefile
+++ b/Samples/0_Introduction/simpleSurfaceWrite/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/simpleSurfaceWrite/NsightEclipse.xml b/Samples/0_Introduction/simpleSurfaceWrite/NsightEclipse.xml
index 182fad401..4d9153cd0 100644
--- a/Samples/0_Introduction/simpleSurfaceWrite/NsightEclipse.xml
+++ b/Samples/0_Introduction/simpleSurfaceWrite/NsightEclipse.xml
@@ -6,18 +6,18 @@
     <clean>output.pgm</clean>
   </cleanextras>
   <cuda_api_list>
+    <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaCreateChannelDesc</toolkit>
+    <toolkit>cudaMallocArray</toolkit>
+    <toolkit>cudaFreeArray</toolkit>
     <toolkit>cudaFree</toolkit>
+    <toolkit>cudaDestroyTextureObject</toolkit>
     <toolkit>cudaMemcpyToArray</toolkit>
-    <toolkit>cudaFreeArray</toolkit>
     <toolkit>cudaDestroySurfaceObject</toolkit>
-    <toolkit>cudaCreateSurfaceObject</toolkit>
-    <toolkit>cudaDestroyTextureObject</toolkit>
     <toolkit>cudaDeviceSynchronize</toolkit>
+    <toolkit>cudaCreateSurfaceObject</toolkit>
     <toolkit>cudaCreateTextureObject</toolkit>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaMallocArray</toolkit>
-    <toolkit>cudaCreateChannelDesc</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
   </cuda_api_list>
   <description><![CDATA[Simple example that demonstrates the use of 2D surface references (Write-to-Texture)]]></description>
@@ -61,6 +61,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/0_Introduction/simpleSurfaceWrite/README.md b/Samples/0_Introduction/simpleSurfaceWrite/README.md
index a93a9d6a8..944cf2fcf 100644
--- a/Samples/0_Introduction/simpleSurfaceWrite/README.md
+++ b/Samples/0_Introduction/simpleSurfaceWrite/README.md
@@ -10,7 +10,7 @@ Texture, Surface Writes, Image Processing
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaMemcpyToArray, cudaFreeArray, cudaDestroySurfaceObject, cudaCreateSurfaceObject, cudaDestroyTextureObject, cudaDeviceSynchronize, cudaCreateTextureObject, cudaMalloc, cudaMallocArray, cudaCreateChannelDesc, cudaMemcpy, cudaGetDeviceProperties
+cudaMemcpy, cudaCreateChannelDesc, cudaMallocArray, cudaFreeArray, cudaFree, cudaDestroyTextureObject, cudaMemcpyToArray, cudaDestroySurfaceObject, cudaDeviceSynchronize, cudaCreateSurfaceObject, cudaCreateTextureObject, cudaMalloc, cudaGetDeviceProperties
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/0_Introduction/simpleSurfaceWrite/simpleSurfaceWrite_vs2017.vcxproj b/Samples/0_Introduction/simpleSurfaceWrite/simpleSurfaceWrite_vs2017.vcxproj
index c076e2710..8a65d23a6 100644
--- a/Samples/0_Introduction/simpleSurfaceWrite/simpleSurfaceWrite_vs2017.vcxproj
+++ b/Samples/0_Introduction/simpleSurfaceWrite/simpleSurfaceWrite_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/simpleSurfaceWrite.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleSurfaceWrite/simpleSurfaceWrite_vs2019.vcxproj b/Samples/0_Introduction/simpleSurfaceWrite/simpleSurfaceWrite_vs2019.vcxproj
index c9fdbc59f..d61b86899 100644
--- a/Samples/0_Introduction/simpleSurfaceWrite/simpleSurfaceWrite_vs2019.vcxproj
+++ b/Samples/0_Introduction/simpleSurfaceWrite/simpleSurfaceWrite_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleSurfaceWrite.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleSurfaceWrite/simpleSurfaceWrite_vs2022.vcxproj b/Samples/0_Introduction/simpleSurfaceWrite/simpleSurfaceWrite_vs2022.vcxproj
index 1b8e29b38..d5275f35c 100644
--- a/Samples/0_Introduction/simpleSurfaceWrite/simpleSurfaceWrite_vs2022.vcxproj
+++ b/Samples/0_Introduction/simpleSurfaceWrite/simpleSurfaceWrite_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleSurfaceWrite.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleTemplates/Makefile b/Samples/0_Introduction/simpleTemplates/Makefile
index 78ad03d31..beac3d79d 100644
--- a/Samples/0_Introduction/simpleTemplates/Makefile
+++ b/Samples/0_Introduction/simpleTemplates/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/simpleTemplates/NsightEclipse.xml b/Samples/0_Introduction/simpleTemplates/NsightEclipse.xml
index 343057ce0..361492619 100644
--- a/Samples/0_Introduction/simpleTemplates/NsightEclipse.xml
+++ b/Samples/0_Introduction/simpleTemplates/NsightEclipse.xml
@@ -4,9 +4,9 @@
   <name>simpleTemplates</name>
   <cuda_api_list>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaFree</toolkit>
     <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
+    <toolkit>cudaFree</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample is a templatized version of the template project. It also shows how to correctly templatize dynamically allocated shared memory arrays.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -42,6 +42,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/0_Introduction/simpleTemplates/README.md b/Samples/0_Introduction/simpleTemplates/README.md
index 83a671030..0db671510 100644
--- a/Samples/0_Introduction/simpleTemplates/README.md
+++ b/Samples/0_Introduction/simpleTemplates/README.md
@@ -10,7 +10,7 @@ C++ Templates
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMalloc, cudaFree, cudaMemcpy, cudaGetDeviceProperties
+cudaMalloc, cudaMemcpy, cudaGetDeviceProperties, cudaFree
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/0_Introduction/simpleTemplates/simpleTemplates_vs2017.vcxproj b/Samples/0_Introduction/simpleTemplates/simpleTemplates_vs2017.vcxproj
index b78a4c18a..96cca9854 100644
--- a/Samples/0_Introduction/simpleTemplates/simpleTemplates_vs2017.vcxproj
+++ b/Samples/0_Introduction/simpleTemplates/simpleTemplates_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/simpleTemplates.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleTemplates/simpleTemplates_vs2019.vcxproj b/Samples/0_Introduction/simpleTemplates/simpleTemplates_vs2019.vcxproj
index 9c5ec6d86..93969038c 100644
--- a/Samples/0_Introduction/simpleTemplates/simpleTemplates_vs2019.vcxproj
+++ b/Samples/0_Introduction/simpleTemplates/simpleTemplates_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleTemplates.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleTemplates/simpleTemplates_vs2022.vcxproj b/Samples/0_Introduction/simpleTemplates/simpleTemplates_vs2022.vcxproj
index 0929e9be7..90b222bea 100644
--- a/Samples/0_Introduction/simpleTemplates/simpleTemplates_vs2022.vcxproj
+++ b/Samples/0_Introduction/simpleTemplates/simpleTemplates_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleTemplates.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleTemplates_nvrtc/README.md b/Samples/0_Introduction/simpleTemplates_nvrtc/README.md
index ae3092616..31c588ee2 100644
--- a/Samples/0_Introduction/simpleTemplates_nvrtc/README.md
+++ b/Samples/0_Introduction/simpleTemplates_nvrtc/README.md
@@ -10,7 +10,7 @@ C++ Templates, Runtime Compilation
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, aarch64
 ## CUDA APIs involved
 
 ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html)
-cuModuleGetFunction, cuMemAlloc, cuLaunchKernel, cuCtxSynchronize, cuMemFree, cuMemcpyDtoH, cuMemcpyHtoD
+cuMemcpyDtoH, cuLaunchKernel, cuMemcpyHtoD, cuCtxSynchronize, cuMemAlloc, cuMemFree, cuModuleGetFunction
 
 ## Dependencies needed to build/run
 [NVRTC](../../../README.md#nvrtc)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/0_Introduction/simpleTemplates_nvrtc/simpleTemplates_nvrtc_vs2017.vcxproj b/Samples/0_Introduction/simpleTemplates_nvrtc/simpleTemplates_nvrtc_vs2017.vcxproj
index 0c2dfa96d..1655b5369 100644
--- a/Samples/0_Introduction/simpleTemplates_nvrtc/simpleTemplates_nvrtc_vs2017.vcxproj
+++ b/Samples/0_Introduction/simpleTemplates_nvrtc/simpleTemplates_nvrtc_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleTemplates_nvrtc/simpleTemplates_nvrtc_vs2019.vcxproj b/Samples/0_Introduction/simpleTemplates_nvrtc/simpleTemplates_nvrtc_vs2019.vcxproj
index 08f3653a2..6d28d7aa7 100644
--- a/Samples/0_Introduction/simpleTemplates_nvrtc/simpleTemplates_nvrtc_vs2019.vcxproj
+++ b/Samples/0_Introduction/simpleTemplates_nvrtc/simpleTemplates_nvrtc_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleTemplates_nvrtc/simpleTemplates_nvrtc_vs2022.vcxproj b/Samples/0_Introduction/simpleTemplates_nvrtc/simpleTemplates_nvrtc_vs2022.vcxproj
index c139a2742..0c8ae9691 100644
--- a/Samples/0_Introduction/simpleTemplates_nvrtc/simpleTemplates_nvrtc_vs2022.vcxproj
+++ b/Samples/0_Introduction/simpleTemplates_nvrtc/simpleTemplates_nvrtc_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleTexture/Makefile b/Samples/0_Introduction/simpleTexture/Makefile
index 046207e9c..e705cef81 100644
--- a/Samples/0_Introduction/simpleTexture/Makefile
+++ b/Samples/0_Introduction/simpleTexture/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/simpleTexture/NsightEclipse.xml b/Samples/0_Introduction/simpleTexture/NsightEclipse.xml
index a4fbab4b0..0f029aeaf 100644
--- a/Samples/0_Introduction/simpleTexture/NsightEclipse.xml
+++ b/Samples/0_Introduction/simpleTexture/NsightEclipse.xml
@@ -6,16 +6,16 @@
     <clean>./data/teapot512_bw_out.pgm</clean>
   </cleanextras>
   <cuda_api_list>
+    <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaCreateChannelDesc</toolkit>
+    <toolkit>cudaMallocArray</toolkit>
+    <toolkit>cudaFreeArray</toolkit>
     <toolkit>cudaFree</toolkit>
+    <toolkit>cudaDestroyTextureObject</toolkit>
     <toolkit>cudaMemcpyToArray</toolkit>
-    <toolkit>cudaFreeArray</toolkit>
     <toolkit>cudaDeviceSynchronize</toolkit>
-    <toolkit>cudaDestroyTextureObject</toolkit>
     <toolkit>cudaCreateTextureObject</toolkit>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaMallocArray</toolkit>
-    <toolkit>cudaCreateChannelDesc</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
   </cuda_api_list>
   <description><![CDATA[Simple example that demonstrates use of Textures in CUDA.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -59,6 +59,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/0_Introduction/simpleTexture/README.md b/Samples/0_Introduction/simpleTexture/README.md
index 8577f3379..834d4ee84 100644
--- a/Samples/0_Introduction/simpleTexture/README.md
+++ b/Samples/0_Introduction/simpleTexture/README.md
@@ -10,7 +10,7 @@ CUDA Runtime API, Texture, Image Processing
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaMemcpyToArray, cudaFreeArray, cudaDeviceSynchronize, cudaDestroyTextureObject, cudaCreateTextureObject, cudaMalloc, cudaMallocArray, cudaCreateChannelDesc, cudaMemcpy
+cudaMemcpy, cudaCreateChannelDesc, cudaMallocArray, cudaFreeArray, cudaFree, cudaDestroyTextureObject, cudaMemcpyToArray, cudaDeviceSynchronize, cudaCreateTextureObject, cudaMalloc
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/0_Introduction/simpleTexture/simpleTexture_vs2017.vcxproj b/Samples/0_Introduction/simpleTexture/simpleTexture_vs2017.vcxproj
index 544d7b224..c12f6f171 100644
--- a/Samples/0_Introduction/simpleTexture/simpleTexture_vs2017.vcxproj
+++ b/Samples/0_Introduction/simpleTexture/simpleTexture_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/simpleTexture.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleTexture/simpleTexture_vs2019.vcxproj b/Samples/0_Introduction/simpleTexture/simpleTexture_vs2019.vcxproj
index 034f28908..6c76b4ab4 100644
--- a/Samples/0_Introduction/simpleTexture/simpleTexture_vs2019.vcxproj
+++ b/Samples/0_Introduction/simpleTexture/simpleTexture_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleTexture.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleTexture/simpleTexture_vs2022.vcxproj b/Samples/0_Introduction/simpleTexture/simpleTexture_vs2022.vcxproj
index 04f2a44d1..5562ce0bb 100644
--- a/Samples/0_Introduction/simpleTexture/simpleTexture_vs2022.vcxproj
+++ b/Samples/0_Introduction/simpleTexture/simpleTexture_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleTexture.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleTexture3D/Makefile b/Samples/0_Introduction/simpleTexture3D/Makefile
index fa6da461b..f232cfff5 100644
--- a/Samples/0_Introduction/simpleTexture3D/Makefile
+++ b/Samples/0_Introduction/simpleTexture3D/Makefile
@@ -299,9 +299,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/simpleTexture3D/NsightEclipse.xml b/Samples/0_Introduction/simpleTexture3D/NsightEclipse.xml
index de66d67b0..56848594b 100644
--- a/Samples/0_Introduction/simpleTexture3D/NsightEclipse.xml
+++ b/Samples/0_Introduction/simpleTexture3D/NsightEclipse.xml
@@ -3,20 +3,20 @@
 <entry>
   <name>simpleTexture3D</name>
   <cuda_api_list>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaGraphicsMapResources</toolkit>
+    <toolkit>cudaGraphicsUnmapResources</toolkit>
+    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaFreeArray</toolkit>
-    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
+    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaPitchedPtr</toolkit>
     <toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
+    <toolkit>cudaGraphicsMapResources</toolkit>
+    <toolkit>cudaDestroyTextureObject</toolkit>
     <toolkit>cudaExtent</toolkit>
     <toolkit>cudaDeviceSynchronize</toolkit>
-    <toolkit>cudaDestroyTextureObject</toolkit>
-    <toolkit>cudaPitchedPtr</toolkit>
     <toolkit>cudaCreateTextureObject</toolkit>
-    <toolkit>cudaMalloc</toolkit>
     <toolkit>cudaGraphicsUnregisterResource</toolkit>
-    <toolkit>cudaGraphicsUnmapResources</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
   </cuda_api_list>
   <description><![CDATA[Simple example that demonstrates use of 3D Textures in CUDA.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -79,6 +79,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/0_Introduction/simpleTexture3D/README.md b/Samples/0_Introduction/simpleTexture3D/README.md
index 7ea427e74..de889b8bd 100644
--- a/Samples/0_Introduction/simpleTexture3D/README.md
+++ b/Samples/0_Introduction/simpleTexture3D/README.md
@@ -10,7 +10,7 @@ Graphics Interop, Image Processing, 3D Textures, Surface Writes
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaGraphicsMapResources, cudaFreeArray, cudaGraphicsGLRegisterBuffer, cudaGraphicsResourceGetMappedPointer, cudaExtent, cudaDeviceSynchronize, cudaDestroyTextureObject, cudaPitchedPtr, cudaCreateTextureObject, cudaMalloc, cudaGraphicsUnregisterResource, cudaGraphicsUnmapResources, cudaMemcpy
+cudaGraphicsUnmapResources, cudaMemcpy, cudaFreeArray, cudaFree, cudaPitchedPtr, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDestroyTextureObject, cudaExtent, cudaDeviceSynchronize, cudaCreateTextureObject, cudaGraphicsUnregisterResource, cudaMalloc, cudaGraphicsGLRegisterBuffer
 
 ## Dependencies needed to build/run
 [X11](../../../README.md#x11), [GL](../../../README.md#gl)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/0_Introduction/simpleTexture3D/findgllib.mk b/Samples/0_Introduction/simpleTexture3D/findgllib.mk
index f0a5c5512..998fcf0f1 100644
--- a/Samples/0_Introduction/simpleTexture3D/findgllib.mk
+++ b/Samples/0_Introduction/simpleTexture3D/findgllib.mk
@@ -53,11 +53,12 @@ endif
 ifeq ("$(TARGET_OS)","linux")
     # $(info) >> findgllib.mk -> LINUX path <<<)
     # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside
-    UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu      >/dev/null 2>&1; echo $$?)
-    FEDORA = $(shell echo $(DISTRO) | grep -i fedora      >/dev/null 2>&1; echo $$?)
-    RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?)
-    CENTOS = $(shell echo $(DISTRO) | grep -i centos      >/dev/null 2>&1; echo $$?)
+    UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu       >/dev/null 2>&1; echo $$?)
+    FEDORA = $(shell echo $(DISTRO) | grep -i fedora       >/dev/null 2>&1; echo $$?)
+    RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel'  >/dev/null 2>&1; echo $$?)
+    CENTOS = $(shell echo $(DISTRO) | grep -i centos       >/dev/null 2>&1; echo $$?)
     SUSE   = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?)
+    KYLIN  = $(shell echo $(DISTRO) | grep -i kylin        >/dev/null 2>&1; echo $$?)
     ifeq ("$(UBUNTU)","0")
       ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
         GLPATH := /usr/arm-linux-gnueabihf/lib
@@ -87,27 +88,17 @@ ifeq ("$(TARGET_OS)","linux")
         DFLT_PATH ?= /usr/lib
       endif
     endif
+
     ifeq ("$(SUSE)","0")
       GLPATH    ?= /usr/X11R6/lib64
       GLLINK    ?= -L/usr/X11R6/lib64
       DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(FEDORA)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(RHEL)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(CENTOS)","0")
+    else 
       GLPATH    ?= /usr/lib64/nvidia
       GLLINK    ?= -L/usr/lib64/nvidia
       DFLT_PATH ?= /usr/lib64
     endif
-  
+      
   # find libGL, libGLU 
   GLLIB  := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGL.so  -print 2>/dev/null)
   GLULIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLU.so -print 2>/dev/null)
diff --git a/Samples/0_Introduction/simpleTexture3D/simpleTexture3D_vs2017.vcxproj b/Samples/0_Introduction/simpleTexture3D/simpleTexture3D_vs2017.vcxproj
index 9fefb6012..ed90a63b8 100644
--- a/Samples/0_Introduction/simpleTexture3D/simpleTexture3D_vs2017.vcxproj
+++ b/Samples/0_Introduction/simpleTexture3D/simpleTexture3D_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/simpleTexture3D.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -118,6 +118,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleTexture3D/simpleTexture3D_vs2019.vcxproj b/Samples/0_Introduction/simpleTexture3D/simpleTexture3D_vs2019.vcxproj
index 23518ada3..be0fa981f 100644
--- a/Samples/0_Introduction/simpleTexture3D/simpleTexture3D_vs2019.vcxproj
+++ b/Samples/0_Introduction/simpleTexture3D/simpleTexture3D_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleTexture3D.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -114,6 +114,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleTexture3D/simpleTexture3D_vs2022.vcxproj b/Samples/0_Introduction/simpleTexture3D/simpleTexture3D_vs2022.vcxproj
index dd4ab2af5..1dd427b06 100644
--- a/Samples/0_Introduction/simpleTexture3D/simpleTexture3D_vs2022.vcxproj
+++ b/Samples/0_Introduction/simpleTexture3D/simpleTexture3D_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleTexture3D.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -114,6 +114,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleTextureDrv/Makefile b/Samples/0_Introduction/simpleTextureDrv/Makefile
index bca148380..95ff9ffe3 100644
--- a/Samples/0_Introduction/simpleTextureDrv/Makefile
+++ b/Samples/0_Introduction/simpleTextureDrv/Makefile
@@ -283,9 +283,9 @@ FATBIN_FILE := simpleTexture_kernel${TARGET_SIZE}.fatbin
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(GENCODE_FLAGS),)
diff --git a/Samples/0_Introduction/simpleTextureDrv/README.md b/Samples/0_Introduction/simpleTextureDrv/README.md
index 3457bfbe6..ee28ee7f9 100644
--- a/Samples/0_Introduction/simpleTextureDrv/README.md
+++ b/Samples/0_Introduction/simpleTextureDrv/README.md
@@ -10,7 +10,7 @@ CUDA Driver API, Texture, Image Processing
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html)
-cuModuleGetFunction, cuTexObjectDestroy, cuModuleLoadData, cuCtxCreate, cuArrayCreate, cuDeviceGetName, cuLaunchKernel, cuMemAlloc, cuCtxSynchronize, cuArrayDestroy, cuTexObjectCreate, cuMemFree, cuMemcpyDtoH, cuCtxDestroy, cuDeviceGetAttribute
+cuMemcpyDtoH, cuLaunchKernel, cuModuleLoadData, cuDeviceGetName, cuDeviceGetAttribute, cuCtxSynchronize, cuMemAlloc, cuArrayCreate, cuMemFree, cuCtxDestroy, cuTexObjectDestroy, cuTexObjectCreate, cuCtxCreate, cuModuleGetFunction, cuArrayDestroy
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/0_Introduction/simpleTextureDrv/simpleTextureDrv_vs2017.vcxproj b/Samples/0_Introduction/simpleTextureDrv/simpleTextureDrv_vs2017.vcxproj
index d674efc69..66dc7b33f 100644
--- a/Samples/0_Introduction/simpleTextureDrv/simpleTextureDrv_vs2017.vcxproj
+++ b/Samples/0_Introduction/simpleTextureDrv/simpleTextureDrv_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/simpleTextureDrv.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -111,6 +111,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleTextureDrv/simpleTextureDrv_vs2019.vcxproj b/Samples/0_Introduction/simpleTextureDrv/simpleTextureDrv_vs2019.vcxproj
index 5817eb47a..e1c437169 100644
--- a/Samples/0_Introduction/simpleTextureDrv/simpleTextureDrv_vs2019.vcxproj
+++ b/Samples/0_Introduction/simpleTextureDrv/simpleTextureDrv_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleTextureDrv.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleTextureDrv/simpleTextureDrv_vs2022.vcxproj b/Samples/0_Introduction/simpleTextureDrv/simpleTextureDrv_vs2022.vcxproj
index 0cbde7a7f..21cf1f35f 100644
--- a/Samples/0_Introduction/simpleTextureDrv/simpleTextureDrv_vs2022.vcxproj
+++ b/Samples/0_Introduction/simpleTextureDrv/simpleTextureDrv_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleTextureDrv.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleVoteIntrinsics/Makefile b/Samples/0_Introduction/simpleVoteIntrinsics/Makefile
index 82e012793..32edcf723 100644
--- a/Samples/0_Introduction/simpleVoteIntrinsics/Makefile
+++ b/Samples/0_Introduction/simpleVoteIntrinsics/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/simpleVoteIntrinsics/NsightEclipse.xml b/Samples/0_Introduction/simpleVoteIntrinsics/NsightEclipse.xml
index b424859f7..e91b97149 100644
--- a/Samples/0_Introduction/simpleVoteIntrinsics/NsightEclipse.xml
+++ b/Samples/0_Introduction/simpleVoteIntrinsics/NsightEclipse.xml
@@ -3,10 +3,10 @@
 <entry>
   <name>simpleVoteIntrinsics</name>
   <cuda_api_list>
+    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaFree</toolkit>
     <toolkit>cudaDeviceSynchronize</toolkit>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
   </cuda_api_list>
   <description><![CDATA[Simple program which demonstrates how to use the Vote (__any_sync, __all_sync) intrinsic instruction in a CUDA kernel.]]></description>
@@ -46,6 +46,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/0_Introduction/simpleVoteIntrinsics/README.md b/Samples/0_Introduction/simpleVoteIntrinsics/README.md
index e35dece3d..9c86c6358 100644
--- a/Samples/0_Introduction/simpleVoteIntrinsics/README.md
+++ b/Samples/0_Introduction/simpleVoteIntrinsics/README.md
@@ -10,7 +10,7 @@ Vote Intrinsics
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l, aarch64
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaDeviceSynchronize, cudaMalloc, cudaMemcpy, cudaGetDeviceProperties
+cudaMemcpy, cudaFree, cudaDeviceSynchronize, cudaMalloc, cudaGetDeviceProperties
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/0_Introduction/simpleVoteIntrinsics/simpleVoteIntrinsics_vs2017.vcxproj b/Samples/0_Introduction/simpleVoteIntrinsics/simpleVoteIntrinsics_vs2017.vcxproj
index c49388da7..d9045a581 100644
--- a/Samples/0_Introduction/simpleVoteIntrinsics/simpleVoteIntrinsics_vs2017.vcxproj
+++ b/Samples/0_Introduction/simpleVoteIntrinsics/simpleVoteIntrinsics_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/simpleVoteIntrinsics.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleVoteIntrinsics/simpleVoteIntrinsics_vs2019.vcxproj b/Samples/0_Introduction/simpleVoteIntrinsics/simpleVoteIntrinsics_vs2019.vcxproj
index 91da2af06..3bcc14725 100644
--- a/Samples/0_Introduction/simpleVoteIntrinsics/simpleVoteIntrinsics_vs2019.vcxproj
+++ b/Samples/0_Introduction/simpleVoteIntrinsics/simpleVoteIntrinsics_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleVoteIntrinsics.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleVoteIntrinsics/simpleVoteIntrinsics_vs2022.vcxproj b/Samples/0_Introduction/simpleVoteIntrinsics/simpleVoteIntrinsics_vs2022.vcxproj
index 980c93b29..3af7fc246 100644
--- a/Samples/0_Introduction/simpleVoteIntrinsics/simpleVoteIntrinsics_vs2022.vcxproj
+++ b/Samples/0_Introduction/simpleVoteIntrinsics/simpleVoteIntrinsics_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleVoteIntrinsics.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleVoteIntrinsics_nvrtc/README.md b/Samples/0_Introduction/simpleVoteIntrinsics_nvrtc/README.md
index 28801cbd4..2f4cdeb49 100644
--- a/Samples/0_Introduction/simpleVoteIntrinsics_nvrtc/README.md
+++ b/Samples/0_Introduction/simpleVoteIntrinsics_nvrtc/README.md
@@ -10,7 +10,7 @@ Vote Intrinsics, CUDA Driver API, Runtime Compilation
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, aarch64
 ## CUDA APIs involved
 
 ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html)
-cuModuleGetFunction, cuMemAlloc, cuLaunchKernel, cuCtxSynchronize, cuMemFree, cuMemcpyDtoH, cuMemcpyHtoD
+cuMemcpyDtoH, cuLaunchKernel, cuMemcpyHtoD, cuCtxSynchronize, cuMemAlloc, cuMemFree, cuModuleGetFunction
 
 ## Dependencies needed to build/run
 [NVRTC](../../../README.md#nvrtc)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/0_Introduction/simpleVoteIntrinsics_nvrtc/simpleVoteIntrinsics_nvrtc_vs2017.vcxproj b/Samples/0_Introduction/simpleVoteIntrinsics_nvrtc/simpleVoteIntrinsics_nvrtc_vs2017.vcxproj
index a0d074920..0d541d12c 100644
--- a/Samples/0_Introduction/simpleVoteIntrinsics_nvrtc/simpleVoteIntrinsics_nvrtc_vs2017.vcxproj
+++ b/Samples/0_Introduction/simpleVoteIntrinsics_nvrtc/simpleVoteIntrinsics_nvrtc_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleVoteIntrinsics_nvrtc/simpleVoteIntrinsics_nvrtc_vs2019.vcxproj b/Samples/0_Introduction/simpleVoteIntrinsics_nvrtc/simpleVoteIntrinsics_nvrtc_vs2019.vcxproj
index 00d7d2757..2c334e595 100644
--- a/Samples/0_Introduction/simpleVoteIntrinsics_nvrtc/simpleVoteIntrinsics_nvrtc_vs2019.vcxproj
+++ b/Samples/0_Introduction/simpleVoteIntrinsics_nvrtc/simpleVoteIntrinsics_nvrtc_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleVoteIntrinsics_nvrtc/simpleVoteIntrinsics_nvrtc_vs2022.vcxproj b/Samples/0_Introduction/simpleVoteIntrinsics_nvrtc/simpleVoteIntrinsics_nvrtc_vs2022.vcxproj
index c8c5ff0bd..69dbd9683 100644
--- a/Samples/0_Introduction/simpleVoteIntrinsics_nvrtc/simpleVoteIntrinsics_nvrtc_vs2022.vcxproj
+++ b/Samples/0_Introduction/simpleVoteIntrinsics_nvrtc/simpleVoteIntrinsics_nvrtc_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleZeroCopy/Makefile b/Samples/0_Introduction/simpleZeroCopy/Makefile
index 8fe33e69b..3161f90b9 100644
--- a/Samples/0_Introduction/simpleZeroCopy/Makefile
+++ b/Samples/0_Introduction/simpleZeroCopy/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/simpleZeroCopy/NsightEclipse.xml b/Samples/0_Introduction/simpleZeroCopy/NsightEclipse.xml
index 155731c7c..a5776e59e 100644
--- a/Samples/0_Introduction/simpleZeroCopy/NsightEclipse.xml
+++ b/Samples/0_Introduction/simpleZeroCopy/NsightEclipse.xml
@@ -4,15 +4,15 @@
   <name>simpleZeroCopy</name>
   <cuda_api_list>
     <toolkit>cudaHostAlloc</toolkit>
-    <toolkit>cudaGetDeviceCount</toolkit>
-    <toolkit>cudaHostRegister</toolkit>
-    <toolkit>cudaDeviceSynchronize</toolkit>
-    <toolkit>cudaFreeHost</toolkit>
-    <toolkit>cudaHostUnregister</toolkit>
     <toolkit>cudaSetDeviceFlags</toolkit>
+    <toolkit>cudaHostRegister</toolkit>
     <toolkit>cudaSetDevice</toolkit>
+    <toolkit>cudaGetDeviceCount</toolkit>
     <toolkit>cudaHostGetDevicePointer</toolkit>
+    <toolkit>cudaDeviceSynchronize</toolkit>
+    <toolkit>cudaFreeHost</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
+    <toolkit>cudaHostUnregister</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample illustrates how to use Zero MemCopy, kernels can read and write directly to pinned system memory.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -51,6 +51,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/0_Introduction/simpleZeroCopy/README.md b/Samples/0_Introduction/simpleZeroCopy/README.md
index 640a3096b..a2f5acc82 100644
--- a/Samples/0_Introduction/simpleZeroCopy/README.md
+++ b/Samples/0_Introduction/simpleZeroCopy/README.md
@@ -10,7 +10,7 @@ Performance Strategies, Pinned System Paged Memory, Vector Addition
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaHostAlloc, cudaGetDeviceCount, cudaHostRegister, cudaDeviceSynchronize, cudaFreeHost, cudaHostUnregister, cudaSetDeviceFlags, cudaSetDevice, cudaHostGetDevicePointer, cudaGetDeviceProperties
+cudaHostAlloc, cudaSetDeviceFlags, cudaHostRegister, cudaSetDevice, cudaGetDeviceCount, cudaHostGetDevicePointer, cudaDeviceSynchronize, cudaFreeHost, cudaGetDeviceProperties, cudaHostUnregister
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/0_Introduction/simpleZeroCopy/simpleZeroCopy_vs2017.vcxproj b/Samples/0_Introduction/simpleZeroCopy/simpleZeroCopy_vs2017.vcxproj
index 46bb04d3d..b7e0b9c04 100644
--- a/Samples/0_Introduction/simpleZeroCopy/simpleZeroCopy_vs2017.vcxproj
+++ b/Samples/0_Introduction/simpleZeroCopy/simpleZeroCopy_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/simpleZeroCopy.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleZeroCopy/simpleZeroCopy_vs2019.vcxproj b/Samples/0_Introduction/simpleZeroCopy/simpleZeroCopy_vs2019.vcxproj
index 17ea198b2..c7a9daed9 100644
--- a/Samples/0_Introduction/simpleZeroCopy/simpleZeroCopy_vs2019.vcxproj
+++ b/Samples/0_Introduction/simpleZeroCopy/simpleZeroCopy_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleZeroCopy.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleZeroCopy/simpleZeroCopy_vs2022.vcxproj b/Samples/0_Introduction/simpleZeroCopy/simpleZeroCopy_vs2022.vcxproj
index 6f4d0c204..29709586e 100644
--- a/Samples/0_Introduction/simpleZeroCopy/simpleZeroCopy_vs2022.vcxproj
+++ b/Samples/0_Introduction/simpleZeroCopy/simpleZeroCopy_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleZeroCopy.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/systemWideAtomics/Makefile b/Samples/0_Introduction/systemWideAtomics/Makefile
index e8019879b..6832e615b 100644
--- a/Samples/0_Introduction/systemWideAtomics/Makefile
+++ b/Samples/0_Introduction/systemWideAtomics/Makefile
@@ -303,9 +303,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 61 70 72 75 80 86 87
+SMS ?= 61 70 72 75 80 86 87 90
 else
-SMS ?= 60 61 70 75 80 86
+SMS ?= 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/systemWideAtomics/NsightEclipse.xml b/Samples/0_Introduction/systemWideAtomics/NsightEclipse.xml
index f9099627d..05284552b 100644
--- a/Samples/0_Introduction/systemWideAtomics/NsightEclipse.xml
+++ b/Samples/0_Introduction/systemWideAtomics/NsightEclipse.xml
@@ -3,10 +3,10 @@
 <entry>
   <name>systemWideAtomics</name>
   <cuda_api_list>
-    <toolkit>cudaFree</toolkit>
     <toolkit>cudaDeviceSynchronize</toolkit>
     <toolkit>cudaMallocManaged</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
+    <toolkit>cudaFree</toolkit>
   </cuda_api_list>
   <description><![CDATA[A simple demonstration of system wide atomic instructions.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -44,6 +44,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/0_Introduction/systemWideAtomics/README.md b/Samples/0_Introduction/systemWideAtomics/README.md
index c838fcd5b..98f2a0629 100644
--- a/Samples/0_Introduction/systemWideAtomics/README.md
+++ b/Samples/0_Introduction/systemWideAtomics/README.md
@@ -10,7 +10,7 @@ Atomic Intrinsics, Unified Memory
 
 ## Supported SM Architectures
 
-[SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaDeviceSynchronize, cudaMallocManaged, cudaGetDeviceProperties
+cudaDeviceSynchronize, cudaMallocManaged, cudaGetDeviceProperties, cudaFree
 
 ## Dependencies needed to build/run
 [UVM](../../../README.md#uvm)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/0_Introduction/template/Makefile b/Samples/0_Introduction/template/Makefile
index a462a6374..47f379667 100644
--- a/Samples/0_Introduction/template/Makefile
+++ b/Samples/0_Introduction/template/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/template/NsightEclipse.xml b/Samples/0_Introduction/template/NsightEclipse.xml
index e043d3891..21fe74fb2 100644
--- a/Samples/0_Introduction/template/NsightEclipse.xml
+++ b/Samples/0_Introduction/template/NsightEclipse.xml
@@ -4,8 +4,8 @@
   <name>template</name>
   <cuda_api_list>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaFree</toolkit>
     <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaFree</toolkit>
   </cuda_api_list>
   <description><![CDATA[A trivial template project that can be used as a starting point to create new CUDA projects.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -41,6 +41,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/0_Introduction/template/README.md b/Samples/0_Introduction/template/README.md
index afcf51e14..f22445699 100644
--- a/Samples/0_Introduction/template/README.md
+++ b/Samples/0_Introduction/template/README.md
@@ -10,7 +10,7 @@ Device Memory Allocation
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMalloc, cudaFree, cudaMemcpy
+cudaMalloc, cudaMemcpy, cudaFree
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/0_Introduction/template/template_vs2017.vcxproj b/Samples/0_Introduction/template/template_vs2017.vcxproj
index 01694a193..5e4367812 100644
--- a/Samples/0_Introduction/template/template_vs2017.vcxproj
+++ b/Samples/0_Introduction/template/template_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/template.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/template/template_vs2019.vcxproj b/Samples/0_Introduction/template/template_vs2019.vcxproj
index 606bb330f..f736fc28e 100644
--- a/Samples/0_Introduction/template/template_vs2019.vcxproj
+++ b/Samples/0_Introduction/template/template_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/template.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/template/template_vs2022.vcxproj b/Samples/0_Introduction/template/template_vs2022.vcxproj
index cc552ab45..6a6d8744f 100644
--- a/Samples/0_Introduction/template/template_vs2022.vcxproj
+++ b/Samples/0_Introduction/template/template_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/template.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/vectorAdd/Makefile b/Samples/0_Introduction/vectorAdd/Makefile
index 3ecf5e49c..62be14985 100644
--- a/Samples/0_Introduction/vectorAdd/Makefile
+++ b/Samples/0_Introduction/vectorAdd/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/vectorAdd/NsightEclipse.xml b/Samples/0_Introduction/vectorAdd/NsightEclipse.xml
index 0df66051f..353acee52 100644
--- a/Samples/0_Introduction/vectorAdd/NsightEclipse.xml
+++ b/Samples/0_Introduction/vectorAdd/NsightEclipse.xml
@@ -3,11 +3,11 @@
 <entry>
   <name>vectorAdd</name>
   <cuda_api_list>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaGetLastError</toolkit>
     <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaGetErrorString</toolkit>
+    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaGetLastError</toolkit>
+    <toolkit>cudaMalloc</toolkit>
   </cuda_api_list>
   <description><![CDATA[This CUDA Runtime API sample is a very basic sample that implements element by element vector addition. It is the same as the sample illustrating Chapter 3 of the programming guide with some additions like error checking.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -46,6 +46,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/0_Introduction/vectorAdd/README.md b/Samples/0_Introduction/vectorAdd/README.md
index 8a7e44640..99523169e 100644
--- a/Samples/0_Introduction/vectorAdd/README.md
+++ b/Samples/0_Introduction/vectorAdd/README.md
@@ -10,7 +10,7 @@ CUDA Runtime API, Vector Addition
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaMalloc, cudaGetLastError, cudaMemcpy, cudaGetErrorString
+cudaMemcpy, cudaGetErrorString, cudaFree, cudaGetLastError, cudaMalloc
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/0_Introduction/vectorAdd/vectorAdd_vs2017.vcxproj b/Samples/0_Introduction/vectorAdd/vectorAdd_vs2017.vcxproj
index 6529e270e..e8af31493 100644
--- a/Samples/0_Introduction/vectorAdd/vectorAdd_vs2017.vcxproj
+++ b/Samples/0_Introduction/vectorAdd/vectorAdd_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/vectorAdd.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/vectorAdd/vectorAdd_vs2019.vcxproj b/Samples/0_Introduction/vectorAdd/vectorAdd_vs2019.vcxproj
index 295f1945b..a25492e3b 100644
--- a/Samples/0_Introduction/vectorAdd/vectorAdd_vs2019.vcxproj
+++ b/Samples/0_Introduction/vectorAdd/vectorAdd_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/vectorAdd.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/vectorAdd/vectorAdd_vs2022.vcxproj b/Samples/0_Introduction/vectorAdd/vectorAdd_vs2022.vcxproj
index 4da531469..c983b88bb 100644
--- a/Samples/0_Introduction/vectorAdd/vectorAdd_vs2022.vcxproj
+++ b/Samples/0_Introduction/vectorAdd/vectorAdd_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/vectorAdd.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/vectorAddDrv/Makefile b/Samples/0_Introduction/vectorAddDrv/Makefile
index c21fa9428..472417b1d 100644
--- a/Samples/0_Introduction/vectorAddDrv/Makefile
+++ b/Samples/0_Introduction/vectorAddDrv/Makefile
@@ -283,9 +283,9 @@ FATBIN_FILE := vectorAdd_kernel${TARGET_SIZE}.fatbin
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(GENCODE_FLAGS),)
diff --git a/Samples/0_Introduction/vectorAddDrv/README.md b/Samples/0_Introduction/vectorAddDrv/README.md
index 09612d234..ac26085a3 100644
--- a/Samples/0_Introduction/vectorAddDrv/README.md
+++ b/Samples/0_Introduction/vectorAddDrv/README.md
@@ -10,7 +10,7 @@ CUDA Driver API, Vector Addition
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html)
-cuModuleGetFunction, cuModuleLoadData, cuCtxCreate, cuLaunchKernel, cuMemAlloc, cuMemcpyDtoH, cuCtxSynchronize, cuMemFree, cuInit, cuCtxDestroy, cuMemcpyHtoD
+cuMemcpyDtoH, cuLaunchKernel, cuMemcpyHtoD, cuModuleLoadData, cuCtxSynchronize, cuMemAlloc, cuMemFree, cuCtxDestroy, cuModuleGetFunction, cuCtxCreate, cuInit
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/0_Introduction/vectorAddDrv/vectorAddDrv_vs2017.vcxproj b/Samples/0_Introduction/vectorAddDrv/vectorAddDrv_vs2017.vcxproj
index bfd45966f..57b066935 100644
--- a/Samples/0_Introduction/vectorAddDrv/vectorAddDrv_vs2017.vcxproj
+++ b/Samples/0_Introduction/vectorAddDrv/vectorAddDrv_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/vectorAddDrv.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -111,6 +111,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/vectorAddDrv/vectorAddDrv_vs2019.vcxproj b/Samples/0_Introduction/vectorAddDrv/vectorAddDrv_vs2019.vcxproj
index d1119c072..c48b9e149 100644
--- a/Samples/0_Introduction/vectorAddDrv/vectorAddDrv_vs2019.vcxproj
+++ b/Samples/0_Introduction/vectorAddDrv/vectorAddDrv_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/vectorAddDrv.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/vectorAddDrv/vectorAddDrv_vs2022.vcxproj b/Samples/0_Introduction/vectorAddDrv/vectorAddDrv_vs2022.vcxproj
index 49a0a5fb3..4f04109bc 100644
--- a/Samples/0_Introduction/vectorAddDrv/vectorAddDrv_vs2022.vcxproj
+++ b/Samples/0_Introduction/vectorAddDrv/vectorAddDrv_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/vectorAddDrv.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/vectorAddMMAP/Makefile b/Samples/0_Introduction/vectorAddMMAP/Makefile
index 72b2eaa41..743d2bdd1 100644
--- a/Samples/0_Introduction/vectorAddMMAP/Makefile
+++ b/Samples/0_Introduction/vectorAddMMAP/Makefile
@@ -307,9 +307,9 @@ FATBIN_FILE := vectorAdd_kernel${TARGET_SIZE}.fatbin
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(GENCODE_FLAGS),)
diff --git a/Samples/0_Introduction/vectorAddMMAP/README.md b/Samples/0_Introduction/vectorAddMMAP/README.md
index 786e136f4..6dbbcae71 100644
--- a/Samples/0_Introduction/vectorAddMMAP/README.md
+++ b/Samples/0_Introduction/vectorAddMMAP/README.md
@@ -10,7 +10,7 @@ CUDA Driver API, Vector Addition, MMAP
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le
 ## CUDA APIs involved
 
 ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html)
-cuMemSetAccess, cuInit, cuMemAddressReserve, cuModuleGetFunction, cuCtxDestroy, cuCtxCreate, cuMemAddressFree, cuMemGetAllocationGranularity, cuMemUnmap, cuDeviceGetAttribute, cuMemRelease, cuModuleLoadData, cuMemMap, cuMemCreate, cuMemcpyHtoD, cuDeviceCanAccessPeer, cuDeviceGetCount, cuLaunchKernel, cuMemcpyDtoH
+cuMemcpyDtoH, cuDeviceCanAccessPeer, cuModuleGetFunction, cuMemSetAccess, cuMemRelease, cuInit, cuMemcpyHtoD, cuLaunchKernel, cuMemCreate, cuModuleLoadData, cuCtxDestroy, cuDeviceGetCount, cuMemMap, cuDeviceGetAttribute, cuMemGetAllocationGranularity, cuMemAddressFree, cuMemUnmap, cuCtxCreate, cuMemAddressReserve
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/0_Introduction/vectorAddMMAP/vectorAddMMAP_vs2017.vcxproj b/Samples/0_Introduction/vectorAddMMAP/vectorAddMMAP_vs2017.vcxproj
index 6aa6aa9e7..00641d9b0 100644
--- a/Samples/0_Introduction/vectorAddMMAP/vectorAddMMAP_vs2017.vcxproj
+++ b/Samples/0_Introduction/vectorAddMMAP/vectorAddMMAP_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/vectorAddMMAP.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -112,6 +112,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/vectorAddMMAP/vectorAddMMAP_vs2019.vcxproj b/Samples/0_Introduction/vectorAddMMAP/vectorAddMMAP_vs2019.vcxproj
index ece800ca5..a4885080d 100644
--- a/Samples/0_Introduction/vectorAddMMAP/vectorAddMMAP_vs2019.vcxproj
+++ b/Samples/0_Introduction/vectorAddMMAP/vectorAddMMAP_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/vectorAddMMAP.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/vectorAddMMAP/vectorAddMMAP_vs2022.vcxproj b/Samples/0_Introduction/vectorAddMMAP/vectorAddMMAP_vs2022.vcxproj
index 81fe95b62..59ad3c829 100644
--- a/Samples/0_Introduction/vectorAddMMAP/vectorAddMMAP_vs2022.vcxproj
+++ b/Samples/0_Introduction/vectorAddMMAP/vectorAddMMAP_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/vectorAddMMAP.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/vectorAdd_nvrtc/README.md b/Samples/0_Introduction/vectorAdd_nvrtc/README.md
index 5e83b512e..03ed2a744 100644
--- a/Samples/0_Introduction/vectorAdd_nvrtc/README.md
+++ b/Samples/0_Introduction/vectorAdd_nvrtc/README.md
@@ -10,7 +10,7 @@ CUDA Driver API, Vector Addition, Runtime Compilation
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,7 +23,7 @@ x86_64, ppc64le, aarch64
 ## CUDA APIs involved
 
 ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html)
-cuModuleGetFunction, cuMemAlloc, cuLaunchKernel, cuCtxSynchronize, cuMemFree, cuMemcpyDtoH, cuMemcpyHtoD
+cuMemcpyDtoH, cuLaunchKernel, cuMemcpyHtoD, cuCtxSynchronize, cuMemAlloc, cuMemFree, cuModuleGetFunction
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
 cudaBlockSize, cudaGridSize
@@ -33,7 +33,7 @@ cudaBlockSize, cudaGridSize
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/0_Introduction/vectorAdd_nvrtc/vectorAdd_nvrtc_vs2017.vcxproj b/Samples/0_Introduction/vectorAdd_nvrtc/vectorAdd_nvrtc_vs2017.vcxproj
index c83312456..1ad04a6d4 100644
--- a/Samples/0_Introduction/vectorAdd_nvrtc/vectorAdd_nvrtc_vs2017.vcxproj
+++ b/Samples/0_Introduction/vectorAdd_nvrtc/vectorAdd_nvrtc_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/vectorAdd_nvrtc/vectorAdd_nvrtc_vs2019.vcxproj b/Samples/0_Introduction/vectorAdd_nvrtc/vectorAdd_nvrtc_vs2019.vcxproj
index 27cd03de3..e140f5eb4 100644
--- a/Samples/0_Introduction/vectorAdd_nvrtc/vectorAdd_nvrtc_vs2019.vcxproj
+++ b/Samples/0_Introduction/vectorAdd_nvrtc/vectorAdd_nvrtc_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/vectorAdd_nvrtc/vectorAdd_nvrtc_vs2022.vcxproj b/Samples/0_Introduction/vectorAdd_nvrtc/vectorAdd_nvrtc_vs2022.vcxproj
index db61b4750..a575fb820 100644
--- a/Samples/0_Introduction/vectorAdd_nvrtc/vectorAdd_nvrtc_vs2022.vcxproj
+++ b/Samples/0_Introduction/vectorAdd_nvrtc/vectorAdd_nvrtc_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/1_Utilities/bandwidthTest/Makefile b/Samples/1_Utilities/bandwidthTest/Makefile
index 31f60de59..8699a8bcb 100644
--- a/Samples/1_Utilities/bandwidthTest/Makefile
+++ b/Samples/1_Utilities/bandwidthTest/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/1_Utilities/bandwidthTest/NsightEclipse.xml b/Samples/1_Utilities/bandwidthTest/NsightEclipse.xml
index 840ab2736..6078a765d 100644
--- a/Samples/1_Utilities/bandwidthTest/NsightEclipse.xml
+++ b/Samples/1_Utilities/bandwidthTest/NsightEclipse.xml
@@ -3,22 +3,22 @@
 <entry>
   <name>bandwidthTest</name>
   <cuda_api_list>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaEventRecord</toolkit>
-    <toolkit>cudaMallocHost</toolkit>
     <toolkit>cudaHostAlloc</toolkit>
-    <toolkit>cudaEventCreate</toolkit>
-    <toolkit>cudaGetDeviceCount</toolkit>
-    <toolkit>cudaEventElapsedTime</toolkit>
-    <toolkit>cudaDeviceSynchronize</toolkit>
-    <toolkit>cudaFreeHost</toolkit>
+    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaEventDestroy</toolkit>
-    <toolkit>cudaSetDevice</toolkit>
     <toolkit>cudaMemcpyAsync</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaFree</toolkit>
     <toolkit>cudaGetErrorString</toolkit>
+    <toolkit>cudaMallocHost</toolkit>
+    <toolkit>cudaSetDevice</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
+    <toolkit>cudaDeviceSynchronize</toolkit>
+    <toolkit>cudaEventRecord</toolkit>
+    <toolkit>cudaFreeHost</toolkit>
+    <toolkit>cudaEventDestroy</toolkit>
+    <toolkit>cudaEventElapsedTime</toolkit>
+    <toolkit>cudaGetDeviceCount</toolkit>
+    <toolkit>cudaEventCreate</toolkit>
   </cuda_api_list>
   <description><![CDATA[This is a simple test program to measure the memcopy bandwidth of the GPU and memcpy bandwidth across PCI-e. This test application is capable of measuring device to device copy bandwidth, host to device copy bandwidth for pageable and page-locked memory, and device to host copy bandwidth for pageable and page-locked memory.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -58,6 +58,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/1_Utilities/bandwidthTest/README.md b/Samples/1_Utilities/bandwidthTest/README.md
index 32e4f7796..2cf0bad72 100644
--- a/Samples/1_Utilities/bandwidthTest/README.md
+++ b/Samples/1_Utilities/bandwidthTest/README.md
@@ -10,7 +10,7 @@ CUDA Streams and Events, Performance Strategies
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaEventRecord, cudaMallocHost, cudaHostAlloc, cudaEventCreate, cudaGetDeviceCount, cudaEventElapsedTime, cudaDeviceSynchronize, cudaFreeHost, cudaMalloc, cudaEventDestroy, cudaSetDevice, cudaMemcpyAsync, cudaMemcpy, cudaGetErrorString, cudaGetDeviceProperties
+cudaHostAlloc, cudaMemcpy, cudaMalloc, cudaMemcpyAsync, cudaFree, cudaGetErrorString, cudaMallocHost, cudaSetDevice, cudaGetDeviceProperties, cudaDeviceSynchronize, cudaEventRecord, cudaFreeHost, cudaEventDestroy, cudaEventElapsedTime, cudaGetDeviceCount, cudaEventCreate
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/1_Utilities/bandwidthTest/bandwidthTest_vs2017.vcxproj b/Samples/1_Utilities/bandwidthTest/bandwidthTest_vs2017.vcxproj
index 38f1b77fa..ad862bff4 100644
--- a/Samples/1_Utilities/bandwidthTest/bandwidthTest_vs2017.vcxproj
+++ b/Samples/1_Utilities/bandwidthTest/bandwidthTest_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/bandwidthTest.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/1_Utilities/bandwidthTest/bandwidthTest_vs2019.vcxproj b/Samples/1_Utilities/bandwidthTest/bandwidthTest_vs2019.vcxproj
index 59300fc11..e6f5f3049 100644
--- a/Samples/1_Utilities/bandwidthTest/bandwidthTest_vs2019.vcxproj
+++ b/Samples/1_Utilities/bandwidthTest/bandwidthTest_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/bandwidthTest.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/1_Utilities/bandwidthTest/bandwidthTest_vs2022.vcxproj b/Samples/1_Utilities/bandwidthTest/bandwidthTest_vs2022.vcxproj
index 9faf45d5f..676302bd9 100644
--- a/Samples/1_Utilities/bandwidthTest/bandwidthTest_vs2022.vcxproj
+++ b/Samples/1_Utilities/bandwidthTest/bandwidthTest_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/bandwidthTest.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/1_Utilities/deviceQuery/Makefile b/Samples/1_Utilities/deviceQuery/Makefile
index ea0d25c1e..44dd2fbc9 100644
--- a/Samples/1_Utilities/deviceQuery/Makefile
+++ b/Samples/1_Utilities/deviceQuery/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/1_Utilities/deviceQuery/NsightEclipse.xml b/Samples/1_Utilities/deviceQuery/NsightEclipse.xml
index 842dea350..dda30eb74 100644
--- a/Samples/1_Utilities/deviceQuery/NsightEclipse.xml
+++ b/Samples/1_Utilities/deviceQuery/NsightEclipse.xml
@@ -5,12 +5,12 @@
   <cuda_api_list>
     <driver>cuDeviceGetAttribute</driver>
     <driver>cuSafeCallNoSync</driver>
-    <toolkit>cudaGetDeviceCount</toolkit>
-    <toolkit>cudaDriverGetVersion</toolkit>
-    <toolkit>cudaDeviceCanAccessPeer</toolkit>
-    <toolkit>cudaSetDevice</toolkit>
     <toolkit>cudaRuntimeGetVersion</toolkit>
     <toolkit>cudaGetErrorString</toolkit>
+    <toolkit>cudaDeviceCanAccessPeer</toolkit>
+    <toolkit>cudaSetDevice</toolkit>
+    <toolkit>cudaGetDeviceCount</toolkit>
+    <toolkit>cudaDriverGetVersion</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample enumerates the properties of the CUDA devices present in the system.]]></description>
@@ -48,6 +48,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/1_Utilities/deviceQuery/README.md b/Samples/1_Utilities/deviceQuery/README.md
index 794c5f76a..4f4a647d1 100644
--- a/Samples/1_Utilities/deviceQuery/README.md
+++ b/Samples/1_Utilities/deviceQuery/README.md
@@ -10,7 +10,7 @@ CUDA Runtime API, Device Query
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -26,11 +26,11 @@ x86_64, ppc64le, armv7l, aarch64
 cuDeviceGetAttribute, cuSafeCallNoSync
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaGetDeviceCount, cudaDriverGetVersion, cudaDeviceCanAccessPeer, cudaSetDevice, cudaRuntimeGetVersion, cudaGetErrorString, cudaGetDeviceProperties
+cudaRuntimeGetVersion, cudaGetErrorString, cudaDeviceCanAccessPeer, cudaSetDevice, cudaGetDeviceCount, cudaDriverGetVersion, cudaGetDeviceProperties
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/1_Utilities/deviceQuery/deviceQuery_vs2017.vcxproj b/Samples/1_Utilities/deviceQuery/deviceQuery_vs2017.vcxproj
index e52b7e5f7..87cca12f6 100644
--- a/Samples/1_Utilities/deviceQuery/deviceQuery_vs2017.vcxproj
+++ b/Samples/1_Utilities/deviceQuery/deviceQuery_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/deviceQuery.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/1_Utilities/deviceQuery/deviceQuery_vs2019.vcxproj b/Samples/1_Utilities/deviceQuery/deviceQuery_vs2019.vcxproj
index 3b4b1f75c..41b5bebd3 100644
--- a/Samples/1_Utilities/deviceQuery/deviceQuery_vs2019.vcxproj
+++ b/Samples/1_Utilities/deviceQuery/deviceQuery_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/deviceQuery.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/1_Utilities/deviceQuery/deviceQuery_vs2022.vcxproj b/Samples/1_Utilities/deviceQuery/deviceQuery_vs2022.vcxproj
index 4d9684d97..4ba036e5e 100644
--- a/Samples/1_Utilities/deviceQuery/deviceQuery_vs2022.vcxproj
+++ b/Samples/1_Utilities/deviceQuery/deviceQuery_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/deviceQuery.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/1_Utilities/deviceQueryDrv/NsightEclipse.xml b/Samples/1_Utilities/deviceQueryDrv/NsightEclipse.xml
index 999dc70ed..7a56e6975 100644
--- a/Samples/1_Utilities/deviceQueryDrv/NsightEclipse.xml
+++ b/Samples/1_Utilities/deviceQueryDrv/NsightEclipse.xml
@@ -3,13 +3,14 @@
 <entry>
   <name>deviceQueryDrv</name>
   <cuda_api_list>
-    <driver>cuDeviceCanAccessPeer</driver>
-    <driver>cuDriverGetVersion</driver>
-    <driver>cuDeviceGetCount</driver>
     <driver>cuDeviceGetName</driver>
+    <driver>cuDeviceGetAttribute</driver>
     <driver>cuDeviceTotalMem</driver>
+    <driver>cuDeviceCanAccessPeer</driver>
+    <driver>cuDeviceGetCount</driver>
+    <driver>cuDriverGetVersion</driver>
     <driver>cuInit</driver>
-    <driver>cuDeviceGetAttribute</driver>
+    <toolkit>cudaSetDevice</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample enumerates the properties of the CUDA devices present using CUDA Driver API calls]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -49,6 +50,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/1_Utilities/deviceQueryDrv/README.md b/Samples/1_Utilities/deviceQueryDrv/README.md
index 5d80066c1..92d02352f 100644
--- a/Samples/1_Utilities/deviceQueryDrv/README.md
+++ b/Samples/1_Utilities/deviceQueryDrv/README.md
@@ -10,7 +10,7 @@ CUDA Driver API, Device Query
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,14 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html)
-cuDeviceCanAccessPeer, cuDriverGetVersion, cuDeviceGetCount, cuDeviceGetName, cuDeviceTotalMem, cuInit, cuDeviceGetAttribute
+cuDeviceGetName, cuDeviceGetAttribute, cuDeviceTotalMem, cuDeviceCanAccessPeer, cuDeviceGetCount, cuDriverGetVersion, cuInit
+
+### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
+cudaSetDevice
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/1_Utilities/deviceQueryDrv/deviceQueryDrv_vs2017.vcxproj b/Samples/1_Utilities/deviceQueryDrv/deviceQueryDrv_vs2017.vcxproj
index a94caee2f..59f77b9b5 100644
--- a/Samples/1_Utilities/deviceQueryDrv/deviceQueryDrv_vs2017.vcxproj
+++ b/Samples/1_Utilities/deviceQueryDrv/deviceQueryDrv_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/1_Utilities/deviceQueryDrv/deviceQueryDrv_vs2019.vcxproj b/Samples/1_Utilities/deviceQueryDrv/deviceQueryDrv_vs2019.vcxproj
index 282fef957..629a2e6eb 100644
--- a/Samples/1_Utilities/deviceQueryDrv/deviceQueryDrv_vs2019.vcxproj
+++ b/Samples/1_Utilities/deviceQueryDrv/deviceQueryDrv_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/1_Utilities/deviceQueryDrv/deviceQueryDrv_vs2022.vcxproj b/Samples/1_Utilities/deviceQueryDrv/deviceQueryDrv_vs2022.vcxproj
index 30aaeef46..5c8aab7e5 100644
--- a/Samples/1_Utilities/deviceQueryDrv/deviceQueryDrv_vs2022.vcxproj
+++ b/Samples/1_Utilities/deviceQueryDrv/deviceQueryDrv_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/1_Utilities/topologyQuery/Makefile b/Samples/1_Utilities/topologyQuery/Makefile
index ea5621746..9a48838ca 100644
--- a/Samples/1_Utilities/topologyQuery/Makefile
+++ b/Samples/1_Utilities/topologyQuery/Makefile
@@ -297,9 +297,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/1_Utilities/topologyQuery/NsightEclipse.xml b/Samples/1_Utilities/topologyQuery/NsightEclipse.xml
index 722261a6d..8bfd757da 100644
--- a/Samples/1_Utilities/topologyQuery/NsightEclipse.xml
+++ b/Samples/1_Utilities/topologyQuery/NsightEclipse.xml
@@ -46,6 +46,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/1_Utilities/topologyQuery/README.md b/Samples/1_Utilities/topologyQuery/README.md
index 4f4093fac..e08fa339d 100644
--- a/Samples/1_Utilities/topologyQuery/README.md
+++ b/Samples/1_Utilities/topologyQuery/README.md
@@ -10,7 +10,7 @@ Performance Strategies, Multi-GPU
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -27,7 +27,7 @@ cudaGetDeviceCount, cudaDeviceGetAttribute
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/1_Utilities/topologyQuery/topologyQuery_vs2017.vcxproj b/Samples/1_Utilities/topologyQuery/topologyQuery_vs2017.vcxproj
index df094c222..feecd32fa 100644
--- a/Samples/1_Utilities/topologyQuery/topologyQuery_vs2017.vcxproj
+++ b/Samples/1_Utilities/topologyQuery/topologyQuery_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/topologyQuery.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/1_Utilities/topologyQuery/topologyQuery_vs2019.vcxproj b/Samples/1_Utilities/topologyQuery/topologyQuery_vs2019.vcxproj
index 62f21c12f..245f929bd 100644
--- a/Samples/1_Utilities/topologyQuery/topologyQuery_vs2019.vcxproj
+++ b/Samples/1_Utilities/topologyQuery/topologyQuery_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/topologyQuery.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/1_Utilities/topologyQuery/topologyQuery_vs2022.vcxproj b/Samples/1_Utilities/topologyQuery/topologyQuery_vs2022.vcxproj
index 40ba0ca7a..1d81b9337 100644
--- a/Samples/1_Utilities/topologyQuery/topologyQuery_vs2022.vcxproj
+++ b/Samples/1_Utilities/topologyQuery/topologyQuery_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/topologyQuery.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/Makefile b/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/Makefile
index 0200b2356..dabf05164 100644
--- a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/Makefile
+++ b/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/Makefile
@@ -301,9 +301,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/NsightEclipse.xml
index 0ab4b349c..6df81e060 100644
--- a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/NsightEclipse.xml
+++ b/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/NsightEclipse.xml
@@ -3,42 +3,42 @@
 <entry>
   <name>EGLStream_CUDA_CrossGPU</name>
   <cuda_api_list>
+    <driver>cuDeviceGetName</driver>
+    <driver>cuEGLStreamConsumerReleaseFrame</driver>
     <driver>cuEGLStreamConsumerConnect</driver>
-    <driver>cuMemFree</driver>
-    <driver>cuInit</driver>
+    <driver>cuEGLStreamConsumerDisconnect</driver>
+    <driver>cuCtxPushCurrent</driver>
+    <driver>cuEGLStreamProducerReturnFrame</driver>
     <driver>cuStreamCreate</driver>
-    <driver>cuCtxCreate</driver>
+    <driver>cuEGLStreamProducerPresentFrame</driver>
+    <driver>cuMemFree</driver>
     <driver>cuGraphicsResourceGetMappedEglFrame</driver>
-    <driver>cuDeviceGetName</driver>
-    <driver>cuCtxSynchronize</driver>
-    <driver>cuEGLStreamConsumerAcquireFrame</driver>
+    <driver>cuInit</driver>
+    <driver>cuMemcpyHtoD</driver>
     <driver>cuDeviceGet</driver>
-    <driver>cuDeviceGetAttribute</driver>
-    <driver>cuMemAlloc</driver>
-    <driver>cuEGLStreamConsumerReleaseFrame</driver>
+    <driver>cuEGLStreamConsumerAcquireFrame</driver>
     <driver>cuEGLStreamProducerDisconnect</driver>
     <driver>cuEGLStreamProducerConnect</driver>
-    <driver>cuEGLStreamConsumerDisconnect</driver>
-    <driver>cuMemcpyHtoD</driver>
-    <driver>cuEGLStreamProducerReturnFrame</driver>
-    <driver>cuCtxPushCurrent</driver>
+    <driver>cuDeviceGetAttribute</driver>
+    <driver>cuCtxSynchronize</driver>
+    <driver>cuMemAlloc</driver>
     <driver>cuCtxPopCurrent</driver>
-    <driver>cuEGLStreamProducerPresentFrame</driver>
-    <toolkit>cudaDeviceCreateConsumer</toolkit>
+    <driver>cuCtxCreate</driver>
+    <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaProducerPresentFrame</toolkit>
     <toolkit>cudaFree</toolkit>
+    <toolkit>cudaGetErrorString</toolkit>
     <toolkit>cudaConsumerReleaseFrame</toolkit>
+    <toolkit>cudaProducerReturnFrame</toolkit>
     <toolkit>cudaDeviceSynchronize</toolkit>
-    <toolkit>cudaGetValueMismatch</toolkit>
+    <toolkit>cudaDeviceCreateProducer</toolkit>
     <toolkit>cudaProducerDeinit</toolkit>
-    <toolkit>cudaProducerPresentFrame</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaProducerInit</toolkit>
-    <toolkit>cudaProducerReturnFrame</toolkit>
     <toolkit>cudaProducerPrepareFrame</toolkit>
+    <toolkit>cudaGetValueMismatch</toolkit>
     <toolkit>cudaConsumerAcquireFrame</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
-    <toolkit>cudaGetErrorString</toolkit>
-    <toolkit>cudaDeviceCreateProducer</toolkit>
+    <toolkit>cudaProducerInit</toolkit>
+    <toolkit>cudaDeviceCreateConsumer</toolkit>
   </cuda_api_list>
   <description><![CDATA[Demonstrates CUDA and EGL Streams interop, where consumer's EGL Stream is on one GPU and producer's on other and both consumer-producer are different processes.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -81,6 +81,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/README.md b/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/README.md
index 2178db665..b559583b8 100644
--- a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/README.md
+++ b/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/README.md
@@ -10,7 +10,7 @@ EGLStreams Interop
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,17 +23,17 @@ x86_64, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html)
-cuEGLStreamConsumerConnect, cuMemFree, cuInit, cuStreamCreate, cuCtxCreate, cuGraphicsResourceGetMappedEglFrame, cuDeviceGetName, cuCtxSynchronize, cuEGLStreamConsumerAcquireFrame, cuDeviceGet, cuDeviceGetAttribute, cuMemAlloc, cuEGLStreamConsumerReleaseFrame, cuEGLStreamProducerDisconnect, cuEGLStreamProducerConnect, cuEGLStreamConsumerDisconnect, cuMemcpyHtoD, cuEGLStreamProducerReturnFrame, cuCtxPushCurrent, cuCtxPopCurrent, cuEGLStreamProducerPresentFrame
+cuDeviceGetName, cuEGLStreamConsumerReleaseFrame, cuEGLStreamConsumerConnect, cuEGLStreamConsumerDisconnect, cuCtxPushCurrent, cuEGLStreamProducerReturnFrame, cuStreamCreate, cuEGLStreamProducerPresentFrame, cuMemFree, cuGraphicsResourceGetMappedEglFrame, cuInit, cuMemcpyHtoD, cuDeviceGet, cuEGLStreamConsumerAcquireFrame, cuEGLStreamProducerDisconnect, cuEGLStreamProducerConnect, cuDeviceGetAttribute, cuCtxSynchronize, cuMemAlloc, cuCtxPopCurrent, cuCtxCreate
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaDeviceCreateConsumer, cudaFree, cudaConsumerReleaseFrame, cudaDeviceSynchronize, cudaGetValueMismatch, cudaProducerDeinit, cudaProducerPresentFrame, cudaMalloc, cudaProducerInit, cudaProducerReturnFrame, cudaProducerPrepareFrame, cudaConsumerAcquireFrame, cudaMemcpy, cudaGetErrorString, cudaDeviceCreateProducer
+cudaMemcpy, cudaMalloc, cudaProducerPresentFrame, cudaFree, cudaGetErrorString, cudaConsumerReleaseFrame, cudaProducerReturnFrame, cudaDeviceSynchronize, cudaDeviceCreateProducer, cudaProducerDeinit, cudaProducerPrepareFrame, cudaGetValueMismatch, cudaConsumerAcquireFrame, cudaProducerInit, cudaDeviceCreateConsumer
 
 ## Dependencies needed to build/run
 [EGL](../../../README.md#egl)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/findegl.mk b/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/findegl.mk
index cfeee8998..33ec1a961 100644
--- a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/findegl.mk
+++ b/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/findegl.mk
@@ -58,6 +58,7 @@ ifeq ("$(TARGET_OS)","linux")
     RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?)
     CENTOS = $(shell echo $(DISTRO) | grep -i centos      >/dev/null 2>&1; echo $$?)
     SUSE   = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?)
+    KYLIN  = $(shell echo $(DISTRO) | grep -i kylin        >/dev/null 2>&1; echo $$?)
     ifeq ("$(UBUNTU)","0")
       ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
         GLPATH := /usr/arm-linux-gnueabihf/lib
@@ -84,22 +85,12 @@ ifeq ("$(TARGET_OS)","linux")
         DFLT_PATH ?= /usr/lib
       endif
     endif
-    ifeq ("$(SUSE)","0")
+	
+	ifeq ("$(SUSE)","0")
       GLPATH    ?= /usr/X11R6/lib64
       GLLINK    ?= -L/usr/X11R6/lib64
       DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(FEDORA)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(RHEL)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(CENTOS)","0")
+    else 
       GLPATH    ?= /usr/lib64/nvidia
       GLLINK    ?= -L/usr/lib64/nvidia
       DFLT_PATH ?= /usr/lib64
diff --git a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/NsightEclipse.xml
index d84d93136..ce22364ae 100644
--- a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/NsightEclipse.xml
+++ b/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/NsightEclipse.xml
@@ -3,23 +3,36 @@
 <entry>
   <name>EGLStream_CUDA_Interop</name>
   <cuda_api_list>
-    <driver>cuDeviceGet</driver>
-    <driver>cuDeviceGetAttribute</driver>
-    <driver>cuDeviceComputeCapability</driver>
-    <driver>cuDeviceGetCount</driver>
+    <driver>cuMemcpyDtoH</driver>
     <driver>cuDeviceGetName</driver>
-    <driver>cuGraphicsResourceGetMappedEglFrame</driver>
-    <driver>cuEGLStreamConsumerAcquireFrame</driver>
     <driver>cuEGLStreamConsumerReleaseFrame</driver>
+    <driver>cuEGLStreamConsumerConnect</driver>
+    <driver>cuEGLStreamConsumerDisconnect</driver>
+    <driver>cuCtxPushCurrent</driver>
+    <driver>cuArrayDestroy</driver>
+    <driver>cuEGLStreamProducerReturnFrame</driver>
     <driver>cuEGLStreamProducerPresentFrame</driver>
-    <driver>cuCtxCreate</driver>
-    <driver>cuMemAlloc</driver>
     <driver>cuMemFree</driver>
-    <driver>cuMemcpy3D</driver>
-    <driver>cuStreamCreate</driver>
-    <driver>cuCtxPushCurrent</driver>
+    <driver>cuGraphicsResourceGetMappedEglFrame</driver>
+    <driver>cuInit</driver>
+    <driver>cuEGLStreamConsumerAcquireFrame</driver>
+    <driver>cuEGLStreamProducerDisconnect</driver>
+    <driver>cuDeviceGetCount</driver>
+    <driver>cuEGLStreamProducerConnect</driver>
+    <driver>cuDeviceGetAttribute</driver>
+    <driver>cuCtxSynchronize</driver>
+    <driver>cuMemAlloc</driver>
     <driver>cuCtxPopCurrent</driver>
-    <driver>cuCtxDestroy</driver>
+    <driver>cuCtxCreate</driver>
+    <driver>cuMemcpy</driver>
+    <toolkit>cudaProducerReadYUVFrame</toolkit>
+    <toolkit>cudaProducerTest</toolkit>
+    <toolkit>cudaProducerDeinit</toolkit>
+    <toolkit>cudaDeviceCreateProducer</toolkit>
+    <toolkit>cudaProducerReadARGBFrame</toolkit>
+    <toolkit>cudaDeviceCreateConsumer</toolkit>
+    <toolkit>cudaConsumerTest</toolkit>
+    <toolkit>cudaProducerInit</toolkit>
   </cuda_api_list>
   <description><![CDATA[Demonstrates data exchange between CUDA and EGL Streams.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -53,6 +66,7 @@
   <sm-arch>sm37</sm-arch>
   <sm-arch>sm50</sm-arch>
   <sm-arch>sm52</sm-arch>
+  <sm-arch>sm53</sm-arch>
   <sm-arch>sm60</sm-arch>
   <sm-arch>sm61</sm-arch>
   <sm-arch>sm70</sm-arch>
@@ -60,6 +74,8 @@
   <sm-arch>sm75</sm-arch>
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
+  <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/README.md b/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/README.md
index 3ccef85d4..5cee12b0b 100644
--- a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/README.md
+++ b/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/README.md
@@ -10,7 +10,7 @@ EGLStreams Interop
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,17 +23,17 @@ x86_64, aarch64
 ## CUDA APIs involved
 
 ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html)
-cuEGLStreamConsumerConnect, cuArrayDestroy, cuMemFree, cuInit, cuCtxCreate, cuGraphicsResourceGetMappedEglFrame, cuDeviceGetName, cuCtxSynchronize, cuEGLStreamConsumerAcquireFrame, cuDeviceGetAttribute, cuMemcpy, cuMemAlloc, cuEGLStreamConsumerReleaseFrame, cuEGLStreamProducerDisconnect, cuEGLStreamProducerConnect, cuEGLStreamConsumerDisconnect, cuDeviceGetCount, cuEGLStreamProducerReturnFrame, cuCtxPushCurrent, cuCtxPopCurrent, cuMemcpyDtoH, cuEGLStreamProducerPresentFrame
+cuMemcpyDtoH, cuDeviceGetName, cuEGLStreamConsumerReleaseFrame, cuEGLStreamConsumerConnect, cuEGLStreamConsumerDisconnect, cuCtxPushCurrent, cuArrayDestroy, cuEGLStreamProducerReturnFrame, cuEGLStreamProducerPresentFrame, cuMemFree, cuGraphicsResourceGetMappedEglFrame, cuInit, cuEGLStreamConsumerAcquireFrame, cuEGLStreamProducerDisconnect, cuDeviceGetCount, cuEGLStreamProducerConnect, cuDeviceGetAttribute, cuCtxSynchronize, cuMemAlloc, cuCtxPopCurrent, cuCtxCreate, cuMemcpy
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaDeviceCreateConsumer, cudaConsumerTest, cudaProducerDeinit, cudaProducerInit, cudaProducerReadYUVFrame, cudaProducerTest, cudaProducerReadARGBFrame, cudaDeviceCreateProducer
+cudaProducerReadYUVFrame, cudaProducerTest, cudaProducerDeinit, cudaDeviceCreateProducer, cudaProducerReadARGBFrame, cudaDeviceCreateConsumer, cudaConsumerTest, cudaProducerInit
 
 ## Dependencies needed to build/run
 [EGL](../../../README.md#egl)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/cuda_producer.cpp b/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/cuda_producer.cpp
index f15c5cd1e..ef3adab2a 100644
--- a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/cuda_producer.cpp
+++ b/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/cuda_producer.cpp
@@ -301,7 +301,7 @@ CUresult cudaDeviceCreateProducer(test_cuda_producer_s *cudaProducer,
 
   if (major < 6) {
     printf(
-        "EGLStreams_CUDA_Interop requires SM 6.0 or higher arch GPU.  "
+        "EGLStream_CUDA_Interop requires SM 6.0 or higher arch GPU.  "
         "Exiting...\n");
     exit(2);  // EXIT_WAIVED
   }
diff --git a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/findegl.mk b/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/findegl.mk
index cfeee8998..33ec1a961 100644
--- a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/findegl.mk
+++ b/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/findegl.mk
@@ -58,6 +58,7 @@ ifeq ("$(TARGET_OS)","linux")
     RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?)
     CENTOS = $(shell echo $(DISTRO) | grep -i centos      >/dev/null 2>&1; echo $$?)
     SUSE   = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?)
+    KYLIN  = $(shell echo $(DISTRO) | grep -i kylin        >/dev/null 2>&1; echo $$?)
     ifeq ("$(UBUNTU)","0")
       ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
         GLPATH := /usr/arm-linux-gnueabihf/lib
@@ -84,22 +85,12 @@ ifeq ("$(TARGET_OS)","linux")
         DFLT_PATH ?= /usr/lib
       endif
     endif
-    ifeq ("$(SUSE)","0")
+	
+	ifeq ("$(SUSE)","0")
       GLPATH    ?= /usr/X11R6/lib64
       GLLINK    ?= -L/usr/X11R6/lib64
       DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(FEDORA)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(RHEL)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(CENTOS)","0")
+    else 
       GLPATH    ?= /usr/lib64/nvidia
       GLLINK    ?= -L/usr/lib64/nvidia
       DFLT_PATH ?= /usr/lib64
diff --git a/Samples/2_Concepts_and_Techniques/EGLSync_CUDAEvent_Interop/Makefile b/Samples/2_Concepts_and_Techniques/EGLSync_CUDAEvent_Interop/Makefile
index 9a670c78c..86cfb9287 100644
--- a/Samples/2_Concepts_and_Techniques/EGLSync_CUDAEvent_Interop/Makefile
+++ b/Samples/2_Concepts_and_Techniques/EGLSync_CUDAEvent_Interop/Makefile
@@ -321,9 +321,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/2_Concepts_and_Techniques/EGLSync_CUDAEvent_Interop/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/EGLSync_CUDAEvent_Interop/NsightEclipse.xml
index fbb91f1ff..639955253 100644
--- a/Samples/2_Concepts_and_Techniques/EGLSync_CUDAEvent_Interop/NsightEclipse.xml
+++ b/Samples/2_Concepts_and_Techniques/EGLSync_CUDAEvent_Interop/NsightEclipse.xml
@@ -3,27 +3,27 @@
 <entry>
   <name>EGLSync_CUDAEvent_Interop</name>
   <cuda_api_list>
-    <driver>cuGraphicsEGLRegisterImage</driver>
-    <driver>cuStreamCreate</driver>
+    <driver>cuEventRecord</driver>
+    <driver>cuDeviceGetAttribute</driver>
     <driver>cuEventCreate</driver>
-    <driver>cuCtxCreate</driver>
-    <driver>cuGraphicsSubResourceGetMappedArray</driver>
-    <driver>cuGraphicsUnregisterResource</driver>
     <driver>cuCtxSynchronize</driver>
-    <driver>cuEventCreateFromEGLSync</driver>
     <driver>cuEventDestroy</driver>
+    <driver>cuGraphicsEGLRegisterImage</driver>
+    <driver>cuGraphicsSubResourceGetMappedArray</driver>
+    <driver>cuStreamCreate</driver>
     <driver>cuStreamWaitEvent</driver>
-    <driver>cuCtxPushCurrent</driver>
+    <driver>cuGraphicsUnregisterResource</driver>
+    <driver>cuCtxCreate</driver>
     <driver>cuSurfObjectCreate</driver>
+    <driver>cuEventCreateFromEGLSync</driver>
+    <driver>cuCtxPushCurrent</driver>
     <driver>cuInit</driver>
-    <driver>cuEventRecord</driver>
-    <driver>cuDeviceGetAttribute</driver>
+    <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaGetErrorString</toolkit>
     <toolkit>cudaFree</toolkit>
     <toolkit>cudaDeviceSynchronize</toolkit>
     <toolkit>cudaGetValueMismatch</toolkit>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
-    <toolkit>cudaGetErrorString</toolkit>
   </cuda_api_list>
   <description><![CDATA[Demonstrates interoperability between CUDA Event and EGL Sync/EGL Image using which one can achieve synchronization on GPU itself for GL-EGL-CUDA operations instead of blocking CPU for synchronization.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -72,6 +72,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>arm</arch>
diff --git a/Samples/2_Concepts_and_Techniques/EGLSync_CUDAEvent_Interop/README.md b/Samples/2_Concepts_and_Techniques/EGLSync_CUDAEvent_Interop/README.md
index 655881d14..8c980b525 100644
--- a/Samples/2_Concepts_and_Techniques/EGLSync_CUDAEvent_Interop/README.md
+++ b/Samples/2_Concepts_and_Techniques/EGLSync_CUDAEvent_Interop/README.md
@@ -10,7 +10,7 @@ EGLSync-CUDAEvent Interop, EGLImage-CUDA Interop
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,17 +23,17 @@ armv7l
 ## CUDA APIs involved
 
 ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html)
-cuGraphicsEGLRegisterImage, cuStreamCreate, cuEventCreate, cuCtxCreate, cuGraphicsSubResourceGetMappedArray, cuGraphicsUnregisterResource, cuCtxSynchronize, cuEventCreateFromEGLSync, cuEventDestroy, cuStreamWaitEvent, cuCtxPushCurrent, cuSurfObjectCreate, cuInit, cuEventRecord, cuDeviceGetAttribute
+cuEventRecord, cuDeviceGetAttribute, cuEventCreate, cuCtxSynchronize, cuEventDestroy, cuGraphicsEGLRegisterImage, cuGraphicsSubResourceGetMappedArray, cuStreamCreate, cuStreamWaitEvent, cuGraphicsUnregisterResource, cuCtxCreate, cuSurfObjectCreate, cuEventCreateFromEGLSync, cuCtxPushCurrent, cuInit
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaDeviceSynchronize, cudaGetValueMismatch, cudaMalloc, cudaMemcpy, cudaGetErrorString
+cudaMemcpy, cudaGetErrorString, cudaFree, cudaDeviceSynchronize, cudaGetValueMismatch, cudaMalloc
 
 ## Dependencies needed to build/run
 [EGL](../../../README.md#egl), [EGLSync](../../../README.md#eglsync), [X11](../../../README.md#x11), [GLES](../../../README.md#gles)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/2_Concepts_and_Techniques/EGLSync_CUDAEvent_Interop/findegl.mk b/Samples/2_Concepts_and_Techniques/EGLSync_CUDAEvent_Interop/findegl.mk
index cfeee8998..33ec1a961 100644
--- a/Samples/2_Concepts_and_Techniques/EGLSync_CUDAEvent_Interop/findegl.mk
+++ b/Samples/2_Concepts_and_Techniques/EGLSync_CUDAEvent_Interop/findegl.mk
@@ -58,6 +58,7 @@ ifeq ("$(TARGET_OS)","linux")
     RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?)
     CENTOS = $(shell echo $(DISTRO) | grep -i centos      >/dev/null 2>&1; echo $$?)
     SUSE   = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?)
+    KYLIN  = $(shell echo $(DISTRO) | grep -i kylin        >/dev/null 2>&1; echo $$?)
     ifeq ("$(UBUNTU)","0")
       ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
         GLPATH := /usr/arm-linux-gnueabihf/lib
@@ -84,22 +85,12 @@ ifeq ("$(TARGET_OS)","linux")
         DFLT_PATH ?= /usr/lib
       endif
     endif
-    ifeq ("$(SUSE)","0")
+	
+	ifeq ("$(SUSE)","0")
       GLPATH    ?= /usr/X11R6/lib64
       GLLINK    ?= -L/usr/X11R6/lib64
       DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(FEDORA)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(RHEL)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(CENTOS)","0")
+    else 
       GLPATH    ?= /usr/lib64/nvidia
       GLLINK    ?= -L/usr/lib64/nvidia
       DFLT_PATH ?= /usr/lib64
diff --git a/Samples/2_Concepts_and_Techniques/FunctionPointers/FunctionPointers_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/FunctionPointers/FunctionPointers_vs2017.vcxproj
index 93f0c4b64..e80efd5bd 100644
--- a/Samples/2_Concepts_and_Techniques/FunctionPointers/FunctionPointers_vs2017.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/FunctionPointers/FunctionPointers_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/FunctionPointers.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -118,6 +118,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/FunctionPointers/FunctionPointers_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/FunctionPointers/FunctionPointers_vs2019.vcxproj
index a67cbbcd9..d5cbccdca 100644
--- a/Samples/2_Concepts_and_Techniques/FunctionPointers/FunctionPointers_vs2019.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/FunctionPointers/FunctionPointers_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/FunctionPointers.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -114,6 +114,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/FunctionPointers/FunctionPointers_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/FunctionPointers/FunctionPointers_vs2022.vcxproj
index 45617a678..ca5eb33b5 100644
--- a/Samples/2_Concepts_and_Techniques/FunctionPointers/FunctionPointers_vs2022.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/FunctionPointers/FunctionPointers_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/FunctionPointers.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -114,6 +114,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/FunctionPointers/Makefile b/Samples/2_Concepts_and_Techniques/FunctionPointers/Makefile
index 47c204fe6..651a4f81e 100644
--- a/Samples/2_Concepts_and_Techniques/FunctionPointers/Makefile
+++ b/Samples/2_Concepts_and_Techniques/FunctionPointers/Makefile
@@ -299,9 +299,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/2_Concepts_and_Techniques/FunctionPointers/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/FunctionPointers/NsightEclipse.xml
index ecd90f9df..f90f7b346 100644
--- a/Samples/2_Concepts_and_Techniques/FunctionPointers/NsightEclipse.xml
+++ b/Samples/2_Concepts_and_Techniques/FunctionPointers/NsightEclipse.xml
@@ -3,21 +3,21 @@
 <entry>
   <name>FunctionPointers</name>
   <cuda_api_list>
-    <toolkit>cudaMemcpyToSymbol</toolkit>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaGraphicsMapResources</toolkit>
+    <toolkit>cudaGraphicsUnmapResources</toolkit>
+    <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaMallocArray</toolkit>
     <toolkit>cudaFreeArray</toolkit>
-    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
+    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaMemcpyFromSymbol</toolkit>
     <toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
-    <toolkit>cudaDeviceSynchronize</toolkit>
+    <toolkit>cudaGraphicsMapResources</toolkit>
     <toolkit>cudaDestroyTextureObject</toolkit>
+    <toolkit>cudaDeviceSynchronize</toolkit>
     <toolkit>cudaCreateTextureObject</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaMallocArray</toolkit>
+    <toolkit>cudaMemcpyToSymbol</toolkit>
     <toolkit>cudaGraphicsUnregisterResource</toolkit>
-    <toolkit>cudaGraphicsUnmapResources</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
-    <toolkit>cudaMemcpyFromSymbol</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample illustrates how to use function pointers and implements the Sobel Edge Detection filter for 8-bit monochrome images.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -88,6 +88,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/2_Concepts_and_Techniques/FunctionPointers/README.md b/Samples/2_Concepts_and_Techniques/FunctionPointers/README.md
index a95ba59dc..83e16413b 100644
--- a/Samples/2_Concepts_and_Techniques/FunctionPointers/README.md
+++ b/Samples/2_Concepts_and_Techniques/FunctionPointers/README.md
@@ -10,7 +10,7 @@ Graphics Interop, Image Processing
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMemcpyToSymbol, cudaFree, cudaGraphicsMapResources, cudaFreeArray, cudaGraphicsGLRegisterBuffer, cudaGraphicsResourceGetMappedPointer, cudaDeviceSynchronize, cudaDestroyTextureObject, cudaCreateTextureObject, cudaMalloc, cudaMallocArray, cudaGraphicsUnregisterResource, cudaGraphicsUnmapResources, cudaMemcpy, cudaMemcpyFromSymbol
+cudaGraphicsUnmapResources, cudaMemcpy, cudaMallocArray, cudaFreeArray, cudaFree, cudaMemcpyFromSymbol, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDestroyTextureObject, cudaDeviceSynchronize, cudaCreateTextureObject, cudaMemcpyToSymbol, cudaGraphicsUnregisterResource, cudaMalloc, cudaGraphicsGLRegisterBuffer
 
 ## Dependencies needed to build/run
 [X11](../../../README.md#x11), [GL](../../../README.md#gl)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/2_Concepts_and_Techniques/FunctionPointers/findgllib.mk b/Samples/2_Concepts_and_Techniques/FunctionPointers/findgllib.mk
index f0a5c5512..998fcf0f1 100644
--- a/Samples/2_Concepts_and_Techniques/FunctionPointers/findgllib.mk
+++ b/Samples/2_Concepts_and_Techniques/FunctionPointers/findgllib.mk
@@ -53,11 +53,12 @@ endif
 ifeq ("$(TARGET_OS)","linux")
     # $(info) >> findgllib.mk -> LINUX path <<<)
     # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside
-    UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu      >/dev/null 2>&1; echo $$?)
-    FEDORA = $(shell echo $(DISTRO) | grep -i fedora      >/dev/null 2>&1; echo $$?)
-    RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?)
-    CENTOS = $(shell echo $(DISTRO) | grep -i centos      >/dev/null 2>&1; echo $$?)
+    UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu       >/dev/null 2>&1; echo $$?)
+    FEDORA = $(shell echo $(DISTRO) | grep -i fedora       >/dev/null 2>&1; echo $$?)
+    RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel'  >/dev/null 2>&1; echo $$?)
+    CENTOS = $(shell echo $(DISTRO) | grep -i centos       >/dev/null 2>&1; echo $$?)
     SUSE   = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?)
+    KYLIN  = $(shell echo $(DISTRO) | grep -i kylin        >/dev/null 2>&1; echo $$?)
     ifeq ("$(UBUNTU)","0")
       ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
         GLPATH := /usr/arm-linux-gnueabihf/lib
@@ -87,27 +88,17 @@ ifeq ("$(TARGET_OS)","linux")
         DFLT_PATH ?= /usr/lib
       endif
     endif
+
     ifeq ("$(SUSE)","0")
       GLPATH    ?= /usr/X11R6/lib64
       GLLINK    ?= -L/usr/X11R6/lib64
       DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(FEDORA)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(RHEL)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(CENTOS)","0")
+    else 
       GLPATH    ?= /usr/lib64/nvidia
       GLLINK    ?= -L/usr/lib64/nvidia
       DFLT_PATH ?= /usr/lib64
     endif
-  
+      
   # find libGL, libGLU 
   GLLIB  := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGL.so  -print 2>/dev/null)
   GLULIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLU.so -print 2>/dev/null)
diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/MC_EstimatePiInlineP_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/MC_EstimatePiInlineP_vs2017.vcxproj
index 7826468d6..9b6616f67 100644
--- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/MC_EstimatePiInlineP_vs2017.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/MC_EstimatePiInlineP_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/MC_EstimatePiInlineP.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -111,6 +111,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/MC_EstimatePiInlineP_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/MC_EstimatePiInlineP_vs2019.vcxproj
index accf4529d..fd17c3e2a 100644
--- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/MC_EstimatePiInlineP_vs2019.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/MC_EstimatePiInlineP_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/MC_EstimatePiInlineP.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/MC_EstimatePiInlineP_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/MC_EstimatePiInlineP_vs2022.vcxproj
index 6ba155315..9d5110d54 100644
--- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/MC_EstimatePiInlineP_vs2022.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/MC_EstimatePiInlineP_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/MC_EstimatePiInlineP.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/Makefile b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/Makefile
index 6bc2ee938..c4a3fa536 100644
--- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/Makefile
+++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/NsightEclipse.xml
index 5c63395b9..bf9c24d2d 100644
--- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/NsightEclipse.xml
+++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/NsightEclipse.xml
@@ -3,13 +3,13 @@
 <entry>
   <name>MC_EstimatePiInlineP</name>
   <cuda_api_list>
+    <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaGetErrorString</toolkit>
     <toolkit>cudaFree</toolkit>
-    <toolkit>cudaFuncGetAttributes</toolkit>
+    <toolkit>cudaSetDevice</toolkit>
     <toolkit>cudaGetDeviceCount</toolkit>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaSetDevice</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
-    <toolkit>cudaGetErrorString</toolkit>
+    <toolkit>cudaFuncGetAttributes</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample uses Monte Carlo simulation for Estimation of Pi (using inline PRNG).  This sample also uses the NVIDIA CURAND library.]]></description>
@@ -60,6 +60,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/README.md b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/README.md
index 53b09b57d..a7d406785 100644
--- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/README.md
+++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/README.md
@@ -10,7 +10,7 @@ Random Number Generator, Computational Finance, CURAND Library
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaFuncGetAttributes, cudaGetDeviceCount, cudaMalloc, cudaSetDevice, cudaMemcpy, cudaGetErrorString, cudaGetDeviceProperties
+cudaMemcpy, cudaGetErrorString, cudaFree, cudaSetDevice, cudaGetDeviceCount, cudaMalloc, cudaFuncGetAttributes, cudaGetDeviceProperties
 
 ## Dependencies needed to build/run
 [CURAND](../../../README.md#curand)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/MC_EstimatePiInlineQ_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/MC_EstimatePiInlineQ_vs2017.vcxproj
index d5ad649be..9ac218442 100644
--- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/MC_EstimatePiInlineQ_vs2017.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/MC_EstimatePiInlineQ_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/MC_EstimatePiInlineQ.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -111,6 +111,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/MC_EstimatePiInlineQ_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/MC_EstimatePiInlineQ_vs2019.vcxproj
index bdf88dd5c..b8246a1a7 100644
--- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/MC_EstimatePiInlineQ_vs2019.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/MC_EstimatePiInlineQ_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/MC_EstimatePiInlineQ.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/MC_EstimatePiInlineQ_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/MC_EstimatePiInlineQ_vs2022.vcxproj
index 4a8efc239..da748e1f7 100644
--- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/MC_EstimatePiInlineQ_vs2022.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/MC_EstimatePiInlineQ_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/MC_EstimatePiInlineQ.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/Makefile b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/Makefile
index 20a15720a..5d8b086b2 100644
--- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/Makefile
+++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/NsightEclipse.xml
index ed2f8f7ac..f087e82b1 100644
--- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/NsightEclipse.xml
+++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/NsightEclipse.xml
@@ -3,13 +3,13 @@
 <entry>
   <name>MC_EstimatePiInlineQ</name>
   <cuda_api_list>
+    <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaGetErrorString</toolkit>
     <toolkit>cudaFree</toolkit>
-    <toolkit>cudaFuncGetAttributes</toolkit>
+    <toolkit>cudaSetDevice</toolkit>
     <toolkit>cudaGetDeviceCount</toolkit>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaSetDevice</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
-    <toolkit>cudaGetErrorString</toolkit>
+    <toolkit>cudaFuncGetAttributes</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample uses Monte Carlo simulation for Estimation of Pi (using inline QRNG).  This sample also uses the NVIDIA CURAND library.]]></description>
@@ -60,6 +60,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/README.md b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/README.md
index 96e19b727..485c16aaa 100644
--- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/README.md
+++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/README.md
@@ -10,7 +10,7 @@ Random Number Generator, Computational Finance, CURAND Library
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaFuncGetAttributes, cudaGetDeviceCount, cudaMalloc, cudaSetDevice, cudaMemcpy, cudaGetErrorString, cudaGetDeviceProperties
+cudaMemcpy, cudaGetErrorString, cudaFree, cudaSetDevice, cudaGetDeviceCount, cudaMalloc, cudaFuncGetAttributes, cudaGetDeviceProperties
 
 ## Dependencies needed to build/run
 [CURAND](../../../README.md#curand)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/MC_EstimatePiP_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/MC_EstimatePiP_vs2017.vcxproj
index a628995cb..c64188719 100644
--- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/MC_EstimatePiP_vs2017.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/MC_EstimatePiP_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/MC_EstimatePiP.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -111,6 +111,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/MC_EstimatePiP_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/MC_EstimatePiP_vs2019.vcxproj
index fb20f5f1d..c188ae7dd 100644
--- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/MC_EstimatePiP_vs2019.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/MC_EstimatePiP_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/MC_EstimatePiP.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/MC_EstimatePiP_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/MC_EstimatePiP_vs2022.vcxproj
index ec7b8f28c..c9893f39d 100644
--- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/MC_EstimatePiP_vs2022.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/MC_EstimatePiP_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/MC_EstimatePiP.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/Makefile b/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/Makefile
index fba6735f2..0e5b42370 100644
--- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/Makefile
+++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/NsightEclipse.xml
index 6578c3f4d..56a07dea5 100644
--- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/NsightEclipse.xml
+++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/NsightEclipse.xml
@@ -3,13 +3,13 @@
 <entry>
   <name>MC_EstimatePiP</name>
   <cuda_api_list>
+    <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaGetErrorString</toolkit>
     <toolkit>cudaFree</toolkit>
-    <toolkit>cudaFuncGetAttributes</toolkit>
+    <toolkit>cudaSetDevice</toolkit>
     <toolkit>cudaGetDeviceCount</toolkit>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaSetDevice</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
-    <toolkit>cudaGetErrorString</toolkit>
+    <toolkit>cudaFuncGetAttributes</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample uses Monte Carlo simulation for Estimation of Pi (using batch PRNG).  This sample also uses the NVIDIA CURAND library.]]></description>
@@ -60,6 +60,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/README.md b/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/README.md
index 098e77f86..4390385f0 100644
--- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/README.md
+++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/README.md
@@ -10,7 +10,7 @@ Random Number Generator, Computational Finance, CURAND Library
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaFuncGetAttributes, cudaGetDeviceCount, cudaMalloc, cudaSetDevice, cudaMemcpy, cudaGetErrorString, cudaGetDeviceProperties
+cudaMemcpy, cudaGetErrorString, cudaFree, cudaSetDevice, cudaGetDeviceCount, cudaMalloc, cudaFuncGetAttributes, cudaGetDeviceProperties
 
 ## Dependencies needed to build/run
 [CURAND](../../../README.md#curand)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/MC_EstimatePiQ_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/MC_EstimatePiQ_vs2017.vcxproj
index 83aba18ad..95b72fe5c 100644
--- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/MC_EstimatePiQ_vs2017.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/MC_EstimatePiQ_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/MC_EstimatePiQ.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -111,6 +111,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/MC_EstimatePiQ_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/MC_EstimatePiQ_vs2019.vcxproj
index ae4a67118..ff9316333 100644
--- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/MC_EstimatePiQ_vs2019.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/MC_EstimatePiQ_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/MC_EstimatePiQ.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/MC_EstimatePiQ_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/MC_EstimatePiQ_vs2022.vcxproj
index 9e11377dc..5714f941b 100644
--- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/MC_EstimatePiQ_vs2022.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/MC_EstimatePiQ_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/MC_EstimatePiQ.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/Makefile b/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/Makefile
index 98d071f93..61ae97d2e 100644
--- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/Makefile
+++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/NsightEclipse.xml
index 45497eeaf..71f9c101d 100644
--- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/NsightEclipse.xml
+++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/NsightEclipse.xml
@@ -3,13 +3,13 @@
 <entry>
   <name>MC_EstimatePiQ</name>
   <cuda_api_list>
+    <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaGetErrorString</toolkit>
     <toolkit>cudaFree</toolkit>
-    <toolkit>cudaFuncGetAttributes</toolkit>
+    <toolkit>cudaSetDevice</toolkit>
     <toolkit>cudaGetDeviceCount</toolkit>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaSetDevice</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
-    <toolkit>cudaGetErrorString</toolkit>
+    <toolkit>cudaFuncGetAttributes</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample uses Monte Carlo simulation for Estimation of Pi (using batch QRNG).  This sample also uses the NVIDIA CURAND library.]]></description>
@@ -60,6 +60,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/README.md b/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/README.md
index dd73101e1..c6bac7b71 100644
--- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/README.md
+++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/README.md
@@ -10,7 +10,7 @@ Random Number Generator, Computational Finance, CURAND Library
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaFuncGetAttributes, cudaGetDeviceCount, cudaMalloc, cudaSetDevice, cudaMemcpy, cudaGetErrorString, cudaGetDeviceProperties
+cudaMemcpy, cudaGetErrorString, cudaFree, cudaSetDevice, cudaGetDeviceCount, cudaMalloc, cudaFuncGetAttributes, cudaGetDeviceProperties
 
 ## Dependencies needed to build/run
 [CURAND](../../../README.md#curand)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/MC_SingleAsianOptionP_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/MC_SingleAsianOptionP_vs2017.vcxproj
index df94f0e1c..c9d46e459 100644
--- a/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/MC_SingleAsianOptionP_vs2017.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/MC_SingleAsianOptionP_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/MC_SingleAsianOptionP.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -112,6 +112,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/MC_SingleAsianOptionP_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/MC_SingleAsianOptionP_vs2019.vcxproj
index c6830be4b..1a31f284a 100644
--- a/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/MC_SingleAsianOptionP_vs2019.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/MC_SingleAsianOptionP_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/MC_SingleAsianOptionP.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/MC_SingleAsianOptionP_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/MC_SingleAsianOptionP_vs2022.vcxproj
index ab727ab07..a97d1d42e 100644
--- a/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/MC_SingleAsianOptionP_vs2022.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/MC_SingleAsianOptionP_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/MC_SingleAsianOptionP.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/Makefile b/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/Makefile
index c2e3b080d..97baec61f 100644
--- a/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/Makefile
+++ b/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/NsightEclipse.xml
index 52a3844dc..e11b104e4 100644
--- a/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/NsightEclipse.xml
+++ b/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/NsightEclipse.xml
@@ -3,13 +3,13 @@
 <entry>
   <name>MC_SingleAsianOptionP</name>
   <cuda_api_list>
+    <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaGetErrorString</toolkit>
     <toolkit>cudaFree</toolkit>
-    <toolkit>cudaFuncGetAttributes</toolkit>
+    <toolkit>cudaSetDevice</toolkit>
     <toolkit>cudaGetDeviceCount</toolkit>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaSetDevice</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
-    <toolkit>cudaGetErrorString</toolkit>
+    <toolkit>cudaFuncGetAttributes</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample uses Monte Carlo to simulate Single Asian Options using the NVIDIA CURAND library.]]></description>
@@ -61,6 +61,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/README.md b/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/README.md
index f2a986d5e..f6f6cd1ac 100644
--- a/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/README.md
+++ b/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/README.md
@@ -10,7 +10,7 @@ Random Number Generator, Computational Finance, CURAND Library
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaFuncGetAttributes, cudaGetDeviceCount, cudaMalloc, cudaSetDevice, cudaMemcpy, cudaGetErrorString, cudaGetDeviceProperties
+cudaMemcpy, cudaGetErrorString, cudaFree, cudaSetDevice, cudaGetDeviceCount, cudaMalloc, cudaFuncGetAttributes, cudaGetDeviceProperties
 
 ## Dependencies needed to build/run
 [CURAND](../../../README.md#curand)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/2_Concepts_and_Techniques/README.md b/Samples/2_Concepts_and_Techniques/README.md
index 00265194e..b407d80b2 100644
--- a/Samples/2_Concepts_and_Techniques/README.md
+++ b/Samples/2_Concepts_and_Techniques/README.md
@@ -19,7 +19,7 @@ This sample demonstrates how Discrete Cosine Transform (DCT) for blocks of 8 by
 ### [EGLStream_CUDA_CrossGPU](./EGLStream_CUDA_CrossGPU)
 Demonstrates CUDA and EGL Streams interop, where consumer's EGL Stream is on one GPU and producer's on other and both consumer-producer are different processes.
 
-### [EGLStreams_CUDA_Interop](./EGLStreams_CUDA_Interop)
+### [EGLStream_CUDA_Interop](./EGLStream_CUDA_Interop)
 Demonstrates data exchange between CUDA and EGL Streams.
 
 ### [EGLSync_CUDAEvent_Interop](./EGLSync_CUDAEvent_Interop)
diff --git a/Samples/2_Concepts_and_Techniques/boxFilter/Makefile b/Samples/2_Concepts_and_Techniques/boxFilter/Makefile
index baec273e1..ef50006af 100644
--- a/Samples/2_Concepts_and_Techniques/boxFilter/Makefile
+++ b/Samples/2_Concepts_and_Techniques/boxFilter/Makefile
@@ -299,9 +299,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/2_Concepts_and_Techniques/boxFilter/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/boxFilter/NsightEclipse.xml
index 60ada986e..5dc277901 100644
--- a/Samples/2_Concepts_and_Techniques/boxFilter/NsightEclipse.xml
+++ b/Samples/2_Concepts_and_Techniques/boxFilter/NsightEclipse.xml
@@ -3,21 +3,21 @@
 <entry>
   <name>boxFilter</name>
   <cuda_api_list>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaGraphicsMapResources</toolkit>
+    <toolkit>cudaGraphicsUnmapResources</toolkit>
+    <toolkit>cudaCreateChannelDesc</toolkit>
+    <toolkit>cudaMallocArray</toolkit>
     <toolkit>cudaFreeArray</toolkit>
-    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
+    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaGetErrorString</toolkit>
+    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
-    <toolkit>cudaDeviceSynchronize</toolkit>
+    <toolkit>cudaGraphicsMapResources</toolkit>
     <toolkit>cudaDestroyTextureObject</toolkit>
+    <toolkit>cudaDeviceSynchronize</toolkit>
     <toolkit>cudaCreateTextureObject</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaMallocArray</toolkit>
-    <toolkit>cudaCreateChannelDesc</toolkit>
     <toolkit>cudaGraphicsUnregisterResource</toolkit>
-    <toolkit>cudaGraphicsUnmapResources</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
-    <toolkit>cudaGetErrorString</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
   </cuda_api_list>
   <description><![CDATA[Fast image box filter using CUDA with OpenGL rendering.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -84,6 +84,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/2_Concepts_and_Techniques/boxFilter/README.md b/Samples/2_Concepts_and_Techniques/boxFilter/README.md
index f4d1299df..5be86cee5 100644
--- a/Samples/2_Concepts_and_Techniques/boxFilter/README.md
+++ b/Samples/2_Concepts_and_Techniques/boxFilter/README.md
@@ -10,7 +10,7 @@ Graphics Interop, Image Processing
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaGraphicsMapResources, cudaFreeArray, cudaGraphicsGLRegisterBuffer, cudaGraphicsResourceGetMappedPointer, cudaDeviceSynchronize, cudaDestroyTextureObject, cudaCreateTextureObject, cudaMalloc, cudaMallocArray, cudaCreateChannelDesc, cudaGraphicsUnregisterResource, cudaGraphicsUnmapResources, cudaMemcpy, cudaGetErrorString
+cudaGraphicsUnmapResources, cudaCreateChannelDesc, cudaMallocArray, cudaFreeArray, cudaFree, cudaGetErrorString, cudaMemcpy, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDestroyTextureObject, cudaDeviceSynchronize, cudaCreateTextureObject, cudaGraphicsUnregisterResource, cudaMalloc, cudaGraphicsGLRegisterBuffer
 
 ## Dependencies needed to build/run
 [X11](../../../README.md#x11), [GL](../../../README.md#gl)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/2_Concepts_and_Techniques/boxFilter/boxFilter_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/boxFilter/boxFilter_vs2017.vcxproj
index f5cef6565..e37e53620 100644
--- a/Samples/2_Concepts_and_Techniques/boxFilter/boxFilter_vs2017.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/boxFilter/boxFilter_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/boxFilter.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -119,6 +119,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/boxFilter/boxFilter_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/boxFilter/boxFilter_vs2019.vcxproj
index 5e2b348e9..08573611b 100644
--- a/Samples/2_Concepts_and_Techniques/boxFilter/boxFilter_vs2019.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/boxFilter/boxFilter_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/boxFilter.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -115,6 +115,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/boxFilter/boxFilter_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/boxFilter/boxFilter_vs2022.vcxproj
index 635ab0f1b..113b623b8 100644
--- a/Samples/2_Concepts_and_Techniques/boxFilter/boxFilter_vs2022.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/boxFilter/boxFilter_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/boxFilter.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -115,6 +115,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/boxFilter/findgllib.mk b/Samples/2_Concepts_and_Techniques/boxFilter/findgllib.mk
index f0a5c5512..998fcf0f1 100644
--- a/Samples/2_Concepts_and_Techniques/boxFilter/findgllib.mk
+++ b/Samples/2_Concepts_and_Techniques/boxFilter/findgllib.mk
@@ -53,11 +53,12 @@ endif
 ifeq ("$(TARGET_OS)","linux")
     # $(info) >> findgllib.mk -> LINUX path <<<)
     # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside
-    UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu      >/dev/null 2>&1; echo $$?)
-    FEDORA = $(shell echo $(DISTRO) | grep -i fedora      >/dev/null 2>&1; echo $$?)
-    RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?)
-    CENTOS = $(shell echo $(DISTRO) | grep -i centos      >/dev/null 2>&1; echo $$?)
+    UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu       >/dev/null 2>&1; echo $$?)
+    FEDORA = $(shell echo $(DISTRO) | grep -i fedora       >/dev/null 2>&1; echo $$?)
+    RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel'  >/dev/null 2>&1; echo $$?)
+    CENTOS = $(shell echo $(DISTRO) | grep -i centos       >/dev/null 2>&1; echo $$?)
     SUSE   = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?)
+    KYLIN  = $(shell echo $(DISTRO) | grep -i kylin        >/dev/null 2>&1; echo $$?)
     ifeq ("$(UBUNTU)","0")
       ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
         GLPATH := /usr/arm-linux-gnueabihf/lib
@@ -87,27 +88,17 @@ ifeq ("$(TARGET_OS)","linux")
         DFLT_PATH ?= /usr/lib
       endif
     endif
+
     ifeq ("$(SUSE)","0")
       GLPATH    ?= /usr/X11R6/lib64
       GLLINK    ?= -L/usr/X11R6/lib64
       DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(FEDORA)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(RHEL)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(CENTOS)","0")
+    else 
       GLPATH    ?= /usr/lib64/nvidia
       GLLINK    ?= -L/usr/lib64/nvidia
       DFLT_PATH ?= /usr/lib64
     endif
-  
+      
   # find libGL, libGLU 
   GLLIB  := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGL.so  -print 2>/dev/null)
   GLULIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLU.so -print 2>/dev/null)
diff --git a/Samples/2_Concepts_and_Techniques/convolutionSeparable/Makefile b/Samples/2_Concepts_and_Techniques/convolutionSeparable/Makefile
index c189aba90..dd13e54b5 100644
--- a/Samples/2_Concepts_and_Techniques/convolutionSeparable/Makefile
+++ b/Samples/2_Concepts_and_Techniques/convolutionSeparable/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/2_Concepts_and_Techniques/convolutionSeparable/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/convolutionSeparable/NsightEclipse.xml
index a76eb5690..6471a4450 100644
--- a/Samples/2_Concepts_and_Techniques/convolutionSeparable/NsightEclipse.xml
+++ b/Samples/2_Concepts_and_Techniques/convolutionSeparable/NsightEclipse.xml
@@ -3,11 +3,11 @@
 <entry>
   <name>convolutionSeparable</name>
   <cuda_api_list>
-    <toolkit>cudaMemcpyToSymbol</toolkit>
+    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaFree</toolkit>
     <toolkit>cudaDeviceSynchronize</toolkit>
+    <toolkit>cudaMemcpyToSymbol</toolkit>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample implements a separable convolution filter of a 2D signal with a gaussian kernel.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -53,6 +53,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/2_Concepts_and_Techniques/convolutionSeparable/README.md b/Samples/2_Concepts_and_Techniques/convolutionSeparable/README.md
index defbd7ea4..8afcf1770 100644
--- a/Samples/2_Concepts_and_Techniques/convolutionSeparable/README.md
+++ b/Samples/2_Concepts_and_Techniques/convolutionSeparable/README.md
@@ -10,7 +10,7 @@ Image Processing, Data Parallel Algorithms
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMemcpyToSymbol, cudaFree, cudaDeviceSynchronize, cudaMalloc, cudaMemcpy
+cudaMemcpy, cudaFree, cudaDeviceSynchronize, cudaMemcpyToSymbol, cudaMalloc
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/2_Concepts_and_Techniques/convolutionSeparable/convolutionSeparable_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/convolutionSeparable/convolutionSeparable_vs2017.vcxproj
index 4cbaabb27..12140d617 100644
--- a/Samples/2_Concepts_and_Techniques/convolutionSeparable/convolutionSeparable_vs2017.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/convolutionSeparable/convolutionSeparable_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/convolutionSeparable.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -109,6 +109,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/convolutionSeparable/convolutionSeparable_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/convolutionSeparable/convolutionSeparable_vs2019.vcxproj
index c231867ed..882b180ca 100644
--- a/Samples/2_Concepts_and_Techniques/convolutionSeparable/convolutionSeparable_vs2019.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/convolutionSeparable/convolutionSeparable_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/convolutionSeparable.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -105,6 +105,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/convolutionSeparable/convolutionSeparable_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/convolutionSeparable/convolutionSeparable_vs2022.vcxproj
index e1ae239aa..0bfe92ac0 100644
--- a/Samples/2_Concepts_and_Techniques/convolutionSeparable/convolutionSeparable_vs2022.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/convolutionSeparable/convolutionSeparable_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/convolutionSeparable.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -105,6 +105,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/convolutionTexture/Makefile b/Samples/2_Concepts_and_Techniques/convolutionTexture/Makefile
index 5eaca29fd..e06312117 100644
--- a/Samples/2_Concepts_and_Techniques/convolutionTexture/Makefile
+++ b/Samples/2_Concepts_and_Techniques/convolutionTexture/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/2_Concepts_and_Techniques/convolutionTexture/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/convolutionTexture/NsightEclipse.xml
index 622b0ebc2..c2a9e145b 100644
--- a/Samples/2_Concepts_and_Techniques/convolutionTexture/NsightEclipse.xml
+++ b/Samples/2_Concepts_and_Techniques/convolutionTexture/NsightEclipse.xml
@@ -3,15 +3,15 @@
 <entry>
   <name>convolutionTexture</name>
   <cuda_api_list>
-    <toolkit>cudaMemcpyToSymbol</toolkit>
+    <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaMallocArray</toolkit>
+    <toolkit>cudaFreeArray</toolkit>
     <toolkit>cudaFree</toolkit>
     <toolkit>cudaMemcpyToArray</toolkit>
-    <toolkit>cudaFreeArray</toolkit>
     <toolkit>cudaDeviceSynchronize</toolkit>
     <toolkit>cudaCreateTextureObject</toolkit>
+    <toolkit>cudaMemcpyToSymbol</toolkit>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaMallocArray</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
   </cuda_api_list>
   <description><![CDATA[Texture-based implementation of a separable 2D convolution with a gaussian kernel. Used for performance comparison against convolutionSeparable.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -59,6 +59,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/2_Concepts_and_Techniques/convolutionTexture/README.md b/Samples/2_Concepts_and_Techniques/convolutionTexture/README.md
index 11ef179a7..b54a396c6 100644
--- a/Samples/2_Concepts_and_Techniques/convolutionTexture/README.md
+++ b/Samples/2_Concepts_and_Techniques/convolutionTexture/README.md
@@ -10,7 +10,7 @@ Image Processing, Texture, Data Parallel Algorithms
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMemcpyToSymbol, cudaFree, cudaMemcpyToArray, cudaFreeArray, cudaDeviceSynchronize, cudaCreateTextureObject, cudaMalloc, cudaMallocArray, cudaMemcpy
+cudaMemcpy, cudaMallocArray, cudaFreeArray, cudaFree, cudaMemcpyToArray, cudaDeviceSynchronize, cudaCreateTextureObject, cudaMemcpyToSymbol, cudaMalloc
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/2_Concepts_and_Techniques/convolutionTexture/convolutionTexture_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/convolutionTexture/convolutionTexture_vs2017.vcxproj
index 442fa53b3..1769a595b 100644
--- a/Samples/2_Concepts_and_Techniques/convolutionTexture/convolutionTexture_vs2017.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/convolutionTexture/convolutionTexture_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/convolutionTexture.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -109,6 +109,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/convolutionTexture/convolutionTexture_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/convolutionTexture/convolutionTexture_vs2019.vcxproj
index 936b6fe5b..1a367ce1e 100644
--- a/Samples/2_Concepts_and_Techniques/convolutionTexture/convolutionTexture_vs2019.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/convolutionTexture/convolutionTexture_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/convolutionTexture.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -105,6 +105,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/convolutionTexture/convolutionTexture_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/convolutionTexture/convolutionTexture_vs2022.vcxproj
index 24511a34e..d9ff12c8b 100644
--- a/Samples/2_Concepts_and_Techniques/convolutionTexture/convolutionTexture_vs2022.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/convolutionTexture/convolutionTexture_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/convolutionTexture.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -105,6 +105,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/cuHook/Makefile b/Samples/2_Concepts_and_Techniques/cuHook/Makefile
index 81ea73bed..5986c7b7a 100644
--- a/Samples/2_Concepts_and_Techniques/cuHook/Makefile
+++ b/Samples/2_Concepts_and_Techniques/cuHook/Makefile
@@ -329,9 +329,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/2_Concepts_and_Techniques/cuHook/README.md b/Samples/2_Concepts_and_Techniques/cuHook/README.md
index 05833d89c..39ec1cbe4 100644
--- a/Samples/2_Concepts_and_Techniques/cuHook/README.md
+++ b/Samples/2_Concepts_and_Techniques/cuHook/README.md
@@ -12,7 +12,7 @@ Debugging
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -25,14 +25,14 @@ x86_64, ppc64le
 ## CUDA APIs involved
 
 ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html)
-cuDeviceGetCount, cuCtxCreate, cuMemAlloc, cuHookInfo, cuHookRegisterCallback, cuHook, cuMemFree, cuInit, cuCtxDestroy
+cuHook, cuMemAlloc, cuHookInfo, cuHookRegisterCallback, cuCtxDestroy, cuMemFree, cuDeviceGetCount, cuCtxCreate, cuInit
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaDeviceReset
+cudaDeviceReset, cudaFree
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/2_Concepts_and_Techniques/dct8x8/Makefile b/Samples/2_Concepts_and_Techniques/dct8x8/Makefile
index b00e4d359..0540bc005 100644
--- a/Samples/2_Concepts_and_Techniques/dct8x8/Makefile
+++ b/Samples/2_Concepts_and_Techniques/dct8x8/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/2_Concepts_and_Techniques/dct8x8/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/dct8x8/NsightEclipse.xml
index 3bbe52f96..adb067568 100644
--- a/Samples/2_Concepts_and_Techniques/dct8x8/NsightEclipse.xml
+++ b/Samples/2_Concepts_and_Techniques/dct8x8/NsightEclipse.xml
@@ -3,13 +3,13 @@
 <entry>
   <name>dct8x8</name>
   <cuda_api_list>
-    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaMallocArray</toolkit>
     <toolkit>cudaFreeArray</toolkit>
-    <toolkit>cudaDeviceSynchronize</toolkit>
-    <toolkit>cudaDestroyTextureObject</toolkit>
+    <toolkit>cudaFree</toolkit>
     <toolkit>cudaMallocPitch</toolkit>
+    <toolkit>cudaDestroyTextureObject</toolkit>
+    <toolkit>cudaDeviceSynchronize</toolkit>
     <toolkit>cudaCreateTextureObject</toolkit>
-    <toolkit>cudaMallocArray</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample demonstrates how Discrete Cosine Transform (DCT) for blocks of 8 by 8 pixels can be performed using CUDA: a naive implementation by definition and a more traditional approach used in many libraries. As opposed to implementing DCT in a fragment shader, CUDA allows for an easier and more efficient implementation.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -57,6 +57,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/2_Concepts_and_Techniques/dct8x8/README.md b/Samples/2_Concepts_and_Techniques/dct8x8/README.md
index 337b2ba29..7e0e24f35 100644
--- a/Samples/2_Concepts_and_Techniques/dct8x8/README.md
+++ b/Samples/2_Concepts_and_Techniques/dct8x8/README.md
@@ -10,7 +10,7 @@ Image Processing, Video Compression
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaFreeArray, cudaDeviceSynchronize, cudaDestroyTextureObject, cudaMallocPitch, cudaCreateTextureObject, cudaMallocArray
+cudaMallocArray, cudaFreeArray, cudaFree, cudaMallocPitch, cudaDestroyTextureObject, cudaDeviceSynchronize, cudaCreateTextureObject
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/2_Concepts_and_Techniques/dct8x8/dct8x8_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/dct8x8/dct8x8_vs2017.vcxproj
index 11ef09af3..5044890e9 100644
--- a/Samples/2_Concepts_and_Techniques/dct8x8/dct8x8_vs2017.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/dct8x8/dct8x8_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/dct8x8.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -115,6 +115,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/dct8x8/dct8x8_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/dct8x8/dct8x8_vs2019.vcxproj
index 7eace209d..8452c9db6 100644
--- a/Samples/2_Concepts_and_Techniques/dct8x8/dct8x8_vs2019.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/dct8x8/dct8x8_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/dct8x8.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -111,6 +111,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/dct8x8/dct8x8_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/dct8x8/dct8x8_vs2022.vcxproj
index f71108411..b0fd73786 100644
--- a/Samples/2_Concepts_and_Techniques/dct8x8/dct8x8_vs2022.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/dct8x8/dct8x8_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/dct8x8.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -111,6 +111,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/eigenvalues/Makefile b/Samples/2_Concepts_and_Techniques/eigenvalues/Makefile
index 93e54441e..54c371eae 100644
--- a/Samples/2_Concepts_and_Techniques/eigenvalues/Makefile
+++ b/Samples/2_Concepts_and_Techniques/eigenvalues/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/2_Concepts_and_Techniques/eigenvalues/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/eigenvalues/NsightEclipse.xml
index 0fce11299..74a135157 100644
--- a/Samples/2_Concepts_and_Techniques/eigenvalues/NsightEclipse.xml
+++ b/Samples/2_Concepts_and_Techniques/eigenvalues/NsightEclipse.xml
@@ -4,9 +4,9 @@
   <name>eigenvalues</name>
   <cuda_api_list>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaFree</toolkit>
     <toolkit>cudaDeviceSynchronize</toolkit>
     <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaFree</toolkit>
   </cuda_api_list>
   <description><![CDATA[The computation of all or a subset of all eigenvalues is an important problem in Linear Algebra, statistics, physics, and many other fields. This sample demonstrates a parallel implementation of a bisection algorithm for the computation of all eigenvalues of a tridiagonal symmetric matrix of arbitrary size with CUDA.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -52,6 +52,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/2_Concepts_and_Techniques/eigenvalues/README.md b/Samples/2_Concepts_and_Techniques/eigenvalues/README.md
index 7d217766f..bef2e951c 100644
--- a/Samples/2_Concepts_and_Techniques/eigenvalues/README.md
+++ b/Samples/2_Concepts_and_Techniques/eigenvalues/README.md
@@ -10,7 +10,7 @@ Linear Algebra
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMalloc, cudaFree, cudaDeviceSynchronize, cudaMemcpy
+cudaMalloc, cudaDeviceSynchronize, cudaMemcpy, cudaFree
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/2_Concepts_and_Techniques/eigenvalues/eigenvalues_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/eigenvalues/eigenvalues_vs2017.vcxproj
index 3f2ba5edd..e5a31279b 100644
--- a/Samples/2_Concepts_and_Techniques/eigenvalues/eigenvalues_vs2017.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/eigenvalues/eigenvalues_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/eigenvalues.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -122,6 +122,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/eigenvalues/eigenvalues_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/eigenvalues/eigenvalues_vs2019.vcxproj
index 4f6a81526..8f846835d 100644
--- a/Samples/2_Concepts_and_Techniques/eigenvalues/eigenvalues_vs2019.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/eigenvalues/eigenvalues_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/eigenvalues.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -118,6 +118,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/eigenvalues/eigenvalues_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/eigenvalues/eigenvalues_vs2022.vcxproj
index 0fd7a89ab..32faad5ed 100644
--- a/Samples/2_Concepts_and_Techniques/eigenvalues/eigenvalues_vs2022.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/eigenvalues/eigenvalues_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/eigenvalues.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -118,6 +118,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/histogram/Makefile b/Samples/2_Concepts_and_Techniques/histogram/Makefile
index d35c575cf..c73f8a988 100644
--- a/Samples/2_Concepts_and_Techniques/histogram/Makefile
+++ b/Samples/2_Concepts_and_Techniques/histogram/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/2_Concepts_and_Techniques/histogram/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/histogram/NsightEclipse.xml
index 6147ce832..72e7c05ba 100644
--- a/Samples/2_Concepts_and_Techniques/histogram/NsightEclipse.xml
+++ b/Samples/2_Concepts_and_Techniques/histogram/NsightEclipse.xml
@@ -3,10 +3,10 @@
 <entry>
   <name>histogram</name>
   <cuda_api_list>
+    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaFree</toolkit>
     <toolkit>cudaDeviceSynchronize</toolkit>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample demonstrates efficient implementation of 64-bin and 256-bin histogram.]]></description>
@@ -52,6 +52,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/2_Concepts_and_Techniques/histogram/README.md b/Samples/2_Concepts_and_Techniques/histogram/README.md
index 4ec0ce474..8ddf8e58f 100644
--- a/Samples/2_Concepts_and_Techniques/histogram/README.md
+++ b/Samples/2_Concepts_and_Techniques/histogram/README.md
@@ -10,7 +10,7 @@ Image Processing, Data Parallel Algorithms
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaDeviceSynchronize, cudaMalloc, cudaMemcpy, cudaGetDeviceProperties
+cudaMemcpy, cudaFree, cudaDeviceSynchronize, cudaMalloc, cudaGetDeviceProperties
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/2_Concepts_and_Techniques/histogram/histogram_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/histogram/histogram_vs2017.vcxproj
index f4cff9b55..39dd83783 100644
--- a/Samples/2_Concepts_and_Techniques/histogram/histogram_vs2017.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/histogram/histogram_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/histogram.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -110,6 +110,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/histogram/histogram_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/histogram/histogram_vs2019.vcxproj
index d2f26cb23..5bde10fc2 100644
--- a/Samples/2_Concepts_and_Techniques/histogram/histogram_vs2019.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/histogram/histogram_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/histogram.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -106,6 +106,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/histogram/histogram_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/histogram/histogram_vs2022.vcxproj
index 22496c8f9..a3e05c1a3 100644
--- a/Samples/2_Concepts_and_Techniques/histogram/histogram_vs2022.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/histogram/histogram_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/histogram.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -106,6 +106,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/imageDenoising/Makefile b/Samples/2_Concepts_and_Techniques/imageDenoising/Makefile
index d452de665..37895080e 100644
--- a/Samples/2_Concepts_and_Techniques/imageDenoising/Makefile
+++ b/Samples/2_Concepts_and_Techniques/imageDenoising/Makefile
@@ -299,9 +299,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/2_Concepts_and_Techniques/imageDenoising/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/imageDenoising/NsightEclipse.xml
index 0fbb86314..bef88766a 100644
--- a/Samples/2_Concepts_and_Techniques/imageDenoising/NsightEclipse.xml
+++ b/Samples/2_Concepts_and_Techniques/imageDenoising/NsightEclipse.xml
@@ -3,19 +3,19 @@
 <entry>
   <name>imageDenoising</name>
   <cuda_api_list>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaGraphicsMapResources</toolkit>
-    <toolkit>cudaGraphicsUnregisterResource</toolkit>
+    <toolkit>cudaGraphicsUnmapResources</toolkit>
+    <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaMallocArray</toolkit>
     <toolkit>cudaFreeArray</toolkit>
-    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
+    <toolkit>cudaFree</toolkit>
     <toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
+    <toolkit>cudaGraphicsMapResources</toolkit>
     <toolkit>cudaDeviceSynchronize</toolkit>
     <toolkit>cudaCreateTextureObject</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaMallocArray</toolkit>
     <toolkit>cudaGLRegisterBufferObject</toolkit>
-    <toolkit>cudaGraphicsUnmapResources</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaGraphicsUnregisterResource</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample demonstrates two adaptive image denoising techniques: KNN and NLM, based on computation of both geometric and color distance between texels. While both techniques are implemented in the DirectX SDK using shaders, massively speeded up variation of the latter technique, taking advantage of shared memory, is implemented in addition to DirectX counterparts.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -83,6 +83,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/2_Concepts_and_Techniques/imageDenoising/README.md b/Samples/2_Concepts_and_Techniques/imageDenoising/README.md
index e0ef23db7..d5741a773 100644
--- a/Samples/2_Concepts_and_Techniques/imageDenoising/README.md
+++ b/Samples/2_Concepts_and_Techniques/imageDenoising/README.md
@@ -10,7 +10,7 @@ Image Processing
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaGraphicsMapResources, cudaGraphicsUnregisterResource, cudaFreeArray, cudaGraphicsGLRegisterBuffer, cudaGraphicsResourceGetMappedPointer, cudaDeviceSynchronize, cudaCreateTextureObject, cudaMalloc, cudaMallocArray, cudaGLRegisterBufferObject, cudaGraphicsUnmapResources, cudaMemcpy
+cudaGraphicsUnmapResources, cudaMemcpy, cudaMallocArray, cudaFreeArray, cudaFree, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDeviceSynchronize, cudaCreateTextureObject, cudaGLRegisterBufferObject, cudaGraphicsUnregisterResource, cudaMalloc, cudaGraphicsGLRegisterBuffer
 
 ## Dependencies needed to build/run
 [X11](../../../README.md#x11), [GL](../../../README.md#gl)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/2_Concepts_and_Techniques/imageDenoising/findgllib.mk b/Samples/2_Concepts_and_Techniques/imageDenoising/findgllib.mk
index f0a5c5512..998fcf0f1 100644
--- a/Samples/2_Concepts_and_Techniques/imageDenoising/findgllib.mk
+++ b/Samples/2_Concepts_and_Techniques/imageDenoising/findgllib.mk
@@ -53,11 +53,12 @@ endif
 ifeq ("$(TARGET_OS)","linux")
     # $(info) >> findgllib.mk -> LINUX path <<<)
     # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside
-    UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu      >/dev/null 2>&1; echo $$?)
-    FEDORA = $(shell echo $(DISTRO) | grep -i fedora      >/dev/null 2>&1; echo $$?)
-    RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?)
-    CENTOS = $(shell echo $(DISTRO) | grep -i centos      >/dev/null 2>&1; echo $$?)
+    UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu       >/dev/null 2>&1; echo $$?)
+    FEDORA = $(shell echo $(DISTRO) | grep -i fedora       >/dev/null 2>&1; echo $$?)
+    RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel'  >/dev/null 2>&1; echo $$?)
+    CENTOS = $(shell echo $(DISTRO) | grep -i centos       >/dev/null 2>&1; echo $$?)
     SUSE   = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?)
+    KYLIN  = $(shell echo $(DISTRO) | grep -i kylin        >/dev/null 2>&1; echo $$?)
     ifeq ("$(UBUNTU)","0")
       ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
         GLPATH := /usr/arm-linux-gnueabihf/lib
@@ -87,27 +88,17 @@ ifeq ("$(TARGET_OS)","linux")
         DFLT_PATH ?= /usr/lib
       endif
     endif
+
     ifeq ("$(SUSE)","0")
       GLPATH    ?= /usr/X11R6/lib64
       GLLINK    ?= -L/usr/X11R6/lib64
       DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(FEDORA)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(RHEL)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(CENTOS)","0")
+    else 
       GLPATH    ?= /usr/lib64/nvidia
       GLLINK    ?= -L/usr/lib64/nvidia
       DFLT_PATH ?= /usr/lib64
     endif
-  
+      
   # find libGL, libGLU 
   GLLIB  := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGL.so  -print 2>/dev/null)
   GLULIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLU.so -print 2>/dev/null)
diff --git a/Samples/2_Concepts_and_Techniques/imageDenoising/imageDenoising_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/imageDenoising/imageDenoising_vs2017.vcxproj
index 866d8b333..bf03ff5bb 100644
--- a/Samples/2_Concepts_and_Techniques/imageDenoising/imageDenoising_vs2017.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/imageDenoising/imageDenoising_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/imageDenoising.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -123,6 +123,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/imageDenoising/imageDenoising_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/imageDenoising/imageDenoising_vs2019.vcxproj
index c924e1a5b..facb985e5 100644
--- a/Samples/2_Concepts_and_Techniques/imageDenoising/imageDenoising_vs2019.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/imageDenoising/imageDenoising_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/imageDenoising.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -119,6 +119,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/imageDenoising/imageDenoising_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/imageDenoising/imageDenoising_vs2022.vcxproj
index 4af74bf37..ec0b7c631 100644
--- a/Samples/2_Concepts_and_Techniques/imageDenoising/imageDenoising_vs2022.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/imageDenoising/imageDenoising_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/imageDenoising.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -119,6 +119,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/inlinePTX/Makefile b/Samples/2_Concepts_and_Techniques/inlinePTX/Makefile
index 481f8a366..6b06a65e2 100644
--- a/Samples/2_Concepts_and_Techniques/inlinePTX/Makefile
+++ b/Samples/2_Concepts_and_Techniques/inlinePTX/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/2_Concepts_and_Techniques/inlinePTX/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/inlinePTX/NsightEclipse.xml
index dd8e554c2..b32981020 100644
--- a/Samples/2_Concepts_and_Techniques/inlinePTX/NsightEclipse.xml
+++ b/Samples/2_Concepts_and_Techniques/inlinePTX/NsightEclipse.xml
@@ -3,15 +3,15 @@
 <entry>
   <name>inlinePTX</name>
   <cuda_api_list>
+    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaFree</toolkit>
     <toolkit>cudaMallocHost</toolkit>
+    <toolkit>cudaGetLastError</toolkit>
     <toolkit>cudaGridSize</toolkit>
-    <toolkit>cudaDeviceSynchronize</toolkit>
     <toolkit>cudaBlockSize</toolkit>
+    <toolkit>cudaDeviceSynchronize</toolkit>
     <toolkit>cudaFreeHost</toolkit>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaGetLastError</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
   </cuda_api_list>
   <description><![CDATA[A simple test application that demonstrates a new CUDA 4.0 ability to embed PTX in a CUDA kernel.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -51,6 +51,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/2_Concepts_and_Techniques/inlinePTX/README.md b/Samples/2_Concepts_and_Techniques/inlinePTX/README.md
index 61009f1d8..60d100d84 100644
--- a/Samples/2_Concepts_and_Techniques/inlinePTX/README.md
+++ b/Samples/2_Concepts_and_Techniques/inlinePTX/README.md
@@ -10,7 +10,7 @@ Performance Strategies, PTX Assembly, CUDA Driver API
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaMallocHost, cudaGridSize, cudaDeviceSynchronize, cudaBlockSize, cudaFreeHost, cudaMalloc, cudaGetLastError, cudaMemcpy
+cudaMemcpy, cudaFree, cudaMallocHost, cudaGetLastError, cudaGridSize, cudaBlockSize, cudaDeviceSynchronize, cudaFreeHost, cudaMalloc
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/2_Concepts_and_Techniques/inlinePTX/inlinePTX_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/inlinePTX/inlinePTX_vs2017.vcxproj
index ef5647697..b54f0ecac 100644
--- a/Samples/2_Concepts_and_Techniques/inlinePTX/inlinePTX_vs2017.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/inlinePTX/inlinePTX_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/inlinePTX.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/inlinePTX/inlinePTX_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/inlinePTX/inlinePTX_vs2019.vcxproj
index c843d4c87..59b6d3357 100644
--- a/Samples/2_Concepts_and_Techniques/inlinePTX/inlinePTX_vs2019.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/inlinePTX/inlinePTX_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/inlinePTX.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/inlinePTX/inlinePTX_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/inlinePTX/inlinePTX_vs2022.vcxproj
index 6e0f9bbd3..955dd6cad 100644
--- a/Samples/2_Concepts_and_Techniques/inlinePTX/inlinePTX_vs2022.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/inlinePTX/inlinePTX_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/inlinePTX.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/inlinePTX_nvrtc/README.md b/Samples/2_Concepts_and_Techniques/inlinePTX_nvrtc/README.md
index 8b06db79f..3d99e87e1 100644
--- a/Samples/2_Concepts_and_Techniques/inlinePTX_nvrtc/README.md
+++ b/Samples/2_Concepts_and_Techniques/inlinePTX_nvrtc/README.md
@@ -10,7 +10,7 @@ Performance Strategies, PTX Assembly, CUDA Driver API, Runtime Compilation
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,7 +23,7 @@ x86_64, ppc64le, aarch64
 ## CUDA APIs involved
 
 ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html)
-cuMemAlloc, cuLaunchKernel, cuCtxSynchronize, cuMemFree, cuMemcpyDtoH, cuModuleGetFunction
+cuMemcpyDtoH, cuLaunchKernel, cuCtxSynchronize, cuMemAlloc, cuMemFree, cuModuleGetFunction
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
 cudaBlockSize, cudaGridSize
@@ -33,7 +33,7 @@ cudaBlockSize, cudaGridSize
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/2_Concepts_and_Techniques/inlinePTX_nvrtc/inlinePTX_nvrtc_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/inlinePTX_nvrtc/inlinePTX_nvrtc_vs2017.vcxproj
index 7b167dbec..3d8dcc9f1 100644
--- a/Samples/2_Concepts_and_Techniques/inlinePTX_nvrtc/inlinePTX_nvrtc_vs2017.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/inlinePTX_nvrtc/inlinePTX_nvrtc_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/inlinePTX_nvrtc/inlinePTX_nvrtc_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/inlinePTX_nvrtc/inlinePTX_nvrtc_vs2019.vcxproj
index 4e9539a9d..be53ad45d 100644
--- a/Samples/2_Concepts_and_Techniques/inlinePTX_nvrtc/inlinePTX_nvrtc_vs2019.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/inlinePTX_nvrtc/inlinePTX_nvrtc_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/inlinePTX_nvrtc/inlinePTX_nvrtc_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/inlinePTX_nvrtc/inlinePTX_nvrtc_vs2022.vcxproj
index 3eb36cb75..87de14f3c 100644
--- a/Samples/2_Concepts_and_Techniques/inlinePTX_nvrtc/inlinePTX_nvrtc_vs2022.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/inlinePTX_nvrtc/inlinePTX_nvrtc_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/interval/Makefile b/Samples/2_Concepts_and_Techniques/interval/Makefile
index bb0e46f95..69cf0f083 100644
--- a/Samples/2_Concepts_and_Techniques/interval/Makefile
+++ b/Samples/2_Concepts_and_Techniques/interval/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/2_Concepts_and_Techniques/interval/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/interval/NsightEclipse.xml
index 5b7f3004f..33d957a09 100644
--- a/Samples/2_Concepts_and_Techniques/interval/NsightEclipse.xml
+++ b/Samples/2_Concepts_and_Techniques/interval/NsightEclipse.xml
@@ -3,19 +3,19 @@
 <entry>
   <name>interval</name>
   <cuda_api_list>
-    <toolkit>cudaDeviceSetLimit</toolkit>
-    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaFuncSetCacheConfig</toolkit>
-    <toolkit>cudaEventRecord</toolkit>
-    <toolkit>cudaEventCreate</toolkit>
-    <toolkit>cudaEventElapsedTime</toolkit>
-    <toolkit>cudaDeviceSynchronize</toolkit>
-    <toolkit>cudaSetDevice</toolkit>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaEventDestroy</toolkit>
+    <toolkit>cudaFree</toolkit>
     <toolkit>cudaGetLastError</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaSetDevice</toolkit>
+    <toolkit>cudaDeviceSynchronize</toolkit>
+    <toolkit>cudaEventRecord</toolkit>
+    <toolkit>cudaDeviceSetLimit</toolkit>
+    <toolkit>cudaEventDestroy</toolkit>
+    <toolkit>cudaEventElapsedTime</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
+    <toolkit>cudaEventCreate</toolkit>
   </cuda_api_list>
   <description><![CDATA[Interval arithmetic operators example.  Uses various C++ features (templates and recursion).  The recursive mode requires Compute SM 2.0 capabilities.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -55,6 +55,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/2_Concepts_and_Techniques/interval/README.md b/Samples/2_Concepts_and_Techniques/interval/README.md
index 2d336b2e3..d13b6e9aa 100644
--- a/Samples/2_Concepts_and_Techniques/interval/README.md
+++ b/Samples/2_Concepts_and_Techniques/interval/README.md
@@ -10,7 +10,7 @@ Recursion, Templates
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaDeviceSetLimit, cudaFree, cudaFuncSetCacheConfig, cudaEventRecord, cudaEventCreate, cudaEventElapsedTime, cudaDeviceSynchronize, cudaSetDevice, cudaMalloc, cudaEventDestroy, cudaGetLastError, cudaMemcpy, cudaGetDeviceProperties
+cudaMemcpy, cudaFuncSetCacheConfig, cudaMalloc, cudaFree, cudaGetLastError, cudaSetDevice, cudaDeviceSynchronize, cudaEventRecord, cudaDeviceSetLimit, cudaEventDestroy, cudaEventElapsedTime, cudaGetDeviceProperties, cudaEventCreate
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/2_Concepts_and_Techniques/interval/interval_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/interval/interval_vs2017.vcxproj
index 937345f70..2c71346d3 100644
--- a/Samples/2_Concepts_and_Techniques/interval/interval_vs2017.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/interval/interval_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/interval.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -213,6 +213,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/interval/interval_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/interval/interval_vs2019.vcxproj
index 0b54fff8b..43bea2307 100644
--- a/Samples/2_Concepts_and_Techniques/interval/interval_vs2019.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/interval/interval_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/interval.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -209,6 +209,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/interval/interval_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/interval/interval_vs2022.vcxproj
index 4f5e0b4c3..ff04d5b61 100644
--- a/Samples/2_Concepts_and_Techniques/interval/interval_vs2022.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/interval/interval_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/interval.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -209,6 +209,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/particles/Makefile b/Samples/2_Concepts_and_Techniques/particles/Makefile
index f322ac641..62b19bfb3 100644
--- a/Samples/2_Concepts_and_Techniques/particles/Makefile
+++ b/Samples/2_Concepts_and_Techniques/particles/Makefile
@@ -324,9 +324,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/2_Concepts_and_Techniques/particles/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/particles/NsightEclipse.xml
index 0b16f690f..b3fcd5add 100644
--- a/Samples/2_Concepts_and_Techniques/particles/NsightEclipse.xml
+++ b/Samples/2_Concepts_and_Techniques/particles/NsightEclipse.xml
@@ -6,19 +6,19 @@
     <flag>--std=c++14</flag>
   </cflags>
   <cuda_api_list>
-    <toolkit>cudaMemcpyToSymbol</toolkit>
-    <toolkit>cudaMemset</toolkit>
+    <toolkit>cudaGraphicsUnmapResources</toolkit>
+    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaFree</toolkit>
-    <toolkit>cudaGraphicsMapResources</toolkit>
-    <toolkit>cudaGLInit</toolkit>
-    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
     <toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
+    <toolkit>cudaGraphicsMapResources</toolkit>
     <toolkit>cudaDeviceSynchronize</toolkit>
-    <toolkit>cudaInit</toolkit>
-    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaMemset</toolkit>
+    <toolkit>cudaMemcpyToSymbol</toolkit>
+    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
     <toolkit>cudaGraphicsUnregisterResource</toolkit>
-    <toolkit>cudaGraphicsUnmapResources</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaInit</toolkit>
+    <toolkit>cudaGLInit</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample uses CUDA to simulate and visualize a large set of particles and their physical interaction.  Adding "-particles=<N>" to the command line will allow users to set # of particles for simulation.  This example implements a uniform grid data structure using either atomic operations or a fast radix sort from the Thrust library]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -83,6 +83,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <sources>
     <extraheader>..\..\..\Common\param.h</extraheader>
     <extraheader>..\..\..\Common\paramgl.h</extraheader>
diff --git a/Samples/2_Concepts_and_Techniques/particles/README.md b/Samples/2_Concepts_and_Techniques/particles/README.md
index 0e1b6134c..3b1a6974a 100644
--- a/Samples/2_Concepts_and_Techniques/particles/README.md
+++ b/Samples/2_Concepts_and_Techniques/particles/README.md
@@ -10,7 +10,7 @@ Graphics Interop, Data Parallel Algorithms, Physically-Based Simulation, Perform
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMemcpyToSymbol, cudaMemset, cudaFree, cudaGraphicsMapResources, cudaGLInit, cudaGraphicsGLRegisterBuffer, cudaGraphicsResourceGetMappedPointer, cudaDeviceSynchronize, cudaInit, cudaMalloc, cudaGraphicsUnregisterResource, cudaGraphicsUnmapResources, cudaMemcpy
+cudaGraphicsUnmapResources, cudaMemcpy, cudaFree, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDeviceSynchronize, cudaMemset, cudaMemcpyToSymbol, cudaGraphicsGLRegisterBuffer, cudaGraphicsUnregisterResource, cudaMalloc, cudaInit, cudaGLInit
 
 ## Dependencies needed to build/run
 [X11](../../../README.md#x11), [GL](../../../README.md#gl)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/2_Concepts_and_Techniques/particles/findgllib.mk b/Samples/2_Concepts_and_Techniques/particles/findgllib.mk
index f0a5c5512..998fcf0f1 100644
--- a/Samples/2_Concepts_and_Techniques/particles/findgllib.mk
+++ b/Samples/2_Concepts_and_Techniques/particles/findgllib.mk
@@ -53,11 +53,12 @@ endif
 ifeq ("$(TARGET_OS)","linux")
     # $(info) >> findgllib.mk -> LINUX path <<<)
     # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside
-    UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu      >/dev/null 2>&1; echo $$?)
-    FEDORA = $(shell echo $(DISTRO) | grep -i fedora      >/dev/null 2>&1; echo $$?)
-    RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?)
-    CENTOS = $(shell echo $(DISTRO) | grep -i centos      >/dev/null 2>&1; echo $$?)
+    UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu       >/dev/null 2>&1; echo $$?)
+    FEDORA = $(shell echo $(DISTRO) | grep -i fedora       >/dev/null 2>&1; echo $$?)
+    RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel'  >/dev/null 2>&1; echo $$?)
+    CENTOS = $(shell echo $(DISTRO) | grep -i centos       >/dev/null 2>&1; echo $$?)
     SUSE   = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?)
+    KYLIN  = $(shell echo $(DISTRO) | grep -i kylin        >/dev/null 2>&1; echo $$?)
     ifeq ("$(UBUNTU)","0")
       ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
         GLPATH := /usr/arm-linux-gnueabihf/lib
@@ -87,27 +88,17 @@ ifeq ("$(TARGET_OS)","linux")
         DFLT_PATH ?= /usr/lib
       endif
     endif
+
     ifeq ("$(SUSE)","0")
       GLPATH    ?= /usr/X11R6/lib64
       GLLINK    ?= -L/usr/X11R6/lib64
       DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(FEDORA)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(RHEL)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(CENTOS)","0")
+    else 
       GLPATH    ?= /usr/lib64/nvidia
       GLLINK    ?= -L/usr/lib64/nvidia
       DFLT_PATH ?= /usr/lib64
     endif
-  
+      
   # find libGL, libGLU 
   GLLIB  := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGL.so  -print 2>/dev/null)
   GLULIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLU.so -print 2>/dev/null)
diff --git a/Samples/2_Concepts_and_Techniques/particles/particles_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/particles/particles_vs2017.vcxproj
index 8f28c7642..d2c2a6f80 100644
--- a/Samples/2_Concepts_and_Techniques/particles/particles_vs2017.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/particles/particles_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/particles.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -129,6 +129,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/particles/particles_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/particles/particles_vs2019.vcxproj
index fba7fdcc9..c739cc74a 100644
--- a/Samples/2_Concepts_and_Techniques/particles/particles_vs2019.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/particles/particles_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/particles.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -125,6 +125,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/particles/particles_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/particles/particles_vs2022.vcxproj
index a915f4c0d..77b78a23b 100644
--- a/Samples/2_Concepts_and_Techniques/particles/particles_vs2022.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/particles/particles_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/particles.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -125,6 +125,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/radixSortThrust/Makefile b/Samples/2_Concepts_and_Techniques/radixSortThrust/Makefile
index df5ee47fb..459e9a21a 100644
--- a/Samples/2_Concepts_and_Techniques/radixSortThrust/Makefile
+++ b/Samples/2_Concepts_and_Techniques/radixSortThrust/Makefile
@@ -304,9 +304,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/2_Concepts_and_Techniques/radixSortThrust/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/radixSortThrust/NsightEclipse.xml
index c29e9f175..62fd4d555 100644
--- a/Samples/2_Concepts_and_Techniques/radixSortThrust/NsightEclipse.xml
+++ b/Samples/2_Concepts_and_Techniques/radixSortThrust/NsightEclipse.xml
@@ -6,13 +6,13 @@
     <flag>--std=c++14</flag>
   </cflags>
   <cuda_api_list>
-    <toolkit>cudaEventRecord</toolkit>
-    <toolkit>cudaEventCreate</toolkit>
-    <toolkit>cudaEventElapsedTime</toolkit>
     <toolkit>cudaEventSynchronize</toolkit>
+    <toolkit>cudaEventRecord</toolkit>
+    <toolkit>cudaGetDevice</toolkit>
     <toolkit>cudaEventDestroy</toolkit>
+    <toolkit>cudaEventElapsedTime</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
-    <toolkit>cudaGetDevice</toolkit>
+    <toolkit>cudaEventCreate</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample demonstrates a very fast and efficient parallel radix sort uses Thrust library. The included RadixSort class can sort either key-value pairs (with float or unsigned integer keys) or keys only.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -58,6 +58,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/2_Concepts_and_Techniques/radixSortThrust/README.md b/Samples/2_Concepts_and_Techniques/radixSortThrust/README.md
index 835bfd17b..4914b5d20 100644
--- a/Samples/2_Concepts_and_Techniques/radixSortThrust/README.md
+++ b/Samples/2_Concepts_and_Techniques/radixSortThrust/README.md
@@ -10,7 +10,7 @@ Data-Parallel Algorithms, Performance Strategies
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaEventRecord, cudaEventCreate, cudaEventElapsedTime, cudaEventSynchronize, cudaEventDestroy, cudaGetDeviceProperties, cudaGetDevice
+cudaEventSynchronize, cudaEventRecord, cudaGetDevice, cudaEventDestroy, cudaEventElapsedTime, cudaGetDeviceProperties, cudaEventCreate
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/2_Concepts_and_Techniques/radixSortThrust/radixSortThrust_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/radixSortThrust/radixSortThrust_vs2017.vcxproj
index 159d441eb..a9c39d2e3 100644
--- a/Samples/2_Concepts_and_Techniques/radixSortThrust/radixSortThrust_vs2017.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/radixSortThrust/radixSortThrust_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/radixSortThrust.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/radixSortThrust/radixSortThrust_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/radixSortThrust/radixSortThrust_vs2019.vcxproj
index 244e1889d..9ac46af9d 100644
--- a/Samples/2_Concepts_and_Techniques/radixSortThrust/radixSortThrust_vs2019.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/radixSortThrust/radixSortThrust_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/radixSortThrust.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/radixSortThrust/radixSortThrust_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/radixSortThrust/radixSortThrust_vs2022.vcxproj
index edef2e2dd..30b2d9bb3 100644
--- a/Samples/2_Concepts_and_Techniques/radixSortThrust/radixSortThrust_vs2022.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/radixSortThrust/radixSortThrust_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/radixSortThrust.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/reduction/Makefile b/Samples/2_Concepts_and_Techniques/reduction/Makefile
index a46a5bb38..eed9f8012 100644
--- a/Samples/2_Concepts_and_Techniques/reduction/Makefile
+++ b/Samples/2_Concepts_and_Techniques/reduction/Makefile
@@ -310,9 +310,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/2_Concepts_and_Techniques/reduction/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/reduction/NsightEclipse.xml
index e7fa89ac2..629ec3ff4 100644
--- a/Samples/2_Concepts_and_Techniques/reduction/NsightEclipse.xml
+++ b/Samples/2_Concepts_and_Techniques/reduction/NsightEclipse.xml
@@ -6,13 +6,13 @@
     <flag>--std=c++11</flag>
   </cflags>
   <cuda_api_list>
+    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaFree</toolkit>
+    <toolkit>cudaSetDevice</toolkit>
     <toolkit>cudaDeviceSynchronize</toolkit>
+    <toolkit>cudaGetDevice</toolkit>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaSetDevice</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
-    <toolkit>cudaGetDevice</toolkit>
   </cuda_api_list>
   <description><![CDATA[A parallel sum reduction that computes the sum of a large arrays of values. This sample demonstrates several important optimization strategies for Data-Parallel Algorithms like reduction using shared memory, __shfl_down_sync, __reduce_add_sync and cooperative_groups reduce.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -67,6 +67,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/2_Concepts_and_Techniques/reduction/README.md b/Samples/2_Concepts_and_Techniques/reduction/README.md
index 1fde9b55d..65024c63a 100644
--- a/Samples/2_Concepts_and_Techniques/reduction/README.md
+++ b/Samples/2_Concepts_and_Techniques/reduction/README.md
@@ -10,7 +10,7 @@ Data-Parallel Algorithms, Performance Strategies
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaDeviceSynchronize, cudaMalloc, cudaSetDevice, cudaMemcpy, cudaGetDeviceProperties, cudaGetDevice
+cudaMemcpy, cudaFree, cudaSetDevice, cudaDeviceSynchronize, cudaGetDevice, cudaMalloc, cudaGetDeviceProperties
 
 ## Dependencies needed to build/run
 [CPP11](../../../README.md#cpp11)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/2_Concepts_and_Techniques/reduction/reduction_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/reduction/reduction_vs2017.vcxproj
index 65d31a5d4..f80b1766e 100644
--- a/Samples/2_Concepts_and_Techniques/reduction/reduction_vs2017.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/reduction/reduction_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/reduction.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/reduction/reduction_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/reduction/reduction_vs2019.vcxproj
index b5b673a37..dea43eabb 100644
--- a/Samples/2_Concepts_and_Techniques/reduction/reduction_vs2019.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/reduction/reduction_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/reduction.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/reduction/reduction_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/reduction/reduction_vs2022.vcxproj
index a3f39519f..bbc6826a8 100644
--- a/Samples/2_Concepts_and_Techniques/reduction/reduction_vs2022.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/reduction/reduction_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/reduction.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/Makefile b/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/Makefile
index 32bdaeb71..2f62c73ee 100644
--- a/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/Makefile
+++ b/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/Makefile
@@ -322,9 +322,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 61 70 72 75 80 86 87
+SMS ?= 61 70 72 75 80 86 87 90
 else
-SMS ?= 60 61 70 75 80 86
+SMS ?= 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/NsightEclipse.xml
index bc231be2e..09decc91e 100644
--- a/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/NsightEclipse.xml
+++ b/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/NsightEclipse.xml
@@ -6,15 +6,15 @@
     <flag>--std=c++11</flag>
   </cflags>
   <cuda_api_list>
+    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaFree</toolkit>
-    <toolkit>cudaOccupancyMaxActiveBlocksPerMultiprocessor</toolkit>
-    <toolkit>cudaOccupancyMaxPotentialBlockSize</toolkit>
-    <toolkit>cudaDeviceSynchronize</toolkit>
     <toolkit>cudaSetDevice</toolkit>
-    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaDeviceSynchronize</toolkit>
     <toolkit>cudaLaunchCooperativeKernel</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaOccupancyMaxActiveBlocksPerMultiprocessor</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
+    <toolkit>cudaOccupancyMaxPotentialBlockSize</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample demonstrates single pass reduction using Multi Block Cooperative Groups.  This sample requires devices with compute capability 6.0 or higher having compute preemption.]]></description>
   <includepaths>
@@ -51,6 +51,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/README.md b/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/README.md
index b0d5e58f6..7473bae9a 100644
--- a/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/README.md
+++ b/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/README.md
@@ -10,7 +10,7 @@ Cooperative Groups, MultiBlock Cooperative Groups
 
 ## Supported SM Architectures
 
-[SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, aarch64
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaOccupancyMaxActiveBlocksPerMultiprocessor, cudaOccupancyMaxPotentialBlockSize, cudaDeviceSynchronize, cudaSetDevice, cudaMalloc, cudaLaunchCooperativeKernel, cudaMemcpy, cudaGetDeviceProperties
+cudaMemcpy, cudaFree, cudaSetDevice, cudaDeviceSynchronize, cudaLaunchCooperativeKernel, cudaMalloc, cudaOccupancyMaxActiveBlocksPerMultiprocessor, cudaGetDeviceProperties, cudaOccupancyMaxPotentialBlockSize
 
 ## Dependencies needed to build/run
 [MBCG](../../../README.md#mbcg), [CPP11](../../../README.md#cpp11)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/reductionMultiBlockCG_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/reductionMultiBlockCG_vs2017.vcxproj
index 4a505b432..c467625a7 100644
--- a/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/reductionMultiBlockCG_vs2017.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/reductionMultiBlockCG_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/reductionMultiBlockCG.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/reductionMultiBlockCG_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/reductionMultiBlockCG_vs2019.vcxproj
index 1e0a9cd1e..306b8c6d2 100644
--- a/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/reductionMultiBlockCG_vs2019.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/reductionMultiBlockCG_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/reductionMultiBlockCG.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/reductionMultiBlockCG_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/reductionMultiBlockCG_vs2022.vcxproj
index e5b2eb9f8..a7261ba7a 100644
--- a/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/reductionMultiBlockCG_vs2022.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/reductionMultiBlockCG_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/reductionMultiBlockCG.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/scalarProd/Makefile b/Samples/2_Concepts_and_Techniques/scalarProd/Makefile
index e20a38107..9cacc538f 100644
--- a/Samples/2_Concepts_and_Techniques/scalarProd/Makefile
+++ b/Samples/2_Concepts_and_Techniques/scalarProd/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/2_Concepts_and_Techniques/scalarProd/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/scalarProd/NsightEclipse.xml
index 6d36f7644..79a458f33 100644
--- a/Samples/2_Concepts_and_Techniques/scalarProd/NsightEclipse.xml
+++ b/Samples/2_Concepts_and_Techniques/scalarProd/NsightEclipse.xml
@@ -4,9 +4,9 @@
   <name>scalarProd</name>
   <cuda_api_list>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaFree</toolkit>
     <toolkit>cudaDeviceSynchronize</toolkit>
     <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaFree</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample calculates scalar products of a given set of input vector pairs.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -46,6 +46,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/2_Concepts_and_Techniques/scalarProd/README.md b/Samples/2_Concepts_and_Techniques/scalarProd/README.md
index 3b54a9662..47ff8e57b 100644
--- a/Samples/2_Concepts_and_Techniques/scalarProd/README.md
+++ b/Samples/2_Concepts_and_Techniques/scalarProd/README.md
@@ -10,7 +10,7 @@ Linear Algebra
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMalloc, cudaFree, cudaDeviceSynchronize, cudaMemcpy
+cudaMalloc, cudaDeviceSynchronize, cudaMemcpy, cudaFree
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/2_Concepts_and_Techniques/scalarProd/scalarProd_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/scalarProd/scalarProd_vs2017.vcxproj
index 087bc7dfc..d404cd613 100644
--- a/Samples/2_Concepts_and_Techniques/scalarProd/scalarProd_vs2017.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/scalarProd/scalarProd_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/scalarProd.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/scalarProd/scalarProd_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/scalarProd/scalarProd_vs2019.vcxproj
index bfb89fa12..72e9579a5 100644
--- a/Samples/2_Concepts_and_Techniques/scalarProd/scalarProd_vs2019.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/scalarProd/scalarProd_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/scalarProd.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/scalarProd/scalarProd_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/scalarProd/scalarProd_vs2022.vcxproj
index 0aabdbf46..cb130eaf4 100644
--- a/Samples/2_Concepts_and_Techniques/scalarProd/scalarProd_vs2022.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/scalarProd/scalarProd_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/scalarProd.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/scan/Makefile b/Samples/2_Concepts_and_Techniques/scan/Makefile
index 5d371f2f1..8ce4ab790 100644
--- a/Samples/2_Concepts_and_Techniques/scan/Makefile
+++ b/Samples/2_Concepts_and_Techniques/scan/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/2_Concepts_and_Techniques/scan/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/scan/NsightEclipse.xml
index 239b68b14..11e6c2f39 100644
--- a/Samples/2_Concepts_and_Techniques/scan/NsightEclipse.xml
+++ b/Samples/2_Concepts_and_Techniques/scan/NsightEclipse.xml
@@ -4,9 +4,9 @@
   <name>scan</name>
   <cuda_api_list>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaFree</toolkit>
     <toolkit>cudaDeviceSynchronize</toolkit>
     <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaFree</toolkit>
   </cuda_api_list>
   <description><![CDATA[This example demonstrates an efficient CUDA implementation of parallel prefix sum, also known as "scan".  Given an array of numbers, scan computes a new array in which each element is the sum of all the elements before it in the input array.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -50,6 +50,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/2_Concepts_and_Techniques/scan/README.md b/Samples/2_Concepts_and_Techniques/scan/README.md
index 8a0563f4d..e10d0a389 100644
--- a/Samples/2_Concepts_and_Techniques/scan/README.md
+++ b/Samples/2_Concepts_and_Techniques/scan/README.md
@@ -10,7 +10,7 @@ Data-Parallel Algorithms, Performance Strategies
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMalloc, cudaFree, cudaDeviceSynchronize, cudaMemcpy
+cudaMalloc, cudaDeviceSynchronize, cudaMemcpy, cudaFree
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/2_Concepts_and_Techniques/scan/scan_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/scan/scan_vs2017.vcxproj
index 37db85698..ac2bd4bd7 100644
--- a/Samples/2_Concepts_and_Techniques/scan/scan_vs2017.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/scan/scan_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/scan.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -109,6 +109,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/scan/scan_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/scan/scan_vs2019.vcxproj
index b6da5f75d..fddb8498c 100644
--- a/Samples/2_Concepts_and_Techniques/scan/scan_vs2019.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/scan/scan_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/scan.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -105,6 +105,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/scan/scan_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/scan/scan_vs2022.vcxproj
index cb3e2bed2..4f238933c 100644
--- a/Samples/2_Concepts_and_Techniques/scan/scan_vs2022.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/scan/scan_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/scan.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -105,6 +105,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/Makefile b/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/Makefile
index 9f15915f0..fe1a04ef2 100644
--- a/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/Makefile
+++ b/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/Makefile
@@ -304,9 +304,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/NsightEclipse.xml
index dd3363eaf..bfabb713b 100644
--- a/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/NsightEclipse.xml
+++ b/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/NsightEclipse.xml
@@ -7,13 +7,13 @@
     <flag>--threads 1</flag>
   </cflags>
   <cuda_api_list>
-    <toolkit>cudaMemset</toolkit>
+    <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaMemGetInfo</toolkit>
+    <toolkit>cudaEventSynchronize</toolkit>
     <toolkit>cudaEventRecord</toolkit>
-    <toolkit>cudaEventCreate</toolkit>
+    <toolkit>cudaMemset</toolkit>
     <toolkit>cudaEventElapsedTime</toolkit>
-    <toolkit>cudaEventSynchronize</toolkit>
-    <toolkit>cudaMemGetInfo</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaEventCreate</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample demonstrates an approach to the image segmentation trees construction.  This method is based on Boruvka's MST algorithm.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -60,6 +60,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/README.md b/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/README.md
index cd6cbc5ae..b62923534 100644
--- a/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/README.md
+++ b/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/README.md
@@ -10,7 +10,7 @@ Data-Parallel Algorithms, Performance Strategies
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMemset, cudaEventRecord, cudaEventCreate, cudaEventElapsedTime, cudaEventSynchronize, cudaMemGetInfo, cudaMemcpy
+cudaMemcpy, cudaMemGetInfo, cudaEventSynchronize, cudaEventRecord, cudaMemset, cudaEventElapsedTime, cudaEventCreate
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/segmentationTreeThrust_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/segmentationTreeThrust_vs2017.vcxproj
index 14b777a67..d72fc3b5e 100644
--- a/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/segmentationTreeThrust_vs2017.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/segmentationTreeThrust_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/segmentationTreeThrust.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 1 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/segmentationTreeThrust_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/segmentationTreeThrust_vs2019.vcxproj
index 591302fec..96fd37606 100644
--- a/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/segmentationTreeThrust_vs2019.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/segmentationTreeThrust_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/segmentationTreeThrust.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 1 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/segmentationTreeThrust_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/segmentationTreeThrust_vs2022.vcxproj
index e82eceb40..18e37f1fd 100644
--- a/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/segmentationTreeThrust_vs2022.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/segmentationTreeThrust_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/segmentationTreeThrust.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 1 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/shfl_scan/Makefile b/Samples/2_Concepts_and_Techniques/shfl_scan/Makefile
index 256e10fd9..4f210b3ef 100644
--- a/Samples/2_Concepts_and_Techniques/shfl_scan/Makefile
+++ b/Samples/2_Concepts_and_Techniques/shfl_scan/Makefile
@@ -310,9 +310,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/2_Concepts_and_Techniques/shfl_scan/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/shfl_scan/NsightEclipse.xml
index 8410c3253..d60600637 100644
--- a/Samples/2_Concepts_and_Techniques/shfl_scan/NsightEclipse.xml
+++ b/Samples/2_Concepts_and_Techniques/shfl_scan/NsightEclipse.xml
@@ -7,18 +7,18 @@
     <flag>-O3</flag>
   </cflags>
   <cuda_api_list>
-    <toolkit>cudaMemset</toolkit>
+    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaFree</toolkit>
-    <toolkit>cudaEventRecord</toolkit>
     <toolkit>cudaMallocHost</toolkit>
-    <toolkit>cudaEventCreate</toolkit>
-    <toolkit>cudaEventElapsedTime</toolkit>
     <toolkit>cudaEventSynchronize</toolkit>
+    <toolkit>cudaEventRecord</toolkit>
     <toolkit>cudaFreeHost</toolkit>
+    <toolkit>cudaGetDevice</toolkit>
+    <toolkit>cudaMemset</toolkit>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaEventElapsedTime</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
-    <toolkit>cudaGetDevice</toolkit>
+    <toolkit>cudaEventCreate</toolkit>
   </cuda_api_list>
   <description><![CDATA[This example demonstrates how to use the shuffle intrinsic __shfl_up_sync to perform a scan operation across a thread block. ]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -66,6 +66,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/2_Concepts_and_Techniques/shfl_scan/README.md b/Samples/2_Concepts_and_Techniques/shfl_scan/README.md
index f042bb793..5afaefead 100644
--- a/Samples/2_Concepts_and_Techniques/shfl_scan/README.md
+++ b/Samples/2_Concepts_and_Techniques/shfl_scan/README.md
@@ -10,7 +10,7 @@ Data-Parallel Algorithms, Performance Strategies
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l, aarch64
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMemset, cudaFree, cudaEventRecord, cudaMallocHost, cudaEventCreate, cudaEventElapsedTime, cudaEventSynchronize, cudaFreeHost, cudaMalloc, cudaMemcpy, cudaGetDeviceProperties, cudaGetDevice
+cudaMemcpy, cudaFree, cudaMallocHost, cudaEventSynchronize, cudaEventRecord, cudaFreeHost, cudaGetDevice, cudaMemset, cudaMalloc, cudaEventElapsedTime, cudaGetDeviceProperties, cudaEventCreate
 
 ## Dependencies needed to build/run
 [CPP11](../../../README.md#cpp11)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/2_Concepts_and_Techniques/shfl_scan/shfl_scan_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/shfl_scan/shfl_scan_vs2017.vcxproj
index ee4c6348f..9e8017119 100644
--- a/Samples/2_Concepts_and_Techniques/shfl_scan/shfl_scan_vs2017.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/shfl_scan/shfl_scan_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/shfl_scan.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/shfl_scan/shfl_scan_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/shfl_scan/shfl_scan_vs2019.vcxproj
index c99b3fe4f..72076c473 100644
--- a/Samples/2_Concepts_and_Techniques/shfl_scan/shfl_scan_vs2019.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/shfl_scan/shfl_scan_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/shfl_scan.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/shfl_scan/shfl_scan_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/shfl_scan/shfl_scan_vs2022.vcxproj
index ec328ba67..3ad9db48c 100644
--- a/Samples/2_Concepts_and_Techniques/shfl_scan/shfl_scan_vs2022.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/shfl_scan/shfl_scan_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/shfl_scan.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/sortingNetworks/Makefile b/Samples/2_Concepts_and_Techniques/sortingNetworks/Makefile
index a2de7cb98..7d715f267 100644
--- a/Samples/2_Concepts_and_Techniques/sortingNetworks/Makefile
+++ b/Samples/2_Concepts_and_Techniques/sortingNetworks/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/2_Concepts_and_Techniques/sortingNetworks/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/sortingNetworks/NsightEclipse.xml
index bec0501ad..3ef627213 100644
--- a/Samples/2_Concepts_and_Techniques/sortingNetworks/NsightEclipse.xml
+++ b/Samples/2_Concepts_and_Techniques/sortingNetworks/NsightEclipse.xml
@@ -4,9 +4,9 @@
   <name>sortingNetworks</name>
   <cuda_api_list>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaFree</toolkit>
     <toolkit>cudaDeviceSynchronize</toolkit>
     <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaFree</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample implements bitonic sort and odd-even merge sort (also known as Batcher's sort), algorithms belonging to the class of sorting networks. While generally subefficient, for large sequences compared to algorithms with better asymptotic algorithmic complexity (i.e. merge sort or radix sort), this may be the preferred algorithms of choice for sorting batches of short-sized to mid-sized (key, value) array pairs. Refer to an excellent tutorial by H. W. Lang http://www.iti.fh-flensburg.de/lang/algorithmen/sortieren/networks/indexen.htm]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -46,6 +46,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/2_Concepts_and_Techniques/sortingNetworks/README.md b/Samples/2_Concepts_and_Techniques/sortingNetworks/README.md
index 905e698ff..707ed30dc 100644
--- a/Samples/2_Concepts_and_Techniques/sortingNetworks/README.md
+++ b/Samples/2_Concepts_and_Techniques/sortingNetworks/README.md
@@ -10,7 +10,7 @@ Data-Parallel Algorithms
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMalloc, cudaFree, cudaDeviceSynchronize, cudaMemcpy
+cudaMalloc, cudaDeviceSynchronize, cudaMemcpy, cudaFree
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/2_Concepts_and_Techniques/sortingNetworks/sortingNetworks_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/sortingNetworks/sortingNetworks_vs2017.vcxproj
index 0012d67a6..7c018efb1 100644
--- a/Samples/2_Concepts_and_Techniques/sortingNetworks/sortingNetworks_vs2017.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/sortingNetworks/sortingNetworks_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/sortingNetworks.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -111,6 +111,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/sortingNetworks/sortingNetworks_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/sortingNetworks/sortingNetworks_vs2019.vcxproj
index 90052760a..3181de728 100644
--- a/Samples/2_Concepts_and_Techniques/sortingNetworks/sortingNetworks_vs2019.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/sortingNetworks/sortingNetworks_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/sortingNetworks.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/sortingNetworks/sortingNetworks_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/sortingNetworks/sortingNetworks_vs2022.vcxproj
index 626d389a3..dc83a7b72 100644
--- a/Samples/2_Concepts_and_Techniques/sortingNetworks/sortingNetworks_vs2022.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/sortingNetworks/sortingNetworks_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/sortingNetworks.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/Makefile b/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/Makefile
index c2e55c399..c672f7074 100644
--- a/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/Makefile
+++ b/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/Makefile
@@ -285,9 +285,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/NsightEclipse.xml
index 63511e063..8ca780e91 100644
--- a/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/NsightEclipse.xml
+++ b/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/NsightEclipse.xml
@@ -3,20 +3,20 @@
 <entry>
   <name>streamOrderedAllocation</name>
   <cuda_api_list>
+    <toolkit>cudaDeviceGetDefaultMemPool</toolkit>
+    <toolkit>cudaFreeAsync</toolkit>
+    <toolkit>cudaStreamCreateWithFlags</toolkit>
     <toolkit>cudaStreamDestroy</toolkit>
-    <toolkit>cudaEventRecord</toolkit>
-    <toolkit>cudaEventCreate</toolkit>
-    <toolkit>cudaEventElapsedTime</toolkit>
-    <toolkit>cudaEventSynchronize</toolkit>
     <toolkit>cudaDeviceGetAttribute</toolkit>
     <toolkit>cudaMallocAsync</toolkit>
-    <toolkit>cudaStreamCreateWithFlags</toolkit>
-    <toolkit>cudaDeviceGetDefaultMemPool</toolkit>
     <toolkit>cudaSetDevice</toolkit>
-    <toolkit>cudaMemPoolSetAttribute</toolkit>
+    <toolkit>cudaEventSynchronize</toolkit>
+    <toolkit>cudaEventRecord</toolkit>
     <toolkit>cudaStreamSynchronize</toolkit>
+    <toolkit>cudaMemPoolSetAttribute</toolkit>
+    <toolkit>cudaEventElapsedTime</toolkit>
     <toolkit>cudaMemcpyAsync</toolkit>
-    <toolkit>cudaFreeAsync</toolkit>
+    <toolkit>cudaEventCreate</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample demonstrates stream ordered memory allocation on a GPU using cudaMallocAsync and cudaMemPool family of APIs.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -48,6 +48,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/README.md b/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/README.md
index 2eaa6d1a2..4af372ec2 100644
--- a/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/README.md
+++ b/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/README.md
@@ -10,7 +10,7 @@ Performance Strategies
 
 ## Supported SM Architectures
 
-[SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaStreamDestroy, cudaEventRecord, cudaEventCreate, cudaEventElapsedTime, cudaEventSynchronize, cudaDeviceGetAttribute, cudaMallocAsync, cudaStreamCreateWithFlags, cudaDeviceGetDefaultMemPool, cudaSetDevice, cudaMemPoolSetAttribute, cudaStreamSynchronize, cudaMemcpyAsync, cudaFreeAsync
+cudaDeviceGetDefaultMemPool, cudaFreeAsync, cudaStreamCreateWithFlags, cudaStreamDestroy, cudaDeviceGetAttribute, cudaMallocAsync, cudaSetDevice, cudaEventSynchronize, cudaEventRecord, cudaStreamSynchronize, cudaMemPoolSetAttribute, cudaEventElapsedTime, cudaMemcpyAsync, cudaEventCreate
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/streamOrderedAllocation_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/streamOrderedAllocation_vs2017.vcxproj
index 376ea1fa0..8f4dc7c00 100644
--- a/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/streamOrderedAllocation_vs2017.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/streamOrderedAllocation_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/streamOrderedAllocation.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/streamOrderedAllocation_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/streamOrderedAllocation_vs2019.vcxproj
index f81e896cb..9cd3baae7 100644
--- a/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/streamOrderedAllocation_vs2019.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/streamOrderedAllocation_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/streamOrderedAllocation.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/streamOrderedAllocation_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/streamOrderedAllocation_vs2022.vcxproj
index b61e42f3f..6f2d5040c 100644
--- a/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/streamOrderedAllocation_vs2022.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/streamOrderedAllocation_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/streamOrderedAllocation.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationIPC/Makefile b/Samples/2_Concepts_and_Techniques/streamOrderedAllocationIPC/Makefile
index b71befbe7..418451617 100644
--- a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationIPC/Makefile
+++ b/Samples/2_Concepts_and_Techniques/streamOrderedAllocationIPC/Makefile
@@ -305,9 +305,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationIPC/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/streamOrderedAllocationIPC/NsightEclipse.xml
index d583eaaad..b357595e0 100644
--- a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationIPC/NsightEclipse.xml
+++ b/Samples/2_Concepts_and_Techniques/streamOrderedAllocationIPC/NsightEclipse.xml
@@ -8,28 +8,28 @@
   <cuda_api_list>
     <driver>cuDeviceGetAttribute</driver>
     <driver>cuDeviceGet</driver>
-    <toolkit>cudaDeviceEnablePeerAccess</toolkit>
-    <toolkit>cudaMemPoolImportPointer</toolkit>
-    <toolkit>cudaOccupancyMaxActiveBlocksPerMultiprocessor</toolkit>
+    <toolkit>cudaDeviceGetAttribute</toolkit>
+    <toolkit>cudaMemPoolImportFromShareableHandle</toolkit>
+    <toolkit>cudaSetDevice</toolkit>
+    <toolkit>cudaMemPoolExportPointer</toolkit>
+    <toolkit>cudaMemPoolGetAccess</toolkit>
     <toolkit>cudaMemPoolDestroy</toolkit>
+    <toolkit>cudaMemPoolSetAccess</toolkit>
     <toolkit>cudaMallocAsync</toolkit>
-    <toolkit>cudaStreamCreateWithFlags</toolkit>
-    <toolkit>cudaDeviceCanAccessPeer</toolkit>
+    <toolkit>cudaMemPoolImportPointer</toolkit>
+    <toolkit>cudaGetDeviceCount</toolkit>
     <toolkit>cudaMemcpyAsync</toolkit>
+    <toolkit>cudaDeviceCanAccessPeer</toolkit>
+    <toolkit>cudaFreeAsync</toolkit>
+    <toolkit>cudaStreamCreateWithFlags</toolkit>
     <toolkit>cudaStreamDestroy</toolkit>
-    <toolkit>cudaSetDevice</toolkit>
-    <toolkit>cudaGetDeviceProperties</toolkit>
-    <toolkit>cudaMemPoolSetAccess</toolkit>
-    <toolkit>cudaGetDeviceCount</toolkit>
-    <toolkit>cudaDeviceGetAttribute</toolkit>
-    <toolkit>cudaMemPoolExportPointer</toolkit>
-    <toolkit>cudaMemPoolImportFromShareableHandle</toolkit>
-    <toolkit>cudaMemPoolCreate</toolkit>
     <toolkit>cudaGetLastError</toolkit>
-    <toolkit>cudaStreamSynchronize</toolkit>
-    <toolkit>cudaMemPoolGetAccess</toolkit>
+    <toolkit>cudaMemPoolCreate</toolkit>
     <toolkit>cudaMemPoolExportToShareableHandle</toolkit>
-    <toolkit>cudaFreeAsync</toolkit>
+    <toolkit>cudaStreamSynchronize</toolkit>
+    <toolkit>cudaDeviceEnablePeerAccess</toolkit>
+    <toolkit>cudaOccupancyMaxActiveBlocksPerMultiprocessor</toolkit>
+    <toolkit>cudaGetDeviceProperties</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample demonstrates IPC pools of stream ordered memory allocated using cudaMallocAsync and cudaMemPool family of APIs.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -63,6 +63,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <sources>
     <extracompilation>../../../Common/helper_multiprocess.cpp</extracompilation>
     <extraheader>../../../Common/helper_multiprocess.h</extraheader>
diff --git a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationIPC/README.md b/Samples/2_Concepts_and_Techniques/streamOrderedAllocationIPC/README.md
index 73a90c5e6..b3eff96a3 100644
--- a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationIPC/README.md
+++ b/Samples/2_Concepts_and_Techniques/streamOrderedAllocationIPC/README.md
@@ -10,7 +10,7 @@ Performance Strategies
 
 ## Supported SM Architectures
 
-[SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -26,11 +26,11 @@ x86_64
 cuDeviceGetAttribute, cuDeviceGet
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaDeviceEnablePeerAccess, cudaMemPoolImportPointer, cudaOccupancyMaxActiveBlocksPerMultiprocessor, cudaMemPoolDestroy, cudaMallocAsync, cudaStreamCreateWithFlags, cudaDeviceCanAccessPeer, cudaMemcpyAsync, cudaStreamDestroy, cudaSetDevice, cudaGetDeviceProperties, cudaMemPoolSetAccess, cudaGetDeviceCount, cudaDeviceGetAttribute, cudaMemPoolExportPointer, cudaMemPoolImportFromShareableHandle, cudaMemPoolCreate, cudaGetLastError, cudaStreamSynchronize, cudaMemPoolGetAccess, cudaMemPoolExportToShareableHandle, cudaFreeAsync
+cudaDeviceGetAttribute, cudaMemPoolImportFromShareableHandle, cudaSetDevice, cudaMemPoolExportPointer, cudaMemPoolGetAccess, cudaMemPoolDestroy, cudaMemPoolSetAccess, cudaMallocAsync, cudaMemPoolImportPointer, cudaGetDeviceCount, cudaMemcpyAsync, cudaDeviceCanAccessPeer, cudaFreeAsync, cudaStreamCreateWithFlags, cudaStreamDestroy, cudaGetLastError, cudaMemPoolCreate, cudaMemPoolExportToShareableHandle, cudaStreamSynchronize, cudaDeviceEnablePeerAccess, cudaOccupancyMaxActiveBlocksPerMultiprocessor, cudaGetDeviceProperties
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/Makefile b/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/Makefile
index 792db21e7..75bf6386c 100644
--- a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/Makefile
+++ b/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/Makefile
@@ -285,9 +285,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/NsightEclipse.xml
index 3e51c0ff0..f64444425 100644
--- a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/NsightEclipse.xml
+++ b/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/NsightEclipse.xml
@@ -6,21 +6,21 @@
     <flag>--std=c++11</flag>
   </cflags>
   <cuda_api_list>
-    <toolkit>cudaStreamWaitEvent</toolkit>
-    <toolkit>cudaStreamDestroy</toolkit>
+    <toolkit>cudaDeviceGetDefaultMemPool</toolkit>
+    <toolkit>cudaFreeAsync</toolkit>
+    <toolkit>cudaStreamCreateWithFlags</toolkit>
     <toolkit>cudaMemPoolSetAccess</toolkit>
-    <toolkit>cudaEventRecord</toolkit>
-    <toolkit>cudaEventCreate</toolkit>
-    <toolkit>cudaGetDeviceCount</toolkit>
-    <toolkit>cudaMallocAsync</toolkit>
+    <toolkit>cudaStreamDestroy</toolkit>
     <toolkit>cudaDeviceGetAttribute</toolkit>
-    <toolkit>cudaStreamCreateWithFlags</toolkit>
-    <toolkit>cudaDeviceCanAccessPeer</toolkit>
-    <toolkit>cudaDeviceGetDefaultMemPool</toolkit>
+    <toolkit>cudaMallocAsync</toolkit>
     <toolkit>cudaSetDevice</toolkit>
+    <toolkit>cudaGetDeviceCount</toolkit>
+    <toolkit>cudaEventRecord</toolkit>
     <toolkit>cudaStreamSynchronize</toolkit>
+    <toolkit>cudaStreamWaitEvent</toolkit>
     <toolkit>cudaMemcpyAsync</toolkit>
-    <toolkit>cudaFreeAsync</toolkit>
+    <toolkit>cudaDeviceCanAccessPeer</toolkit>
+    <toolkit>cudaEventCreate</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample demonstrates peer-to-peer access of stream ordered memory allocated using cudaMallocAsync and cudaMemPool family of APIs.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -52,6 +52,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/README.md b/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/README.md
index 9ef3fa179..0b2a83d95 100644
--- a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/README.md
+++ b/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/README.md
@@ -10,7 +10,7 @@ Performance Strategies
 
 ## Supported SM Architectures
 
-[SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaStreamWaitEvent, cudaStreamDestroy, cudaMemPoolSetAccess, cudaEventRecord, cudaEventCreate, cudaGetDeviceCount, cudaMallocAsync, cudaDeviceGetAttribute, cudaStreamCreateWithFlags, cudaDeviceCanAccessPeer, cudaDeviceGetDefaultMemPool, cudaSetDevice, cudaStreamSynchronize, cudaMemcpyAsync, cudaFreeAsync
+cudaDeviceGetDefaultMemPool, cudaFreeAsync, cudaStreamCreateWithFlags, cudaMemPoolSetAccess, cudaStreamDestroy, cudaDeviceGetAttribute, cudaMallocAsync, cudaSetDevice, cudaGetDeviceCount, cudaEventRecord, cudaStreamSynchronize, cudaStreamWaitEvent, cudaMemcpyAsync, cudaDeviceCanAccessPeer, cudaEventCreate
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/streamOrderedAllocationP2P_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/streamOrderedAllocationP2P_vs2017.vcxproj
index 5b9ec3209..5ad486fcf 100644
--- a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/streamOrderedAllocationP2P_vs2017.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/streamOrderedAllocationP2P_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/streamOrderedAllocationP2P.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/streamOrderedAllocationP2P_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/streamOrderedAllocationP2P_vs2019.vcxproj
index 79b1ce851..a7248a6e3 100644
--- a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/streamOrderedAllocationP2P_vs2019.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/streamOrderedAllocationP2P_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/streamOrderedAllocationP2P.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/streamOrderedAllocationP2P_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/streamOrderedAllocationP2P_vs2022.vcxproj
index 6b468f18d..0ea721452 100644
--- a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/streamOrderedAllocationP2P_vs2022.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/streamOrderedAllocationP2P_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/streamOrderedAllocationP2P.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/threadFenceReduction/Makefile b/Samples/2_Concepts_and_Techniques/threadFenceReduction/Makefile
index e3f4586be..37b8a9b46 100644
--- a/Samples/2_Concepts_and_Techniques/threadFenceReduction/Makefile
+++ b/Samples/2_Concepts_and_Techniques/threadFenceReduction/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/2_Concepts_and_Techniques/threadFenceReduction/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/threadFenceReduction/NsightEclipse.xml
index e997b187f..8436fef3e 100644
--- a/Samples/2_Concepts_and_Techniques/threadFenceReduction/NsightEclipse.xml
+++ b/Samples/2_Concepts_and_Techniques/threadFenceReduction/NsightEclipse.xml
@@ -3,10 +3,10 @@
 <entry>
   <name>threadFenceReduction</name>
   <cuda_api_list>
+    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaFree</toolkit>
     <toolkit>cudaDeviceSynchronize</toolkit>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample shows how to perform a reduction operation on an array of values using the thread Fence intrinsic to produce a single value in a single kernel (as opposed to two or more kernel calls as shown in the "reduction" CUDA Sample).  Single-pass reduction requires global atomic instructions (Compute Capability 2.0 or later) and the _threadfence() intrinsic (CUDA 2.2 or later).]]></description>
@@ -48,6 +48,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/2_Concepts_and_Techniques/threadFenceReduction/README.md b/Samples/2_Concepts_and_Techniques/threadFenceReduction/README.md
index 4ce80f5c4..0156a5f12 100644
--- a/Samples/2_Concepts_and_Techniques/threadFenceReduction/README.md
+++ b/Samples/2_Concepts_and_Techniques/threadFenceReduction/README.md
@@ -10,7 +10,7 @@ Cooperative Groups, Data-Parallel Algorithms, Performance Strategies
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaDeviceSynchronize, cudaMalloc, cudaMemcpy, cudaGetDeviceProperties
+cudaMemcpy, cudaFree, cudaDeviceSynchronize, cudaMalloc, cudaGetDeviceProperties
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/2_Concepts_and_Techniques/threadFenceReduction/threadFenceReduction_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/threadFenceReduction/threadFenceReduction_vs2017.vcxproj
index 4b0cf87e5..0d547054e 100644
--- a/Samples/2_Concepts_and_Techniques/threadFenceReduction/threadFenceReduction_vs2017.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/threadFenceReduction/threadFenceReduction_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/threadFenceReduction.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/threadFenceReduction/threadFenceReduction_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/threadFenceReduction/threadFenceReduction_vs2019.vcxproj
index b15f148f5..ca4df79f1 100644
--- a/Samples/2_Concepts_and_Techniques/threadFenceReduction/threadFenceReduction_vs2019.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/threadFenceReduction/threadFenceReduction_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/threadFenceReduction.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/threadFenceReduction/threadFenceReduction_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/threadFenceReduction/threadFenceReduction_vs2022.vcxproj
index 157d11dcc..2e5fa2f9d 100644
--- a/Samples/2_Concepts_and_Techniques/threadFenceReduction/threadFenceReduction_vs2022.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/threadFenceReduction/threadFenceReduction_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/threadFenceReduction.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/threadMigration/Makefile b/Samples/2_Concepts_and_Techniques/threadMigration/Makefile
index 3acd84547..6c4d542dd 100644
--- a/Samples/2_Concepts_and_Techniques/threadMigration/Makefile
+++ b/Samples/2_Concepts_and_Techniques/threadMigration/Makefile
@@ -283,9 +283,9 @@ FATBIN_FILE := threadMigration_kernel${TARGET_SIZE}.fatbin
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(GENCODE_FLAGS),)
diff --git a/Samples/2_Concepts_and_Techniques/threadMigration/README.md b/Samples/2_Concepts_and_Techniques/threadMigration/README.md
index 22a095da6..801305ce0 100644
--- a/Samples/2_Concepts_and_Techniques/threadMigration/README.md
+++ b/Samples/2_Concepts_and_Techniques/threadMigration/README.md
@@ -10,7 +10,7 @@ CUDA Driver API
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html)
-cuModuleGetFunction, cuModuleLoadData, cuDeviceGetCount, cuCtxCreate, cuCtxPopCurrent, cuDeviceGetName, cuLaunchKernel, cuMemAlloc, cuMemcpyDtoH, cuModuleUnload, cuCtxPushCurrent, cuDeviceGet, cuMemFree, cuInit, cuCtxDestroy, cuDeviceGetAttribute
+cuMemcpyDtoH, cuLaunchKernel, cuModuleLoadData, cuDeviceGetName, cuDeviceGet, cuDeviceGetAttribute, cuMemAlloc, cuMemFree, cuCtxDestroy, cuCtxPopCurrent, cuModuleUnload, cuDeviceGetCount, cuModuleGetFunction, cuCtxCreate, cuCtxPushCurrent, cuInit
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/2_Concepts_and_Techniques/threadMigration/threadMigration_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/threadMigration/threadMigration_vs2017.vcxproj
index 8a38efdd2..ba53a46ac 100644
--- a/Samples/2_Concepts_and_Techniques/threadMigration/threadMigration_vs2017.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/threadMigration/threadMigration_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/threadMigration.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -111,6 +111,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/threadMigration/threadMigration_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/threadMigration/threadMigration_vs2019.vcxproj
index 05df36d85..6f0f8a904 100644
--- a/Samples/2_Concepts_and_Techniques/threadMigration/threadMigration_vs2019.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/threadMigration/threadMigration_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/threadMigration.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/threadMigration/threadMigration_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/threadMigration/threadMigration_vs2022.vcxproj
index cb4ebcc86..b81a2b07e 100644
--- a/Samples/2_Concepts_and_Techniques/threadMigration/threadMigration_vs2022.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/threadMigration/threadMigration_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/threadMigration.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/StreamPriorities/Makefile b/Samples/3_CUDA_Features/StreamPriorities/Makefile
index 9b1734f5f..6faeb198a 100644
--- a/Samples/3_CUDA_Features/StreamPriorities/Makefile
+++ b/Samples/3_CUDA_Features/StreamPriorities/Makefile
@@ -297,9 +297,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/3_CUDA_Features/StreamPriorities/NsightEclipse.xml b/Samples/3_CUDA_Features/StreamPriorities/NsightEclipse.xml
index b3e3aab7e..0cb59d58a 100644
--- a/Samples/3_CUDA_Features/StreamPriorities/NsightEclipse.xml
+++ b/Samples/3_CUDA_Features/StreamPriorities/NsightEclipse.xml
@@ -3,15 +3,15 @@
 <entry>
   <name>StreamPriorities</name>
   <cuda_api_list>
+    <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaStreamCreateWithPriority</toolkit>
     <toolkit>cudaDeviceGetStreamPriorityRange</toolkit>
-    <toolkit>cudaEventRecord</toolkit>
-    <toolkit>cudaEventCreate</toolkit>
-    <toolkit>cudaEventElapsedTime</toolkit>
     <toolkit>cudaEventSynchronize</toolkit>
+    <toolkit>cudaEventRecord</toolkit>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaStreamCreateWithPriority</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaEventElapsedTime</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
+    <toolkit>cudaEventCreate</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample demonstrates basic use of stream priorities.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -54,6 +54,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/3_CUDA_Features/StreamPriorities/README.md b/Samples/3_CUDA_Features/StreamPriorities/README.md
index 52150773a..0b616b490 100644
--- a/Samples/3_CUDA_Features/StreamPriorities/README.md
+++ b/Samples/3_CUDA_Features/StreamPriorities/README.md
@@ -10,7 +10,7 @@ CUDA Streams and Events
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaDeviceGetStreamPriorityRange, cudaEventRecord, cudaEventCreate, cudaEventElapsedTime, cudaEventSynchronize, cudaMalloc, cudaStreamCreateWithPriority, cudaMemcpy, cudaGetDeviceProperties
+cudaMemcpy, cudaStreamCreateWithPriority, cudaDeviceGetStreamPriorityRange, cudaEventSynchronize, cudaEventRecord, cudaMalloc, cudaEventElapsedTime, cudaGetDeviceProperties, cudaEventCreate
 
 ## Dependencies needed to build/run
 [Stream-Priorities](../../../README.md#stream-priorities)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/3_CUDA_Features/bf16TensorCoreGemm/Makefile b/Samples/3_CUDA_Features/bf16TensorCoreGemm/Makefile
index 3d8b8dec9..29fdcbaec 100644
--- a/Samples/3_CUDA_Features/bf16TensorCoreGemm/Makefile
+++ b/Samples/3_CUDA_Features/bf16TensorCoreGemm/Makefile
@@ -322,9 +322,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 80 86 87
+SMS ?= 80 86 87 90
 else
-SMS ?= 80 86
+SMS ?= 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/3_CUDA_Features/bf16TensorCoreGemm/NsightEclipse.xml b/Samples/3_CUDA_Features/bf16TensorCoreGemm/NsightEclipse.xml
index a93136960..1eac8e5c0 100644
--- a/Samples/3_CUDA_Features/bf16TensorCoreGemm/NsightEclipse.xml
+++ b/Samples/3_CUDA_Features/bf16TensorCoreGemm/NsightEclipse.xml
@@ -6,18 +6,18 @@
     <flag>--std=c++11</flag>
   </cflags>
   <cuda_api_list>
-    <toolkit>cudaMemset</toolkit>
+    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaFree</toolkit>
-    <toolkit>cudaEventRecord</toolkit>
-    <toolkit>cudaEventCreate</toolkit>
-    <toolkit>cudaFuncSetAttribute</toolkit>
-    <toolkit>cudaEventElapsedTime</toolkit>
+    <toolkit>cudaGetErrorString</toolkit>
+    <toolkit>cudaGetLastError</toolkit>
     <toolkit>cudaEventSynchronize</toolkit>
+    <toolkit>cudaFuncSetAttribute</toolkit>
+    <toolkit>cudaEventRecord</toolkit>
+    <toolkit>cudaMemset</toolkit>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaGetLastError</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
-    <toolkit>cudaGetErrorString</toolkit>
+    <toolkit>cudaEventElapsedTime</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
+    <toolkit>cudaEventCreate</toolkit>
   </cuda_api_list>
   <description><![CDATA[A CUDA sample demonstrating __nv_bfloat16 (e8m7) GEMM computation using the Warp Matrix Multiply and Accumulate (WMMA) API introduced with CUDA 11 in Ampere chip family tensor cores for faster matrix operations. This sample also uses async copy provided by cuda pipeline interface for gmem to shmem async loads which improves kernel performance and reduces register presssure.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -52,6 +52,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/3_CUDA_Features/bf16TensorCoreGemm/README.md b/Samples/3_CUDA_Features/bf16TensorCoreGemm/README.md
index 7a8b729b7..30f4eece5 100644
--- a/Samples/3_CUDA_Features/bf16TensorCoreGemm/README.md
+++ b/Samples/3_CUDA_Features/bf16TensorCoreGemm/README.md
@@ -10,7 +10,7 @@ Matrix Multiply, WMMA, Tensor Cores
 
 ## Supported SM Architectures
 
-[SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, aarch64
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMemset, cudaFree, cudaEventRecord, cudaEventCreate, cudaFuncSetAttribute, cudaEventElapsedTime, cudaEventSynchronize, cudaMalloc, cudaGetLastError, cudaMemcpy, cudaGetErrorString, cudaGetDeviceProperties
+cudaMemcpy, cudaFree, cudaGetErrorString, cudaGetLastError, cudaEventSynchronize, cudaFuncSetAttribute, cudaEventRecord, cudaMemset, cudaMalloc, cudaEventElapsedTime, cudaGetDeviceProperties, cudaEventCreate
 
 ## Dependencies needed to build/run
 [CPP11](../../../README.md#cpp11)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/3_CUDA_Features/bf16TensorCoreGemm/bf16TensorCoreGemm_vs2017.vcxproj b/Samples/3_CUDA_Features/bf16TensorCoreGemm/bf16TensorCoreGemm_vs2017.vcxproj
index bfab31dcf..b8cb9fb27 100644
--- a/Samples/3_CUDA_Features/bf16TensorCoreGemm/bf16TensorCoreGemm_vs2017.vcxproj
+++ b/Samples/3_CUDA_Features/bf16TensorCoreGemm/bf16TensorCoreGemm_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/bf16TensorCoreGemm.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/bf16TensorCoreGemm/bf16TensorCoreGemm_vs2019.vcxproj b/Samples/3_CUDA_Features/bf16TensorCoreGemm/bf16TensorCoreGemm_vs2019.vcxproj
index 4240024c6..c2c1f9202 100644
--- a/Samples/3_CUDA_Features/bf16TensorCoreGemm/bf16TensorCoreGemm_vs2019.vcxproj
+++ b/Samples/3_CUDA_Features/bf16TensorCoreGemm/bf16TensorCoreGemm_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/bf16TensorCoreGemm.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/bf16TensorCoreGemm/bf16TensorCoreGemm_vs2022.vcxproj b/Samples/3_CUDA_Features/bf16TensorCoreGemm/bf16TensorCoreGemm_vs2022.vcxproj
index ecbfc6d54..774b45bf7 100644
--- a/Samples/3_CUDA_Features/bf16TensorCoreGemm/bf16TensorCoreGemm_vs2022.vcxproj
+++ b/Samples/3_CUDA_Features/bf16TensorCoreGemm/bf16TensorCoreGemm_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/bf16TensorCoreGemm.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/binaryPartitionCG/Makefile b/Samples/3_CUDA_Features/binaryPartitionCG/Makefile
index d1bdbc6d3..ace0b3ae0 100644
--- a/Samples/3_CUDA_Features/binaryPartitionCG/Makefile
+++ b/Samples/3_CUDA_Features/binaryPartitionCG/Makefile
@@ -310,9 +310,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/3_CUDA_Features/binaryPartitionCG/NsightEclipse.xml b/Samples/3_CUDA_Features/binaryPartitionCG/NsightEclipse.xml
index b611d7c1c..96982217e 100644
--- a/Samples/3_CUDA_Features/binaryPartitionCG/NsightEclipse.xml
+++ b/Samples/3_CUDA_Features/binaryPartitionCG/NsightEclipse.xml
@@ -6,15 +6,15 @@
     <flag>--std=c++11</flag>
   </cflags>
   <cuda_api_list>
+    <toolkit>cudaStreamCreateWithFlags</toolkit>
     <toolkit>cudaFree</toolkit>
     <toolkit>cudaMallocHost</toolkit>
-    <toolkit>cudaOccupancyMaxPotentialBlockSize</toolkit>
-    <toolkit>cudaMemsetAsync</toolkit>
     <toolkit>cudaFreeHost</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaStreamCreateWithFlags</toolkit>
     <toolkit>cudaStreamSynchronize</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaMemsetAsync</toolkit>
     <toolkit>cudaMemcpyAsync</toolkit>
+    <toolkit>cudaOccupancyMaxPotentialBlockSize</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample is a simple code that illustrates binary partition cooperative groups and reduce within the thread block.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -54,6 +54,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/3_CUDA_Features/binaryPartitionCG/README.md b/Samples/3_CUDA_Features/binaryPartitionCG/README.md
index c40ff6bdc..d2c296826 100644
--- a/Samples/3_CUDA_Features/binaryPartitionCG/README.md
+++ b/Samples/3_CUDA_Features/binaryPartitionCG/README.md
@@ -10,7 +10,7 @@ Cooperative Groups
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaMallocHost, cudaOccupancyMaxPotentialBlockSize, cudaMemsetAsync, cudaFreeHost, cudaMalloc, cudaStreamCreateWithFlags, cudaStreamSynchronize, cudaMemcpyAsync
+cudaStreamCreateWithFlags, cudaFree, cudaMallocHost, cudaFreeHost, cudaStreamSynchronize, cudaMalloc, cudaMemsetAsync, cudaMemcpyAsync, cudaOccupancyMaxPotentialBlockSize
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/3_CUDA_Features/binaryPartitionCG/binaryPartitionCG_vs2017.vcxproj b/Samples/3_CUDA_Features/binaryPartitionCG/binaryPartitionCG_vs2017.vcxproj
index cd3fccf0a..be9569bba 100644
--- a/Samples/3_CUDA_Features/binaryPartitionCG/binaryPartitionCG_vs2017.vcxproj
+++ b/Samples/3_CUDA_Features/binaryPartitionCG/binaryPartitionCG_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/binaryPartitionCG.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/binaryPartitionCG/binaryPartitionCG_vs2019.vcxproj b/Samples/3_CUDA_Features/binaryPartitionCG/binaryPartitionCG_vs2019.vcxproj
index 5b7320cec..c3c283623 100644
--- a/Samples/3_CUDA_Features/binaryPartitionCG/binaryPartitionCG_vs2019.vcxproj
+++ b/Samples/3_CUDA_Features/binaryPartitionCG/binaryPartitionCG_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/binaryPartitionCG.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/binaryPartitionCG/binaryPartitionCG_vs2022.vcxproj b/Samples/3_CUDA_Features/binaryPartitionCG/binaryPartitionCG_vs2022.vcxproj
index df9ea16fd..594d4bd3c 100644
--- a/Samples/3_CUDA_Features/binaryPartitionCG/binaryPartitionCG_vs2022.vcxproj
+++ b/Samples/3_CUDA_Features/binaryPartitionCG/binaryPartitionCG_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/binaryPartitionCG.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/bindlessTexture/Makefile b/Samples/3_CUDA_Features/bindlessTexture/Makefile
index 4310ee3ee..9e9c3369d 100644
--- a/Samples/3_CUDA_Features/bindlessTexture/Makefile
+++ b/Samples/3_CUDA_Features/bindlessTexture/Makefile
@@ -299,9 +299,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/3_CUDA_Features/bindlessTexture/NsightEclipse.xml b/Samples/3_CUDA_Features/bindlessTexture/NsightEclipse.xml
index 247f12559..25b63ad86 100644
--- a/Samples/3_CUDA_Features/bindlessTexture/NsightEclipse.xml
+++ b/Samples/3_CUDA_Features/bindlessTexture/NsightEclipse.xml
@@ -3,28 +3,28 @@
 <entry>
   <name>bindlessTexture</name>
   <cuda_api_list>
-    <toolkit>cudaGraphicsMapResources</toolkit>
-    <toolkit>cudaGetMipmappedArrayLevel</toolkit>
-    <toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
-    <toolkit>cudaArrayGetInfo</toolkit>
     <toolkit>cudaMemcpy</toolkit>
-    <toolkit>cudaFreeMipmappedArray</toolkit>
+    <toolkit>cudaGetMipmappedArrayLevel</toolkit>
+    <toolkit>cudaGraphicsMapResources</toolkit>
     <toolkit>cudaDestroySurfaceObject</toolkit>
-    <toolkit>cudaPitchedPtr</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaGraphicsUnregisterResource</toolkit>
-    <toolkit>cudaDeviceSynchronize</toolkit>
-    <toolkit>cudaDestroyTextureObject</toolkit>
-    <toolkit>cudaGetLastError</toolkit>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaFreeArray</toolkit>
-    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
     <toolkit>cudaExtent</toolkit>
+    <toolkit>cudaDeviceSynchronize</toolkit>
     <toolkit>cudaCreateSurfaceObject</toolkit>
     <toolkit>cudaMallocMipmappedArray</toolkit>
+    <toolkit>cudaPitchedPtr</toolkit>
+    <toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
     <toolkit>cudaCreateTextureObject</toolkit>
-    <toolkit>cudaMallocArray</toolkit>
     <toolkit>cudaGraphicsUnmapResources</toolkit>
+    <toolkit>cudaMallocArray</toolkit>
+    <toolkit>cudaFreeArray</toolkit>
+    <toolkit>cudaArrayGetInfo</toolkit>
+    <toolkit>cudaGetLastError</toolkit>
+    <toolkit>cudaDestroyTextureObject</toolkit>
+    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
+    <toolkit>cudaFreeMipmappedArray</toolkit>
+    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaGraphicsUnregisterResource</toolkit>
+    <toolkit>cudaMalloc</toolkit>
   </cuda_api_list>
   <description><![CDATA[This example demonstrates use of cudaSurfaceObject, cudaTextureObject, and MipMap support in CUDA.  A GPU with Compute Capability SM 3.0 is required to run the sample.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -85,6 +85,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/3_CUDA_Features/bindlessTexture/README.md b/Samples/3_CUDA_Features/bindlessTexture/README.md
index 4047f08aa..cf14ba1ae 100644
--- a/Samples/3_CUDA_Features/bindlessTexture/README.md
+++ b/Samples/3_CUDA_Features/bindlessTexture/README.md
@@ -10,7 +10,7 @@ Graphics Interop, Texture
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaGraphicsMapResources, cudaGetMipmappedArrayLevel, cudaGraphicsResourceGetMappedPointer, cudaArrayGetInfo, cudaMemcpy, cudaFreeMipmappedArray, cudaDestroySurfaceObject, cudaPitchedPtr, cudaMalloc, cudaGraphicsUnregisterResource, cudaDeviceSynchronize, cudaDestroyTextureObject, cudaGetLastError, cudaFree, cudaFreeArray, cudaGraphicsGLRegisterBuffer, cudaExtent, cudaCreateSurfaceObject, cudaMallocMipmappedArray, cudaCreateTextureObject, cudaMallocArray, cudaGraphicsUnmapResources
+cudaMemcpy, cudaGetMipmappedArrayLevel, cudaGraphicsMapResources, cudaDestroySurfaceObject, cudaExtent, cudaDeviceSynchronize, cudaCreateSurfaceObject, cudaMallocMipmappedArray, cudaPitchedPtr, cudaGraphicsResourceGetMappedPointer, cudaCreateTextureObject, cudaGraphicsUnmapResources, cudaMallocArray, cudaFreeArray, cudaArrayGetInfo, cudaGetLastError, cudaDestroyTextureObject, cudaGraphicsGLRegisterBuffer, cudaFreeMipmappedArray, cudaFree, cudaGraphicsUnregisterResource, cudaMalloc
 
 ## Dependencies needed to build/run
 [X11](../../../README.md#x11), [GL](../../../README.md#gl)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/3_CUDA_Features/bindlessTexture/bindlessTexture_vs2017.vcxproj b/Samples/3_CUDA_Features/bindlessTexture/bindlessTexture_vs2017.vcxproj
index c74343151..bcc1990a3 100644
--- a/Samples/3_CUDA_Features/bindlessTexture/bindlessTexture_vs2017.vcxproj
+++ b/Samples/3_CUDA_Features/bindlessTexture/bindlessTexture_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/bindlessTexture.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -118,6 +118,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/bindlessTexture/bindlessTexture_vs2019.vcxproj b/Samples/3_CUDA_Features/bindlessTexture/bindlessTexture_vs2019.vcxproj
index d18b1ac48..110d990fb 100644
--- a/Samples/3_CUDA_Features/bindlessTexture/bindlessTexture_vs2019.vcxproj
+++ b/Samples/3_CUDA_Features/bindlessTexture/bindlessTexture_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/bindlessTexture.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -114,6 +114,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/bindlessTexture/bindlessTexture_vs2022.vcxproj b/Samples/3_CUDA_Features/bindlessTexture/bindlessTexture_vs2022.vcxproj
index e08edee01..f9bcc8ae7 100644
--- a/Samples/3_CUDA_Features/bindlessTexture/bindlessTexture_vs2022.vcxproj
+++ b/Samples/3_CUDA_Features/bindlessTexture/bindlessTexture_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/bindlessTexture.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -114,6 +114,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/bindlessTexture/findgllib.mk b/Samples/3_CUDA_Features/bindlessTexture/findgllib.mk
index f0a5c5512..998fcf0f1 100644
--- a/Samples/3_CUDA_Features/bindlessTexture/findgllib.mk
+++ b/Samples/3_CUDA_Features/bindlessTexture/findgllib.mk
@@ -53,11 +53,12 @@ endif
 ifeq ("$(TARGET_OS)","linux")
     # $(info) >> findgllib.mk -> LINUX path <<<)
     # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside
-    UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu      >/dev/null 2>&1; echo $$?)
-    FEDORA = $(shell echo $(DISTRO) | grep -i fedora      >/dev/null 2>&1; echo $$?)
-    RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?)
-    CENTOS = $(shell echo $(DISTRO) | grep -i centos      >/dev/null 2>&1; echo $$?)
+    UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu       >/dev/null 2>&1; echo $$?)
+    FEDORA = $(shell echo $(DISTRO) | grep -i fedora       >/dev/null 2>&1; echo $$?)
+    RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel'  >/dev/null 2>&1; echo $$?)
+    CENTOS = $(shell echo $(DISTRO) | grep -i centos       >/dev/null 2>&1; echo $$?)
     SUSE   = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?)
+    KYLIN  = $(shell echo $(DISTRO) | grep -i kylin        >/dev/null 2>&1; echo $$?)
     ifeq ("$(UBUNTU)","0")
       ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
         GLPATH := /usr/arm-linux-gnueabihf/lib
@@ -87,27 +88,17 @@ ifeq ("$(TARGET_OS)","linux")
         DFLT_PATH ?= /usr/lib
       endif
     endif
+
     ifeq ("$(SUSE)","0")
       GLPATH    ?= /usr/X11R6/lib64
       GLLINK    ?= -L/usr/X11R6/lib64
       DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(FEDORA)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(RHEL)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(CENTOS)","0")
+    else 
       GLPATH    ?= /usr/lib64/nvidia
       GLLINK    ?= -L/usr/lib64/nvidia
       DFLT_PATH ?= /usr/lib64
     endif
-  
+      
   # find libGL, libGLU 
   GLLIB  := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGL.so  -print 2>/dev/null)
   GLULIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLU.so -print 2>/dev/null)
diff --git a/Samples/3_CUDA_Features/cdpAdvancedQuicksort/Makefile b/Samples/3_CUDA_Features/cdpAdvancedQuicksort/Makefile
index 85498be38..5eab1414e 100644
--- a/Samples/3_CUDA_Features/cdpAdvancedQuicksort/Makefile
+++ b/Samples/3_CUDA_Features/cdpAdvancedQuicksort/Makefile
@@ -310,9 +310,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 61 70 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/3_CUDA_Features/cdpAdvancedQuicksort/NsightEclipse.xml b/Samples/3_CUDA_Features/cdpAdvancedQuicksort/NsightEclipse.xml
index 714aab28a..383410d36 100644
--- a/Samples/3_CUDA_Features/cdpAdvancedQuicksort/NsightEclipse.xml
+++ b/Samples/3_CUDA_Features/cdpAdvancedQuicksort/NsightEclipse.xml
@@ -8,20 +8,20 @@
     <flag>--std=c++14</flag>
   </cflags>
   <cuda_api_list>
-    <toolkit>cudaMemset</toolkit>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaEventRecord</toolkit>
-    <toolkit>cudaEventCreate</toolkit>
-    <toolkit>cudaEventElapsedTime</toolkit>
-    <toolkit>cudaDeviceSynchronize</toolkit>
-    <toolkit>cudaPeekAtLastError</toolkit>
-    <toolkit>cudaMalloc</toolkit>
     <toolkit>cudaStreamCreateWithFlags</toolkit>
-    <toolkit>cudaGetLastError</toolkit>
-    <toolkit>cudaMemcpyAsync</toolkit>
     <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaMemcpyAsync</toolkit>
+    <toolkit>cudaFree</toolkit>
     <toolkit>cudaGetErrorString</toolkit>
+    <toolkit>cudaGetLastError</toolkit>
+    <toolkit>cudaPeekAtLastError</toolkit>
+    <toolkit>cudaDeviceSynchronize</toolkit>
+    <toolkit>cudaEventRecord</toolkit>
+    <toolkit>cudaMemset</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaEventElapsedTime</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
+    <toolkit>cudaEventCreate</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample demonstrates an advanced quicksort implemented using CUDA Dynamic Parallelism.  This sample requires devices with compute capability 3.5 or higher.]]></description>
   <includepaths>
@@ -63,6 +63,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/3_CUDA_Features/cdpAdvancedQuicksort/README.md b/Samples/3_CUDA_Features/cdpAdvancedQuicksort/README.md
index b3f334194..1314b89b4 100644
--- a/Samples/3_CUDA_Features/cdpAdvancedQuicksort/README.md
+++ b/Samples/3_CUDA_Features/cdpAdvancedQuicksort/README.md
@@ -10,7 +10,7 @@ Cooperative Groups, CUDA Dynamic Parallelism
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMemset, cudaFree, cudaEventRecord, cudaEventCreate, cudaEventElapsedTime, cudaDeviceSynchronize, cudaPeekAtLastError, cudaMalloc, cudaStreamCreateWithFlags, cudaGetLastError, cudaMemcpyAsync, cudaMemcpy, cudaGetErrorString, cudaGetDeviceProperties
+cudaStreamCreateWithFlags, cudaMemcpy, cudaMemcpyAsync, cudaFree, cudaGetErrorString, cudaGetLastError, cudaPeekAtLastError, cudaDeviceSynchronize, cudaEventRecord, cudaMemset, cudaMalloc, cudaEventElapsedTime, cudaGetDeviceProperties, cudaEventCreate
 
 ## Dependencies needed to build/run
 [CDP](../../../README.md#cdp)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/3_CUDA_Features/cdpAdvancedQuicksort/cdpAdvancedQuicksort_vs2017.vcxproj b/Samples/3_CUDA_Features/cdpAdvancedQuicksort/cdpAdvancedQuicksort_vs2017.vcxproj
index 6244cf4fd..5c76d1c7d 100644
--- a/Samples/3_CUDA_Features/cdpAdvancedQuicksort/cdpAdvancedQuicksort_vs2017.vcxproj
+++ b/Samples/3_CUDA_Features/cdpAdvancedQuicksort/cdpAdvancedQuicksort_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/cdpAdvancedQuicksort.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -109,6 +109,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/cdpAdvancedQuicksort/cdpAdvancedQuicksort_vs2019.vcxproj b/Samples/3_CUDA_Features/cdpAdvancedQuicksort/cdpAdvancedQuicksort_vs2019.vcxproj
index 68071f8a3..4779bb514 100644
--- a/Samples/3_CUDA_Features/cdpAdvancedQuicksort/cdpAdvancedQuicksort_vs2019.vcxproj
+++ b/Samples/3_CUDA_Features/cdpAdvancedQuicksort/cdpAdvancedQuicksort_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/cdpAdvancedQuicksort.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -105,6 +105,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/cdpAdvancedQuicksort/cdpAdvancedQuicksort_vs2022.vcxproj b/Samples/3_CUDA_Features/cdpAdvancedQuicksort/cdpAdvancedQuicksort_vs2022.vcxproj
index 462ed63c6..39da54075 100644
--- a/Samples/3_CUDA_Features/cdpAdvancedQuicksort/cdpAdvancedQuicksort_vs2022.vcxproj
+++ b/Samples/3_CUDA_Features/cdpAdvancedQuicksort/cdpAdvancedQuicksort_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/cdpAdvancedQuicksort.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -105,6 +105,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/cdpBezierTessellation/Makefile b/Samples/3_CUDA_Features/cdpBezierTessellation/Makefile
index 4f89e84df..23ef29e87 100644
--- a/Samples/3_CUDA_Features/cdpBezierTessellation/Makefile
+++ b/Samples/3_CUDA_Features/cdpBezierTessellation/Makefile
@@ -285,9 +285,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 61 70 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/3_CUDA_Features/cdpBezierTessellation/NsightEclipse.xml b/Samples/3_CUDA_Features/cdpBezierTessellation/NsightEclipse.xml
index 5566265e5..26392f7b9 100644
--- a/Samples/3_CUDA_Features/cdpBezierTessellation/NsightEclipse.xml
+++ b/Samples/3_CUDA_Features/cdpBezierTessellation/NsightEclipse.xml
@@ -6,10 +6,10 @@
     <flag>-dc</flag>
   </cflags>
   <cuda_api_list>
+    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaFree</toolkit>
     <toolkit>cudaGetDeviceCount</toolkit>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample demonstrates bezier tessellation of lines implemented using CUDA Dynamic Parallelism.  This sample requires devices with compute capability 3.5 or higher.]]></description>
@@ -50,6 +50,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/3_CUDA_Features/cdpBezierTessellation/README.md b/Samples/3_CUDA_Features/cdpBezierTessellation/README.md
index 30ba33753..bb2d6e636 100644
--- a/Samples/3_CUDA_Features/cdpBezierTessellation/README.md
+++ b/Samples/3_CUDA_Features/cdpBezierTessellation/README.md
@@ -10,7 +10,7 @@ CUDA Dynamic Parallelism
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaGetDeviceCount, cudaMalloc, cudaMemcpy, cudaGetDeviceProperties
+cudaMemcpy, cudaFree, cudaGetDeviceCount, cudaMalloc, cudaGetDeviceProperties
 
 ## Dependencies needed to build/run
 [CDP](../../../README.md#cdp)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/3_CUDA_Features/cdpBezierTessellation/cdpBezierTessellation_vs2017.vcxproj b/Samples/3_CUDA_Features/cdpBezierTessellation/cdpBezierTessellation_vs2017.vcxproj
index 6436a8ad2..e7733bf88 100644
--- a/Samples/3_CUDA_Features/cdpBezierTessellation/cdpBezierTessellation_vs2017.vcxproj
+++ b/Samples/3_CUDA_Features/cdpBezierTessellation/cdpBezierTessellation_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/cdpBezierTessellation.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/cdpBezierTessellation/cdpBezierTessellation_vs2019.vcxproj b/Samples/3_CUDA_Features/cdpBezierTessellation/cdpBezierTessellation_vs2019.vcxproj
index 1452f3312..aa2fefe0a 100644
--- a/Samples/3_CUDA_Features/cdpBezierTessellation/cdpBezierTessellation_vs2019.vcxproj
+++ b/Samples/3_CUDA_Features/cdpBezierTessellation/cdpBezierTessellation_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/cdpBezierTessellation.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/cdpBezierTessellation/cdpBezierTessellation_vs2022.vcxproj b/Samples/3_CUDA_Features/cdpBezierTessellation/cdpBezierTessellation_vs2022.vcxproj
index b1eb0d98a..577c5e196 100644
--- a/Samples/3_CUDA_Features/cdpBezierTessellation/cdpBezierTessellation_vs2022.vcxproj
+++ b/Samples/3_CUDA_Features/cdpBezierTessellation/cdpBezierTessellation_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/cdpBezierTessellation.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/cdpQuadtree/Makefile b/Samples/3_CUDA_Features/cdpQuadtree/Makefile
index fc39f0e0b..9b4e08f87 100644
--- a/Samples/3_CUDA_Features/cdpQuadtree/Makefile
+++ b/Samples/3_CUDA_Features/cdpQuadtree/Makefile
@@ -310,9 +310,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 61 70 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/3_CUDA_Features/cdpQuadtree/NsightEclipse.xml b/Samples/3_CUDA_Features/cdpQuadtree/NsightEclipse.xml
index 63b022306..6d867f7f1 100644
--- a/Samples/3_CUDA_Features/cdpQuadtree/NsightEclipse.xml
+++ b/Samples/3_CUDA_Features/cdpQuadtree/NsightEclipse.xml
@@ -7,11 +7,11 @@
     <flag>--std=c++14</flag>
   </cflags>
   <cuda_api_list>
-    <toolkit>cudaDeviceSetLimit</toolkit>
+    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaFree</toolkit>
-    <toolkit>cudaMalloc</toolkit>
     <toolkit>cudaGetLastError</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaDeviceSetLimit</toolkit>
+    <toolkit>cudaMalloc</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample demonstrates Quad Trees implemented using CUDA Dynamic Parallelism. This sample requires devices with compute capability 3.5 or higher.]]></description>
@@ -54,6 +54,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/3_CUDA_Features/cdpQuadtree/README.md b/Samples/3_CUDA_Features/cdpQuadtree/README.md
index a170e47e4..dc96c3c5a 100644
--- a/Samples/3_CUDA_Features/cdpQuadtree/README.md
+++ b/Samples/3_CUDA_Features/cdpQuadtree/README.md
@@ -10,7 +10,7 @@ Cooperative Groups, CUDA Dynamic Parallelism
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaDeviceSetLimit, cudaFree, cudaMalloc, cudaGetLastError, cudaMemcpy, cudaGetDeviceProperties
+cudaMemcpy, cudaFree, cudaGetLastError, cudaDeviceSetLimit, cudaMalloc, cudaGetDeviceProperties
 
 ## Dependencies needed to build/run
 [CDP](../../../README.md#cdp)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/3_CUDA_Features/cdpQuadtree/cdpQuadtree_vs2017.vcxproj b/Samples/3_CUDA_Features/cdpQuadtree/cdpQuadtree_vs2017.vcxproj
index b0cd83761..15110cd3a 100644
--- a/Samples/3_CUDA_Features/cdpQuadtree/cdpQuadtree_vs2017.vcxproj
+++ b/Samples/3_CUDA_Features/cdpQuadtree/cdpQuadtree_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/cdpQuadtree.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/cdpQuadtree/cdpQuadtree_vs2019.vcxproj b/Samples/3_CUDA_Features/cdpQuadtree/cdpQuadtree_vs2019.vcxproj
index 99bfdc1d0..3ec1b1364 100644
--- a/Samples/3_CUDA_Features/cdpQuadtree/cdpQuadtree_vs2019.vcxproj
+++ b/Samples/3_CUDA_Features/cdpQuadtree/cdpQuadtree_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/cdpQuadtree.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/cdpQuadtree/cdpQuadtree_vs2022.vcxproj b/Samples/3_CUDA_Features/cdpQuadtree/cdpQuadtree_vs2022.vcxproj
index d2e082999..181408c39 100644
--- a/Samples/3_CUDA_Features/cdpQuadtree/cdpQuadtree_vs2022.vcxproj
+++ b/Samples/3_CUDA_Features/cdpQuadtree/cdpQuadtree_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/cdpQuadtree.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/cdpSimplePrint/Makefile b/Samples/3_CUDA_Features/cdpSimplePrint/Makefile
index d64d69da8..1ea644281 100644
--- a/Samples/3_CUDA_Features/cdpSimplePrint/Makefile
+++ b/Samples/3_CUDA_Features/cdpSimplePrint/Makefile
@@ -285,9 +285,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 61 70 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/3_CUDA_Features/cdpSimplePrint/NsightEclipse.xml b/Samples/3_CUDA_Features/cdpSimplePrint/NsightEclipse.xml
index d1d5fd2fb..cfe32d190 100644
--- a/Samples/3_CUDA_Features/cdpSimplePrint/NsightEclipse.xml
+++ b/Samples/3_CUDA_Features/cdpSimplePrint/NsightEclipse.xml
@@ -6,10 +6,10 @@
     <flag>-dc</flag>
   </cflags>
   <cuda_api_list>
-    <toolkit>cudaDeviceSetLimit</toolkit>
-    <toolkit>cudaGetLastError</toolkit>
     <toolkit>cudaDeviceSynchronize</toolkit>
+    <toolkit>cudaGetLastError</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
+    <toolkit>cudaDeviceSetLimit</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample demonstrates simple printf implemented using CUDA Dynamic Parallelism.  This sample requires devices with compute capability 3.5 or higher.]]></description>
   <includepaths>
@@ -51,6 +51,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/3_CUDA_Features/cdpSimplePrint/README.md b/Samples/3_CUDA_Features/cdpSimplePrint/README.md
index ab8682711..c872f4bbb 100644
--- a/Samples/3_CUDA_Features/cdpSimplePrint/README.md
+++ b/Samples/3_CUDA_Features/cdpSimplePrint/README.md
@@ -10,7 +10,7 @@ CUDA Dynamic Parallelism
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaDeviceSetLimit, cudaGetLastError, cudaDeviceSynchronize, cudaGetDeviceProperties
+cudaDeviceSynchronize, cudaGetLastError, cudaGetDeviceProperties, cudaDeviceSetLimit
 
 ## Dependencies needed to build/run
 [CDP](../../../README.md#cdp)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/3_CUDA_Features/cdpSimplePrint/cdpSimplePrint_vs2017.vcxproj b/Samples/3_CUDA_Features/cdpSimplePrint/cdpSimplePrint_vs2017.vcxproj
index 63502de30..b26134875 100644
--- a/Samples/3_CUDA_Features/cdpSimplePrint/cdpSimplePrint_vs2017.vcxproj
+++ b/Samples/3_CUDA_Features/cdpSimplePrint/cdpSimplePrint_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/cdpSimplePrint.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/cdpSimplePrint/cdpSimplePrint_vs2019.vcxproj b/Samples/3_CUDA_Features/cdpSimplePrint/cdpSimplePrint_vs2019.vcxproj
index d87192480..ae105c0af 100644
--- a/Samples/3_CUDA_Features/cdpSimplePrint/cdpSimplePrint_vs2019.vcxproj
+++ b/Samples/3_CUDA_Features/cdpSimplePrint/cdpSimplePrint_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/cdpSimplePrint.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/cdpSimplePrint/cdpSimplePrint_vs2022.vcxproj b/Samples/3_CUDA_Features/cdpSimplePrint/cdpSimplePrint_vs2022.vcxproj
index bc59636b0..7f1b73f1b 100644
--- a/Samples/3_CUDA_Features/cdpSimplePrint/cdpSimplePrint_vs2022.vcxproj
+++ b/Samples/3_CUDA_Features/cdpSimplePrint/cdpSimplePrint_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/cdpSimplePrint.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/cdpSimpleQuicksort/Makefile b/Samples/3_CUDA_Features/cdpSimpleQuicksort/Makefile
index 5771acad3..1bf519d50 100644
--- a/Samples/3_CUDA_Features/cdpSimpleQuicksort/Makefile
+++ b/Samples/3_CUDA_Features/cdpSimpleQuicksort/Makefile
@@ -285,9 +285,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 61 70 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/3_CUDA_Features/cdpSimpleQuicksort/NsightEclipse.xml b/Samples/3_CUDA_Features/cdpSimpleQuicksort/NsightEclipse.xml
index 827146192..9c1960819 100644
--- a/Samples/3_CUDA_Features/cdpSimpleQuicksort/NsightEclipse.xml
+++ b/Samples/3_CUDA_Features/cdpSimpleQuicksort/NsightEclipse.xml
@@ -6,13 +6,13 @@
     <flag>-dc</flag>
   </cflags>
   <cuda_api_list>
-    <toolkit>cudaDeviceSetLimit</toolkit>
-    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaStreamCreateWithFlags</toolkit>
+    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaStreamDestroy</toolkit>
+    <toolkit>cudaFree</toolkit>
     <toolkit>cudaDeviceSynchronize</toolkit>
+    <toolkit>cudaDeviceSetLimit</toolkit>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaStreamCreateWithFlags</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample demonstrates simple quicksort implemented using CUDA Dynamic Parallelism.  This sample requires devices with compute capability 3.5 or higher.]]></description>
@@ -55,6 +55,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/3_CUDA_Features/cdpSimpleQuicksort/README.md b/Samples/3_CUDA_Features/cdpSimpleQuicksort/README.md
index 0805d3800..5a765c0a8 100644
--- a/Samples/3_CUDA_Features/cdpSimpleQuicksort/README.md
+++ b/Samples/3_CUDA_Features/cdpSimpleQuicksort/README.md
@@ -10,7 +10,7 @@ CUDA Dynamic Parallelism
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaDeviceSetLimit, cudaFree, cudaStreamDestroy, cudaDeviceSynchronize, cudaMalloc, cudaStreamCreateWithFlags, cudaMemcpy, cudaGetDeviceProperties
+cudaStreamCreateWithFlags, cudaMemcpy, cudaStreamDestroy, cudaFree, cudaDeviceSynchronize, cudaDeviceSetLimit, cudaMalloc, cudaGetDeviceProperties
 
 ## Dependencies needed to build/run
 [CDP](../../../README.md#cdp)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/3_CUDA_Features/cdpSimpleQuicksort/cdpSimpleQuicksort_vs2017.vcxproj b/Samples/3_CUDA_Features/cdpSimpleQuicksort/cdpSimpleQuicksort_vs2017.vcxproj
index 1f396df63..701ef7dfd 100644
--- a/Samples/3_CUDA_Features/cdpSimpleQuicksort/cdpSimpleQuicksort_vs2017.vcxproj
+++ b/Samples/3_CUDA_Features/cdpSimpleQuicksort/cdpSimpleQuicksort_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/cdpSimpleQuicksort.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/cdpSimpleQuicksort/cdpSimpleQuicksort_vs2019.vcxproj b/Samples/3_CUDA_Features/cdpSimpleQuicksort/cdpSimpleQuicksort_vs2019.vcxproj
index e926d0d1a..be79af0a7 100644
--- a/Samples/3_CUDA_Features/cdpSimpleQuicksort/cdpSimpleQuicksort_vs2019.vcxproj
+++ b/Samples/3_CUDA_Features/cdpSimpleQuicksort/cdpSimpleQuicksort_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/cdpSimpleQuicksort.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/cdpSimpleQuicksort/cdpSimpleQuicksort_vs2022.vcxproj b/Samples/3_CUDA_Features/cdpSimpleQuicksort/cdpSimpleQuicksort_vs2022.vcxproj
index bf24f8f3e..601d665a4 100644
--- a/Samples/3_CUDA_Features/cdpSimpleQuicksort/cdpSimpleQuicksort_vs2022.vcxproj
+++ b/Samples/3_CUDA_Features/cdpSimpleQuicksort/cdpSimpleQuicksort_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/cdpSimpleQuicksort.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/cudaCompressibleMemory/Makefile b/Samples/3_CUDA_Features/cudaCompressibleMemory/Makefile
index 7118ad8c4..9d7f9adfc 100644
--- a/Samples/3_CUDA_Features/cudaCompressibleMemory/Makefile
+++ b/Samples/3_CUDA_Features/cudaCompressibleMemory/Makefile
@@ -293,9 +293,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/3_CUDA_Features/cudaCompressibleMemory/README.md b/Samples/3_CUDA_Features/cudaCompressibleMemory/README.md
index e95e49045..cd28b6f24 100644
--- a/Samples/3_CUDA_Features/cudaCompressibleMemory/README.md
+++ b/Samples/3_CUDA_Features/cudaCompressibleMemory/README.md
@@ -10,7 +10,7 @@ CUDA Driver API, Compressible Memory, MMAP
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, aarch64
 ## CUDA APIs involved
 
 ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html)
-cuMemRelease, cuCtxGetDevice, cuMemGetAllocationPropertiesFromHandle, cuMemSetAccess, cuMemMap, cuMemCreate, cuMemAddressFree, cuMemGetAllocationGranularity, cuMemUnmap, cuMemAddressReserve, cuDeviceGetAttribute
+cuMemGetAllocationPropertiesFromHandle, cuMemCreate, cuDeviceGetAttribute, cuCtxGetDevice, cuMemGetAllocationGranularity, cuMemAddressFree, cuMemUnmap, cuMemMap, cuMemRelease, cuMemAddressReserve, cuMemSetAccess
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaEventRecord, cudaEventCreate, cudaOccupancyMaxPotentialBlockSize, cudaEventElapsedTime, cudaEventSynchronize, cudaMemcpy
+cudaMemcpy, cudaEventSynchronize, cudaEventRecord, cudaEventElapsedTime, cudaOccupancyMaxPotentialBlockSize, cudaEventCreate
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/3_CUDA_Features/cudaCompressibleMemory/cudaCompressibleMemory_vs2017.vcxproj b/Samples/3_CUDA_Features/cudaCompressibleMemory/cudaCompressibleMemory_vs2017.vcxproj
index cb21616cd..5fd82a2d2 100644
--- a/Samples/3_CUDA_Features/cudaCompressibleMemory/cudaCompressibleMemory_vs2017.vcxproj
+++ b/Samples/3_CUDA_Features/cudaCompressibleMemory/cudaCompressibleMemory_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/cudaCompressibleMemory.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/cudaCompressibleMemory/cudaCompressibleMemory_vs2019.vcxproj b/Samples/3_CUDA_Features/cudaCompressibleMemory/cudaCompressibleMemory_vs2019.vcxproj
index f4763a3e8..f01382784 100644
--- a/Samples/3_CUDA_Features/cudaCompressibleMemory/cudaCompressibleMemory_vs2019.vcxproj
+++ b/Samples/3_CUDA_Features/cudaCompressibleMemory/cudaCompressibleMemory_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/cudaCompressibleMemory.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/cudaCompressibleMemory/cudaCompressibleMemory_vs2022.vcxproj b/Samples/3_CUDA_Features/cudaCompressibleMemory/cudaCompressibleMemory_vs2022.vcxproj
index 1a116a853..5c5b989cb 100644
--- a/Samples/3_CUDA_Features/cudaCompressibleMemory/cudaCompressibleMemory_vs2022.vcxproj
+++ b/Samples/3_CUDA_Features/cudaCompressibleMemory/cudaCompressibleMemory_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/cudaCompressibleMemory.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/cudaTensorCoreGemm/Makefile b/Samples/3_CUDA_Features/cudaTensorCoreGemm/Makefile
index cd5ed03c6..a949034f5 100644
--- a/Samples/3_CUDA_Features/cudaTensorCoreGemm/Makefile
+++ b/Samples/3_CUDA_Features/cudaTensorCoreGemm/Makefile
@@ -291,9 +291,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 70 72 75 80 86 87
+SMS ?= 70 72 75 80 86 87 90
 else
-SMS ?= 70 75 80 86
+SMS ?= 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/3_CUDA_Features/cudaTensorCoreGemm/NsightEclipse.xml b/Samples/3_CUDA_Features/cudaTensorCoreGemm/NsightEclipse.xml
index 25dc757d5..e4306062b 100644
--- a/Samples/3_CUDA_Features/cudaTensorCoreGemm/NsightEclipse.xml
+++ b/Samples/3_CUDA_Features/cudaTensorCoreGemm/NsightEclipse.xml
@@ -6,18 +6,18 @@
     <flag>-maxrregcount=255</flag>
   </cflags>
   <cuda_api_list>
-    <toolkit>cudaMemset</toolkit>
+    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaFree</toolkit>
-    <toolkit>cudaEventRecord</toolkit>
-    <toolkit>cudaEventCreate</toolkit>
-    <toolkit>cudaFuncSetAttribute</toolkit>
-    <toolkit>cudaEventElapsedTime</toolkit>
+    <toolkit>cudaGetErrorString</toolkit>
+    <toolkit>cudaGetLastError</toolkit>
     <toolkit>cudaEventSynchronize</toolkit>
+    <toolkit>cudaFuncSetAttribute</toolkit>
+    <toolkit>cudaEventRecord</toolkit>
+    <toolkit>cudaMemset</toolkit>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaGetLastError</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
-    <toolkit>cudaGetErrorString</toolkit>
+    <toolkit>cudaEventElapsedTime</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
+    <toolkit>cudaEventCreate</toolkit>
   </cuda_api_list>
   <description><![CDATA[CUDA sample demonstrating a GEMM computation using the Warp Matrix Multiply and Accumulate (WMMA) API introduced in CUDA 9.
 
@@ -52,6 +52,7 @@ In addition to that, it demonstrates the use of the new CUDA function attribute
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/3_CUDA_Features/cudaTensorCoreGemm/README.md b/Samples/3_CUDA_Features/cudaTensorCoreGemm/README.md
index 5c7a02ca7..ed9ca03e2 100644
--- a/Samples/3_CUDA_Features/cudaTensorCoreGemm/README.md
+++ b/Samples/3_CUDA_Features/cudaTensorCoreGemm/README.md
@@ -14,7 +14,7 @@ Matrix Multiply, WMMA, Tensor Cores
 
 ## Supported SM Architectures
 
-[SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -27,11 +27,11 @@ x86_64, ppc64le, aarch64
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMemset, cudaFree, cudaEventRecord, cudaEventCreate, cudaFuncSetAttribute, cudaEventElapsedTime, cudaEventSynchronize, cudaMalloc, cudaGetLastError, cudaMemcpy, cudaGetErrorString, cudaGetDeviceProperties
+cudaMemcpy, cudaFree, cudaGetErrorString, cudaGetLastError, cudaEventSynchronize, cudaFuncSetAttribute, cudaEventRecord, cudaMemset, cudaMalloc, cudaEventElapsedTime, cudaGetDeviceProperties, cudaEventCreate
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/3_CUDA_Features/cudaTensorCoreGemm/cudaTensorCoreGemm_vs2017.vcxproj b/Samples/3_CUDA_Features/cudaTensorCoreGemm/cudaTensorCoreGemm_vs2017.vcxproj
index 6f38472db..a5854708c 100644
--- a/Samples/3_CUDA_Features/cudaTensorCoreGemm/cudaTensorCoreGemm_vs2017.vcxproj
+++ b/Samples/3_CUDA_Features/cudaTensorCoreGemm/cudaTensorCoreGemm_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/cudaTensorCoreGemm.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/cudaTensorCoreGemm/cudaTensorCoreGemm_vs2019.vcxproj b/Samples/3_CUDA_Features/cudaTensorCoreGemm/cudaTensorCoreGemm_vs2019.vcxproj
index 32c37bb49..5a7700ba1 100644
--- a/Samples/3_CUDA_Features/cudaTensorCoreGemm/cudaTensorCoreGemm_vs2019.vcxproj
+++ b/Samples/3_CUDA_Features/cudaTensorCoreGemm/cudaTensorCoreGemm_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/cudaTensorCoreGemm.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/cudaTensorCoreGemm/cudaTensorCoreGemm_vs2022.vcxproj b/Samples/3_CUDA_Features/cudaTensorCoreGemm/cudaTensorCoreGemm_vs2022.vcxproj
index 24801f678..3a4f102a2 100644
--- a/Samples/3_CUDA_Features/cudaTensorCoreGemm/cudaTensorCoreGemm_vs2022.vcxproj
+++ b/Samples/3_CUDA_Features/cudaTensorCoreGemm/cudaTensorCoreGemm_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/cudaTensorCoreGemm.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/dmmaTensorCoreGemm/Makefile b/Samples/3_CUDA_Features/dmmaTensorCoreGemm/Makefile
index e8ed96d9b..a8731ad23 100644
--- a/Samples/3_CUDA_Features/dmmaTensorCoreGemm/Makefile
+++ b/Samples/3_CUDA_Features/dmmaTensorCoreGemm/Makefile
@@ -322,9 +322,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 80 86 87
+SMS ?= 80 86 87 90
 else
-SMS ?= 80 86
+SMS ?= 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/3_CUDA_Features/dmmaTensorCoreGemm/NsightEclipse.xml b/Samples/3_CUDA_Features/dmmaTensorCoreGemm/NsightEclipse.xml
index 2cd658145..f28f86b30 100644
--- a/Samples/3_CUDA_Features/dmmaTensorCoreGemm/NsightEclipse.xml
+++ b/Samples/3_CUDA_Features/dmmaTensorCoreGemm/NsightEclipse.xml
@@ -6,18 +6,18 @@
     <flag>--std=c++11</flag>
   </cflags>
   <cuda_api_list>
-    <toolkit>cudaMemset</toolkit>
+    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaFree</toolkit>
-    <toolkit>cudaEventRecord</toolkit>
-    <toolkit>cudaEventCreate</toolkit>
-    <toolkit>cudaFuncSetAttribute</toolkit>
-    <toolkit>cudaEventElapsedTime</toolkit>
+    <toolkit>cudaGetErrorString</toolkit>
+    <toolkit>cudaGetLastError</toolkit>
     <toolkit>cudaEventSynchronize</toolkit>
+    <toolkit>cudaFuncSetAttribute</toolkit>
+    <toolkit>cudaEventRecord</toolkit>
+    <toolkit>cudaMemset</toolkit>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaGetLastError</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
-    <toolkit>cudaGetErrorString</toolkit>
+    <toolkit>cudaEventElapsedTime</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
+    <toolkit>cudaEventCreate</toolkit>
   </cuda_api_list>
   <description><![CDATA[CUDA sample demonstrates double precision GEMM computation using the Double precision Warp Matrix Multiply and Accumulate (WMMA) API introduced with CUDA 11 in Ampere chip family tensor cores for faster matrix operations. This sample also uses async copy provided by cuda pipeline interface for gmem to shmem async loads which improves kernel performance and reduces register presssure. Further, this sample also demonstrates how to use cooperative groups async copy interface over a group for performing gmem to shmem async loads.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -52,6 +52,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/3_CUDA_Features/dmmaTensorCoreGemm/README.md b/Samples/3_CUDA_Features/dmmaTensorCoreGemm/README.md
index 13b8e8e40..8699aa21f 100644
--- a/Samples/3_CUDA_Features/dmmaTensorCoreGemm/README.md
+++ b/Samples/3_CUDA_Features/dmmaTensorCoreGemm/README.md
@@ -10,7 +10,7 @@ Matrix Multiply, WMMA, Tensor Cores
 
 ## Supported SM Architectures
 
-[SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, aarch64
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMemset, cudaFree, cudaEventRecord, cudaEventCreate, cudaFuncSetAttribute, cudaEventElapsedTime, cudaEventSynchronize, cudaMalloc, cudaGetLastError, cudaMemcpy, cudaGetErrorString, cudaGetDeviceProperties
+cudaMemcpy, cudaFree, cudaGetErrorString, cudaGetLastError, cudaEventSynchronize, cudaFuncSetAttribute, cudaEventRecord, cudaMemset, cudaMalloc, cudaEventElapsedTime, cudaGetDeviceProperties, cudaEventCreate
 
 ## Dependencies needed to build/run
 [CPP11](../../../README.md#cpp11)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/3_CUDA_Features/dmmaTensorCoreGemm/dmmaTensorCoreGemm_vs2017.vcxproj b/Samples/3_CUDA_Features/dmmaTensorCoreGemm/dmmaTensorCoreGemm_vs2017.vcxproj
index 8e82e63ea..1dbcff5ee 100644
--- a/Samples/3_CUDA_Features/dmmaTensorCoreGemm/dmmaTensorCoreGemm_vs2017.vcxproj
+++ b/Samples/3_CUDA_Features/dmmaTensorCoreGemm/dmmaTensorCoreGemm_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/dmmaTensorCoreGemm.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/dmmaTensorCoreGemm/dmmaTensorCoreGemm_vs2019.vcxproj b/Samples/3_CUDA_Features/dmmaTensorCoreGemm/dmmaTensorCoreGemm_vs2019.vcxproj
index 7c6849bde..0f024a266 100644
--- a/Samples/3_CUDA_Features/dmmaTensorCoreGemm/dmmaTensorCoreGemm_vs2019.vcxproj
+++ b/Samples/3_CUDA_Features/dmmaTensorCoreGemm/dmmaTensorCoreGemm_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/dmmaTensorCoreGemm.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/dmmaTensorCoreGemm/dmmaTensorCoreGemm_vs2022.vcxproj b/Samples/3_CUDA_Features/dmmaTensorCoreGemm/dmmaTensorCoreGemm_vs2022.vcxproj
index 480cc0b81..6dcc0232a 100644
--- a/Samples/3_CUDA_Features/dmmaTensorCoreGemm/dmmaTensorCoreGemm_vs2022.vcxproj
+++ b/Samples/3_CUDA_Features/dmmaTensorCoreGemm/dmmaTensorCoreGemm_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/dmmaTensorCoreGemm.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/globalToShmemAsyncCopy/Makefile b/Samples/3_CUDA_Features/globalToShmemAsyncCopy/Makefile
index 7a7fd2fa1..6fdd9aab7 100644
--- a/Samples/3_CUDA_Features/globalToShmemAsyncCopy/Makefile
+++ b/Samples/3_CUDA_Features/globalToShmemAsyncCopy/Makefile
@@ -310,9 +310,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 70 72 75 80 86 87
+SMS ?= 70 72 75 80 86 87 90
 else
-SMS ?= 70 75 80 86
+SMS ?= 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/3_CUDA_Features/globalToShmemAsyncCopy/NsightEclipse.xml b/Samples/3_CUDA_Features/globalToShmemAsyncCopy/NsightEclipse.xml
index 60eb92c8a..83bc6d5c5 100644
--- a/Samples/3_CUDA_Features/globalToShmemAsyncCopy/NsightEclipse.xml
+++ b/Samples/3_CUDA_Features/globalToShmemAsyncCopy/NsightEclipse.xml
@@ -6,20 +6,20 @@
     <flag>--std=c++11</flag>
   </cflags>
   <cuda_api_list>
+    <toolkit>cudaStreamCreateWithFlags</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaDeviceGetAttribute</toolkit>
     <toolkit>cudaFree</toolkit>
-    <toolkit>cudaEventRecord</toolkit>
     <toolkit>cudaMallocHost</toolkit>
-    <toolkit>cudaEventCreate</toolkit>
-    <toolkit>cudaMemsetAsync</toolkit>
-    <toolkit>cudaEventElapsedTime</toolkit>
     <toolkit>cudaEventSynchronize</toolkit>
-    <toolkit>cudaDeviceGetAttribute</toolkit>
+    <toolkit>cudaEventRecord</toolkit>
     <toolkit>cudaFreeHost</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaStreamCreateWithFlags</toolkit>
-    <toolkit>cudaEventDestroy</toolkit>
     <toolkit>cudaStreamSynchronize</toolkit>
+    <toolkit>cudaEventDestroy</toolkit>
+    <toolkit>cudaEventElapsedTime</toolkit>
+    <toolkit>cudaMemsetAsync</toolkit>
     <toolkit>cudaMemcpyAsync</toolkit>
+    <toolkit>cudaEventCreate</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample implements matrix multiplication which uses asynchronous copy of data from global to shared memory when on compute capability 8.0 or higher. Also demonstrates arrive-wait barrier for synchronization.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -59,6 +59,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/3_CUDA_Features/globalToShmemAsyncCopy/README.md b/Samples/3_CUDA_Features/globalToShmemAsyncCopy/README.md
index b5adb76f4..a2d7d6a34 100644
--- a/Samples/3_CUDA_Features/globalToShmemAsyncCopy/README.md
+++ b/Samples/3_CUDA_Features/globalToShmemAsyncCopy/README.md
@@ -10,7 +10,7 @@ CUDA Runtime API, Linear Algebra, CPP11 CUDA
 
 ## Supported SM Architectures
 
-[SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l, aarch64
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaEventRecord, cudaMallocHost, cudaEventCreate, cudaMemsetAsync, cudaEventElapsedTime, cudaEventSynchronize, cudaDeviceGetAttribute, cudaFreeHost, cudaMalloc, cudaStreamCreateWithFlags, cudaEventDestroy, cudaStreamSynchronize, cudaMemcpyAsync
+cudaStreamCreateWithFlags, cudaMalloc, cudaDeviceGetAttribute, cudaFree, cudaMallocHost, cudaEventSynchronize, cudaEventRecord, cudaFreeHost, cudaStreamSynchronize, cudaEventDestroy, cudaEventElapsedTime, cudaMemsetAsync, cudaMemcpyAsync, cudaEventCreate
 
 ## Dependencies needed to build/run
 [CPP11](../../../README.md#cpp11)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/3_CUDA_Features/globalToShmemAsyncCopy/globalToShmemAsyncCopy_vs2017.vcxproj b/Samples/3_CUDA_Features/globalToShmemAsyncCopy/globalToShmemAsyncCopy_vs2017.vcxproj
index 100834d71..41bedad12 100644
--- a/Samples/3_CUDA_Features/globalToShmemAsyncCopy/globalToShmemAsyncCopy_vs2017.vcxproj
+++ b/Samples/3_CUDA_Features/globalToShmemAsyncCopy/globalToShmemAsyncCopy_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/globalToShmemAsyncCopy.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/globalToShmemAsyncCopy/globalToShmemAsyncCopy_vs2019.vcxproj b/Samples/3_CUDA_Features/globalToShmemAsyncCopy/globalToShmemAsyncCopy_vs2019.vcxproj
index 5f1c0721b..7db3f2315 100644
--- a/Samples/3_CUDA_Features/globalToShmemAsyncCopy/globalToShmemAsyncCopy_vs2019.vcxproj
+++ b/Samples/3_CUDA_Features/globalToShmemAsyncCopy/globalToShmemAsyncCopy_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/globalToShmemAsyncCopy.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/globalToShmemAsyncCopy/globalToShmemAsyncCopy_vs2022.vcxproj b/Samples/3_CUDA_Features/globalToShmemAsyncCopy/globalToShmemAsyncCopy_vs2022.vcxproj
index 2b82306f1..5351130d8 100644
--- a/Samples/3_CUDA_Features/globalToShmemAsyncCopy/globalToShmemAsyncCopy_vs2022.vcxproj
+++ b/Samples/3_CUDA_Features/globalToShmemAsyncCopy/globalToShmemAsyncCopy_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/globalToShmemAsyncCopy.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/graphMemoryFootprint/Makefile b/Samples/3_CUDA_Features/graphMemoryFootprint/Makefile
index f4647da1f..0233718bf 100644
--- a/Samples/3_CUDA_Features/graphMemoryFootprint/Makefile
+++ b/Samples/3_CUDA_Features/graphMemoryFootprint/Makefile
@@ -285,9 +285,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/3_CUDA_Features/graphMemoryFootprint/NsightEclipse.xml b/Samples/3_CUDA_Features/graphMemoryFootprint/NsightEclipse.xml
index 72be4775f..7f7d842b7 100644
--- a/Samples/3_CUDA_Features/graphMemoryFootprint/NsightEclipse.xml
+++ b/Samples/3_CUDA_Features/graphMemoryFootprint/NsightEclipse.xml
@@ -3,23 +3,23 @@
 <entry>
   <name>graphMemoryFootprint</name>
   <cuda_api_list>
-    <toolkit>cudaGraphAddMemFreeNode</toolkit>
+    <toolkit>cudaGraphAddMemAllocNode</toolkit>
+    <toolkit>cudaStreamCreateWithFlags</toolkit>
+    <toolkit>cudaGraphInstantiate</toolkit>
     <toolkit>cudaStreamDestroy</toolkit>
     <toolkit>cudaFree</toolkit>
-    <toolkit>cudaGraphExecDestroy</toolkit>
-    <toolkit>cudaGraphInstantiate</toolkit>
     <toolkit>cudaDeviceGetAttribute</toolkit>
-    <toolkit>cudaDriverGetVersion</toolkit>
-    <toolkit>cudaGraphCreate</toolkit>
     <toolkit>cudaGraphAddKernelNode</toolkit>
-    <toolkit>cudaGraphAddMemAllocNode</toolkit>
-    <toolkit>cudaStreamCreateWithFlags</toolkit>
-    <toolkit>cudaDeviceGraphMemTrim</toolkit>
-    <toolkit>cudaStreamSynchronize</toolkit>
+    <toolkit>cudaGraphAddMemFreeNode</toolkit>
     <toolkit>cudaDeviceGetGraphMemAttribute</toolkit>
+    <toolkit>cudaGraphCreate</toolkit>
     <toolkit>cudaGraphDestroy</toolkit>
-    <toolkit>cudaGetDeviceProperties</toolkit>
+    <toolkit>cudaDriverGetVersion</toolkit>
     <toolkit>cudaGraphLaunch</toolkit>
+    <toolkit>cudaStreamSynchronize</toolkit>
+    <toolkit>cudaDeviceGraphMemTrim</toolkit>
+    <toolkit>cudaGetDeviceProperties</toolkit>
+    <toolkit>cudaGraphExecDestroy</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample demonstrates how graph memory nodes re-use virtual addresses and physical memory.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -56,6 +56,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/3_CUDA_Features/graphMemoryFootprint/README.md b/Samples/3_CUDA_Features/graphMemoryFootprint/README.md
index 76c04cf75..6286fa0df 100644
--- a/Samples/3_CUDA_Features/graphMemoryFootprint/README.md
+++ b/Samples/3_CUDA_Features/graphMemoryFootprint/README.md
@@ -10,7 +10,7 @@ CUDA Runtime API, Performance Strategies, CUDA Graphs
 
 ## Supported SM Architectures
 
-[SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaGraphAddMemFreeNode, cudaStreamDestroy, cudaFree, cudaGraphExecDestroy, cudaGraphInstantiate, cudaDeviceGetAttribute, cudaDriverGetVersion, cudaGraphCreate, cudaGraphAddKernelNode, cudaGraphAddMemAllocNode, cudaStreamCreateWithFlags, cudaDeviceGraphMemTrim, cudaStreamSynchronize, cudaDeviceGetGraphMemAttribute, cudaGraphDestroy, cudaGetDeviceProperties, cudaGraphLaunch
+cudaGraphAddMemAllocNode, cudaStreamCreateWithFlags, cudaGraphInstantiate, cudaStreamDestroy, cudaFree, cudaDeviceGetAttribute, cudaGraphAddKernelNode, cudaGraphAddMemFreeNode, cudaDeviceGetGraphMemAttribute, cudaGraphCreate, cudaGraphDestroy, cudaDriverGetVersion, cudaGraphLaunch, cudaStreamSynchronize, cudaDeviceGraphMemTrim, cudaGetDeviceProperties, cudaGraphExecDestroy
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/3_CUDA_Features/graphMemoryFootprint/graphMemoryFootprint_vs2017.vcxproj b/Samples/3_CUDA_Features/graphMemoryFootprint/graphMemoryFootprint_vs2017.vcxproj
index 412492cbd..e65d0b667 100644
--- a/Samples/3_CUDA_Features/graphMemoryFootprint/graphMemoryFootprint_vs2017.vcxproj
+++ b/Samples/3_CUDA_Features/graphMemoryFootprint/graphMemoryFootprint_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/graphMemoryFootprint.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/graphMemoryFootprint/graphMemoryFootprint_vs2019.vcxproj b/Samples/3_CUDA_Features/graphMemoryFootprint/graphMemoryFootprint_vs2019.vcxproj
index 28c073169..82b981424 100644
--- a/Samples/3_CUDA_Features/graphMemoryFootprint/graphMemoryFootprint_vs2019.vcxproj
+++ b/Samples/3_CUDA_Features/graphMemoryFootprint/graphMemoryFootprint_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/graphMemoryFootprint.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/graphMemoryFootprint/graphMemoryFootprint_vs2022.vcxproj b/Samples/3_CUDA_Features/graphMemoryFootprint/graphMemoryFootprint_vs2022.vcxproj
index 5eaef34bb..cbf3dd312 100644
--- a/Samples/3_CUDA_Features/graphMemoryFootprint/graphMemoryFootprint_vs2022.vcxproj
+++ b/Samples/3_CUDA_Features/graphMemoryFootprint/graphMemoryFootprint_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/graphMemoryFootprint.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/graphMemoryNodes/Makefile b/Samples/3_CUDA_Features/graphMemoryNodes/Makefile
index a233d14bb..b760fc44b 100644
--- a/Samples/3_CUDA_Features/graphMemoryNodes/Makefile
+++ b/Samples/3_CUDA_Features/graphMemoryNodes/Makefile
@@ -285,9 +285,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/3_CUDA_Features/graphMemoryNodes/NsightEclipse.xml b/Samples/3_CUDA_Features/graphMemoryNodes/NsightEclipse.xml
index 5fd9f688d..a2cc3608d 100644
--- a/Samples/3_CUDA_Features/graphMemoryNodes/NsightEclipse.xml
+++ b/Samples/3_CUDA_Features/graphMemoryNodes/NsightEclipse.xml
@@ -3,32 +3,32 @@
 <entry>
   <name>graphMemoryNodes</name>
   <cuda_api_list>
-    <toolkit>cudaMallocAsync</toolkit>
-    <toolkit>cudaStreamCreateWithFlags</toolkit>
     <toolkit>cudaMemcpy</toolkit>
-    <toolkit>cudaMemcpyAsync</toolkit>
-    <toolkit>cudaStreamDestroy</toolkit>
-    <toolkit>cudaMallocManaged</toolkit>
-    <toolkit>cudaEventCreate</toolkit>
+    <toolkit>cudaDeviceGetAttribute</toolkit>
     <toolkit>cudaDriverGetVersion</toolkit>
-    <toolkit>cudaGraphCreate</toolkit>
-    <toolkit>cudaGraphAddMemAllocNode</toolkit>
-    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaGraphLaunch</toolkit>
     <toolkit>cudaEventDestroy</toolkit>
+    <toolkit>cudaMallocAsync</toolkit>
     <toolkit>cudaStreamEndCapture</toolkit>
-    <toolkit>cudaGraphExecDestroy</toolkit>
+    <toolkit>cudaMallocManaged</toolkit>
+    <toolkit>cudaGraphCreate</toolkit>
+    <toolkit>cudaMemcpyAsync</toolkit>
+    <toolkit>cudaFreeAsync</toolkit>
+    <toolkit>cudaStreamCreateWithFlags</toolkit>
+    <toolkit>cudaGraphInstantiate</toolkit>
+    <toolkit>cudaStreamDestroy</toolkit>
     <toolkit>cudaStreamBeginCapture</toolkit>
-    <toolkit>cudaDeviceGetAttribute</toolkit>
-    <toolkit>cudaStreamSynchronize</toolkit>
-    <toolkit>cudaGraphDestroy</toolkit>
-    <toolkit>cudaGraphLaunch</toolkit>
-    <toolkit>cudaGraphAddMemFreeNode</toolkit>
     <toolkit>cudaStreamWaitEvent</toolkit>
+    <toolkit>cudaEventCreate</toolkit>
+    <toolkit>cudaGraphAddMemAllocNode</toolkit>
     <toolkit>cudaFree</toolkit>
-    <toolkit>cudaEventRecord</toolkit>
-    <toolkit>cudaGraphInstantiate</toolkit>
     <toolkit>cudaGraphAddKernelNode</toolkit>
-    <toolkit>cudaFreeAsync</toolkit>
+    <toolkit>cudaGraphAddMemFreeNode</toolkit>
+    <toolkit>cudaGraphDestroy</toolkit>
+    <toolkit>cudaEventRecord</toolkit>
+    <toolkit>cudaStreamSynchronize</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaGraphExecDestroy</toolkit>
   </cuda_api_list>
   <description><![CDATA[A demonstration of memory allocations and frees within CUDA graphs using Graph APIs and Stream Capture APIs.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -67,6 +67,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/3_CUDA_Features/graphMemoryNodes/README.md b/Samples/3_CUDA_Features/graphMemoryNodes/README.md
index f3e934e10..7bf467a4d 100644
--- a/Samples/3_CUDA_Features/graphMemoryNodes/README.md
+++ b/Samples/3_CUDA_Features/graphMemoryNodes/README.md
@@ -10,7 +10,7 @@ CUDA Graphs, Stream Capture
 
 ## Supported SM Architectures
 
-[SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMallocAsync, cudaStreamCreateWithFlags, cudaMemcpy, cudaMemcpyAsync, cudaStreamDestroy, cudaMallocManaged, cudaEventCreate, cudaDriverGetVersion, cudaGraphCreate, cudaGraphAddMemAllocNode, cudaMalloc, cudaEventDestroy, cudaStreamEndCapture, cudaGraphExecDestroy, cudaStreamBeginCapture, cudaDeviceGetAttribute, cudaStreamSynchronize, cudaGraphDestroy, cudaGraphLaunch, cudaGraphAddMemFreeNode, cudaStreamWaitEvent, cudaFree, cudaEventRecord, cudaGraphInstantiate, cudaGraphAddKernelNode, cudaFreeAsync
+cudaMemcpy, cudaDeviceGetAttribute, cudaDriverGetVersion, cudaGraphLaunch, cudaEventDestroy, cudaMallocAsync, cudaStreamEndCapture, cudaMallocManaged, cudaGraphCreate, cudaMemcpyAsync, cudaFreeAsync, cudaStreamCreateWithFlags, cudaGraphInstantiate, cudaStreamDestroy, cudaStreamBeginCapture, cudaStreamWaitEvent, cudaEventCreate, cudaGraphAddMemAllocNode, cudaFree, cudaGraphAddKernelNode, cudaGraphAddMemFreeNode, cudaGraphDestroy, cudaEventRecord, cudaStreamSynchronize, cudaMalloc, cudaGraphExecDestroy
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/3_CUDA_Features/graphMemoryNodes/graphMemoryNodes_vs2017.vcxproj b/Samples/3_CUDA_Features/graphMemoryNodes/graphMemoryNodes_vs2017.vcxproj
index 54629b898..f025d778b 100644
--- a/Samples/3_CUDA_Features/graphMemoryNodes/graphMemoryNodes_vs2017.vcxproj
+++ b/Samples/3_CUDA_Features/graphMemoryNodes/graphMemoryNodes_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/graphMemoryNodes.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/graphMemoryNodes/graphMemoryNodes_vs2019.vcxproj b/Samples/3_CUDA_Features/graphMemoryNodes/graphMemoryNodes_vs2019.vcxproj
index ce007363f..df298580f 100644
--- a/Samples/3_CUDA_Features/graphMemoryNodes/graphMemoryNodes_vs2019.vcxproj
+++ b/Samples/3_CUDA_Features/graphMemoryNodes/graphMemoryNodes_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/graphMemoryNodes.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/graphMemoryNodes/graphMemoryNodes_vs2022.vcxproj b/Samples/3_CUDA_Features/graphMemoryNodes/graphMemoryNodes_vs2022.vcxproj
index 70d59c50e..5f123dc96 100644
--- a/Samples/3_CUDA_Features/graphMemoryNodes/graphMemoryNodes_vs2022.vcxproj
+++ b/Samples/3_CUDA_Features/graphMemoryNodes/graphMemoryNodes_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/graphMemoryNodes.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/immaTensorCoreGemm/Makefile b/Samples/3_CUDA_Features/immaTensorCoreGemm/Makefile
index 0018823dd..840e9399c 100644
--- a/Samples/3_CUDA_Features/immaTensorCoreGemm/Makefile
+++ b/Samples/3_CUDA_Features/immaTensorCoreGemm/Makefile
@@ -291,9 +291,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 72 75 80 86 87
+SMS ?= 72 75 80 86 87 90
 else
-SMS ?= 75 80 86
+SMS ?= 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/3_CUDA_Features/immaTensorCoreGemm/NsightEclipse.xml b/Samples/3_CUDA_Features/immaTensorCoreGemm/NsightEclipse.xml
index ce92f2df5..35a48fe6a 100644
--- a/Samples/3_CUDA_Features/immaTensorCoreGemm/NsightEclipse.xml
+++ b/Samples/3_CUDA_Features/immaTensorCoreGemm/NsightEclipse.xml
@@ -6,18 +6,18 @@
     <flag>-maxrregcount=255</flag>
   </cflags>
   <cuda_api_list>
-    <toolkit>cudaMemset</toolkit>
+    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaFree</toolkit>
-    <toolkit>cudaEventRecord</toolkit>
-    <toolkit>cudaEventCreate</toolkit>
-    <toolkit>cudaFuncSetAttribute</toolkit>
-    <toolkit>cudaEventElapsedTime</toolkit>
+    <toolkit>cudaGetErrorString</toolkit>
+    <toolkit>cudaGetLastError</toolkit>
     <toolkit>cudaEventSynchronize</toolkit>
+    <toolkit>cudaFuncSetAttribute</toolkit>
+    <toolkit>cudaEventRecord</toolkit>
+    <toolkit>cudaMemset</toolkit>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaGetLastError</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
-    <toolkit>cudaGetErrorString</toolkit>
+    <toolkit>cudaEventElapsedTime</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
+    <toolkit>cudaEventCreate</toolkit>
   </cuda_api_list>
   <description><![CDATA[CUDA sample demonstrating a integer GEMM computation using the Warp Matrix Multiply and Accumulate (WMMA) API for integer introduced in CUDA 10. This sample demonstrates the use of the CUDA WMMA API employing the Tensor Cores introduced in the Volta chip family for faster matrix operations. In addition to that, it demonstrates the use of the new CUDA function attribute cudaFuncAttributeMaxDynamicSharedMemorySize that allows the application to reserve an extended amount of shared memory than it is available by default.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -47,6 +47,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/3_CUDA_Features/immaTensorCoreGemm/README.md b/Samples/3_CUDA_Features/immaTensorCoreGemm/README.md
index 61b407100..db9d48029 100644
--- a/Samples/3_CUDA_Features/immaTensorCoreGemm/README.md
+++ b/Samples/3_CUDA_Features/immaTensorCoreGemm/README.md
@@ -10,7 +10,7 @@ Matrix Multiply, WMMA, Tensor Cores
 
 ## Supported SM Architectures
 
-[SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, aarch64
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMemset, cudaFree, cudaEventRecord, cudaEventCreate, cudaFuncSetAttribute, cudaEventElapsedTime, cudaEventSynchronize, cudaMalloc, cudaGetLastError, cudaMemcpy, cudaGetErrorString, cudaGetDeviceProperties
+cudaMemcpy, cudaFree, cudaGetErrorString, cudaGetLastError, cudaEventSynchronize, cudaFuncSetAttribute, cudaEventRecord, cudaMemset, cudaMalloc, cudaEventElapsedTime, cudaGetDeviceProperties, cudaEventCreate
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/3_CUDA_Features/immaTensorCoreGemm/immaTensorCoreGemm_vs2017.vcxproj b/Samples/3_CUDA_Features/immaTensorCoreGemm/immaTensorCoreGemm_vs2017.vcxproj
index 79b1a3195..b48ad38fa 100644
--- a/Samples/3_CUDA_Features/immaTensorCoreGemm/immaTensorCoreGemm_vs2017.vcxproj
+++ b/Samples/3_CUDA_Features/immaTensorCoreGemm/immaTensorCoreGemm_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/immaTensorCoreGemm.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/immaTensorCoreGemm/immaTensorCoreGemm_vs2019.vcxproj b/Samples/3_CUDA_Features/immaTensorCoreGemm/immaTensorCoreGemm_vs2019.vcxproj
index 9351844dc..b5931f576 100644
--- a/Samples/3_CUDA_Features/immaTensorCoreGemm/immaTensorCoreGemm_vs2019.vcxproj
+++ b/Samples/3_CUDA_Features/immaTensorCoreGemm/immaTensorCoreGemm_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/immaTensorCoreGemm.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/immaTensorCoreGemm/immaTensorCoreGemm_vs2022.vcxproj b/Samples/3_CUDA_Features/immaTensorCoreGemm/immaTensorCoreGemm_vs2022.vcxproj
index 17495837d..90ae33904 100644
--- a/Samples/3_CUDA_Features/immaTensorCoreGemm/immaTensorCoreGemm_vs2022.vcxproj
+++ b/Samples/3_CUDA_Features/immaTensorCoreGemm/immaTensorCoreGemm_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/immaTensorCoreGemm.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/jacobiCudaGraphs/Makefile b/Samples/3_CUDA_Features/jacobiCudaGraphs/Makefile
index 35fadaa55..fa42ed87f 100644
--- a/Samples/3_CUDA_Features/jacobiCudaGraphs/Makefile
+++ b/Samples/3_CUDA_Features/jacobiCudaGraphs/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/3_CUDA_Features/jacobiCudaGraphs/NsightEclipse.xml b/Samples/3_CUDA_Features/jacobiCudaGraphs/NsightEclipse.xml
index a752218c7..e1572b437 100644
--- a/Samples/3_CUDA_Features/jacobiCudaGraphs/NsightEclipse.xml
+++ b/Samples/3_CUDA_Features/jacobiCudaGraphs/NsightEclipse.xml
@@ -3,28 +3,28 @@
 <entry>
   <name>jacobiCudaGraphs</name>
   <cuda_api_list>
-    <toolkit>cudaGraphAddMemsetNode</toolkit>
-    <toolkit>cudaStreamCreateWithFlags</toolkit>
-    <toolkit>cudaMemcpyAsync</toolkit>
+    <toolkit>cudaExtent</toolkit>
+    <toolkit>cudaGraphLaunch</toolkit>
+    <toolkit>cudaGraphAddMemcpyNode</toolkit>
     <toolkit>cudaMallocHost</toolkit>
     <toolkit>cudaPitchedPtr</toolkit>
-    <toolkit>cudaGraphCreate</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaPos</toolkit>
-    <toolkit>cudaGraphAddMemcpyNode</toolkit>
     <toolkit>cudaStreamEndCapture</toolkit>
-    <toolkit>cudaGraphExecDestroy</toolkit>
-    <toolkit>cudaStreamBeginCapture</toolkit>
+    <toolkit>cudaGraphCreate</toolkit>
+    <toolkit>cudaFreeHost</toolkit>
+    <toolkit>cudaMemsetAsync</toolkit>
+    <toolkit>cudaMemcpyAsync</toolkit>
     <toolkit>cudaGraphExecKernelNodeSetParams</toolkit>
-    <toolkit>cudaStreamSynchronize</toolkit>
-    <toolkit>cudaGraphLaunch</toolkit>
-    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaStreamCreateWithFlags</toolkit>
     <toolkit>cudaGraphInstantiate</toolkit>
-    <toolkit>cudaExtent</toolkit>
-    <toolkit>cudaMemsetAsync</toolkit>
-    <toolkit>cudaFreeHost</toolkit>
-    <toolkit>cudaGraphAddKernelNode</toolkit>
+    <toolkit>cudaStreamBeginCapture</toolkit>
+    <toolkit>cudaFree</toolkit>
     <toolkit>cudaGraphExecUpdate</toolkit>
+    <toolkit>cudaGraphAddKernelNode</toolkit>
+    <toolkit>cudaPos</toolkit>
+    <toolkit>cudaStreamSynchronize</toolkit>
+    <toolkit>cudaGraphAddMemsetNode</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaGraphExecDestroy</toolkit>
   </cuda_api_list>
   <description><![CDATA[Demonstrates Instantiated CUDA Graph Update with Jacobi Iterative Method using cudaGraphExecKernelNodeSetParams() and cudaGraphExecUpdate() approach.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -66,6 +66,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/3_CUDA_Features/jacobiCudaGraphs/README.md b/Samples/3_CUDA_Features/jacobiCudaGraphs/README.md
index b9d76e69e..687221875 100644
--- a/Samples/3_CUDA_Features/jacobiCudaGraphs/README.md
+++ b/Samples/3_CUDA_Features/jacobiCudaGraphs/README.md
@@ -10,7 +10,7 @@ CUDA Graphs, Stream Capture, Instantiated CUDA Graph Update, Cooperative Groups
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaGraphAddMemsetNode, cudaStreamCreateWithFlags, cudaMemcpyAsync, cudaMallocHost, cudaPitchedPtr, cudaGraphCreate, cudaMalloc, cudaPos, cudaGraphAddMemcpyNode, cudaStreamEndCapture, cudaGraphExecDestroy, cudaStreamBeginCapture, cudaGraphExecKernelNodeSetParams, cudaStreamSynchronize, cudaGraphLaunch, cudaFree, cudaGraphInstantiate, cudaExtent, cudaMemsetAsync, cudaFreeHost, cudaGraphAddKernelNode, cudaGraphExecUpdate
+cudaExtent, cudaGraphLaunch, cudaGraphAddMemcpyNode, cudaMallocHost, cudaPitchedPtr, cudaStreamEndCapture, cudaGraphCreate, cudaFreeHost, cudaMemsetAsync, cudaMemcpyAsync, cudaGraphExecKernelNodeSetParams, cudaStreamCreateWithFlags, cudaGraphInstantiate, cudaStreamBeginCapture, cudaFree, cudaGraphExecUpdate, cudaGraphAddKernelNode, cudaPos, cudaStreamSynchronize, cudaGraphAddMemsetNode, cudaMalloc, cudaGraphExecDestroy
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/3_CUDA_Features/jacobiCudaGraphs/jacobiCudaGraphs_vs2017.vcxproj b/Samples/3_CUDA_Features/jacobiCudaGraphs/jacobiCudaGraphs_vs2017.vcxproj
index e8a5153bc..489735bcc 100644
--- a/Samples/3_CUDA_Features/jacobiCudaGraphs/jacobiCudaGraphs_vs2017.vcxproj
+++ b/Samples/3_CUDA_Features/jacobiCudaGraphs/jacobiCudaGraphs_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/jacobiCudaGraphs.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/jacobiCudaGraphs/jacobiCudaGraphs_vs2019.vcxproj b/Samples/3_CUDA_Features/jacobiCudaGraphs/jacobiCudaGraphs_vs2019.vcxproj
index be8464540..b6440eb15 100644
--- a/Samples/3_CUDA_Features/jacobiCudaGraphs/jacobiCudaGraphs_vs2019.vcxproj
+++ b/Samples/3_CUDA_Features/jacobiCudaGraphs/jacobiCudaGraphs_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/jacobiCudaGraphs.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/jacobiCudaGraphs/jacobiCudaGraphs_vs2022.vcxproj b/Samples/3_CUDA_Features/jacobiCudaGraphs/jacobiCudaGraphs_vs2022.vcxproj
index e7b1ea437..2d37b0879 100644
--- a/Samples/3_CUDA_Features/jacobiCudaGraphs/jacobiCudaGraphs_vs2022.vcxproj
+++ b/Samples/3_CUDA_Features/jacobiCudaGraphs/jacobiCudaGraphs_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/jacobiCudaGraphs.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/memMapIPCDrv/Makefile b/Samples/3_CUDA_Features/memMapIPCDrv/Makefile
index fd7d6ba76..ae7b17d1e 100644
--- a/Samples/3_CUDA_Features/memMapIPCDrv/Makefile
+++ b/Samples/3_CUDA_Features/memMapIPCDrv/Makefile
@@ -274,24 +274,6 @@ ifeq ($(TARGET_OS),darwin)
   SAMPLE_ENABLED := 0
 endif
 
-# This sample is not supported on ARMv7
-ifeq ($(TARGET_ARCH),armv7l)
-  $(info >>> WARNING - memMapIPCDrv is not supported on ARMv7 - waiving sample <<<)
-  SAMPLE_ENABLED := 0
-endif
-
-# This sample is not supported on aarch64
-ifeq ($(TARGET_ARCH),aarch64)
-  $(info >>> WARNING - memMapIPCDrv is not supported on aarch64 - waiving sample <<<)
-  SAMPLE_ENABLED := 0
-endif
-
-# This sample is not supported on sbsa
-ifeq ($(TARGET_ARCH),sbsa)
-  $(info >>> WARNING - memMapIPCDrv is not supported on sbsa - waiving sample <<<)
-  SAMPLE_ENABLED := 0
-endif
-
 ALL_LDFLAGS :=
 ALL_LDFLAGS += $(ALL_CCFLAGS)
 ALL_LDFLAGS += $(addprefix -Xlinker ,$(LDFLAGS))
diff --git a/Samples/3_CUDA_Features/memMapIPCDrv/README.md b/Samples/3_CUDA_Features/memMapIPCDrv/README.md
index 435af2edd..bace5c4f1 100644
--- a/Samples/3_CUDA_Features/memMapIPCDrv/README.md
+++ b/Samples/3_CUDA_Features/memMapIPCDrv/README.md
@@ -10,27 +10,27 @@ CUDA Driver API, cuMemMap IPC, MMAP
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
-Linux, Windows
+Linux, Windows, QNX
 
 ## Supported CPU Architecture
 
-x86_64, ppc64le
+x86_64, ppc64le, armv7l, aarch64
 
 ## CUDA APIs involved
 
 ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html)
-cuCtxSetCurrent, cuMemSetAccess, cuMemcpyDtoHAsync, cuStreamDestroy, cuInit, cuMemAddressReserve, cuCtxDestroy, cuModuleGetFunction, cuModuleLoad, cuStreamCreate, cuCtxCreate, cuMemExportToShareableHandle, cuMemAddressFree, cuMemGetAllocationGranularity, cuModuleLoadDataEx, cuDeviceGet, cuMemUnmap, cuDeviceGetAttribute, cuMemRelease, cuCtxEnablePeerAccess, cuMemMap, cuMemImportFromShareableHandle, cuMemCreate, cuStreamSynchronize, cuDeviceCanAccessPeer, cuDeviceGetCount, cuLaunchKernel, cuOccupancyMaxActiveBlocksPerMultiprocessor
+cuDeviceCanAccessPeer, cuMemImportFromShareableHandle, cuModuleLoadDataEx, cuModuleGetFunction, cuMemSetAccess, cuModuleLoad, cuStreamCreate, cuMemRelease, cuInit, cuLaunchKernel, cuMemcpyDtoHAsync, cuMemCreate, cuDeviceGet, cuCtxDestroy, cuDeviceGetCount, cuMemMap, cuMemExportToShareableHandle, cuStreamSynchronize, cuCtxEnablePeerAccess, cuDeviceGetAttribute, cuOccupancyMaxActiveBlocksPerMultiprocessor, cuCtxSetCurrent, cuMemGetAllocationGranularity, cuMemAddressFree, cuMemUnmap, cuCtxCreate, cuStreamDestroy, cuMemAddressReserve
 
 ## Dependencies needed to build/run
 [IPC](../../../README.md#ipc)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
@@ -52,9 +52,9 @@ $ cd <sample_dir>
 $ make
 ```
 The samples makefiles can take advantage of certain options:
-*  **TARGET_ARCH=<arch>** - cross-compile targeting a specific architecture. Allowed architectures are x86_64, ppc64le.
+*  **TARGET_ARCH=<arch>** - cross-compile targeting a specific architecture. Allowed architectures are x86_64, ppc64le, armv7l, aarch64.
     By default, TARGET_ARCH is set to HOST_ARCH. On a x86_64 machine, not setting TARGET_ARCH is the equivalent of setting TARGET_ARCH=x86_64.<br/>
-`$ make TARGET_ARCH=x86_64` <br/> `$ make TARGET_ARCH=ppc64le` <br/>
+`$ make TARGET_ARCH=x86_64` <br/> `$ make TARGET_ARCH=ppc64le` <br/> `$ make TARGET_ARCH=armv7l` <br/> `$ make TARGET_ARCH=aarch64` <br/>
     See [here](http://docs.nvidia.com/cuda/cuda-samples/index.html#cross-samples) for more details.
 *   **dbg=1** - build with debug symbols
     ```
diff --git a/Samples/3_CUDA_Features/memMapIPCDrv/memMapIPCDrv_vs2017.vcxproj b/Samples/3_CUDA_Features/memMapIPCDrv/memMapIPCDrv_vs2017.vcxproj
index 02b0d7ead..4d7d058a2 100644
--- a/Samples/3_CUDA_Features/memMapIPCDrv/memMapIPCDrv_vs2017.vcxproj
+++ b/Samples/3_CUDA_Features/memMapIPCDrv/memMapIPCDrv_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -112,6 +112,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/memMapIPCDrv/memMapIPCDrv_vs2019.vcxproj b/Samples/3_CUDA_Features/memMapIPCDrv/memMapIPCDrv_vs2019.vcxproj
index 3abc66c66..287fbc927 100644
--- a/Samples/3_CUDA_Features/memMapIPCDrv/memMapIPCDrv_vs2019.vcxproj
+++ b/Samples/3_CUDA_Features/memMapIPCDrv/memMapIPCDrv_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/memMapIPCDrv/memMapIPCDrv_vs2022.vcxproj b/Samples/3_CUDA_Features/memMapIPCDrv/memMapIPCDrv_vs2022.vcxproj
index 8da6a6cdc..d6bc39c60 100644
--- a/Samples/3_CUDA_Features/memMapIPCDrv/memMapIPCDrv_vs2022.vcxproj
+++ b/Samples/3_CUDA_Features/memMapIPCDrv/memMapIPCDrv_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/memMapIPCDrv/memMapIpc.cpp b/Samples/3_CUDA_Features/memMapIPCDrv/memMapIpc.cpp
index ba275b64d..19d6aa608 100644
--- a/Samples/3_CUDA_Features/memMapIPCDrv/memMapIpc.cpp
+++ b/Samples/3_CUDA_Features/memMapIPCDrv/memMapIpc.cpp
@@ -595,10 +595,6 @@ static void parentProcess(char *app) {
 
 // Host code
 int main(int argc, char **argv) {
-#if defined(__arm__) || defined(__aarch64__)
-  printf("Not supported on ARM\n");
-  return EXIT_WAIVED;
-#else
   // Initialize
   checkCudaErrors(cuInit(0));
 
@@ -608,7 +604,6 @@ int main(int argc, char **argv) {
     childProcess(atoi(argv[1]), atoi(argv[2]), argv);
   }
   return EXIT_SUCCESS;
-#endif
 }
 
 bool inline findModulePath(const char *module_file, string &module_path,
@@ -643,4 +638,4 @@ bool inline findModulePath(const char *module_file, string &module_path,
 
     return true;
   }
-}
\ No newline at end of file
+}
diff --git a/Samples/3_CUDA_Features/newdelete/Makefile b/Samples/3_CUDA_Features/newdelete/Makefile
index 9e4ba3b66..48c352f31 100644
--- a/Samples/3_CUDA_Features/newdelete/Makefile
+++ b/Samples/3_CUDA_Features/newdelete/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/3_CUDA_Features/newdelete/NsightEclipse.xml b/Samples/3_CUDA_Features/newdelete/NsightEclipse.xml
index edcd52704..ae7639e44 100644
--- a/Samples/3_CUDA_Features/newdelete/NsightEclipse.xml
+++ b/Samples/3_CUDA_Features/newdelete/NsightEclipse.xml
@@ -3,11 +3,11 @@
 <entry>
   <name>newdelete</name>
   <cuda_api_list>
-    <toolkit>cudaDeviceSetLimit</toolkit>
+    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaFree</toolkit>
     <toolkit>cudaDeviceSynchronize</toolkit>
+    <toolkit>cudaDeviceSetLimit</toolkit>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample demonstrates dynamic global memory allocation through device C++ new and delete operators and virtual function declarations available with CUDA 4.0.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -46,6 +46,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/3_CUDA_Features/newdelete/README.md b/Samples/3_CUDA_Features/newdelete/README.md
index a976004d4..1df54ae29 100644
--- a/Samples/3_CUDA_Features/newdelete/README.md
+++ b/Samples/3_CUDA_Features/newdelete/README.md
@@ -10,7 +10,7 @@ Device Memory Allocation, C++ Templates
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaDeviceSetLimit, cudaFree, cudaDeviceSynchronize, cudaMalloc, cudaMemcpy
+cudaMemcpy, cudaFree, cudaDeviceSynchronize, cudaDeviceSetLimit, cudaMalloc
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/3_CUDA_Features/newdelete/newdelete_vs2017.vcxproj b/Samples/3_CUDA_Features/newdelete/newdelete_vs2017.vcxproj
index 04f1a3b28..f5546f277 100644
--- a/Samples/3_CUDA_Features/newdelete/newdelete_vs2017.vcxproj
+++ b/Samples/3_CUDA_Features/newdelete/newdelete_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/newdelete.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/newdelete/newdelete_vs2019.vcxproj b/Samples/3_CUDA_Features/newdelete/newdelete_vs2019.vcxproj
index 75e28c88f..4f6a09f44 100644
--- a/Samples/3_CUDA_Features/newdelete/newdelete_vs2019.vcxproj
+++ b/Samples/3_CUDA_Features/newdelete/newdelete_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/newdelete.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/newdelete/newdelete_vs2022.vcxproj b/Samples/3_CUDA_Features/newdelete/newdelete_vs2022.vcxproj
index 20c428e5f..cec331d86 100644
--- a/Samples/3_CUDA_Features/newdelete/newdelete_vs2022.vcxproj
+++ b/Samples/3_CUDA_Features/newdelete/newdelete_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/newdelete.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/ptxjit/NsightEclipse.xml b/Samples/3_CUDA_Features/ptxjit/NsightEclipse.xml
index a5dc2e54f..2beac119a 100644
--- a/Samples/3_CUDA_Features/ptxjit/NsightEclipse.xml
+++ b/Samples/3_CUDA_Features/ptxjit/NsightEclipse.xml
@@ -3,18 +3,18 @@
 <entry>
   <name>ptxjit</name>
   <cuda_api_list>
+    <driver>cuLaunchKernel</driver>
+    <driver>cuModuleLoadData</driver>
+    <driver>cuLinkCreate</driver>
     <driver>cuModuleGetFunction</driver>
     <driver>cuLinkAddData</driver>
-    <driver>cuModuleLoadData</driver>
-    <driver>cuLaunchKernel</driver>
     <driver>cuModuleUnload</driver>
-    <driver>cuLinkComplete</driver>
-    <driver>cuLinkCreate</driver>
     <driver>cuLinkDestroy</driver>
-    <toolkit>cudaDriverGetVersion</toolkit>
-    <toolkit>cudaFree</toolkit>
+    <driver>cuLinkComplete</driver>
     <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaDriverGetVersion</toolkit>
     <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaFree</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample uses the Driver API to just-in-time compile (JIT) a Kernel from PTX code. Additionally, this sample demonstrates the seamless interoperability capability of the CUDA Runtime and CUDA Driver API calls.  For CUDA 5.5, this sample shows how to use cuLink* functions to link PTX assembly using the CUDA driver at runtime.]]></description>
   <devicecompilation>separate</devicecompilation>
@@ -57,6 +57,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/3_CUDA_Features/ptxjit/README.md b/Samples/3_CUDA_Features/ptxjit/README.md
index 021b4b400..314c62844 100644
--- a/Samples/3_CUDA_Features/ptxjit/README.md
+++ b/Samples/3_CUDA_Features/ptxjit/README.md
@@ -10,7 +10,7 @@ CUDA Driver API
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html)
-cuModuleGetFunction, cuLinkAddData, cuModuleLoadData, cuLaunchKernel, cuModuleUnload, cuLinkComplete, cuLinkCreate, cuLinkDestroy
+cuLaunchKernel, cuModuleLoadData, cuLinkCreate, cuModuleGetFunction, cuLinkAddData, cuModuleUnload, cuLinkDestroy, cuLinkComplete
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaDriverGetVersion, cudaFree, cudaMalloc, cudaMemcpy
+cudaMalloc, cudaDriverGetVersion, cudaMemcpy, cudaFree
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/3_CUDA_Features/ptxjit/ptxjit_vs2017.vcxproj b/Samples/3_CUDA_Features/ptxjit/ptxjit_vs2017.vcxproj
index 6c7fa953c..8544a38c4 100644
--- a/Samples/3_CUDA_Features/ptxjit/ptxjit_vs2017.vcxproj
+++ b/Samples/3_CUDA_Features/ptxjit/ptxjit_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -111,6 +111,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/ptxjit/ptxjit_vs2019.vcxproj b/Samples/3_CUDA_Features/ptxjit/ptxjit_vs2019.vcxproj
index 9d3b8c111..d0c152c80 100644
--- a/Samples/3_CUDA_Features/ptxjit/ptxjit_vs2019.vcxproj
+++ b/Samples/3_CUDA_Features/ptxjit/ptxjit_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/ptxjit/ptxjit_vs2022.vcxproj b/Samples/3_CUDA_Features/ptxjit/ptxjit_vs2022.vcxproj
index f8eecb9f2..c4dbf9127 100644
--- a/Samples/3_CUDA_Features/ptxjit/ptxjit_vs2022.vcxproj
+++ b/Samples/3_CUDA_Features/ptxjit/ptxjit_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/simpleCudaGraphs/Makefile b/Samples/3_CUDA_Features/simpleCudaGraphs/Makefile
index a64fbbf6d..d956e9b4e 100644
--- a/Samples/3_CUDA_Features/simpleCudaGraphs/Makefile
+++ b/Samples/3_CUDA_Features/simpleCudaGraphs/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/3_CUDA_Features/simpleCudaGraphs/NsightEclipse.xml b/Samples/3_CUDA_Features/simpleCudaGraphs/NsightEclipse.xml
index 0b60949bb..56db08fb3 100644
--- a/Samples/3_CUDA_Features/simpleCudaGraphs/NsightEclipse.xml
+++ b/Samples/3_CUDA_Features/simpleCudaGraphs/NsightEclipse.xml
@@ -3,37 +3,37 @@
 <entry>
   <name>simpleCudaGraphs</name>
   <cuda_api_list>
-    <toolkit>cudaGraphAddMemsetNode</toolkit>
-    <toolkit>cudaGraphsUsingStreamCapture</toolkit>
-    <toolkit>cudaMemcpyAsync</toolkit>
-    <toolkit>cudaGraphGetNodes</toolkit>
-    <toolkit>cudaStreamDestroy</toolkit>
-    <toolkit>cudaMallocHost</toolkit>
     <toolkit>cudaGraphClone</toolkit>
-    <toolkit>cudaEventCreate</toolkit>
-    <toolkit>cudaPitchedPtr</toolkit>
-    <toolkit>cudaGraphCreate</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaPos</toolkit>
+    <toolkit>cudaExtent</toolkit>
+    <toolkit>cudaGraphLaunch</toolkit>
+    <toolkit>cudaStreamCreate</toolkit>
+    <toolkit>cudaLaunchHostFunc</toolkit>
     <toolkit>cudaGraphAddMemcpyNode</toolkit>
+    <toolkit>cudaMallocHost</toolkit>
+    <toolkit>cudaPitchedPtr</toolkit>
     <toolkit>cudaStreamEndCapture</toolkit>
-    <toolkit>cudaGraphExecDestroy</toolkit>
-    <toolkit>cudaStreamBeginCapture</toolkit>
+    <toolkit>cudaGraphCreate</toolkit>
+    <toolkit>cudaFreeHost</toolkit>
+    <toolkit>cudaGraphGetNodes</toolkit>
+    <toolkit>cudaMemsetAsync</toolkit>
+    <toolkit>cudaMemcpyAsync</toolkit>
     <toolkit>cudaGraphAddHostNode</toolkit>
-    <toolkit>cudaGraphsManual</toolkit>
-    <toolkit>cudaStreamSynchronize</toolkit>
-    <toolkit>cudaGraphDestroy</toolkit>
-    <toolkit>cudaGraphLaunch</toolkit>
+    <toolkit>cudaGraphInstantiate</toolkit>
+    <toolkit>cudaStreamDestroy</toolkit>
+    <toolkit>cudaStreamBeginCapture</toolkit>
     <toolkit>cudaStreamWaitEvent</toolkit>
+    <toolkit>cudaEventCreate</toolkit>
+    <toolkit>cudaMalloc</toolkit>
     <toolkit>cudaFree</toolkit>
-    <toolkit>cudaEventRecord</toolkit>
-    <toolkit>cudaStreamCreate</toolkit>
-    <toolkit>cudaGraphInstantiate</toolkit>
-    <toolkit>cudaLaunchHostFunc</toolkit>
-    <toolkit>cudaExtent</toolkit>
-    <toolkit>cudaMemsetAsync</toolkit>
-    <toolkit>cudaFreeHost</toolkit>
+    <toolkit>cudaPos</toolkit>
     <toolkit>cudaGraphAddKernelNode</toolkit>
+    <toolkit>cudaGraphDestroy</toolkit>
+    <toolkit>cudaEventRecord</toolkit>
+    <toolkit>cudaGraphsManual</toolkit>
+    <toolkit>cudaStreamSynchronize</toolkit>
+    <toolkit>cudaGraphAddMemsetNode</toolkit>
+    <toolkit>cudaGraphsUsingStreamCapture</toolkit>
+    <toolkit>cudaGraphExecDestroy</toolkit>
   </cuda_api_list>
   <description><![CDATA[A demonstration of CUDA Graphs creation, instantiation and launch using Graphs APIs and Stream Capture APIs.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -73,6 +73,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/3_CUDA_Features/simpleCudaGraphs/README.md b/Samples/3_CUDA_Features/simpleCudaGraphs/README.md
index 9a2c9249a..b421b2fb4 100644
--- a/Samples/3_CUDA_Features/simpleCudaGraphs/README.md
+++ b/Samples/3_CUDA_Features/simpleCudaGraphs/README.md
@@ -10,7 +10,7 @@ CUDA Graphs, Stream Capture
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaGraphAddMemsetNode, cudaGraphsUsingStreamCapture, cudaMemcpyAsync, cudaGraphGetNodes, cudaStreamDestroy, cudaMallocHost, cudaGraphClone, cudaEventCreate, cudaPitchedPtr, cudaGraphCreate, cudaMalloc, cudaPos, cudaGraphAddMemcpyNode, cudaStreamEndCapture, cudaGraphExecDestroy, cudaStreamBeginCapture, cudaGraphAddHostNode, cudaGraphsManual, cudaStreamSynchronize, cudaGraphDestroy, cudaGraphLaunch, cudaStreamWaitEvent, cudaFree, cudaEventRecord, cudaStreamCreate, cudaGraphInstantiate, cudaLaunchHostFunc, cudaExtent, cudaMemsetAsync, cudaFreeHost, cudaGraphAddKernelNode
+cudaGraphClone, cudaExtent, cudaGraphLaunch, cudaStreamCreate, cudaLaunchHostFunc, cudaGraphAddMemcpyNode, cudaMallocHost, cudaPitchedPtr, cudaStreamEndCapture, cudaGraphCreate, cudaFreeHost, cudaGraphGetNodes, cudaMemsetAsync, cudaMemcpyAsync, cudaGraphAddHostNode, cudaGraphInstantiate, cudaStreamDestroy, cudaStreamBeginCapture, cudaStreamWaitEvent, cudaEventCreate, cudaMalloc, cudaFree, cudaPos, cudaGraphAddKernelNode, cudaGraphDestroy, cudaEventRecord, cudaGraphsManual, cudaStreamSynchronize, cudaGraphAddMemsetNode, cudaGraphsUsingStreamCapture, cudaGraphExecDestroy
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/3_CUDA_Features/simpleCudaGraphs/simpleCudaGraphs_vs2017.vcxproj b/Samples/3_CUDA_Features/simpleCudaGraphs/simpleCudaGraphs_vs2017.vcxproj
index d9e1f37e1..a9525b01f 100644
--- a/Samples/3_CUDA_Features/simpleCudaGraphs/simpleCudaGraphs_vs2017.vcxproj
+++ b/Samples/3_CUDA_Features/simpleCudaGraphs/simpleCudaGraphs_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/simpleCudaGraphs.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/simpleCudaGraphs/simpleCudaGraphs_vs2019.vcxproj b/Samples/3_CUDA_Features/simpleCudaGraphs/simpleCudaGraphs_vs2019.vcxproj
index 7d3dfb84a..168b88abc 100644
--- a/Samples/3_CUDA_Features/simpleCudaGraphs/simpleCudaGraphs_vs2019.vcxproj
+++ b/Samples/3_CUDA_Features/simpleCudaGraphs/simpleCudaGraphs_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleCudaGraphs.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/simpleCudaGraphs/simpleCudaGraphs_vs2022.vcxproj b/Samples/3_CUDA_Features/simpleCudaGraphs/simpleCudaGraphs_vs2022.vcxproj
index b41246b05..58840c801 100644
--- a/Samples/3_CUDA_Features/simpleCudaGraphs/simpleCudaGraphs_vs2022.vcxproj
+++ b/Samples/3_CUDA_Features/simpleCudaGraphs/simpleCudaGraphs_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleCudaGraphs.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/tf32TensorCoreGemm/Makefile b/Samples/3_CUDA_Features/tf32TensorCoreGemm/Makefile
index eee8e8437..a5c514249 100644
--- a/Samples/3_CUDA_Features/tf32TensorCoreGemm/Makefile
+++ b/Samples/3_CUDA_Features/tf32TensorCoreGemm/Makefile
@@ -322,9 +322,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 80 86 87
+SMS ?= 80 86 87 90
 else
-SMS ?= 80 86
+SMS ?= 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/3_CUDA_Features/tf32TensorCoreGemm/NsightEclipse.xml b/Samples/3_CUDA_Features/tf32TensorCoreGemm/NsightEclipse.xml
index 8213c761f..f21f38958 100644
--- a/Samples/3_CUDA_Features/tf32TensorCoreGemm/NsightEclipse.xml
+++ b/Samples/3_CUDA_Features/tf32TensorCoreGemm/NsightEclipse.xml
@@ -7,18 +7,18 @@
     <flag>--maxrregcount=128</flag>
   </cflags>
   <cuda_api_list>
-    <toolkit>cudaMemset</toolkit>
+    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaFree</toolkit>
-    <toolkit>cudaEventRecord</toolkit>
-    <toolkit>cudaEventCreate</toolkit>
-    <toolkit>cudaFuncSetAttribute</toolkit>
-    <toolkit>cudaEventElapsedTime</toolkit>
+    <toolkit>cudaGetErrorString</toolkit>
+    <toolkit>cudaGetLastError</toolkit>
     <toolkit>cudaEventSynchronize</toolkit>
+    <toolkit>cudaFuncSetAttribute</toolkit>
+    <toolkit>cudaEventRecord</toolkit>
+    <toolkit>cudaMemset</toolkit>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaGetLastError</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
-    <toolkit>cudaGetErrorString</toolkit>
+    <toolkit>cudaEventElapsedTime</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
+    <toolkit>cudaEventCreate</toolkit>
   </cuda_api_list>
   <description><![CDATA[A CUDA sample demonstrating tf32 (e8m10) GEMM computation using the Warp Matrix Multiply and Accumulate (WMMA) API introduced with CUDA 11 in Ampere chip family tensor cores for faster matrix operations. This sample also uses async copy provided by cuda pipeline interface for gmem to shmem async loads which improves kernel performance and reduces register presssure.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -53,6 +53,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/3_CUDA_Features/tf32TensorCoreGemm/README.md b/Samples/3_CUDA_Features/tf32TensorCoreGemm/README.md
index 343eb6de0..d7f41f68e 100644
--- a/Samples/3_CUDA_Features/tf32TensorCoreGemm/README.md
+++ b/Samples/3_CUDA_Features/tf32TensorCoreGemm/README.md
@@ -10,7 +10,7 @@ Matrix Multiply, WMMA, Tensor Cores
 
 ## Supported SM Architectures
 
-[SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, aarch64
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMemset, cudaFree, cudaEventRecord, cudaEventCreate, cudaFuncSetAttribute, cudaEventElapsedTime, cudaEventSynchronize, cudaMalloc, cudaGetLastError, cudaMemcpy, cudaGetErrorString, cudaGetDeviceProperties
+cudaMemcpy, cudaFree, cudaGetErrorString, cudaGetLastError, cudaEventSynchronize, cudaFuncSetAttribute, cudaEventRecord, cudaMemset, cudaMalloc, cudaEventElapsedTime, cudaGetDeviceProperties, cudaEventCreate
 
 ## Dependencies needed to build/run
 [CPP11](../../../README.md#cpp11)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/3_CUDA_Features/tf32TensorCoreGemm/tf32TensorCoreGemm_vs2017.vcxproj b/Samples/3_CUDA_Features/tf32TensorCoreGemm/tf32TensorCoreGemm_vs2017.vcxproj
index 6b002d886..2948b39d0 100644
--- a/Samples/3_CUDA_Features/tf32TensorCoreGemm/tf32TensorCoreGemm_vs2017.vcxproj
+++ b/Samples/3_CUDA_Features/tf32TensorCoreGemm/tf32TensorCoreGemm_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/tf32TensorCoreGemm.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/tf32TensorCoreGemm/tf32TensorCoreGemm_vs2019.vcxproj b/Samples/3_CUDA_Features/tf32TensorCoreGemm/tf32TensorCoreGemm_vs2019.vcxproj
index 4166d39e9..7a06218bc 100644
--- a/Samples/3_CUDA_Features/tf32TensorCoreGemm/tf32TensorCoreGemm_vs2019.vcxproj
+++ b/Samples/3_CUDA_Features/tf32TensorCoreGemm/tf32TensorCoreGemm_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/tf32TensorCoreGemm.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/tf32TensorCoreGemm/tf32TensorCoreGemm_vs2022.vcxproj b/Samples/3_CUDA_Features/tf32TensorCoreGemm/tf32TensorCoreGemm_vs2022.vcxproj
index c4534b653..f7b1d1a04 100644
--- a/Samples/3_CUDA_Features/tf32TensorCoreGemm/tf32TensorCoreGemm_vs2022.vcxproj
+++ b/Samples/3_CUDA_Features/tf32TensorCoreGemm/tf32TensorCoreGemm_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/tf32TensorCoreGemm.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/Makefile b/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/Makefile
index e743176ee..aa25f1512 100644
--- a/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/Makefile
+++ b/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/Makefile
@@ -304,9 +304,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/NsightEclipse.xml b/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/NsightEclipse.xml
index 7f5952b94..f0457b127 100644
--- a/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/NsightEclipse.xml
+++ b/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/NsightEclipse.xml
@@ -6,11 +6,11 @@
     <flag>--std=c++11</flag>
   </cflags>
   <cuda_api_list>
-    <toolkit>cudaMemset</toolkit>
+    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaFree</toolkit>
     <toolkit>cudaDeviceGetAttribute</toolkit>
+    <toolkit>cudaMemset</toolkit>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample demonstrates how using Cooperative Groups (CG) to perform warp aggregated atomics to single and multiple counters, a useful technique to improve performance when many threads atomically add to a single or multiple counters.]]></description>
   <includepaths>
@@ -50,6 +50,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/README.md b/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/README.md
index 69c27bdd1..f40b05ef1 100644
--- a/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/README.md
+++ b/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/README.md
@@ -10,7 +10,7 @@ Cooperative Groups, Atomic Intrinsics
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l, aarch64
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMemset, cudaFree, cudaDeviceGetAttribute, cudaMalloc, cudaMemcpy
+cudaMemcpy, cudaFree, cudaDeviceGetAttribute, cudaMemset, cudaMalloc
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/warpAggregatedAtomicsCG_vs2017.vcxproj b/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/warpAggregatedAtomicsCG_vs2017.vcxproj
index d7339145e..0e2d5973f 100644
--- a/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/warpAggregatedAtomicsCG_vs2017.vcxproj
+++ b/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/warpAggregatedAtomicsCG_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/warpAggregatedAtomicsCG.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/warpAggregatedAtomicsCG_vs2019.vcxproj b/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/warpAggregatedAtomicsCG_vs2019.vcxproj
index 36bd9bcac..a15057bb7 100644
--- a/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/warpAggregatedAtomicsCG_vs2019.vcxproj
+++ b/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/warpAggregatedAtomicsCG_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/warpAggregatedAtomicsCG.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/warpAggregatedAtomicsCG_vs2022.vcxproj b/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/warpAggregatedAtomicsCG_vs2022.vcxproj
index ff0902b4b..8567bb365 100644
--- a/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/warpAggregatedAtomicsCG_vs2022.vcxproj
+++ b/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/warpAggregatedAtomicsCG_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/warpAggregatedAtomicsCG.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/FilterBorderControlNPP/FilterBorderControlNPP_vs2017.vcxproj b/Samples/4_CUDA_Libraries/FilterBorderControlNPP/FilterBorderControlNPP_vs2017.vcxproj
index 695d8b1ad..61a190adc 100644
--- a/Samples/4_CUDA_Libraries/FilterBorderControlNPP/FilterBorderControlNPP_vs2017.vcxproj
+++ b/Samples/4_CUDA_Libraries/FilterBorderControlNPP/FilterBorderControlNPP_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -117,6 +117,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/FilterBorderControlNPP/FilterBorderControlNPP_vs2019.vcxproj b/Samples/4_CUDA_Libraries/FilterBorderControlNPP/FilterBorderControlNPP_vs2019.vcxproj
index 702ecd0ab..438dd31e2 100644
--- a/Samples/4_CUDA_Libraries/FilterBorderControlNPP/FilterBorderControlNPP_vs2019.vcxproj
+++ b/Samples/4_CUDA_Libraries/FilterBorderControlNPP/FilterBorderControlNPP_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -113,6 +113,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/FilterBorderControlNPP/FilterBorderControlNPP_vs2022.vcxproj b/Samples/4_CUDA_Libraries/FilterBorderControlNPP/FilterBorderControlNPP_vs2022.vcxproj
index 0e5f29290..867808495 100644
--- a/Samples/4_CUDA_Libraries/FilterBorderControlNPP/FilterBorderControlNPP_vs2022.vcxproj
+++ b/Samples/4_CUDA_Libraries/FilterBorderControlNPP/FilterBorderControlNPP_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -113,6 +113,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/FilterBorderControlNPP/NsightEclipse.xml b/Samples/4_CUDA_Libraries/FilterBorderControlNPP/NsightEclipse.xml
index cc161f14a..60bd7e9d2 100644
--- a/Samples/4_CUDA_Libraries/FilterBorderControlNPP/NsightEclipse.xml
+++ b/Samples/4_CUDA_Libraries/FilterBorderControlNPP/NsightEclipse.xml
@@ -3,12 +3,12 @@
 <entry>
   <name>FilterBorderControlNPP</name>
   <cuda_api_list>
+    <toolkit>cudaRuntimeGetVersion</toolkit>
     <toolkit>cudaDeviceReset</toolkit>
+    <toolkit>cudaSetDevice</toolkit>
     <toolkit>cudaGetDeviceCount</toolkit>
-    <toolkit>cudaDriverGetVersion</toolkit>
     <toolkit>cudaDeviceInit</toolkit>
-    <toolkit>cudaSetDevice</toolkit>
-    <toolkit>cudaRuntimeGetVersion</toolkit>
+    <toolkit>cudaDriverGetVersion</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample demonstrates how any border version of an NPP filtering function can be used in the most common mode, with border control enabled. Mentioned functions can be used to duplicate the results of the equivalent non-border version of the NPP functions. They can be also used for enabling and disabling border control on various source image edges depending on what portion of the source image is being used as input.]]></description>
@@ -68,6 +68,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/4_CUDA_Libraries/FilterBorderControlNPP/README.md b/Samples/4_CUDA_Libraries/FilterBorderControlNPP/README.md
index 23f2bfb89..54a652ade 100644
--- a/Samples/4_CUDA_Libraries/FilterBorderControlNPP/README.md
+++ b/Samples/4_CUDA_Libraries/FilterBorderControlNPP/README.md
@@ -10,7 +10,7 @@ Performance Strategies, Image Processing, NPP Library
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaDeviceReset, cudaGetDeviceCount, cudaDriverGetVersion, cudaDeviceInit, cudaSetDevice, cudaRuntimeGetVersion, cudaGetDeviceProperties
+cudaRuntimeGetVersion, cudaDeviceReset, cudaSetDevice, cudaGetDeviceCount, cudaDeviceInit, cudaDriverGetVersion, cudaGetDeviceProperties
 
 ## Dependencies needed to build/run
 [FreeImage](../../../README.md#freeimage), [NPP](../../../README.md#npp)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/MersenneTwisterGP11213_vs2017.vcxproj b/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/MersenneTwisterGP11213_vs2017.vcxproj
index dd8192260..17822bcb5 100644
--- a/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/MersenneTwisterGP11213_vs2017.vcxproj
+++ b/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/MersenneTwisterGP11213_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/MersenneTwisterGP11213_vs2019.vcxproj b/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/MersenneTwisterGP11213_vs2019.vcxproj
index d2ac6df21..753226082 100644
--- a/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/MersenneTwisterGP11213_vs2019.vcxproj
+++ b/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/MersenneTwisterGP11213_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/MersenneTwisterGP11213_vs2022.vcxproj b/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/MersenneTwisterGP11213_vs2022.vcxproj
index 1de5e0199..a40090f1c 100644
--- a/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/MersenneTwisterGP11213_vs2022.vcxproj
+++ b/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/MersenneTwisterGP11213_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/NsightEclipse.xml b/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/NsightEclipse.xml
index 40c95ec03..34ed799bc 100644
--- a/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/NsightEclipse.xml
+++ b/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/NsightEclipse.xml
@@ -3,13 +3,13 @@
 <entry>
   <name>MersenneTwisterGP11213</name>
   <cuda_api_list>
+    <toolkit>cudaStreamCreateWithFlags</toolkit>
     <toolkit>cudaStreamDestroy</toolkit>
     <toolkit>cudaFree</toolkit>
     <toolkit>cudaMallocHost</toolkit>
     <toolkit>cudaFreeHost</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaStreamCreateWithFlags</toolkit>
     <toolkit>cudaStreamSynchronize</toolkit>
+    <toolkit>cudaMalloc</toolkit>
     <toolkit>cudaMemcpyAsync</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample demonstrates the Mersenne Twister random number generator GP11213 in cuRAND.]]></description>
@@ -58,6 +58,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/README.md b/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/README.md
index 9af50ae28..6244164c2 100644
--- a/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/README.md
+++ b/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/README.md
@@ -10,7 +10,7 @@ CURAND Library
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaStreamDestroy, cudaFree, cudaMallocHost, cudaFreeHost, cudaMalloc, cudaStreamCreateWithFlags, cudaStreamSynchronize, cudaMemcpyAsync
+cudaStreamCreateWithFlags, cudaStreamDestroy, cudaFree, cudaMallocHost, cudaFreeHost, cudaStreamSynchronize, cudaMalloc, cudaMemcpyAsync
 
 ## Dependencies needed to build/run
 [CURAND](../../../README.md#curand)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/4_CUDA_Libraries/batchCUBLAS/NsightEclipse.xml b/Samples/4_CUDA_Libraries/batchCUBLAS/NsightEclipse.xml
index e166cad3b..44f306e21 100644
--- a/Samples/4_CUDA_Libraries/batchCUBLAS/NsightEclipse.xml
+++ b/Samples/4_CUDA_Libraries/batchCUBLAS/NsightEclipse.xml
@@ -5,15 +5,15 @@
   <cuda_api_list>
     <driver>cuRand</driver>
     <driver>cuEqual</driver>
+    <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaGetErrorString</toolkit>
     <toolkit>cudaFree</toolkit>
-    <toolkit>cudaStreamCreate</toolkit>
+    <toolkit>cudaGetLastError</toolkit>
     <toolkit>cudaDeviceSynchronize</toolkit>
+    <toolkit>cudaGetDevice</toolkit>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaGetLastError</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
-    <toolkit>cudaGetErrorString</toolkit>
+    <toolkit>cudaStreamCreate</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
-    <toolkit>cudaGetDevice</toolkit>
   </cuda_api_list>
   <description><![CDATA[A CUDA Sample that demonstrates how using batched CUBLAS API calls to improve overall performance.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -58,6 +58,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/4_CUDA_Libraries/batchCUBLAS/README.md b/Samples/4_CUDA_Libraries/batchCUBLAS/README.md
index ade22850e..635750224 100644
--- a/Samples/4_CUDA_Libraries/batchCUBLAS/README.md
+++ b/Samples/4_CUDA_Libraries/batchCUBLAS/README.md
@@ -10,7 +10,7 @@ Linear Algebra, CUBLAS Library
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -26,14 +26,14 @@ x86_64, ppc64le, armv7l
 cuRand, cuEqual
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaStreamCreate, cudaDeviceSynchronize, cudaMalloc, cudaGetLastError, cudaMemcpy, cudaGetErrorString, cudaGetDeviceProperties, cudaGetDevice
+cudaMemcpy, cudaGetErrorString, cudaFree, cudaGetLastError, cudaDeviceSynchronize, cudaGetDevice, cudaMalloc, cudaStreamCreate, cudaGetDeviceProperties
 
 ## Dependencies needed to build/run
 [CUBLAS](../../../README.md#cublas)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/4_CUDA_Libraries/batchCUBLAS/batchCUBLAS_vs2017.vcxproj b/Samples/4_CUDA_Libraries/batchCUBLAS/batchCUBLAS_vs2017.vcxproj
index 8df8d4956..2c41bea31 100644
--- a/Samples/4_CUDA_Libraries/batchCUBLAS/batchCUBLAS_vs2017.vcxproj
+++ b/Samples/4_CUDA_Libraries/batchCUBLAS/batchCUBLAS_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/batchCUBLAS/batchCUBLAS_vs2019.vcxproj b/Samples/4_CUDA_Libraries/batchCUBLAS/batchCUBLAS_vs2019.vcxproj
index 3f42f964d..1bd17bac8 100644
--- a/Samples/4_CUDA_Libraries/batchCUBLAS/batchCUBLAS_vs2019.vcxproj
+++ b/Samples/4_CUDA_Libraries/batchCUBLAS/batchCUBLAS_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/batchCUBLAS/batchCUBLAS_vs2022.vcxproj b/Samples/4_CUDA_Libraries/batchCUBLAS/batchCUBLAS_vs2022.vcxproj
index 1823a30b0..dba07dfdd 100644
--- a/Samples/4_CUDA_Libraries/batchCUBLAS/batchCUBLAS_vs2022.vcxproj
+++ b/Samples/4_CUDA_Libraries/batchCUBLAS/batchCUBLAS_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/NsightEclipse.xml b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/NsightEclipse.xml
index 821f1eea8..9cd72dd80 100644
--- a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/NsightEclipse.xml
+++ b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/NsightEclipse.xml
@@ -3,19 +3,19 @@
 <entry>
   <name>batchedLabelMarkersAndLabelCompressionNPP</name>
   <cuda_api_list>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaMallocHost</toolkit>
+    <toolkit>cudaRuntimeGetVersion</toolkit>
     <toolkit>cudaMallocPitch</toolkit>
+    <toolkit>cudaFree</toolkit>
     <toolkit>cudaDeviceGetAttribute</toolkit>
-    <toolkit>cudaFreeHost</toolkit>
+    <toolkit>cudaMallocHost</toolkit>
     <toolkit>cudaDriverGetVersion</toolkit>
-    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaFreeHost</toolkit>
+    <toolkit>cudaGetDevice</toolkit>
     <toolkit>cudaStreamGetFlags</toolkit>
-    <toolkit>cudaRuntimeGetVersion</toolkit>
     <toolkit>cudaStreamSynchronize</toolkit>
+    <toolkit>cudaMalloc</toolkit>
     <toolkit>cudaMemcpyAsync</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
-    <toolkit>cudaGetDevice</toolkit>
   </cuda_api_list>
   <description><![CDATA[An NPP CUDA Sample that demonstrates how to use the NPP label markers generation and label compression functions based on a Union Find (UF) algorithm including both single image and batched image versions.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -68,6 +68,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/README.md b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/README.md
index 97dbcd1dd..ddc106f35 100644
--- a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/README.md
+++ b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/README.md
@@ -10,7 +10,7 @@ Performance Strategies, Image Processing, NPP Library, Using NPP Batch Functions
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaMallocHost, cudaMallocPitch, cudaDeviceGetAttribute, cudaFreeHost, cudaDriverGetVersion, cudaMalloc, cudaStreamGetFlags, cudaRuntimeGetVersion, cudaStreamSynchronize, cudaMemcpyAsync, cudaGetDeviceProperties, cudaGetDevice
+cudaRuntimeGetVersion, cudaMallocPitch, cudaFree, cudaDeviceGetAttribute, cudaMallocHost, cudaDriverGetVersion, cudaFreeHost, cudaGetDevice, cudaStreamGetFlags, cudaStreamSynchronize, cudaMalloc, cudaMemcpyAsync, cudaGetDeviceProperties
 
 ## Dependencies needed to build/run
 [NPP](../../../README.md#npp)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/batchedLabelMarkersAndLabelCompressionNPP_vs2017.vcxproj b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/batchedLabelMarkersAndLabelCompressionNPP_vs2017.vcxproj
index 59e207a8c..da25b507b 100644
--- a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/batchedLabelMarkersAndLabelCompressionNPP_vs2017.vcxproj
+++ b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/batchedLabelMarkersAndLabelCompressionNPP_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/batchedLabelMarkersAndLabelCompressionNPP_vs2019.vcxproj b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/batchedLabelMarkersAndLabelCompressionNPP_vs2019.vcxproj
index 148be7cb1..928dc419d 100644
--- a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/batchedLabelMarkersAndLabelCompressionNPP_vs2019.vcxproj
+++ b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/batchedLabelMarkersAndLabelCompressionNPP_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/batchedLabelMarkersAndLabelCompressionNPP_vs2022.vcxproj b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/batchedLabelMarkersAndLabelCompressionNPP_vs2022.vcxproj
index 1f5a0ff4b..5b9408aab 100644
--- a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/batchedLabelMarkersAndLabelCompressionNPP_vs2022.vcxproj
+++ b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/batchedLabelMarkersAndLabelCompressionNPP_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/boxFilterNPP/NsightEclipse.xml b/Samples/4_CUDA_Libraries/boxFilterNPP/NsightEclipse.xml
index ae3b9b465..da9c0d55c 100644
--- a/Samples/4_CUDA_Libraries/boxFilterNPP/NsightEclipse.xml
+++ b/Samples/4_CUDA_Libraries/boxFilterNPP/NsightEclipse.xml
@@ -3,8 +3,8 @@
 <entry>
   <name>boxFilterNPP</name>
   <cuda_api_list>
-    <toolkit>cudaDriverGetVersion</toolkit>
     <toolkit>cudaRuntimeGetVersion</toolkit>
+    <toolkit>cudaDriverGetVersion</toolkit>
   </cuda_api_list>
   <description><![CDATA[A NPP CUDA Sample that demonstrates how to use NPP FilterBox function to perform a Box Filter.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -64,6 +64,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/4_CUDA_Libraries/boxFilterNPP/README.md b/Samples/4_CUDA_Libraries/boxFilterNPP/README.md
index 3de69529b..f647397c2 100644
--- a/Samples/4_CUDA_Libraries/boxFilterNPP/README.md
+++ b/Samples/4_CUDA_Libraries/boxFilterNPP/README.md
@@ -10,7 +10,7 @@ Performance Strategies, Image Processing, NPP Library
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaDriverGetVersion, cudaRuntimeGetVersion
+cudaRuntimeGetVersion, cudaDriverGetVersion
 
 ## Dependencies needed to build/run
 [FreeImage](../../../README.md#freeimage), [NPP](../../../README.md#npp)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/4_CUDA_Libraries/boxFilterNPP/boxFilterNPP_vs2017.vcxproj b/Samples/4_CUDA_Libraries/boxFilterNPP/boxFilterNPP_vs2017.vcxproj
index 693cda831..bf2a30abe 100644
--- a/Samples/4_CUDA_Libraries/boxFilterNPP/boxFilterNPP_vs2017.vcxproj
+++ b/Samples/4_CUDA_Libraries/boxFilterNPP/boxFilterNPP_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -117,6 +117,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/boxFilterNPP/boxFilterNPP_vs2019.vcxproj b/Samples/4_CUDA_Libraries/boxFilterNPP/boxFilterNPP_vs2019.vcxproj
index 0f7a3fa01..a7ab43bb7 100644
--- a/Samples/4_CUDA_Libraries/boxFilterNPP/boxFilterNPP_vs2019.vcxproj
+++ b/Samples/4_CUDA_Libraries/boxFilterNPP/boxFilterNPP_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -113,6 +113,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/boxFilterNPP/boxFilterNPP_vs2022.vcxproj b/Samples/4_CUDA_Libraries/boxFilterNPP/boxFilterNPP_vs2022.vcxproj
index 1ef674c0d..5b3e3fb3c 100644
--- a/Samples/4_CUDA_Libraries/boxFilterNPP/boxFilterNPP_vs2022.vcxproj
+++ b/Samples/4_CUDA_Libraries/boxFilterNPP/boxFilterNPP_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -113,6 +113,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/NsightEclipse.xml b/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/NsightEclipse.xml
index d4dca5bac..26b3f456c 100644
--- a/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/NsightEclipse.xml
+++ b/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/NsightEclipse.xml
@@ -3,13 +3,13 @@
 <entry>
   <name>cannyEdgeDetectorNPP</name>
   <cuda_api_list>
+    <toolkit>cudaRuntimeGetVersion</toolkit>
     <toolkit>cudaFree</toolkit>
+    <toolkit>cudaSetDevice</toolkit>
     <toolkit>cudaGetDeviceCount</toolkit>
+    <toolkit>cudaDeviceInit</toolkit>
     <toolkit>cudaDriverGetVersion</toolkit>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaDeviceInit</toolkit>
-    <toolkit>cudaSetDevice</toolkit>
-    <toolkit>cudaRuntimeGetVersion</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
   </cuda_api_list>
   <description><![CDATA[An NPP CUDA Sample that demonstrates the recommended parameters to use with the nppiFilterCannyBorder_8u_C1R Canny Edge Detection image filter function. This function expects a single channel 8-bit grayscale input image. You can generate a grayscale image from a color image by first calling nppiColorToGray() or nppiRGBToGray(). The Canny Edge Detection function combines and improves on the techniques required to produce an edge detection image using multiple steps.]]></description>
@@ -66,6 +66,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/README.md b/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/README.md
index 41630e5da..01493d71b 100644
--- a/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/README.md
+++ b/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/README.md
@@ -10,7 +10,7 @@ Performance Strategies, Image Processing, NPP Library
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaGetDeviceCount, cudaDriverGetVersion, cudaMalloc, cudaDeviceInit, cudaSetDevice, cudaRuntimeGetVersion, cudaGetDeviceProperties
+cudaRuntimeGetVersion, cudaFree, cudaSetDevice, cudaGetDeviceCount, cudaDeviceInit, cudaDriverGetVersion, cudaMalloc, cudaGetDeviceProperties
 
 ## Dependencies needed to build/run
 [FreeImage](../../../README.md#freeimage), [NPP](../../../README.md#npp)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/cannyEdgeDetectorNPP_vs2017.vcxproj b/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/cannyEdgeDetectorNPP_vs2017.vcxproj
index a2fe1e4c8..338a498d4 100644
--- a/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/cannyEdgeDetectorNPP_vs2017.vcxproj
+++ b/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/cannyEdgeDetectorNPP_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -117,6 +117,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/cannyEdgeDetectorNPP_vs2019.vcxproj b/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/cannyEdgeDetectorNPP_vs2019.vcxproj
index 0510328c9..91164159a 100644
--- a/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/cannyEdgeDetectorNPP_vs2019.vcxproj
+++ b/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/cannyEdgeDetectorNPP_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -113,6 +113,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/cannyEdgeDetectorNPP_vs2022.vcxproj b/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/cannyEdgeDetectorNPP_vs2022.vcxproj
index 29e20532b..31270caa3 100644
--- a/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/cannyEdgeDetectorNPP_vs2022.vcxproj
+++ b/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/cannyEdgeDetectorNPP_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -113,6 +113,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/conjugateGradient/NsightEclipse.xml b/Samples/4_CUDA_Libraries/conjugateGradient/NsightEclipse.xml
index 7fb145798..99a85b356 100644
--- a/Samples/4_CUDA_Libraries/conjugateGradient/NsightEclipse.xml
+++ b/Samples/4_CUDA_Libraries/conjugateGradient/NsightEclipse.xml
@@ -3,10 +3,10 @@
 <entry>
   <name>conjugateGradient</name>
   <cuda_api_list>
+    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaFree</toolkit>
     <toolkit>cudaDeviceSynchronize</toolkit>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample implements a conjugate gradient solver on GPU using CUBLAS and CUSPARSE library.]]></description>
@@ -59,6 +59,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/4_CUDA_Libraries/conjugateGradient/README.md b/Samples/4_CUDA_Libraries/conjugateGradient/README.md
index c85219875..9f6647821 100644
--- a/Samples/4_CUDA_Libraries/conjugateGradient/README.md
+++ b/Samples/4_CUDA_Libraries/conjugateGradient/README.md
@@ -10,7 +10,7 @@ Linear Algebra, CUBLAS Library, CUSPARSE Library
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaDeviceSynchronize, cudaMalloc, cudaMemcpy, cudaGetDeviceProperties
+cudaMemcpy, cudaFree, cudaDeviceSynchronize, cudaMalloc, cudaGetDeviceProperties
 
 ## Dependencies needed to build/run
 [CUBLAS](../../../README.md#cublas), [CUSPARSE](../../../README.md#cusparse)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/4_CUDA_Libraries/conjugateGradient/conjugateGradient_vs2017.vcxproj b/Samples/4_CUDA_Libraries/conjugateGradient/conjugateGradient_vs2017.vcxproj
index 3f9f1fc84..42096e5ab 100644
--- a/Samples/4_CUDA_Libraries/conjugateGradient/conjugateGradient_vs2017.vcxproj
+++ b/Samples/4_CUDA_Libraries/conjugateGradient/conjugateGradient_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/conjugateGradient/conjugateGradient_vs2019.vcxproj b/Samples/4_CUDA_Libraries/conjugateGradient/conjugateGradient_vs2019.vcxproj
index 9755b7c00..44cdb92c2 100644
--- a/Samples/4_CUDA_Libraries/conjugateGradient/conjugateGradient_vs2019.vcxproj
+++ b/Samples/4_CUDA_Libraries/conjugateGradient/conjugateGradient_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/conjugateGradient/conjugateGradient_vs2022.vcxproj b/Samples/4_CUDA_Libraries/conjugateGradient/conjugateGradient_vs2022.vcxproj
index 2d9bb9181..afb56e1de 100644
--- a/Samples/4_CUDA_Libraries/conjugateGradient/conjugateGradient_vs2022.vcxproj
+++ b/Samples/4_CUDA_Libraries/conjugateGradient/conjugateGradient_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/Makefile b/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/Makefile
index af3ffc453..4c4c95d56 100644
--- a/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/Makefile
+++ b/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/NsightEclipse.xml b/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/NsightEclipse.xml
index 3a4d5e980..0a766802f 100644
--- a/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/NsightEclipse.xml
+++ b/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/NsightEclipse.xml
@@ -3,23 +3,23 @@
 <entry>
   <name>conjugateGradientCudaGraphs</name>
   <cuda_api_list>
+    <toolkit>cudaGraphInstantiate</toolkit>
     <toolkit>cudaStreamDestroy</toolkit>
+    <toolkit>cudaStreamBeginCapture</toolkit>
     <toolkit>cudaFree</toolkit>
-    <toolkit>cudaGraphExecDestroy</toolkit>
     <toolkit>cudaMallocHost</toolkit>
-    <toolkit>cudaStreamCreate</toolkit>
-    <toolkit>cudaGraphInstantiate</toolkit>
-    <toolkit>cudaOccupancyMaxPotentialBlockSize</toolkit>
-    <toolkit>cudaStreamBeginCapture</toolkit>
-    <toolkit>cudaMemsetAsync</toolkit>
     <toolkit>cudaStreamEndCapture</toolkit>
+    <toolkit>cudaGraphDestroy</toolkit>
     <toolkit>cudaFreeHost</toolkit>
-    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaGraphLaunch</toolkit>
+    <toolkit>cudaStreamCreate</toolkit>
     <toolkit>cudaStreamSynchronize</toolkit>
+    <toolkit>cudaOccupancyMaxPotentialBlockSize</toolkit>
+    <toolkit>cudaMalloc</toolkit>
     <toolkit>cudaMemcpyAsync</toolkit>
-    <toolkit>cudaGraphDestroy</toolkit>
+    <toolkit>cudaMemsetAsync</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
-    <toolkit>cudaGraphLaunch</toolkit>
+    <toolkit>cudaGraphExecDestroy</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample implements a conjugate gradient solver on GPU using CUBLAS and CUSPARSE library calls captured and called using CUDA Graph APIs.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -71,6 +71,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/README.md b/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/README.md
index 8d634cdea..787c89a4f 100644
--- a/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/README.md
+++ b/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/README.md
@@ -10,7 +10,7 @@ Linear Algebra, CUBLAS Library, CUSPARSE Library
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaStreamDestroy, cudaFree, cudaGraphExecDestroy, cudaMallocHost, cudaStreamCreate, cudaGraphInstantiate, cudaOccupancyMaxPotentialBlockSize, cudaStreamBeginCapture, cudaMemsetAsync, cudaStreamEndCapture, cudaFreeHost, cudaMalloc, cudaStreamSynchronize, cudaMemcpyAsync, cudaGraphDestroy, cudaGetDeviceProperties, cudaGraphLaunch
+cudaGraphInstantiate, cudaStreamDestroy, cudaStreamBeginCapture, cudaFree, cudaMallocHost, cudaStreamEndCapture, cudaGraphDestroy, cudaFreeHost, cudaGraphLaunch, cudaStreamCreate, cudaStreamSynchronize, cudaOccupancyMaxPotentialBlockSize, cudaMalloc, cudaMemcpyAsync, cudaMemsetAsync, cudaGetDeviceProperties, cudaGraphExecDestroy
 
 ## Dependencies needed to build/run
 [CUBLAS](../../../README.md#cublas), [CUSPARSE](../../../README.md#cusparse)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/conjugateGradientCudaGraphs_vs2017.vcxproj b/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/conjugateGradientCudaGraphs_vs2017.vcxproj
index 8bd52098a..2a1e00ec7 100644
--- a/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/conjugateGradientCudaGraphs_vs2017.vcxproj
+++ b/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/conjugateGradientCudaGraphs_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/conjugateGradientCudaGraphs.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/conjugateGradientCudaGraphs_vs2019.vcxproj b/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/conjugateGradientCudaGraphs_vs2019.vcxproj
index 1838fc792..c1e36760b 100644
--- a/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/conjugateGradientCudaGraphs_vs2019.vcxproj
+++ b/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/conjugateGradientCudaGraphs_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/conjugateGradientCudaGraphs.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/conjugateGradientCudaGraphs_vs2022.vcxproj b/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/conjugateGradientCudaGraphs_vs2022.vcxproj
index b07dfc7b8..f641b210c 100644
--- a/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/conjugateGradientCudaGraphs_vs2022.vcxproj
+++ b/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/conjugateGradientCudaGraphs_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/conjugateGradientCudaGraphs.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/Makefile b/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/Makefile
index 2cd57b055..6e5d34352 100644
--- a/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/Makefile
+++ b/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/Makefile
@@ -322,9 +322,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 61 70 72 75 80 86 87
+SMS ?= 61 70 72 75 80 86 87 90
 else
-SMS ?= 60 61 70 75 80 86
+SMS ?= 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/NsightEclipse.xml b/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/NsightEclipse.xml
index 55c72749e..0deaa733a 100644
--- a/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/NsightEclipse.xml
+++ b/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/NsightEclipse.xml
@@ -7,15 +7,15 @@
   </cflags>
   <cuda_api_list>
     <toolkit>cudaFree</toolkit>
-    <toolkit>cudaEventRecord</toolkit>
     <toolkit>cudaMallocManaged</toolkit>
-    <toolkit>cudaOccupancyMaxActiveBlocksPerMultiprocessor</toolkit>
-    <toolkit>cudaEventCreate</toolkit>
-    <toolkit>cudaEventElapsedTime</toolkit>
     <toolkit>cudaDeviceSynchronize</toolkit>
-    <toolkit>cudaEventDestroy</toolkit>
+    <toolkit>cudaEventRecord</toolkit>
     <toolkit>cudaLaunchCooperativeKernel</toolkit>
+    <toolkit>cudaEventDestroy</toolkit>
+    <toolkit>cudaEventElapsedTime</toolkit>
+    <toolkit>cudaOccupancyMaxActiveBlocksPerMultiprocessor</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
+    <toolkit>cudaEventCreate</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample implements a conjugate gradient solver on GPU using Multi Block Cooperative Groups, also uses Unified Memory.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -60,6 +60,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/README.md b/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/README.md
index 804955b69..e8c0643c1 100644
--- a/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/README.md
+++ b/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/README.md
@@ -10,7 +10,7 @@ Unified Memory, Linear Algebra, Cooperative Groups, MultiBlock Cooperative Group
 
 ## Supported SM Architectures
 
-[SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, aarch64
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaEventRecord, cudaMallocManaged, cudaOccupancyMaxActiveBlocksPerMultiprocessor, cudaEventCreate, cudaEventElapsedTime, cudaDeviceSynchronize, cudaEventDestroy, cudaLaunchCooperativeKernel, cudaGetDeviceProperties
+cudaFree, cudaMallocManaged, cudaDeviceSynchronize, cudaEventRecord, cudaLaunchCooperativeKernel, cudaEventDestroy, cudaEventElapsedTime, cudaOccupancyMaxActiveBlocksPerMultiprocessor, cudaGetDeviceProperties, cudaEventCreate
 
 ## Dependencies needed to build/run
 [UVM](../../../README.md#uvm), [MBCG](../../../README.md#mbcg)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/conjugateGradientMultiBlockCG_vs2017.vcxproj b/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/conjugateGradientMultiBlockCG_vs2017.vcxproj
index 0fa1a17ec..d3fc2fdf8 100644
--- a/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/conjugateGradientMultiBlockCG_vs2017.vcxproj
+++ b/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/conjugateGradientMultiBlockCG_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/conjugateGradientMultiBlockCG.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/conjugateGradientMultiBlockCG_vs2019.vcxproj b/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/conjugateGradientMultiBlockCG_vs2019.vcxproj
index 7301e0327..6c064a95a 100644
--- a/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/conjugateGradientMultiBlockCG_vs2019.vcxproj
+++ b/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/conjugateGradientMultiBlockCG_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/conjugateGradientMultiBlockCG.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/conjugateGradientMultiBlockCG_vs2022.vcxproj b/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/conjugateGradientMultiBlockCG_vs2022.vcxproj
index 57816f0be..0e4d81bed 100644
--- a/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/conjugateGradientMultiBlockCG_vs2022.vcxproj
+++ b/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/conjugateGradientMultiBlockCG_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/conjugateGradientMultiBlockCG.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/Makefile b/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/Makefile
index 06f2703ef..317131819 100644
--- a/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/Makefile
+++ b/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/Makefile
@@ -322,9 +322,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 61 70 72 75 80 86 87
+SMS ?= 61 70 72 75 80 86 87 90
 else
-SMS ?= 60 61 70 75 80 86
+SMS ?= 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/NsightEclipse.xml b/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/NsightEclipse.xml
index 7852487ae..ef83507cf 100644
--- a/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/NsightEclipse.xml
+++ b/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/NsightEclipse.xml
@@ -8,22 +8,22 @@
     <flag>--std=c++11</flag>
   </cflags>
   <cuda_api_list>
-    <toolkit>cudaDeviceEnablePeerAccess</toolkit>
-    <toolkit>cudaMemset</toolkit>
+    <toolkit>cudaHostAlloc</toolkit>
+    <toolkit>cudaMemPrefetchAsync</toolkit>
     <toolkit>cudaFree</toolkit>
+    <toolkit>cudaLaunchCooperativeKernel</toolkit>
     <toolkit>cudaMallocManaged</toolkit>
-    <toolkit>cudaMemPrefetchAsync</toolkit>
-    <toolkit>cudaHostAlloc</toolkit>
-    <toolkit>cudaOccupancyMaxActiveBlocksPerMultiprocessor</toolkit>
-    <toolkit>cudaStreamCreate</toolkit>
+    <toolkit>cudaSetDevice</toolkit>
     <toolkit>cudaGetDeviceCount</toolkit>
+    <toolkit>cudaGetDeviceProperties</toolkit>
     <toolkit>cudaFreeHost</toolkit>
-    <toolkit>cudaSetDevice</toolkit>
-    <toolkit>cudaDeviceCanAccessPeer</toolkit>
-    <toolkit>cudaLaunchCooperativeKernel</toolkit>
+    <toolkit>cudaMemset</toolkit>
+    <toolkit>cudaStreamCreate</toolkit>
     <toolkit>cudaStreamSynchronize</toolkit>
+    <toolkit>cudaDeviceEnablePeerAccess</toolkit>
     <toolkit>cudaMemAdvise</toolkit>
-    <toolkit>cudaGetDeviceProperties</toolkit>
+    <toolkit>cudaOccupancyMaxActiveBlocksPerMultiprocessor</toolkit>
+    <toolkit>cudaDeviceCanAccessPeer</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample implements a conjugate gradient solver on multiple GPUs using Multi Device Cooperative Groups, also uses Unified Memory optimized using prefetching and usage hints.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -71,6 +71,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/README.md b/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/README.md
index e21a35071..9d77bf384 100644
--- a/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/README.md
+++ b/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/README.md
@@ -10,7 +10,7 @@ Unified Memory, Linear Algebra, Cooperative Groups, MultiDevice Cooperative Grou
 
 ## Supported SM Architectures
 
-[SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, aarch64
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaDeviceEnablePeerAccess, cudaMemset, cudaFree, cudaMallocManaged, cudaMemPrefetchAsync, cudaHostAlloc, cudaOccupancyMaxActiveBlocksPerMultiprocessor, cudaStreamCreate, cudaGetDeviceCount, cudaFreeHost, cudaSetDevice, cudaDeviceCanAccessPeer, cudaLaunchCooperativeKernel, cudaStreamSynchronize, cudaMemAdvise, cudaGetDeviceProperties
+cudaHostAlloc, cudaMemPrefetchAsync, cudaFree, cudaLaunchCooperativeKernel, cudaMallocManaged, cudaSetDevice, cudaGetDeviceCount, cudaGetDeviceProperties, cudaFreeHost, cudaMemset, cudaStreamCreate, cudaStreamSynchronize, cudaDeviceEnablePeerAccess, cudaMemAdvise, cudaOccupancyMaxActiveBlocksPerMultiprocessor, cudaDeviceCanAccessPeer
 
 ## Dependencies needed to build/run
 [UVM](../../../README.md#uvm), [MDCG](../../../README.md#mdcg), [CPP11](../../../README.md#cpp11)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/conjugateGradientMultiDeviceCG_vs2017.vcxproj b/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/conjugateGradientMultiDeviceCG_vs2017.vcxproj
index 85930d364..b58051c34 100644
--- a/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/conjugateGradientMultiDeviceCG_vs2017.vcxproj
+++ b/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/conjugateGradientMultiDeviceCG_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/conjugateGradientMultiDeviceCG.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/conjugateGradientMultiDeviceCG_vs2019.vcxproj b/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/conjugateGradientMultiDeviceCG_vs2019.vcxproj
index 4a5df0b85..b422370dd 100644
--- a/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/conjugateGradientMultiDeviceCG_vs2019.vcxproj
+++ b/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/conjugateGradientMultiDeviceCG_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/conjugateGradientMultiDeviceCG.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/conjugateGradientMultiDeviceCG_vs2022.vcxproj b/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/conjugateGradientMultiDeviceCG_vs2022.vcxproj
index 91ab39cc7..41caff0d8 100644
--- a/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/conjugateGradientMultiDeviceCG_vs2022.vcxproj
+++ b/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/conjugateGradientMultiDeviceCG_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/conjugateGradientMultiDeviceCG.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/conjugateGradientPrecond/NsightEclipse.xml b/Samples/4_CUDA_Libraries/conjugateGradientPrecond/NsightEclipse.xml
index 62d843c01..760d5de3e 100644
--- a/Samples/4_CUDA_Libraries/conjugateGradientPrecond/NsightEclipse.xml
+++ b/Samples/4_CUDA_Libraries/conjugateGradientPrecond/NsightEclipse.xml
@@ -3,10 +3,10 @@
 <entry>
   <name>conjugateGradientPrecond</name>
   <cuda_api_list>
-    <toolkit>cudaMemset</toolkit>
+    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaFree</toolkit>
+    <toolkit>cudaMemset</toolkit>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample implements a preconditioned conjugate gradient solver on GPU using CUBLAS and CUSPARSE library.]]></description>
@@ -57,6 +57,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/4_CUDA_Libraries/conjugateGradientPrecond/README.md b/Samples/4_CUDA_Libraries/conjugateGradientPrecond/README.md
index 6e1116d59..bded98176 100644
--- a/Samples/4_CUDA_Libraries/conjugateGradientPrecond/README.md
+++ b/Samples/4_CUDA_Libraries/conjugateGradientPrecond/README.md
@@ -10,7 +10,7 @@ Linear Algebra, CUBLAS Library, CUSPARSE Library
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMemset, cudaFree, cudaMalloc, cudaMemcpy, cudaGetDeviceProperties
+cudaMemcpy, cudaFree, cudaMemset, cudaMalloc, cudaGetDeviceProperties
 
 ## Dependencies needed to build/run
 [CUBLAS](../../../README.md#cublas), [CUSPARSE](../../../README.md#cusparse)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/4_CUDA_Libraries/conjugateGradientPrecond/conjugateGradientPrecond_vs2017.vcxproj b/Samples/4_CUDA_Libraries/conjugateGradientPrecond/conjugateGradientPrecond_vs2017.vcxproj
index bd750e905..3af1df6d3 100644
--- a/Samples/4_CUDA_Libraries/conjugateGradientPrecond/conjugateGradientPrecond_vs2017.vcxproj
+++ b/Samples/4_CUDA_Libraries/conjugateGradientPrecond/conjugateGradientPrecond_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/conjugateGradientPrecond/conjugateGradientPrecond_vs2019.vcxproj b/Samples/4_CUDA_Libraries/conjugateGradientPrecond/conjugateGradientPrecond_vs2019.vcxproj
index 593817cda..0721d9eb5 100644
--- a/Samples/4_CUDA_Libraries/conjugateGradientPrecond/conjugateGradientPrecond_vs2019.vcxproj
+++ b/Samples/4_CUDA_Libraries/conjugateGradientPrecond/conjugateGradientPrecond_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/conjugateGradientPrecond/conjugateGradientPrecond_vs2022.vcxproj b/Samples/4_CUDA_Libraries/conjugateGradientPrecond/conjugateGradientPrecond_vs2022.vcxproj
index d368d236b..e601f5fd1 100644
--- a/Samples/4_CUDA_Libraries/conjugateGradientPrecond/conjugateGradientPrecond_vs2022.vcxproj
+++ b/Samples/4_CUDA_Libraries/conjugateGradientPrecond/conjugateGradientPrecond_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/conjugateGradientUM/Makefile b/Samples/4_CUDA_Libraries/conjugateGradientUM/Makefile
index b16c5e68c..8f72576d4 100644
--- a/Samples/4_CUDA_Libraries/conjugateGradientUM/Makefile
+++ b/Samples/4_CUDA_Libraries/conjugateGradientUM/Makefile
@@ -285,9 +285,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/4_CUDA_Libraries/conjugateGradientUM/NsightEclipse.xml b/Samples/4_CUDA_Libraries/conjugateGradientUM/NsightEclipse.xml
index 3e59ba5d2..ca7258c98 100644
--- a/Samples/4_CUDA_Libraries/conjugateGradientUM/NsightEclipse.xml
+++ b/Samples/4_CUDA_Libraries/conjugateGradientUM/NsightEclipse.xml
@@ -59,6 +59,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/4_CUDA_Libraries/conjugateGradientUM/README.md b/Samples/4_CUDA_Libraries/conjugateGradientUM/README.md
index bb3253f18..ac9fd2529 100644
--- a/Samples/4_CUDA_Libraries/conjugateGradientUM/README.md
+++ b/Samples/4_CUDA_Libraries/conjugateGradientUM/README.md
@@ -10,7 +10,7 @@ Unified Memory, Linear Algebra, CUBLAS Library, CUSPARSE Library
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -30,7 +30,7 @@ cudaFree, cudaMallocManaged, cudaDeviceSynchronize, cudaMalloc, cudaGetDevicePro
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/4_CUDA_Libraries/conjugateGradientUM/conjugateGradientUM_vs2017.vcxproj b/Samples/4_CUDA_Libraries/conjugateGradientUM/conjugateGradientUM_vs2017.vcxproj
index 62f9d13da..4a8f1f5fd 100644
--- a/Samples/4_CUDA_Libraries/conjugateGradientUM/conjugateGradientUM_vs2017.vcxproj
+++ b/Samples/4_CUDA_Libraries/conjugateGradientUM/conjugateGradientUM_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/conjugateGradientUM.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/conjugateGradientUM/conjugateGradientUM_vs2019.vcxproj b/Samples/4_CUDA_Libraries/conjugateGradientUM/conjugateGradientUM_vs2019.vcxproj
index a6541e3d2..44fd5a526 100644
--- a/Samples/4_CUDA_Libraries/conjugateGradientUM/conjugateGradientUM_vs2019.vcxproj
+++ b/Samples/4_CUDA_Libraries/conjugateGradientUM/conjugateGradientUM_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/conjugateGradientUM.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/conjugateGradientUM/conjugateGradientUM_vs2022.vcxproj b/Samples/4_CUDA_Libraries/conjugateGradientUM/conjugateGradientUM_vs2022.vcxproj
index 045228e5c..05b5205cc 100644
--- a/Samples/4_CUDA_Libraries/conjugateGradientUM/conjugateGradientUM_vs2022.vcxproj
+++ b/Samples/4_CUDA_Libraries/conjugateGradientUM/conjugateGradientUM_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/conjugateGradientUM.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/cuDLAErrorReporting/Makefile b/Samples/4_CUDA_Libraries/cuDLAErrorReporting/Makefile
index 19b23678c..9c8f53fed 100644
--- a/Samples/4_CUDA_Libraries/cuDLAErrorReporting/Makefile
+++ b/Samples/4_CUDA_Libraries/cuDLAErrorReporting/Makefile
@@ -330,9 +330,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/4_CUDA_Libraries/cuDLAErrorReporting/NsightEclipse.xml b/Samples/4_CUDA_Libraries/cuDLAErrorReporting/NsightEclipse.xml
index fdf187ae4..0d4cf217b 100644
--- a/Samples/4_CUDA_Libraries/cuDLAErrorReporting/NsightEclipse.xml
+++ b/Samples/4_CUDA_Libraries/cuDLAErrorReporting/NsightEclipse.xml
@@ -6,14 +6,14 @@
     <flag>--std=c++11</flag>
   </cflags>
   <cuda_api_list>
+    <toolkit>cudaStreamCreateWithFlags</toolkit>
     <toolkit>cudaStreamDestroy</toolkit>
     <toolkit>cudaFree</toolkit>
+    <toolkit>cudaGetErrorName</toolkit>
+    <toolkit>cudaSetDevice</toolkit>
     <toolkit>cudaStreamSynchronize</toolkit>
-    <toolkit>cudaMemsetAsync</toolkit>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaStreamCreateWithFlags</toolkit>
-    <toolkit>cudaSetDevice</toolkit>
-    <toolkit>cudaGetErrorName</toolkit>
+    <toolkit>cudaMemsetAsync</toolkit>
     <toolkit>cudaMemcpyAsync</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample demonstrates how DLA errors can be detected via CUDA.]]></description>
@@ -51,6 +51,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>aarch64</arch>
diff --git a/Samples/4_CUDA_Libraries/cuDLAErrorReporting/README.md b/Samples/4_CUDA_Libraries/cuDLAErrorReporting/README.md
index e4b50ac9e..e18dc7d8a 100644
--- a/Samples/4_CUDA_Libraries/cuDLAErrorReporting/README.md
+++ b/Samples/4_CUDA_Libraries/cuDLAErrorReporting/README.md
@@ -10,7 +10,7 @@ cuDLA, Data Parallel Algorithms, Image Processing
 
 ## Supported SM Architectures
 
-[SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ aarch64
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaStreamDestroy, cudaFree, cudaStreamSynchronize, cudaMemsetAsync, cudaMalloc, cudaStreamCreateWithFlags, cudaSetDevice, cudaGetErrorName, cudaMemcpyAsync
+cudaStreamCreateWithFlags, cudaStreamDestroy, cudaFree, cudaGetErrorName, cudaSetDevice, cudaStreamSynchronize, cudaMalloc, cudaMemsetAsync, cudaMemcpyAsync
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/4_CUDA_Libraries/cuDLAHybridMode/Makefile b/Samples/4_CUDA_Libraries/cuDLAHybridMode/Makefile
index 70ba53d68..5cf7413aa 100644
--- a/Samples/4_CUDA_Libraries/cuDLAHybridMode/Makefile
+++ b/Samples/4_CUDA_Libraries/cuDLAHybridMode/Makefile
@@ -330,9 +330,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/4_CUDA_Libraries/cuDLAHybridMode/NsightEclipse.xml b/Samples/4_CUDA_Libraries/cuDLAHybridMode/NsightEclipse.xml
index 07ca19c56..d59cdec6d 100644
--- a/Samples/4_CUDA_Libraries/cuDLAHybridMode/NsightEclipse.xml
+++ b/Samples/4_CUDA_Libraries/cuDLAHybridMode/NsightEclipse.xml
@@ -6,14 +6,14 @@
     <flag>--std=c++11</flag>
   </cflags>
   <cuda_api_list>
+    <toolkit>cudaStreamCreateWithFlags</toolkit>
     <toolkit>cudaStreamDestroy</toolkit>
     <toolkit>cudaFree</toolkit>
+    <toolkit>cudaGetErrorName</toolkit>
+    <toolkit>cudaSetDevice</toolkit>
     <toolkit>cudaStreamSynchronize</toolkit>
-    <toolkit>cudaMemsetAsync</toolkit>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaStreamCreateWithFlags</toolkit>
-    <toolkit>cudaSetDevice</toolkit>
-    <toolkit>cudaGetErrorName</toolkit>
+    <toolkit>cudaMemsetAsync</toolkit>
     <toolkit>cudaMemcpyAsync</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample demonstrates cuDLA hybrid mode wherein DLA can be programmed using CUDA.]]></description>
@@ -51,6 +51,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>aarch64</arch>
diff --git a/Samples/4_CUDA_Libraries/cuDLAHybridMode/README.md b/Samples/4_CUDA_Libraries/cuDLAHybridMode/README.md
index 2f4559bb9..12799c18a 100644
--- a/Samples/4_CUDA_Libraries/cuDLAHybridMode/README.md
+++ b/Samples/4_CUDA_Libraries/cuDLAHybridMode/README.md
@@ -10,7 +10,7 @@ cuDLA, Data Parallel Algorithms, Image Processing
 
 ## Supported SM Architectures
 
-[SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ aarch64
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaStreamDestroy, cudaFree, cudaStreamSynchronize, cudaMemsetAsync, cudaMalloc, cudaStreamCreateWithFlags, cudaSetDevice, cudaGetErrorName, cudaMemcpyAsync
+cudaStreamCreateWithFlags, cudaStreamDestroy, cudaFree, cudaGetErrorName, cudaSetDevice, cudaStreamSynchronize, cudaMalloc, cudaMemsetAsync, cudaMemcpyAsync
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/4_CUDA_Libraries/cuDLAStandaloneMode/Makefile b/Samples/4_CUDA_Libraries/cuDLAStandaloneMode/Makefile
index 4893fee94..76bfe834a 100644
--- a/Samples/4_CUDA_Libraries/cuDLAStandaloneMode/Makefile
+++ b/Samples/4_CUDA_Libraries/cuDLAStandaloneMode/Makefile
@@ -333,9 +333,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/4_CUDA_Libraries/cuDLAStandaloneMode/NsightEclipse.xml b/Samples/4_CUDA_Libraries/cuDLAStandaloneMode/NsightEclipse.xml
index b9db9e0b2..7f3ef809f 100644
--- a/Samples/4_CUDA_Libraries/cuDLAStandaloneMode/NsightEclipse.xml
+++ b/Samples/4_CUDA_Libraries/cuDLAStandaloneMode/NsightEclipse.xml
@@ -45,6 +45,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>aarch64</arch>
diff --git a/Samples/4_CUDA_Libraries/cuDLAStandaloneMode/README.md b/Samples/4_CUDA_Libraries/cuDLAStandaloneMode/README.md
index 6ca250bab..21cdfb8ff 100644
--- a/Samples/4_CUDA_Libraries/cuDLAStandaloneMode/README.md
+++ b/Samples/4_CUDA_Libraries/cuDLAStandaloneMode/README.md
@@ -10,7 +10,7 @@ cuDLA, Data Parallel Algorithms, Image Processing
 
 ## Supported SM Architectures
 
-[SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -27,7 +27,7 @@ aarch64
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/4_CUDA_Libraries/cuDLAStandaloneMode/findnvsci.mk b/Samples/4_CUDA_Libraries/cuDLAStandaloneMode/findnvsci.mk
index 1934e1380..0d6d157c4 100644
--- a/Samples/4_CUDA_Libraries/cuDLAStandaloneMode/findnvsci.mk
+++ b/Samples/4_CUDA_Libraries/cuDLAStandaloneMode/findnvsci.mk
@@ -56,6 +56,7 @@ ifeq ("$(TARGET_OS)","linux")
     RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?)
     CENTOS = $(shell echo $(DISTRO) | grep -i centos      >/dev/null 2>&1; echo $$?)
     SUSE   = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?)
+    KYLIN  = $(shell echo $(DISTRO) | grep -i kylin        >/dev/null 2>&1; echo $$?)
     ifeq ("$(UBUNTU)","0")
       ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
         GLPATH := /usr/arm-linux-gnueabihf/lib
@@ -101,6 +102,11 @@ ifeq ("$(TARGET_OS)","linux")
       GLLINK    ?= -L/usr/lib64/nvidia
       DFLT_PATH ?= /usr/lib64
     endif
+    ifeq ("$(KYLIN)","0")
+      GLPATH    ?= /usr/lib64/nvidia
+      GLLINK    ?= -L/usr/lib64/nvidia
+      DFLT_PATH ?= /usr/lib64
+    endif
 
   NVSCIBUFLIB  := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libnvscibuf.so    -print 2>/dev/null)
   NVSCISYNCLIB  := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libnvscisync.so    -print 2>/dev/null)
diff --git a/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/NsightEclipse.xml b/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/NsightEclipse.xml
index ba8ad2016..9405b3940 100644
--- a/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/NsightEclipse.xml
+++ b/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/NsightEclipse.xml
@@ -5,13 +5,13 @@
   <cuda_api_list>
     <driver>cuDoubleComplex</driver>
     <driver>cuComplex</driver>
-    <toolkit>cudaMemset</toolkit>
-    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaStreamDestroy</toolkit>
-    <toolkit>cudaStreamCreate</toolkit>
+    <toolkit>cudaFree</toolkit>
     <toolkit>cudaDeviceSynchronize</toolkit>
+    <toolkit>cudaMemset</toolkit>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaStreamCreate</toolkit>
   </cuda_api_list>
   <description><![CDATA[A CUDA Sample that demonstrates cuSolverDN's LU, QR and Cholesky factorization.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -68,6 +68,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/README.md b/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/README.md
index 92210b8fd..0b311943a 100644
--- a/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/README.md
+++ b/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/README.md
@@ -10,7 +10,7 @@ Linear Algebra, CUSOLVER Library
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -26,14 +26,14 @@ x86_64, ppc64le, aarch64
 cuDoubleComplex, cuComplex
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMemset, cudaFree, cudaStreamDestroy, cudaStreamCreate, cudaDeviceSynchronize, cudaMalloc, cudaMemcpy
+cudaMemcpy, cudaStreamDestroy, cudaFree, cudaDeviceSynchronize, cudaMemset, cudaMalloc, cudaStreamCreate
 
 ## Dependencies needed to build/run
 [CUSOLVER](../../../README.md#cusolver), [CUBLAS](../../../README.md#cublas), [CUSPARSE](../../../README.md#cusparse)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/cuSolverDn_LinearSolver_vs2017.vcxproj b/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/cuSolverDn_LinearSolver_vs2017.vcxproj
index 90d7b869e..cd859b9b1 100644
--- a/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/cuSolverDn_LinearSolver_vs2017.vcxproj
+++ b/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/cuSolverDn_LinearSolver_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -109,6 +109,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/cuSolverDn_LinearSolver_vs2019.vcxproj b/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/cuSolverDn_LinearSolver_vs2019.vcxproj
index 2b63d7d11..6155be1ab 100644
--- a/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/cuSolverDn_LinearSolver_vs2019.vcxproj
+++ b/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/cuSolverDn_LinearSolver_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -105,6 +105,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/cuSolverDn_LinearSolver_vs2022.vcxproj b/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/cuSolverDn_LinearSolver_vs2022.vcxproj
index 1f123af80..98448924f 100644
--- a/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/cuSolverDn_LinearSolver_vs2022.vcxproj
+++ b/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/cuSolverDn_LinearSolver_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -105,6 +105,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/cuSolverRf/Makefile b/Samples/4_CUDA_Libraries/cuSolverRf/Makefile
index 27824bc2d..92fdc9849 100644
--- a/Samples/4_CUDA_Libraries/cuSolverRf/Makefile
+++ b/Samples/4_CUDA_Libraries/cuSolverRf/Makefile
@@ -283,9 +283,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/4_CUDA_Libraries/cuSolverRf/NsightEclipse.xml b/Samples/4_CUDA_Libraries/cuSolverRf/NsightEclipse.xml
index 2cf3f041a..84c56c891 100644
--- a/Samples/4_CUDA_Libraries/cuSolverRf/NsightEclipse.xml
+++ b/Samples/4_CUDA_Libraries/cuSolverRf/NsightEclipse.xml
@@ -3,15 +3,15 @@
 <entry>
   <name>cuSolverRf</name>
   <cuda_api_list>
+    <driver>cuGet</driver>
     <driver>cuDoubleComplex</driver>
     <driver>cuComplex</driver>
-    <driver>cuGet</driver>
+    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaStreamDestroy</toolkit>
     <toolkit>cudaFree</toolkit>
-    <toolkit>cudaStreamCreate</toolkit>
     <toolkit>cudaDeviceSynchronize</toolkit>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaStreamCreate</toolkit>
   </cuda_api_list>
   <description><![CDATA[A CUDA Sample that demonstrates cuSolver's refactorization library - CUSOLVERRF.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -67,6 +67,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/4_CUDA_Libraries/cuSolverRf/README.md b/Samples/4_CUDA_Libraries/cuSolverRf/README.md
index d0a99cf47..c268cf070 100644
--- a/Samples/4_CUDA_Libraries/cuSolverRf/README.md
+++ b/Samples/4_CUDA_Libraries/cuSolverRf/README.md
@@ -10,7 +10,7 @@ Linear Algebra, CUSOLVER Library
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,17 +23,17 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html)
-cuDoubleComplex, cuComplex, cuGet
+cuGet, cuDoubleComplex, cuComplex
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaStreamDestroy, cudaFree, cudaStreamCreate, cudaDeviceSynchronize, cudaMalloc, cudaMemcpy
+cudaMemcpy, cudaStreamDestroy, cudaFree, cudaDeviceSynchronize, cudaMalloc, cudaStreamCreate
 
 ## Dependencies needed to build/run
 [CUSOLVER](../../../README.md#cusolver), [CUBLAS](../../../README.md#cublas), [CUSPARSE](../../../README.md#cusparse)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/4_CUDA_Libraries/cuSolverRf/cuSolverRf_vs2017.vcxproj b/Samples/4_CUDA_Libraries/cuSolverRf/cuSolverRf_vs2017.vcxproj
index fc5a4d2f7..bcc7a6a4b 100644
--- a/Samples/4_CUDA_Libraries/cuSolverRf/cuSolverRf_vs2017.vcxproj
+++ b/Samples/4_CUDA_Libraries/cuSolverRf/cuSolverRf_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/cuSolverRf.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -109,6 +109,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/cuSolverRf/cuSolverRf_vs2019.vcxproj b/Samples/4_CUDA_Libraries/cuSolverRf/cuSolverRf_vs2019.vcxproj
index 02802989e..16948fcd1 100644
--- a/Samples/4_CUDA_Libraries/cuSolverRf/cuSolverRf_vs2019.vcxproj
+++ b/Samples/4_CUDA_Libraries/cuSolverRf/cuSolverRf_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/cuSolverRf.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -105,6 +105,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/cuSolverRf/cuSolverRf_vs2022.vcxproj b/Samples/4_CUDA_Libraries/cuSolverRf/cuSolverRf_vs2022.vcxproj
index 9cd7669f5..6de3db9bf 100644
--- a/Samples/4_CUDA_Libraries/cuSolverRf/cuSolverRf_vs2022.vcxproj
+++ b/Samples/4_CUDA_Libraries/cuSolverRf/cuSolverRf_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/cuSolverRf.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -105,6 +105,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/Makefile b/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/Makefile
index 59b0c3d09..04cdc6ea9 100644
--- a/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/Makefile
+++ b/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/Makefile
@@ -283,9 +283,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/NsightEclipse.xml b/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/NsightEclipse.xml
index cc3a57adc..f6b92a544 100644
--- a/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/NsightEclipse.xml
+++ b/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/NsightEclipse.xml
@@ -7,9 +7,9 @@
     <driver>cuComplex</driver>
     <toolkit>cudaStreamDestroy</toolkit>
     <toolkit>cudaFree</toolkit>
-    <toolkit>cudaStreamCreate</toolkit>
     <toolkit>cudaDeviceSynchronize</toolkit>
     <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaStreamCreate</toolkit>
     <toolkit>cudaMemcpyAsync</toolkit>
   </cuda_api_list>
   <description><![CDATA[A CUDA Sample that demonstrates cuSolverSP's LU, QR and Cholesky factorization.]]></description>
@@ -68,6 +68,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/README.md b/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/README.md
index 2ac87f314..25ce28654 100644
--- a/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/README.md
+++ b/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/README.md
@@ -10,7 +10,7 @@ Linear Algebra, CUSOLVER Library
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -26,14 +26,14 @@ x86_64, ppc64le, armv7l
 cuDoubleComplex, cuComplex
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaStreamDestroy, cudaFree, cudaStreamCreate, cudaDeviceSynchronize, cudaMalloc, cudaMemcpyAsync
+cudaStreamDestroy, cudaFree, cudaDeviceSynchronize, cudaMalloc, cudaStreamCreate, cudaMemcpyAsync
 
 ## Dependencies needed to build/run
 [CUSOLVER](../../../README.md#cusolver), [CUSPARSE](../../../README.md#cusparse)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/cuSolverSp_LinearSolver_vs2017.vcxproj b/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/cuSolverSp_LinearSolver_vs2017.vcxproj
index 78512bbdc..9978ec3e7 100644
--- a/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/cuSolverSp_LinearSolver_vs2017.vcxproj
+++ b/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/cuSolverSp_LinearSolver_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/cuSolverSp_LinearSolver.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -109,6 +109,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/cuSolverSp_LinearSolver_vs2019.vcxproj b/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/cuSolverSp_LinearSolver_vs2019.vcxproj
index fe6195019..a7aef7dd5 100644
--- a/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/cuSolverSp_LinearSolver_vs2019.vcxproj
+++ b/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/cuSolverSp_LinearSolver_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/cuSolverSp_LinearSolver.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -105,6 +105,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/cuSolverSp_LinearSolver_vs2022.vcxproj b/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/cuSolverSp_LinearSolver_vs2022.vcxproj
index 0d22534df..d6a6ef404 100644
--- a/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/cuSolverSp_LinearSolver_vs2022.vcxproj
+++ b/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/cuSolverSp_LinearSolver_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/cuSolverSp_LinearSolver.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -105,6 +105,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/Makefile b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/Makefile
index e80902e4c..649da6f8f 100644
--- a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/Makefile
+++ b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/Makefile
@@ -283,9 +283,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/NsightEclipse.xml b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/NsightEclipse.xml
index 32bfa4d34..65fa85566 100644
--- a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/NsightEclipse.xml
+++ b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/NsightEclipse.xml
@@ -5,11 +5,11 @@
   <cuda_api_list>
     <driver>cuDoubleComplex</driver>
     <driver>cuComplex</driver>
+    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaStreamDestroy</toolkit>
     <toolkit>cudaFree</toolkit>
-    <toolkit>cudaStreamCreate</toolkit>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaStreamCreate</toolkit>
   </cuda_api_list>
   <description><![CDATA[A CUDA Sample that demonstrates Cholesky factorization using cuSolverSP's low level APIs.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -59,6 +59,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/README.md b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/README.md
index a992f9ef1..3cf4112f0 100644
--- a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/README.md
+++ b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/README.md
@@ -10,7 +10,7 @@ Linear Algebra, CUSOLVER Library
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -26,14 +26,14 @@ x86_64, ppc64le, armv7l
 cuDoubleComplex, cuComplex
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaStreamDestroy, cudaFree, cudaStreamCreate, cudaMalloc, cudaMemcpy
+cudaMemcpy, cudaStreamDestroy, cudaFree, cudaMalloc, cudaStreamCreate
 
 ## Dependencies needed to build/run
 [CUSOLVER](../../../README.md#cusolver), [CUSPARSE](../../../README.md#cusparse)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/cuSolverSp_LowlevelCholesky_vs2017.vcxproj b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/cuSolverSp_LowlevelCholesky_vs2017.vcxproj
index d04553b80..d2c8031e1 100644
--- a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/cuSolverSp_LowlevelCholesky_vs2017.vcxproj
+++ b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/cuSolverSp_LowlevelCholesky_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/cuSolverSp_LowlevelCholesky.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -109,6 +109,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/cuSolverSp_LowlevelCholesky_vs2019.vcxproj b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/cuSolverSp_LowlevelCholesky_vs2019.vcxproj
index 3b686a782..2703da3ad 100644
--- a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/cuSolverSp_LowlevelCholesky_vs2019.vcxproj
+++ b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/cuSolverSp_LowlevelCholesky_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/cuSolverSp_LowlevelCholesky.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -105,6 +105,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/cuSolverSp_LowlevelCholesky_vs2022.vcxproj b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/cuSolverSp_LowlevelCholesky_vs2022.vcxproj
index 94e8d4035..b85749bf5 100644
--- a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/cuSolverSp_LowlevelCholesky_vs2022.vcxproj
+++ b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/cuSolverSp_LowlevelCholesky_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/cuSolverSp_LowlevelCholesky.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -105,6 +105,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/Makefile b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/Makefile
index 5e3384bc8..2e7d1c9f8 100644
--- a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/Makefile
+++ b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/Makefile
@@ -283,9 +283,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/NsightEclipse.xml b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/NsightEclipse.xml
index 3e3dc5c40..51bab24a0 100644
--- a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/NsightEclipse.xml
+++ b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/NsightEclipse.xml
@@ -3,14 +3,14 @@
 <entry>
   <name>cuSolverSp_LowlevelQR</name>
   <cuda_api_list>
+    <driver>cuGet</driver>
     <driver>cuDoubleComplex</driver>
     <driver>cuComplex</driver>
-    <driver>cuGet</driver>
+    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaStreamDestroy</toolkit>
     <toolkit>cudaFree</toolkit>
-    <toolkit>cudaStreamCreate</toolkit>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaStreamCreate</toolkit>
   </cuda_api_list>
   <description><![CDATA[A CUDA Sample that demonstrates QR factorization using cuSolverSP's low level APIs.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -60,6 +60,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/README.md b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/README.md
index eae84e4dd..df5f2a84d 100644
--- a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/README.md
+++ b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/README.md
@@ -10,7 +10,7 @@ Linear Algebra, CUSOLVER Library
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,17 +23,17 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html)
-cuDoubleComplex, cuComplex, cuGet
+cuGet, cuDoubleComplex, cuComplex
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaStreamDestroy, cudaFree, cudaStreamCreate, cudaMalloc, cudaMemcpy
+cudaMemcpy, cudaStreamDestroy, cudaFree, cudaMalloc, cudaStreamCreate
 
 ## Dependencies needed to build/run
 [CUSOLVER](../../../README.md#cusolver), [CUSPARSE](../../../README.md#cusparse)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/cuSolverSp_LowlevelQR_vs2017.vcxproj b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/cuSolverSp_LowlevelQR_vs2017.vcxproj
index b62c633e8..b25aa37d9 100644
--- a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/cuSolverSp_LowlevelQR_vs2017.vcxproj
+++ b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/cuSolverSp_LowlevelQR_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/cuSolverSp_LowlevelQR.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -109,6 +109,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/cuSolverSp_LowlevelQR_vs2019.vcxproj b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/cuSolverSp_LowlevelQR_vs2019.vcxproj
index 5ab45d7df..af967c450 100644
--- a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/cuSolverSp_LowlevelQR_vs2019.vcxproj
+++ b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/cuSolverSp_LowlevelQR_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/cuSolverSp_LowlevelQR.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -105,6 +105,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/cuSolverSp_LowlevelQR_vs2022.vcxproj b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/cuSolverSp_LowlevelQR_vs2022.vcxproj
index d8ee669f5..bb98790f2 100644
--- a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/cuSolverSp_LowlevelQR_vs2022.vcxproj
+++ b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/cuSolverSp_LowlevelQR_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/cuSolverSp_LowlevelQR.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -105,6 +105,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/cudaNvSci/Makefile b/Samples/4_CUDA_Libraries/cudaNvSci/Makefile
index dd8e7bce1..f6561b1b2 100644
--- a/Samples/4_CUDA_Libraries/cudaNvSci/Makefile
+++ b/Samples/4_CUDA_Libraries/cudaNvSci/Makefile
@@ -327,9 +327,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/4_CUDA_Libraries/cudaNvSci/NsightEclipse.xml b/Samples/4_CUDA_Libraries/cudaNvSci/NsightEclipse.xml
index a8659d179..0995196ed 100644
--- a/Samples/4_CUDA_Libraries/cudaNvSci/NsightEclipse.xml
+++ b/Samples/4_CUDA_Libraries/cudaNvSci/NsightEclipse.xml
@@ -7,39 +7,39 @@
   </cflags>
   <cuda_api_list>
     <driver>cuDeviceGetUuid</driver>
+    <toolkit>cudaExternalMemoryGetMappedBuffer</toolkit>
+    <toolkit>cudaImportExternalSemaphore</toolkit>
+    <toolkit>cudaDeviceGetAttribute</toolkit>
+    <toolkit>cudaNvSciSignal</toolkit>
     <toolkit>cudaGetMipmappedArrayLevel</toolkit>
+    <toolkit>cudaImportNvSciRawBuf</toolkit>
+    <toolkit>cudaSetDevice</toolkit>
     <toolkit>cudaImportNvSciImage</toolkit>
-    <toolkit>cudaImportExternalSemaphore</toolkit>
     <toolkit>cudaNvSciApp</toolkit>
-    <toolkit>cudaStreamCreateWithFlags</toolkit>
-    <toolkit>cudaExternalMemoryGetMappedMipmappedArray</toolkit>
+    <toolkit>cudaDeviceId</toolkit>
+    <toolkit>cudaMallocHost</toolkit>
+    <toolkit>cudaSignalExternalSemaphoresAsync</toolkit>
+    <toolkit>cudaCreateTextureObject</toolkit>
+    <toolkit>cudaFreeHost</toolkit>
+    <toolkit>cudaNvSci</toolkit>
     <toolkit>cudaNvSciWait</toolkit>
-    <toolkit>cudaDestroyExternalMemory</toolkit>
+    <toolkit>cudaGetDeviceCount</toolkit>
     <toolkit>cudaMemcpyAsync</toolkit>
+    <toolkit>cudaStreamCreateWithFlags</toolkit>
+    <toolkit>cudaExternalMemoryGetMappedMipmappedArray</toolkit>
     <toolkit>cudaStreamDestroy</toolkit>
-    <toolkit>cudaSignalExternalSemaphoresAsync</toolkit>
     <toolkit>cudaDeviceGetNvSciSyncAttributes</toolkit>
-    <toolkit>cudaFreeMipmappedArray</toolkit>
-    <toolkit>cudaMallocHost</toolkit>
-    <toolkit>cudaNvSci</toolkit>
-    <toolkit>cudaImportExternalMemory</toolkit>
-    <toolkit>cudaSetDevice</toolkit>
-    <toolkit>cudaImportNvSciRawBuf</toolkit>
-    <toolkit>cudaImportNvSciSemaphore</toolkit>
-    <toolkit>cudaGetDeviceCount</toolkit>
     <toolkit>cudaDestroyTextureObject</toolkit>
-    <toolkit>cudaDeviceGetAttribute</toolkit>
+    <toolkit>cudaDestroyExternalMemory</toolkit>
+    <toolkit>cudaImportExternalMemory</toolkit>
     <toolkit>cudaDestroyExternalSemaphore</toolkit>
-    <toolkit>cudaStreamSynchronize</toolkit>
-    <toolkit>cudaNvSciSignal</toolkit>
+    <toolkit>cudaFreeMipmappedArray</toolkit>
     <toolkit>cudaFree</toolkit>
-    <toolkit>cudaDeviceId</toolkit>
-    <toolkit>cudaExternalMemoryGetMappedBuffer</toolkit>
-    <toolkit>cudaCreateTextureObject</toolkit>
-    <toolkit>cudaFreeHost</toolkit>
+    <toolkit>cudaStreamSynchronize</toolkit>
     <toolkit>cudaWaitExternalSemaphoresAsync</toolkit>
+    <toolkit>cudaImportNvSciSemaphore</toolkit>
   </cuda_api_list>
-  <description><![CDATA[This sample demonstrates CUDA-NvSciBuf/NvSciSync Interop. Two CPU threads import the NvSciBuf and NvSciSync into CUDA to perform two image processing algorithms on a ppm image - image rotation in 1st thread &amp;amp;amp;amp;amp;amp;amp;amp;amp;amp; rgba to grayscale conversion of rotated image in 2nd thread. Currently only supported on Ubuntu 18.04]]></description>
+  <description><![CDATA[This sample demonstrates CUDA-NvSciBuf/NvSciSync Interop. Two CPU threads import the NvSciBuf and NvSciSync into CUDA to perform two image processing algorithms on a ppm image - image rotation in 1st thread &amp;amp;amp;amp;amp;amp;amp;amp;amp;amp;amp;amp;amp; rgba to grayscale conversion of rotated image in 2nd thread. Currently only supported on Ubuntu 18.04]]></description>
   <devicecompilation>whole</devicecompilation>
   <includepaths>
     <path>./</path>
@@ -80,6 +80,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <platform>aarch64</platform>
diff --git a/Samples/4_CUDA_Libraries/cudaNvSci/README.md b/Samples/4_CUDA_Libraries/cudaNvSci/README.md
index baac35c69..2e12e227b 100644
--- a/Samples/4_CUDA_Libraries/cudaNvSci/README.md
+++ b/Samples/4_CUDA_Libraries/cudaNvSci/README.md
@@ -2,7 +2,7 @@
 
 ## Description
 
-This sample demonstrates CUDA-NvSciBuf/NvSciSync Interop. Two CPU threads import the NvSciBuf and NvSciSync into CUDA to perform two image processing algorithms on a ppm image - image rotation in 1st thread &amp;amp;amp;amp;amp;amp;amp;amp;amp;amp; rgba to grayscale conversion of rotated image in 2nd thread. Currently only supported on Ubuntu 18.04
+This sample demonstrates CUDA-NvSciBuf/NvSciSync Interop. Two CPU threads import the NvSciBuf and NvSciSync into CUDA to perform two image processing algorithms on a ppm image - image rotation in 1st thread &amp;amp;amp;amp;amp;amp;amp;amp;amp;amp;amp;amp;amp; rgba to grayscale conversion of rotated image in 2nd thread. Currently only supported on Ubuntu 18.04
 
 ## Key Concepts
 
@@ -10,7 +10,7 @@ CUDA NvSci Interop, Data Parallel Algorithms, Image Processing
 
 ## Supported SM Architectures
 
-[SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -26,14 +26,14 @@ x86_64, aarch64
 cuDeviceGetUuid
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaGetMipmappedArrayLevel, cudaImportNvSciImage, cudaImportExternalSemaphore, cudaNvSciApp, cudaStreamCreateWithFlags, cudaExternalMemoryGetMappedMipmappedArray, cudaNvSciWait, cudaDestroyExternalMemory, cudaMemcpyAsync, cudaStreamDestroy, cudaSignalExternalSemaphoresAsync, cudaDeviceGetNvSciSyncAttributes, cudaFreeMipmappedArray, cudaMallocHost, cudaNvSci, cudaImportExternalMemory, cudaSetDevice, cudaImportNvSciRawBuf, cudaImportNvSciSemaphore, cudaGetDeviceCount, cudaDestroyTextureObject, cudaDeviceGetAttribute, cudaDestroyExternalSemaphore, cudaStreamSynchronize, cudaNvSciSignal, cudaFree, cudaDeviceId, cudaExternalMemoryGetMappedBuffer, cudaCreateTextureObject, cudaFreeHost, cudaWaitExternalSemaphoresAsync
+cudaExternalMemoryGetMappedBuffer, cudaImportExternalSemaphore, cudaDeviceGetAttribute, cudaNvSciSignal, cudaGetMipmappedArrayLevel, cudaImportNvSciRawBuf, cudaSetDevice, cudaImportNvSciImage, cudaNvSciApp, cudaDeviceId, cudaMallocHost, cudaSignalExternalSemaphoresAsync, cudaCreateTextureObject, cudaFreeHost, cudaNvSci, cudaNvSciWait, cudaGetDeviceCount, cudaMemcpyAsync, cudaStreamCreateWithFlags, cudaExternalMemoryGetMappedMipmappedArray, cudaStreamDestroy, cudaDeviceGetNvSciSyncAttributes, cudaDestroyTextureObject, cudaDestroyExternalMemory, cudaImportExternalMemory, cudaDestroyExternalSemaphore, cudaFreeMipmappedArray, cudaFree, cudaStreamSynchronize, cudaWaitExternalSemaphoresAsync, cudaImportNvSciSemaphore
 
 ## Dependencies needed to build/run
 [NVSCI](../../../README.md#nvsci)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/4_CUDA_Libraries/cudaNvSci/findnvsci.mk b/Samples/4_CUDA_Libraries/cudaNvSci/findnvsci.mk
index 1934e1380..0d6d157c4 100644
--- a/Samples/4_CUDA_Libraries/cudaNvSci/findnvsci.mk
+++ b/Samples/4_CUDA_Libraries/cudaNvSci/findnvsci.mk
@@ -56,6 +56,7 @@ ifeq ("$(TARGET_OS)","linux")
     RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?)
     CENTOS = $(shell echo $(DISTRO) | grep -i centos      >/dev/null 2>&1; echo $$?)
     SUSE   = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?)
+    KYLIN  = $(shell echo $(DISTRO) | grep -i kylin        >/dev/null 2>&1; echo $$?)
     ifeq ("$(UBUNTU)","0")
       ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
         GLPATH := /usr/arm-linux-gnueabihf/lib
@@ -101,6 +102,11 @@ ifeq ("$(TARGET_OS)","linux")
       GLLINK    ?= -L/usr/lib64/nvidia
       DFLT_PATH ?= /usr/lib64
     endif
+    ifeq ("$(KYLIN)","0")
+      GLPATH    ?= /usr/lib64/nvidia
+      GLLINK    ?= -L/usr/lib64/nvidia
+      DFLT_PATH ?= /usr/lib64
+    endif
 
   NVSCIBUFLIB  := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libnvscibuf.so    -print 2>/dev/null)
   NVSCISYNCLIB  := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libnvscisync.so    -print 2>/dev/null)
diff --git a/Samples/4_CUDA_Libraries/cudaNvSciNvMedia/Makefile b/Samples/4_CUDA_Libraries/cudaNvSciNvMedia/Makefile
index 390fb6f24..f9a79cde8 100644
--- a/Samples/4_CUDA_Libraries/cudaNvSciNvMedia/Makefile
+++ b/Samples/4_CUDA_Libraries/cudaNvSciNvMedia/Makefile
@@ -338,9 +338,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/4_CUDA_Libraries/cudaNvSciNvMedia/NsightEclipse.xml b/Samples/4_CUDA_Libraries/cudaNvSciNvMedia/NsightEclipse.xml
index 31ebd06cd..0b3b44cb3 100644
--- a/Samples/4_CUDA_Libraries/cudaNvSciNvMedia/NsightEclipse.xml
+++ b/Samples/4_CUDA_Libraries/cudaNvSciNvMedia/NsightEclipse.xml
@@ -7,32 +7,32 @@
   </cflags>
   <cuda_api_list>
     <driver>cuDeviceGetUuid</driver>
+    <toolkit>cudaImportExternalSemaphore</toolkit>
     <toolkit>cudaGetMipmappedArrayLevel</toolkit>
+    <toolkit>cudaSetDevice</toolkit>
+    <toolkit>cudaDestroySurfaceObject</toolkit>
+    <toolkit>cudaCreateSurfaceObject</toolkit>
     <toolkit>cudaImportNvSciImage</toolkit>
-    <toolkit>cudaImportExternalSemaphore</toolkit>
+    <toolkit>cudaCreateChannelDesc</toolkit>
+    <toolkit>cudaMallocHost</toolkit>
+    <toolkit>cudaSignalExternalSemaphoresAsync</toolkit>
+    <toolkit>cudaFreeHost</toolkit>
+    <toolkit>cudaMemcpyAsync</toolkit>
     <toolkit>cudaStreamCreateWithFlags</toolkit>
     <toolkit>cudaExternalMemoryGetMappedMipmappedArray</toolkit>
-    <toolkit>cudaDestroyExternalMemory</toolkit>
-    <toolkit>cudaMemcpyAsync</toolkit>
+    <toolkit>cudaMallocArray</toolkit>
+    <toolkit>cudaFreeArray</toolkit>
     <toolkit>cudaStreamDestroy</toolkit>
-    <toolkit>cudaSignalExternalSemaphoresAsync</toolkit>
     <toolkit>cudaDeviceGetNvSciSyncAttributes</toolkit>
-    <toolkit>cudaFreeMipmappedArray</toolkit>
-    <toolkit>cudaMallocHost</toolkit>
-    <toolkit>cudaDestroySurfaceObject</toolkit>
+    <toolkit>cudaDestroyExternalMemory</toolkit>
     <toolkit>cudaImportExternalMemory</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaSetDevice</toolkit>
     <toolkit>cudaDestroyExternalSemaphore</toolkit>
-    <toolkit>cudaCreateChannelDesc</toolkit>
-    <toolkit>cudaStreamSynchronize</toolkit>
+    <toolkit>cudaFreeMipmappedArray</toolkit>
+    <toolkit>cudaImportNvSciSync</toolkit>
     <toolkit>cudaFree</toolkit>
-    <toolkit>cudaFreeArray</toolkit>
-    <toolkit>cudaCreateSurfaceObject</toolkit>
-    <toolkit>cudaFreeHost</toolkit>
-    <toolkit>cudaMallocArray</toolkit>
+    <toolkit>cudaStreamSynchronize</toolkit>
+    <toolkit>cudaMalloc</toolkit>
     <toolkit>cudaWaitExternalSemaphoresAsync</toolkit>
-    <toolkit>cudaImportNvSciSync</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample demonstrates CUDA-NvMedia interop via NvSciBuf/NvSciSync APIs. Note that this sample only supports cross build from x86_64 to aarch64, aarch64 native build is not supported. For detailed workflow of the sample please check cudaNvSciNvMedia_Readme.pdf in the sample directory.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -77,6 +77,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>aarch64</arch>
diff --git a/Samples/4_CUDA_Libraries/cudaNvSciNvMedia/README.md b/Samples/4_CUDA_Libraries/cudaNvSciNvMedia/README.md
index 2ef3f7691..9d1cd1367 100644
--- a/Samples/4_CUDA_Libraries/cudaNvSciNvMedia/README.md
+++ b/Samples/4_CUDA_Libraries/cudaNvSciNvMedia/README.md
@@ -10,7 +10,7 @@ CUDA NvSci Interop, Data Parallel Algorithms, Image Processing
 
 ## Supported SM Architectures
 
-[SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -26,14 +26,14 @@ aarch64
 cuDeviceGetUuid
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaGetMipmappedArrayLevel, cudaImportNvSciImage, cudaImportExternalSemaphore, cudaStreamCreateWithFlags, cudaExternalMemoryGetMappedMipmappedArray, cudaDestroyExternalMemory, cudaMemcpyAsync, cudaStreamDestroy, cudaSignalExternalSemaphoresAsync, cudaDeviceGetNvSciSyncAttributes, cudaFreeMipmappedArray, cudaMallocHost, cudaDestroySurfaceObject, cudaImportExternalMemory, cudaMalloc, cudaSetDevice, cudaDestroyExternalSemaphore, cudaCreateChannelDesc, cudaStreamSynchronize, cudaFree, cudaFreeArray, cudaCreateSurfaceObject, cudaFreeHost, cudaMallocArray, cudaWaitExternalSemaphoresAsync, cudaImportNvSciSync
+cudaImportExternalSemaphore, cudaGetMipmappedArrayLevel, cudaSetDevice, cudaDestroySurfaceObject, cudaCreateSurfaceObject, cudaImportNvSciImage, cudaCreateChannelDesc, cudaMallocHost, cudaSignalExternalSemaphoresAsync, cudaFreeHost, cudaMemcpyAsync, cudaStreamCreateWithFlags, cudaExternalMemoryGetMappedMipmappedArray, cudaMallocArray, cudaFreeArray, cudaStreamDestroy, cudaDeviceGetNvSciSyncAttributes, cudaDestroyExternalMemory, cudaImportExternalMemory, cudaDestroyExternalSemaphore, cudaFreeMipmappedArray, cudaImportNvSciSync, cudaFree, cudaStreamSynchronize, cudaMalloc, cudaWaitExternalSemaphoresAsync
 
 ## Dependencies needed to build/run
 [NVSCI](../../../README.md#nvsci), [NvMedia](../../../README.md#nvmedia)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/4_CUDA_Libraries/cudaNvSciNvMedia/findnvmedia.mk b/Samples/4_CUDA_Libraries/cudaNvSciNvMedia/findnvmedia.mk
index 7b8cd1b64..23cfcd539 100644
--- a/Samples/4_CUDA_Libraries/cudaNvSciNvMedia/findnvmedia.mk
+++ b/Samples/4_CUDA_Libraries/cudaNvSciNvMedia/findnvmedia.mk
@@ -56,6 +56,7 @@ ifeq ("$(TARGET_OS)","linux")
     RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?)
     CENTOS = $(shell echo $(DISTRO) | grep -i centos      >/dev/null 2>&1; echo $$?)
     SUSE   = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?)
+    KYLIN  = $(shell echo $(DISTRO) | grep -i kylin        >/dev/null 2>&1; echo $$?)
     ifeq ("$(UBUNTU)","0")
       ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
         GLPATH := /usr/arm-linux-gnueabihf/lib
@@ -101,6 +102,11 @@ ifeq ("$(TARGET_OS)","linux")
       GLLINK    ?= -L/usr/lib64/nvidia
       DFLT_PATH ?= /usr/lib64
     endif
+    ifeq ("$(KYLIN)","0")
+      GLPATH    ?= /usr/lib64/nvidia
+      GLLINK    ?= -L/usr/lib64/nvidia
+      DFLT_PATH ?= /usr/lib64
+    endif
 
   NVMEDIALIB  := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libnvmedia.so -print 2>/dev/null)
 
diff --git a/Samples/4_CUDA_Libraries/cudaNvSciNvMedia/findnvsci.mk b/Samples/4_CUDA_Libraries/cudaNvSciNvMedia/findnvsci.mk
index 1934e1380..0d6d157c4 100644
--- a/Samples/4_CUDA_Libraries/cudaNvSciNvMedia/findnvsci.mk
+++ b/Samples/4_CUDA_Libraries/cudaNvSciNvMedia/findnvsci.mk
@@ -56,6 +56,7 @@ ifeq ("$(TARGET_OS)","linux")
     RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?)
     CENTOS = $(shell echo $(DISTRO) | grep -i centos      >/dev/null 2>&1; echo $$?)
     SUSE   = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?)
+    KYLIN  = $(shell echo $(DISTRO) | grep -i kylin        >/dev/null 2>&1; echo $$?)
     ifeq ("$(UBUNTU)","0")
       ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
         GLPATH := /usr/arm-linux-gnueabihf/lib
@@ -101,6 +102,11 @@ ifeq ("$(TARGET_OS)","linux")
       GLLINK    ?= -L/usr/lib64/nvidia
       DFLT_PATH ?= /usr/lib64
     endif
+    ifeq ("$(KYLIN)","0")
+      GLPATH    ?= /usr/lib64/nvidia
+      GLLINK    ?= -L/usr/lib64/nvidia
+      DFLT_PATH ?= /usr/lib64
+    endif
 
   NVSCIBUFLIB  := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libnvscibuf.so    -print 2>/dev/null)
   NVSCISYNCLIB  := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libnvscisync.so    -print 2>/dev/null)
diff --git a/Samples/4_CUDA_Libraries/freeImageInteropNPP/NsightEclipse.xml b/Samples/4_CUDA_Libraries/freeImageInteropNPP/NsightEclipse.xml
index 0296542c3..33f807557 100644
--- a/Samples/4_CUDA_Libraries/freeImageInteropNPP/NsightEclipse.xml
+++ b/Samples/4_CUDA_Libraries/freeImageInteropNPP/NsightEclipse.xml
@@ -3,11 +3,11 @@
 <entry>
   <name>freeImageInteropNPP</name>
   <cuda_api_list>
+    <toolkit>cudaRuntimeGetVersion</toolkit>
+    <toolkit>cudaSetDevice</toolkit>
     <toolkit>cudaGetDeviceCount</toolkit>
-    <toolkit>cudaDriverGetVersion</toolkit>
     <toolkit>cudaDeviceInit</toolkit>
-    <toolkit>cudaSetDevice</toolkit>
-    <toolkit>cudaRuntimeGetVersion</toolkit>
+    <toolkit>cudaDriverGetVersion</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
   </cuda_api_list>
   <description><![CDATA[A simple CUDA Sample demonstrate how to use FreeImage library with NPP.]]></description>
@@ -65,6 +65,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/4_CUDA_Libraries/freeImageInteropNPP/README.md b/Samples/4_CUDA_Libraries/freeImageInteropNPP/README.md
index 20361fe8f..96e072b68 100644
--- a/Samples/4_CUDA_Libraries/freeImageInteropNPP/README.md
+++ b/Samples/4_CUDA_Libraries/freeImageInteropNPP/README.md
@@ -10,7 +10,7 @@ Performance Strategies, Image Processing, NPP Library
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaGetDeviceCount, cudaDriverGetVersion, cudaDeviceInit, cudaSetDevice, cudaRuntimeGetVersion, cudaGetDeviceProperties
+cudaRuntimeGetVersion, cudaSetDevice, cudaGetDeviceCount, cudaDeviceInit, cudaDriverGetVersion, cudaGetDeviceProperties
 
 ## Dependencies needed to build/run
 [FreeImage](../../../README.md#freeimage), [NPP](../../../README.md#npp)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/4_CUDA_Libraries/freeImageInteropNPP/freeImageInteropNPP_vs2017.vcxproj b/Samples/4_CUDA_Libraries/freeImageInteropNPP/freeImageInteropNPP_vs2017.vcxproj
index e791cbe15..b81f5f26a 100644
--- a/Samples/4_CUDA_Libraries/freeImageInteropNPP/freeImageInteropNPP_vs2017.vcxproj
+++ b/Samples/4_CUDA_Libraries/freeImageInteropNPP/freeImageInteropNPP_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -117,6 +117,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/freeImageInteropNPP/freeImageInteropNPP_vs2019.vcxproj b/Samples/4_CUDA_Libraries/freeImageInteropNPP/freeImageInteropNPP_vs2019.vcxproj
index d6b27ff1b..032895951 100644
--- a/Samples/4_CUDA_Libraries/freeImageInteropNPP/freeImageInteropNPP_vs2019.vcxproj
+++ b/Samples/4_CUDA_Libraries/freeImageInteropNPP/freeImageInteropNPP_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -113,6 +113,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/freeImageInteropNPP/freeImageInteropNPP_vs2022.vcxproj b/Samples/4_CUDA_Libraries/freeImageInteropNPP/freeImageInteropNPP_vs2022.vcxproj
index 6b803978f..624cfaa8d 100644
--- a/Samples/4_CUDA_Libraries/freeImageInteropNPP/freeImageInteropNPP_vs2022.vcxproj
+++ b/Samples/4_CUDA_Libraries/freeImageInteropNPP/freeImageInteropNPP_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -113,6 +113,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/histEqualizationNPP/NsightEclipse.xml b/Samples/4_CUDA_Libraries/histEqualizationNPP/NsightEclipse.xml
index eadb54385..9b1554bb9 100644
--- a/Samples/4_CUDA_Libraries/histEqualizationNPP/NsightEclipse.xml
+++ b/Samples/4_CUDA_Libraries/histEqualizationNPP/NsightEclipse.xml
@@ -3,14 +3,14 @@
 <entry>
   <name>histEqualizationNPP</name>
   <cuda_api_list>
+    <toolkit>cudaRuntimeGetVersion</toolkit>
+    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaFree</toolkit>
+    <toolkit>cudaSetDevice</toolkit>
     <toolkit>cudaGetDeviceCount</toolkit>
+    <toolkit>cudaDeviceInit</toolkit>
     <toolkit>cudaDriverGetVersion</toolkit>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaDeviceInit</toolkit>
-    <toolkit>cudaSetDevice</toolkit>
-    <toolkit>cudaRuntimeGetVersion</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
   </cuda_api_list>
   <description><![CDATA[This CUDA Sample demonstrates how to use NPP for histogram equalization for image data.]]></description>
@@ -71,6 +71,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/4_CUDA_Libraries/histEqualizationNPP/README.md b/Samples/4_CUDA_Libraries/histEqualizationNPP/README.md
index dff01a26f..ecf77bcb9 100644
--- a/Samples/4_CUDA_Libraries/histEqualizationNPP/README.md
+++ b/Samples/4_CUDA_Libraries/histEqualizationNPP/README.md
@@ -10,7 +10,7 @@ Image Processing, Performance Strategies, NPP Library
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaGetDeviceCount, cudaDriverGetVersion, cudaMalloc, cudaDeviceInit, cudaSetDevice, cudaRuntimeGetVersion, cudaMemcpy, cudaGetDeviceProperties
+cudaRuntimeGetVersion, cudaMemcpy, cudaFree, cudaSetDevice, cudaGetDeviceCount, cudaDeviceInit, cudaDriverGetVersion, cudaMalloc, cudaGetDeviceProperties
 
 ## Dependencies needed to build/run
 [FreeImage](../../../README.md#freeimage), [NPP](../../../README.md#npp)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/4_CUDA_Libraries/histEqualizationNPP/histEqualizationNPP_vs2017.vcxproj b/Samples/4_CUDA_Libraries/histEqualizationNPP/histEqualizationNPP_vs2017.vcxproj
index a97516a6c..6eb23e169 100644
--- a/Samples/4_CUDA_Libraries/histEqualizationNPP/histEqualizationNPP_vs2017.vcxproj
+++ b/Samples/4_CUDA_Libraries/histEqualizationNPP/histEqualizationNPP_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -117,6 +117,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/histEqualizationNPP/histEqualizationNPP_vs2019.vcxproj b/Samples/4_CUDA_Libraries/histEqualizationNPP/histEqualizationNPP_vs2019.vcxproj
index 2a220a887..6dd57051b 100644
--- a/Samples/4_CUDA_Libraries/histEqualizationNPP/histEqualizationNPP_vs2019.vcxproj
+++ b/Samples/4_CUDA_Libraries/histEqualizationNPP/histEqualizationNPP_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -113,6 +113,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/histEqualizationNPP/histEqualizationNPP_vs2022.vcxproj b/Samples/4_CUDA_Libraries/histEqualizationNPP/histEqualizationNPP_vs2022.vcxproj
index b1780d1d5..3f16252a7 100644
--- a/Samples/4_CUDA_Libraries/histEqualizationNPP/histEqualizationNPP_vs2022.vcxproj
+++ b/Samples/4_CUDA_Libraries/histEqualizationNPP/histEqualizationNPP_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -113,6 +113,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/lineOfSight/Makefile b/Samples/4_CUDA_Libraries/lineOfSight/Makefile
index 55adba98e..21b842fee 100644
--- a/Samples/4_CUDA_Libraries/lineOfSight/Makefile
+++ b/Samples/4_CUDA_Libraries/lineOfSight/Makefile
@@ -304,9 +304,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/4_CUDA_Libraries/lineOfSight/NsightEclipse.xml b/Samples/4_CUDA_Libraries/lineOfSight/NsightEclipse.xml
index c97e235ab..e279a4fca 100644
--- a/Samples/4_CUDA_Libraries/lineOfSight/NsightEclipse.xml
+++ b/Samples/4_CUDA_Libraries/lineOfSight/NsightEclipse.xml
@@ -6,11 +6,11 @@
     <flag>--std=c++14</flag>
   </cflags>
   <cuda_api_list>
+    <toolkit>cudaCreateChannelDesc</toolkit>
+    <toolkit>cudaMallocArray</toolkit>
     <toolkit>cudaFreeArray</toolkit>
     <toolkit>cudaDeviceSynchronize</toolkit>
     <toolkit>cudaCreateTextureObject</toolkit>
-    <toolkit>cudaMallocArray</toolkit>
-    <toolkit>cudaCreateChannelDesc</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample is an implementation of a simple line-of-sight algorithm: Given a height map and a ray originating at some observation point, it computes all the points along the ray that are visible from the observation point. The implementation is based on the Thrust library.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -57,6 +57,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/4_CUDA_Libraries/lineOfSight/README.md b/Samples/4_CUDA_Libraries/lineOfSight/README.md
index cbd95d409..e704d3e2c 100644
--- a/Samples/4_CUDA_Libraries/lineOfSight/README.md
+++ b/Samples/4_CUDA_Libraries/lineOfSight/README.md
@@ -10,7 +10,7 @@ Thrust Library
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFreeArray, cudaDeviceSynchronize, cudaCreateTextureObject, cudaMallocArray, cudaCreateChannelDesc
+cudaCreateChannelDesc, cudaMallocArray, cudaFreeArray, cudaDeviceSynchronize, cudaCreateTextureObject
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/4_CUDA_Libraries/lineOfSight/lineOfSight_vs2017.vcxproj b/Samples/4_CUDA_Libraries/lineOfSight/lineOfSight_vs2017.vcxproj
index 0e4b882b6..14b93fad3 100644
--- a/Samples/4_CUDA_Libraries/lineOfSight/lineOfSight_vs2017.vcxproj
+++ b/Samples/4_CUDA_Libraries/lineOfSight/lineOfSight_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/lineOfSight.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/lineOfSight/lineOfSight_vs2019.vcxproj b/Samples/4_CUDA_Libraries/lineOfSight/lineOfSight_vs2019.vcxproj
index 937d7a1b6..e2dc1bf96 100644
--- a/Samples/4_CUDA_Libraries/lineOfSight/lineOfSight_vs2019.vcxproj
+++ b/Samples/4_CUDA_Libraries/lineOfSight/lineOfSight_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/lineOfSight.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/lineOfSight/lineOfSight_vs2022.vcxproj b/Samples/4_CUDA_Libraries/lineOfSight/lineOfSight_vs2022.vcxproj
index 41f708799..2b15511d3 100644
--- a/Samples/4_CUDA_Libraries/lineOfSight/lineOfSight_vs2022.vcxproj
+++ b/Samples/4_CUDA_Libraries/lineOfSight/lineOfSight_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/lineOfSight.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/matrixMulCUBLAS/Makefile b/Samples/4_CUDA_Libraries/matrixMulCUBLAS/Makefile
index c5e76fb9f..dc4bc8dd1 100644
--- a/Samples/4_CUDA_Libraries/matrixMulCUBLAS/Makefile
+++ b/Samples/4_CUDA_Libraries/matrixMulCUBLAS/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/4_CUDA_Libraries/matrixMulCUBLAS/NsightEclipse.xml b/Samples/4_CUDA_Libraries/matrixMulCUBLAS/NsightEclipse.xml
index 6f7125d96..8c19d35ab 100644
--- a/Samples/4_CUDA_Libraries/matrixMulCUBLAS/NsightEclipse.xml
+++ b/Samples/4_CUDA_Libraries/matrixMulCUBLAS/NsightEclipse.xml
@@ -3,14 +3,14 @@
 <entry>
   <name>matrixMulCUBLAS</name>
   <cuda_api_list>
+    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaFree</toolkit>
-    <toolkit>cudaEventRecord</toolkit>
-    <toolkit>cudaEventCreate</toolkit>
-    <toolkit>cudaEventElapsedTime</toolkit>
     <toolkit>cudaEventSynchronize</toolkit>
+    <toolkit>cudaEventRecord</toolkit>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaEventElapsedTime</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
+    <toolkit>cudaEventCreate</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample implements matrix multiplication from Chapter 3 of the programming guide. To illustrate GPU performance for matrix multiply, this sample also shows how to use the new CUDA 4.0 interface for CUBLAS to demonstrate high-performance performance for matrix multiplication.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -57,6 +57,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <sources>
     <ignore>new/matrixMulCUBLAS.cpp</ignore>
   </sources>
diff --git a/Samples/4_CUDA_Libraries/matrixMulCUBLAS/README.md b/Samples/4_CUDA_Libraries/matrixMulCUBLAS/README.md
index 1390e9ebd..a5d683530 100644
--- a/Samples/4_CUDA_Libraries/matrixMulCUBLAS/README.md
+++ b/Samples/4_CUDA_Libraries/matrixMulCUBLAS/README.md
@@ -10,7 +10,7 @@ CUDA Runtime API, Performance Strategies, Linear Algebra, CUBLAS
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaEventRecord, cudaEventCreate, cudaEventElapsedTime, cudaEventSynchronize, cudaMalloc, cudaMemcpy, cudaGetDeviceProperties
+cudaMemcpy, cudaFree, cudaEventSynchronize, cudaEventRecord, cudaMalloc, cudaEventElapsedTime, cudaGetDeviceProperties, cudaEventCreate
 
 ## Dependencies needed to build/run
 [CUBLAS](../../../README.md#cublas)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/4_CUDA_Libraries/matrixMulCUBLAS/matrixMulCUBLAS_vs2017.vcxproj b/Samples/4_CUDA_Libraries/matrixMulCUBLAS/matrixMulCUBLAS_vs2017.vcxproj
index ccc83a662..7cf90b098 100644
--- a/Samples/4_CUDA_Libraries/matrixMulCUBLAS/matrixMulCUBLAS_vs2017.vcxproj
+++ b/Samples/4_CUDA_Libraries/matrixMulCUBLAS/matrixMulCUBLAS_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/matrixMulCUBLAS.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/matrixMulCUBLAS/matrixMulCUBLAS_vs2019.vcxproj b/Samples/4_CUDA_Libraries/matrixMulCUBLAS/matrixMulCUBLAS_vs2019.vcxproj
index 6f820ba19..1665d0fa2 100644
--- a/Samples/4_CUDA_Libraries/matrixMulCUBLAS/matrixMulCUBLAS_vs2019.vcxproj
+++ b/Samples/4_CUDA_Libraries/matrixMulCUBLAS/matrixMulCUBLAS_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/matrixMulCUBLAS.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/matrixMulCUBLAS/matrixMulCUBLAS_vs2022.vcxproj b/Samples/4_CUDA_Libraries/matrixMulCUBLAS/matrixMulCUBLAS_vs2022.vcxproj
index 4b4d87227..e9257bfbe 100644
--- a/Samples/4_CUDA_Libraries/matrixMulCUBLAS/matrixMulCUBLAS_vs2022.vcxproj
+++ b/Samples/4_CUDA_Libraries/matrixMulCUBLAS/matrixMulCUBLAS_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/matrixMulCUBLAS.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/nvJPEG/NsightEclipse.xml b/Samples/4_CUDA_Libraries/nvJPEG/NsightEclipse.xml
index 901147812..baba3f94a 100644
--- a/Samples/4_CUDA_Libraries/nvJPEG/NsightEclipse.xml
+++ b/Samples/4_CUDA_Libraries/nvJPEG/NsightEclipse.xml
@@ -3,18 +3,18 @@
 <entry>
   <name>nvJPEG</name>
   <cuda_api_list>
+    <toolkit>cudaHostAlloc</toolkit>
+    <toolkit>cudaStreamCreateWithFlags</toolkit>
     <toolkit>cudaStreamDestroy</toolkit>
     <toolkit>cudaFree</toolkit>
-    <toolkit>cudaEventRecord</toolkit>
-    <toolkit>cudaHostAlloc</toolkit>
-    <toolkit>cudaEventCreate</toolkit>
-    <toolkit>cudaEventElapsedTime</toolkit>
     <toolkit>cudaEventSynchronize</toolkit>
+    <toolkit>cudaEventRecord</toolkit>
     <toolkit>cudaFreeHost</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaStreamCreateWithFlags</toolkit>
     <toolkit>cudaStreamSynchronize</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaEventElapsedTime</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
+    <toolkit>cudaEventCreate</toolkit>
   </cuda_api_list>
   <description><![CDATA[A CUDA Sample that demonstrates single and batched decoding of jpeg images using NVJPEG Library.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -58,6 +58,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/4_CUDA_Libraries/nvJPEG/README.md b/Samples/4_CUDA_Libraries/nvJPEG/README.md
index e1d3f7fe8..a54a46ca8 100644
--- a/Samples/4_CUDA_Libraries/nvJPEG/README.md
+++ b/Samples/4_CUDA_Libraries/nvJPEG/README.md
@@ -10,7 +10,7 @@ Image Decoding, NVJPEG Library
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, aarch64
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaStreamDestroy, cudaFree, cudaEventRecord, cudaHostAlloc, cudaEventCreate, cudaEventElapsedTime, cudaEventSynchronize, cudaFreeHost, cudaMalloc, cudaStreamCreateWithFlags, cudaStreamSynchronize, cudaGetDeviceProperties
+cudaHostAlloc, cudaStreamCreateWithFlags, cudaStreamDestroy, cudaFree, cudaEventSynchronize, cudaEventRecord, cudaFreeHost, cudaStreamSynchronize, cudaMalloc, cudaEventElapsedTime, cudaGetDeviceProperties, cudaEventCreate
 
 ## Dependencies needed to build/run
 [NVJPEG](../../../README.md#nvjpeg)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/4_CUDA_Libraries/nvJPEG/nvJPEG_vs2017.vcxproj b/Samples/4_CUDA_Libraries/nvJPEG/nvJPEG_vs2017.vcxproj
index 5abe980b0..c5931b667 100644
--- a/Samples/4_CUDA_Libraries/nvJPEG/nvJPEG_vs2017.vcxproj
+++ b/Samples/4_CUDA_Libraries/nvJPEG/nvJPEG_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/nvJPEG/nvJPEG_vs2019.vcxproj b/Samples/4_CUDA_Libraries/nvJPEG/nvJPEG_vs2019.vcxproj
index 1bae3470e..05906affc 100644
--- a/Samples/4_CUDA_Libraries/nvJPEG/nvJPEG_vs2019.vcxproj
+++ b/Samples/4_CUDA_Libraries/nvJPEG/nvJPEG_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/nvJPEG/nvJPEG_vs2022.vcxproj b/Samples/4_CUDA_Libraries/nvJPEG/nvJPEG_vs2022.vcxproj
index 9b68e545f..f861ff460 100644
--- a/Samples/4_CUDA_Libraries/nvJPEG/nvJPEG_vs2022.vcxproj
+++ b/Samples/4_CUDA_Libraries/nvJPEG/nvJPEG_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/nvJPEG_encoder/NsightEclipse.xml b/Samples/4_CUDA_Libraries/nvJPEG_encoder/NsightEclipse.xml
index 157b43cdc..fa59430ad 100644
--- a/Samples/4_CUDA_Libraries/nvJPEG_encoder/NsightEclipse.xml
+++ b/Samples/4_CUDA_Libraries/nvJPEG_encoder/NsightEclipse.xml
@@ -4,14 +4,14 @@
   <name>nvJPEG_encoder</name>
   <cuda_api_list>
     <toolkit>cudaFree</toolkit>
-    <toolkit>cudaEventRecord</toolkit>
-    <toolkit>cudaEventCreate</toolkit>
-    <toolkit>cudaEventElapsedTime</toolkit>
-    <toolkit>cudaDeviceSynchronize</toolkit>
+    <toolkit>cudaGetErrorString</toolkit>
     <toolkit>cudaEventSynchronize</toolkit>
+    <toolkit>cudaDeviceSynchronize</toolkit>
+    <toolkit>cudaEventRecord</toolkit>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaGetErrorString</toolkit>
+    <toolkit>cudaEventElapsedTime</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
+    <toolkit>cudaEventCreate</toolkit>
   </cuda_api_list>
   <description><![CDATA[A CUDA Sample that demonstrates single encoding of jpeg images using NVJPEG Library.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -55,6 +55,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/4_CUDA_Libraries/nvJPEG_encoder/README.md b/Samples/4_CUDA_Libraries/nvJPEG_encoder/README.md
index d3170c4b2..09e2227b7 100644
--- a/Samples/4_CUDA_Libraries/nvJPEG_encoder/README.md
+++ b/Samples/4_CUDA_Libraries/nvJPEG_encoder/README.md
@@ -10,7 +10,7 @@ Image Encoding, NVJPEG Library
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, aarch64
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaEventRecord, cudaEventCreate, cudaEventElapsedTime, cudaDeviceSynchronize, cudaEventSynchronize, cudaMalloc, cudaGetErrorString, cudaGetDeviceProperties
+cudaFree, cudaGetErrorString, cudaEventSynchronize, cudaDeviceSynchronize, cudaEventRecord, cudaMalloc, cudaEventElapsedTime, cudaGetDeviceProperties, cudaEventCreate
 
 ## Dependencies needed to build/run
 [NVJPEG](../../../README.md#nvjpeg)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/4_CUDA_Libraries/nvJPEG_encoder/nvJPEG_encoder_vs2017.vcxproj b/Samples/4_CUDA_Libraries/nvJPEG_encoder/nvJPEG_encoder_vs2017.vcxproj
index bd7bdf907..22a093777 100644
--- a/Samples/4_CUDA_Libraries/nvJPEG_encoder/nvJPEG_encoder_vs2017.vcxproj
+++ b/Samples/4_CUDA_Libraries/nvJPEG_encoder/nvJPEG_encoder_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/nvJPEG_encoder/nvJPEG_encoder_vs2019.vcxproj b/Samples/4_CUDA_Libraries/nvJPEG_encoder/nvJPEG_encoder_vs2019.vcxproj
index bf27a1ed6..4e23250e7 100644
--- a/Samples/4_CUDA_Libraries/nvJPEG_encoder/nvJPEG_encoder_vs2019.vcxproj
+++ b/Samples/4_CUDA_Libraries/nvJPEG_encoder/nvJPEG_encoder_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/nvJPEG_encoder/nvJPEG_encoder_vs2022.vcxproj b/Samples/4_CUDA_Libraries/nvJPEG_encoder/nvJPEG_encoder_vs2022.vcxproj
index fcef7dc3d..735c98926 100644
--- a/Samples/4_CUDA_Libraries/nvJPEG_encoder/nvJPEG_encoder_vs2022.vcxproj
+++ b/Samples/4_CUDA_Libraries/nvJPEG_encoder/nvJPEG_encoder_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/oceanFFT/Makefile b/Samples/4_CUDA_Libraries/oceanFFT/Makefile
index 333096a42..eaa571723 100644
--- a/Samples/4_CUDA_Libraries/oceanFFT/Makefile
+++ b/Samples/4_CUDA_Libraries/oceanFFT/Makefile
@@ -299,9 +299,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/4_CUDA_Libraries/oceanFFT/NsightEclipse.xml b/Samples/4_CUDA_Libraries/oceanFFT/NsightEclipse.xml
index 3c846efc6..8ac28a1ba 100644
--- a/Samples/4_CUDA_Libraries/oceanFFT/NsightEclipse.xml
+++ b/Samples/4_CUDA_Libraries/oceanFFT/NsightEclipse.xml
@@ -3,17 +3,17 @@
 <entry>
   <name>oceanFFT</name>
   <cuda_api_list>
+    <toolkit>cudaGraphicsUnmapResources</toolkit>
+    <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaMalloc</toolkit>
     <toolkit>cudaFree</toolkit>
+    <toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
+    <toolkit>cudaCalculateSlopeKernel</toolkit>
     <toolkit>cudaGraphicsMapResources</toolkit>
     <toolkit>cudaUpdateHeightmapKernel</toolkit>
-    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
-    <toolkit>cudaCalculateSlopeKernel</toolkit>
-    <toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaGenerateSpectrumKernel</toolkit>
     <toolkit>cudaGraphicsUnregisterResource</toolkit>
-    <toolkit>cudaGraphicsUnmapResources</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaGenerateSpectrumKernel</toolkit>
+    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample simulates an Ocean height field using CUFFT Library and renders the result using OpenGL.]]></description>
@@ -85,6 +85,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/4_CUDA_Libraries/oceanFFT/README.md b/Samples/4_CUDA_Libraries/oceanFFT/README.md
index e6f1b2a22..00f7aa57e 100644
--- a/Samples/4_CUDA_Libraries/oceanFFT/README.md
+++ b/Samples/4_CUDA_Libraries/oceanFFT/README.md
@@ -10,7 +10,7 @@ Graphics Interop, Image Processing, CUFFT Library
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaGraphicsMapResources, cudaUpdateHeightmapKernel, cudaGraphicsGLRegisterBuffer, cudaCalculateSlopeKernel, cudaGraphicsResourceGetMappedPointer, cudaMalloc, cudaGenerateSpectrumKernel, cudaGraphicsUnregisterResource, cudaGraphicsUnmapResources, cudaMemcpy, cudaGetDeviceProperties
+cudaGraphicsUnmapResources, cudaMemcpy, cudaMalloc, cudaFree, cudaGraphicsResourceGetMappedPointer, cudaCalculateSlopeKernel, cudaGraphicsMapResources, cudaUpdateHeightmapKernel, cudaGraphicsUnregisterResource, cudaGenerateSpectrumKernel, cudaGraphicsGLRegisterBuffer, cudaGetDeviceProperties
 
 ## Dependencies needed to build/run
 [X11](../../../README.md#x11), [GL](../../../README.md#gl), [CUFFT](../../../README.md#cufft)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/4_CUDA_Libraries/oceanFFT/findgllib.mk b/Samples/4_CUDA_Libraries/oceanFFT/findgllib.mk
index f0a5c5512..998fcf0f1 100644
--- a/Samples/4_CUDA_Libraries/oceanFFT/findgllib.mk
+++ b/Samples/4_CUDA_Libraries/oceanFFT/findgllib.mk
@@ -53,11 +53,12 @@ endif
 ifeq ("$(TARGET_OS)","linux")
     # $(info) >> findgllib.mk -> LINUX path <<<)
     # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside
-    UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu      >/dev/null 2>&1; echo $$?)
-    FEDORA = $(shell echo $(DISTRO) | grep -i fedora      >/dev/null 2>&1; echo $$?)
-    RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?)
-    CENTOS = $(shell echo $(DISTRO) | grep -i centos      >/dev/null 2>&1; echo $$?)
+    UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu       >/dev/null 2>&1; echo $$?)
+    FEDORA = $(shell echo $(DISTRO) | grep -i fedora       >/dev/null 2>&1; echo $$?)
+    RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel'  >/dev/null 2>&1; echo $$?)
+    CENTOS = $(shell echo $(DISTRO) | grep -i centos       >/dev/null 2>&1; echo $$?)
     SUSE   = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?)
+    KYLIN  = $(shell echo $(DISTRO) | grep -i kylin        >/dev/null 2>&1; echo $$?)
     ifeq ("$(UBUNTU)","0")
       ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
         GLPATH := /usr/arm-linux-gnueabihf/lib
@@ -87,27 +88,17 @@ ifeq ("$(TARGET_OS)","linux")
         DFLT_PATH ?= /usr/lib
       endif
     endif
+
     ifeq ("$(SUSE)","0")
       GLPATH    ?= /usr/X11R6/lib64
       GLLINK    ?= -L/usr/X11R6/lib64
       DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(FEDORA)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(RHEL)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(CENTOS)","0")
+    else 
       GLPATH    ?= /usr/lib64/nvidia
       GLLINK    ?= -L/usr/lib64/nvidia
       DFLT_PATH ?= /usr/lib64
     endif
-  
+      
   # find libGL, libGLU 
   GLLIB  := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGL.so  -print 2>/dev/null)
   GLULIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLU.so -print 2>/dev/null)
diff --git a/Samples/4_CUDA_Libraries/oceanFFT/oceanFFT_vs2017.vcxproj b/Samples/4_CUDA_Libraries/oceanFFT/oceanFFT_vs2017.vcxproj
index ff470ed3b..09d8130bc 100644
--- a/Samples/4_CUDA_Libraries/oceanFFT/oceanFFT_vs2017.vcxproj
+++ b/Samples/4_CUDA_Libraries/oceanFFT/oceanFFT_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/oceanFFT.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -118,6 +118,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/oceanFFT/oceanFFT_vs2019.vcxproj b/Samples/4_CUDA_Libraries/oceanFFT/oceanFFT_vs2019.vcxproj
index 2283b05fa..84a21720f 100644
--- a/Samples/4_CUDA_Libraries/oceanFFT/oceanFFT_vs2019.vcxproj
+++ b/Samples/4_CUDA_Libraries/oceanFFT/oceanFFT_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/oceanFFT.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -114,6 +114,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/oceanFFT/oceanFFT_vs2022.vcxproj b/Samples/4_CUDA_Libraries/oceanFFT/oceanFFT_vs2022.vcxproj
index 5e71f639f..9a86a3380 100644
--- a/Samples/4_CUDA_Libraries/oceanFFT/oceanFFT_vs2022.vcxproj
+++ b/Samples/4_CUDA_Libraries/oceanFFT/oceanFFT_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/oceanFFT.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -114,6 +114,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/randomFog/NsightEclipse.xml b/Samples/4_CUDA_Libraries/randomFog/NsightEclipse.xml
index 71960c724..467d2ef07 100644
--- a/Samples/4_CUDA_Libraries/randomFog/NsightEclipse.xml
+++ b/Samples/4_CUDA_Libraries/randomFog/NsightEclipse.xml
@@ -7,9 +7,9 @@
   </cflags>
   <cuda_api_list>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaGetErrorString</toolkit>
+    <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaFree</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample illustrates pseudo- and quasi- random numbers produced by CURAND.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -76,6 +76,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/4_CUDA_Libraries/randomFog/README.md b/Samples/4_CUDA_Libraries/randomFog/README.md
index 9a4779940..e101b5b6a 100644
--- a/Samples/4_CUDA_Libraries/randomFog/README.md
+++ b/Samples/4_CUDA_Libraries/randomFog/README.md
@@ -10,7 +10,7 @@ This sample illustrates pseudo- and quasi- random numbers produced by CURAND.
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMalloc, cudaFree, cudaMemcpy, cudaGetErrorString
+cudaMalloc, cudaGetErrorString, cudaMemcpy, cudaFree
 
 ## Dependencies needed to build/run
 [X11](../../../README.md#x11), [GL](../../../README.md#gl), [CURAND](../../../README.md#curand)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/4_CUDA_Libraries/randomFog/findgllib.mk b/Samples/4_CUDA_Libraries/randomFog/findgllib.mk
index f0a5c5512..998fcf0f1 100644
--- a/Samples/4_CUDA_Libraries/randomFog/findgllib.mk
+++ b/Samples/4_CUDA_Libraries/randomFog/findgllib.mk
@@ -53,11 +53,12 @@ endif
 ifeq ("$(TARGET_OS)","linux")
     # $(info) >> findgllib.mk -> LINUX path <<<)
     # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside
-    UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu      >/dev/null 2>&1; echo $$?)
-    FEDORA = $(shell echo $(DISTRO) | grep -i fedora      >/dev/null 2>&1; echo $$?)
-    RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?)
-    CENTOS = $(shell echo $(DISTRO) | grep -i centos      >/dev/null 2>&1; echo $$?)
+    UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu       >/dev/null 2>&1; echo $$?)
+    FEDORA = $(shell echo $(DISTRO) | grep -i fedora       >/dev/null 2>&1; echo $$?)
+    RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel'  >/dev/null 2>&1; echo $$?)
+    CENTOS = $(shell echo $(DISTRO) | grep -i centos       >/dev/null 2>&1; echo $$?)
     SUSE   = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?)
+    KYLIN  = $(shell echo $(DISTRO) | grep -i kylin        >/dev/null 2>&1; echo $$?)
     ifeq ("$(UBUNTU)","0")
       ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
         GLPATH := /usr/arm-linux-gnueabihf/lib
@@ -87,27 +88,17 @@ ifeq ("$(TARGET_OS)","linux")
         DFLT_PATH ?= /usr/lib
       endif
     endif
+
     ifeq ("$(SUSE)","0")
       GLPATH    ?= /usr/X11R6/lib64
       GLLINK    ?= -L/usr/X11R6/lib64
       DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(FEDORA)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(RHEL)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(CENTOS)","0")
+    else 
       GLPATH    ?= /usr/lib64/nvidia
       GLLINK    ?= -L/usr/lib64/nvidia
       DFLT_PATH ?= /usr/lib64
     endif
-  
+      
   # find libGL, libGLU 
   GLLIB  := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGL.so  -print 2>/dev/null)
   GLULIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLU.so -print 2>/dev/null)
diff --git a/Samples/4_CUDA_Libraries/randomFog/randomFog_vs2017.vcxproj b/Samples/4_CUDA_Libraries/randomFog/randomFog_vs2017.vcxproj
index 6c354dee5..4e8773d61 100644
--- a/Samples/4_CUDA_Libraries/randomFog/randomFog_vs2017.vcxproj
+++ b/Samples/4_CUDA_Libraries/randomFog/randomFog_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -118,6 +118,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/randomFog/randomFog_vs2019.vcxproj b/Samples/4_CUDA_Libraries/randomFog/randomFog_vs2019.vcxproj
index ec4d468c5..52ae3ec8c 100644
--- a/Samples/4_CUDA_Libraries/randomFog/randomFog_vs2019.vcxproj
+++ b/Samples/4_CUDA_Libraries/randomFog/randomFog_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -114,6 +114,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/randomFog/randomFog_vs2022.vcxproj b/Samples/4_CUDA_Libraries/randomFog/randomFog_vs2022.vcxproj
index 680161ec1..126ba72fa 100644
--- a/Samples/4_CUDA_Libraries/randomFog/randomFog_vs2022.vcxproj
+++ b/Samples/4_CUDA_Libraries/randomFog/randomFog_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -114,6 +114,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/simpleCUBLAS/NsightEclipse.xml b/Samples/4_CUDA_Libraries/simpleCUBLAS/NsightEclipse.xml
index 14436606e..47e065759 100644
--- a/Samples/4_CUDA_Libraries/simpleCUBLAS/NsightEclipse.xml
+++ b/Samples/4_CUDA_Libraries/simpleCUBLAS/NsightEclipse.xml
@@ -50,6 +50,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/4_CUDA_Libraries/simpleCUBLAS/README.md b/Samples/4_CUDA_Libraries/simpleCUBLAS/README.md
index d398137e1..dbb814b9e 100644
--- a/Samples/4_CUDA_Libraries/simpleCUBLAS/README.md
+++ b/Samples/4_CUDA_Libraries/simpleCUBLAS/README.md
@@ -10,7 +10,7 @@ Image Processing, CUBLAS Library
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -30,7 +30,7 @@ cudaMalloc, cudaFree
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/4_CUDA_Libraries/simpleCUBLAS/simpleCUBLAS_vs2017.vcxproj b/Samples/4_CUDA_Libraries/simpleCUBLAS/simpleCUBLAS_vs2017.vcxproj
index 569946e5b..181e913b9 100644
--- a/Samples/4_CUDA_Libraries/simpleCUBLAS/simpleCUBLAS_vs2017.vcxproj
+++ b/Samples/4_CUDA_Libraries/simpleCUBLAS/simpleCUBLAS_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/simpleCUBLAS/simpleCUBLAS_vs2019.vcxproj b/Samples/4_CUDA_Libraries/simpleCUBLAS/simpleCUBLAS_vs2019.vcxproj
index 1870c7cca..f0994fdf4 100644
--- a/Samples/4_CUDA_Libraries/simpleCUBLAS/simpleCUBLAS_vs2019.vcxproj
+++ b/Samples/4_CUDA_Libraries/simpleCUBLAS/simpleCUBLAS_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/simpleCUBLAS/simpleCUBLAS_vs2022.vcxproj b/Samples/4_CUDA_Libraries/simpleCUBLAS/simpleCUBLAS_vs2022.vcxproj
index 21e6dcf2f..9640014c4 100644
--- a/Samples/4_CUDA_Libraries/simpleCUBLAS/simpleCUBLAS_vs2022.vcxproj
+++ b/Samples/4_CUDA_Libraries/simpleCUBLAS/simpleCUBLAS_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/simpleCUBLASXT/NsightEclipse.xml b/Samples/4_CUDA_Libraries/simpleCUBLASXT/NsightEclipse.xml
index 9aa988232..47bd44a5a 100644
--- a/Samples/4_CUDA_Libraries/simpleCUBLASXT/NsightEclipse.xml
+++ b/Samples/4_CUDA_Libraries/simpleCUBLASXT/NsightEclipse.xml
@@ -3,9 +3,9 @@
 <entry>
   <name>simpleCUBLASXT</name>
   <cuda_api_list>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaGetDeviceCount</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
+    <toolkit>cudaGetDeviceCount</toolkit>
+    <toolkit>cudaFree</toolkit>
   </cuda_api_list>
   <description><![CDATA[Example of using CUBLAS-XT library which performs GEMM operations over Multiple GPUs.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -50,6 +50,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/4_CUDA_Libraries/simpleCUBLASXT/README.md b/Samples/4_CUDA_Libraries/simpleCUBLASXT/README.md
index 37d6d4e9d..0d8c9695a 100644
--- a/Samples/4_CUDA_Libraries/simpleCUBLASXT/README.md
+++ b/Samples/4_CUDA_Libraries/simpleCUBLASXT/README.md
@@ -10,7 +10,7 @@ CUBLAS-XT Library
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, aarch64
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaGetDeviceCount, cudaGetDeviceProperties
+cudaGetDeviceProperties, cudaGetDeviceCount, cudaFree
 
 ## Dependencies needed to build/run
 [CUBLAS](../../../README.md#cublas)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/4_CUDA_Libraries/simpleCUBLASXT/simpleCUBLASXT_vs2017.vcxproj b/Samples/4_CUDA_Libraries/simpleCUBLASXT/simpleCUBLASXT_vs2017.vcxproj
index eec501a78..297026580 100644
--- a/Samples/4_CUDA_Libraries/simpleCUBLASXT/simpleCUBLASXT_vs2017.vcxproj
+++ b/Samples/4_CUDA_Libraries/simpleCUBLASXT/simpleCUBLASXT_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/simpleCUBLASXT/simpleCUBLASXT_vs2019.vcxproj b/Samples/4_CUDA_Libraries/simpleCUBLASXT/simpleCUBLASXT_vs2019.vcxproj
index 064460547..32a4ace12 100644
--- a/Samples/4_CUDA_Libraries/simpleCUBLASXT/simpleCUBLASXT_vs2019.vcxproj
+++ b/Samples/4_CUDA_Libraries/simpleCUBLASXT/simpleCUBLASXT_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/simpleCUBLASXT/simpleCUBLASXT_vs2022.vcxproj b/Samples/4_CUDA_Libraries/simpleCUBLASXT/simpleCUBLASXT_vs2022.vcxproj
index 114ef777d..c900da38c 100644
--- a/Samples/4_CUDA_Libraries/simpleCUBLASXT/simpleCUBLASXT_vs2022.vcxproj
+++ b/Samples/4_CUDA_Libraries/simpleCUBLASXT/simpleCUBLASXT_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/Makefile b/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/Makefile
index 2a75e025e..86638c5ee 100644
--- a/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/Makefile
+++ b/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/Makefile
@@ -291,9 +291,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/NsightEclipse.xml b/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/NsightEclipse.xml
index 2b64842d7..3dcea4ef2 100644
--- a/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/NsightEclipse.xml
+++ b/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/NsightEclipse.xml
@@ -4,9 +4,9 @@
   <name>simpleCUBLAS_LU</name>
   <cuda_api_list>
     <toolkit>cudaGetErrorEnum</toolkit>
-    <toolkit>cudaFree</toolkit>
     <toolkit>cudaMalloc</toolkit>
     <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaFree</toolkit>
   </cuda_api_list>
   <description><![CDATA[CUDA sample demonstrating cuBLAS API cublasDgetrfBatched() for lower-upper (LU) decomposition of a matrix.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -52,6 +52,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/README.md b/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/README.md
index 94bdb4e85..2b1b93dcd 100644
--- a/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/README.md
+++ b/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/README.md
@@ -10,7 +10,7 @@ CUBLAS Library, LU decomposition
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, aarch64
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaGetErrorEnum, cudaFree, cudaMalloc, cudaMemcpy
+cudaGetErrorEnum, cudaMalloc, cudaMemcpy, cudaFree
 
 ## Dependencies needed to build/run
 [CUBLAS](../../../README.md#cublas)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/simpleCUBLAS_LU_vs2017.vcxproj b/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/simpleCUBLAS_LU_vs2017.vcxproj
index 9f13e4e8b..bffe80d5f 100644
--- a/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/simpleCUBLAS_LU_vs2017.vcxproj
+++ b/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/simpleCUBLAS_LU_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/simpleCUBLAS_LU.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/simpleCUBLAS_LU_vs2019.vcxproj b/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/simpleCUBLAS_LU_vs2019.vcxproj
index 9c1278ca1..18c1b1172 100644
--- a/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/simpleCUBLAS_LU_vs2019.vcxproj
+++ b/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/simpleCUBLAS_LU_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleCUBLAS_LU.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/simpleCUBLAS_LU_vs2022.vcxproj b/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/simpleCUBLAS_LU_vs2022.vcxproj
index 4e775803f..2ff518799 100644
--- a/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/simpleCUBLAS_LU_vs2022.vcxproj
+++ b/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/simpleCUBLAS_LU_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleCUBLAS_LU.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT/Makefile b/Samples/4_CUDA_Libraries/simpleCUFFT/Makefile
index 3c37107dd..080c25d28 100644
--- a/Samples/4_CUDA_Libraries/simpleCUFFT/Makefile
+++ b/Samples/4_CUDA_Libraries/simpleCUFFT/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT/NsightEclipse.xml b/Samples/4_CUDA_Libraries/simpleCUFFT/NsightEclipse.xml
index cd63fbb55..6ba60d809 100644
--- a/Samples/4_CUDA_Libraries/simpleCUFFT/NsightEclipse.xml
+++ b/Samples/4_CUDA_Libraries/simpleCUFFT/NsightEclipse.xml
@@ -4,8 +4,8 @@
   <name>simpleCUFFT</name>
   <cuda_api_list>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaFree</toolkit>
     <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaFree</toolkit>
   </cuda_api_list>
   <description><![CDATA[Example of using CUFFT. In this example, CUFFT is used to compute the 1D-convolution of some signal with some filter by transforming both into frequency domain, multiplying them together, and transforming the signal back to time domain. cuFFT plans are created using simple and advanced API functions.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -49,6 +49,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT/README.md b/Samples/4_CUDA_Libraries/simpleCUFFT/README.md
index 1d8cb4044..e91252be4 100644
--- a/Samples/4_CUDA_Libraries/simpleCUFFT/README.md
+++ b/Samples/4_CUDA_Libraries/simpleCUFFT/README.md
@@ -10,7 +10,7 @@ Image Processing, CUFFT Library
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l, aarch64
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMalloc, cudaFree, cudaMemcpy
+cudaMalloc, cudaMemcpy, cudaFree
 
 ## Dependencies needed to build/run
 [CUFFT](../../../README.md#cufft)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT/simpleCUFFT_vs2017.vcxproj b/Samples/4_CUDA_Libraries/simpleCUFFT/simpleCUFFT_vs2017.vcxproj
index dadcc1f8c..a6e80d875 100644
--- a/Samples/4_CUDA_Libraries/simpleCUFFT/simpleCUFFT_vs2017.vcxproj
+++ b/Samples/4_CUDA_Libraries/simpleCUFFT/simpleCUFFT_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/simpleCUFFT.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT/simpleCUFFT_vs2019.vcxproj b/Samples/4_CUDA_Libraries/simpleCUFFT/simpleCUFFT_vs2019.vcxproj
index 4920b00dc..5eb648920 100644
--- a/Samples/4_CUDA_Libraries/simpleCUFFT/simpleCUFFT_vs2019.vcxproj
+++ b/Samples/4_CUDA_Libraries/simpleCUFFT/simpleCUFFT_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleCUFFT.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT/simpleCUFFT_vs2022.vcxproj b/Samples/4_CUDA_Libraries/simpleCUFFT/simpleCUFFT_vs2022.vcxproj
index 0b3dd99a7..2c5983796 100644
--- a/Samples/4_CUDA_Libraries/simpleCUFFT/simpleCUFFT_vs2022.vcxproj
+++ b/Samples/4_CUDA_Libraries/simpleCUFFT/simpleCUFFT_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleCUFFT.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/Makefile b/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/Makefile
index 610b3cc9e..c21a0c600 100644
--- a/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/Makefile
+++ b/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/NsightEclipse.xml b/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/NsightEclipse.xml
index 55dddd19a..a22e53a06 100644
--- a/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/NsightEclipse.xml
+++ b/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/NsightEclipse.xml
@@ -6,13 +6,13 @@
     <preprocessor>_USE_MATH_DEFINES</preprocessor>
   </additional_preprocessor>
   <cuda_api_list>
-    <toolkit>cudaFree</toolkit>
     <toolkit>cudaXtFree</toolkit>
+    <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaSetDevice</toolkit>
     <toolkit>cudaGetDeviceCount</toolkit>
     <toolkit>cudaDeviceSynchronize</toolkit>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaSetDevice</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
   </cuda_api_list>
   <description><![CDATA[Example of using CUFFT. In this example, CUFFT is used to compute the 2D-convolution of some signal with some filter by transforming both into frequency domain, multiplying them together, and transforming the signal back to time domain on Multiple GPU.]]></description>
@@ -57,6 +57,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/README.md b/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/README.md
index 0c941fbb4..9cd1ad571 100644
--- a/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/README.md
+++ b/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/README.md
@@ -10,7 +10,7 @@ Image Processing, CUFFT Library
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaXtFree, cudaGetDeviceCount, cudaDeviceSynchronize, cudaMalloc, cudaSetDevice, cudaMemcpy, cudaGetDeviceProperties
+cudaXtFree, cudaMemcpy, cudaFree, cudaSetDevice, cudaGetDeviceCount, cudaDeviceSynchronize, cudaMalloc, cudaGetDeviceProperties
 
 ## Dependencies needed to build/run
 [CUFFT](../../../README.md#cufft)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/simpleCUFFT_2d_MGPU_vs2017.vcxproj b/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/simpleCUFFT_2d_MGPU_vs2017.vcxproj
index 219ca46b9..836ea0639 100644
--- a/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/simpleCUFFT_2d_MGPU_vs2017.vcxproj
+++ b/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/simpleCUFFT_2d_MGPU_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/simpleCUFFT_2d_MGPU.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/simpleCUFFT_2d_MGPU_vs2019.vcxproj b/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/simpleCUFFT_2d_MGPU_vs2019.vcxproj
index d8d5f3603..86780b708 100644
--- a/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/simpleCUFFT_2d_MGPU_vs2019.vcxproj
+++ b/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/simpleCUFFT_2d_MGPU_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleCUFFT_2d_MGPU.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/simpleCUFFT_2d_MGPU_vs2022.vcxproj b/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/simpleCUFFT_2d_MGPU_vs2022.vcxproj
index 44c8dc9aa..6e6b95b91 100644
--- a/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/simpleCUFFT_2d_MGPU_vs2022.vcxproj
+++ b/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/simpleCUFFT_2d_MGPU_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleCUFFT_2d_MGPU.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/Makefile b/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/Makefile
index 91eef96c4..94cb18d76 100644
--- a/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/Makefile
+++ b/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/NsightEclipse.xml b/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/NsightEclipse.xml
index 5af2b802e..f7274f8ab 100644
--- a/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/NsightEclipse.xml
+++ b/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/NsightEclipse.xml
@@ -3,10 +3,10 @@
 <entry>
   <name>simpleCUFFT_MGPU</name>
   <cuda_api_list>
+    <toolkit>cudaXtFree</toolkit>
+    <toolkit>cudaSetDevice</toolkit>
     <toolkit>cudaGetDeviceCount</toolkit>
     <toolkit>cudaDeviceSynchronize</toolkit>
-    <toolkit>cudaSetDevice</toolkit>
-    <toolkit>cudaXtFree</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
   </cuda_api_list>
   <description><![CDATA[Example of using CUFFT. In this example, CUFFT is used to compute the 1D-convolution of some signal with some filter by transforming both into frequency domain, multiplying them together, and transforming the signal back to time domain on Multiple GPU.]]></description>
@@ -51,6 +51,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/README.md b/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/README.md
index 9d894764b..bfb6e031d 100644
--- a/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/README.md
+++ b/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/README.md
@@ -10,7 +10,7 @@ Image Processing, CUFFT Library
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaGetDeviceCount, cudaDeviceSynchronize, cudaSetDevice, cudaXtFree, cudaGetDeviceProperties
+cudaXtFree, cudaSetDevice, cudaGetDeviceCount, cudaDeviceSynchronize, cudaGetDeviceProperties
 
 ## Dependencies needed to build/run
 [CUFFT](../../../README.md#cufft)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/simpleCUFFT_MGPU_vs2017.vcxproj b/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/simpleCUFFT_MGPU_vs2017.vcxproj
index 8b222ff97..c9da79110 100644
--- a/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/simpleCUFFT_MGPU_vs2017.vcxproj
+++ b/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/simpleCUFFT_MGPU_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/simpleCUFFT_MGPU.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/simpleCUFFT_MGPU_vs2019.vcxproj b/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/simpleCUFFT_MGPU_vs2019.vcxproj
index 06f274044..694fc4fd5 100644
--- a/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/simpleCUFFT_MGPU_vs2019.vcxproj
+++ b/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/simpleCUFFT_MGPU_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleCUFFT_MGPU.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/simpleCUFFT_MGPU_vs2022.vcxproj b/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/simpleCUFFT_MGPU_vs2022.vcxproj
index 8a2ca6a0b..a2f9ad9cd 100644
--- a/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/simpleCUFFT_MGPU_vs2022.vcxproj
+++ b/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/simpleCUFFT_MGPU_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleCUFFT_MGPU.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT_callback/Makefile b/Samples/4_CUDA_Libraries/simpleCUFFT_callback/Makefile
index b9577d0d1..c5159bed5 100644
--- a/Samples/4_CUDA_Libraries/simpleCUFFT_callback/Makefile
+++ b/Samples/4_CUDA_Libraries/simpleCUFFT_callback/Makefile
@@ -316,9 +316,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 70 72 75 80 86 87
+SMS ?= 53 70 72 75 80 86 87 90
 else
-SMS ?= 35 50 60 70 75 80 86
+SMS ?= 35 50 60 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT_callback/NsightEclipse.xml b/Samples/4_CUDA_Libraries/simpleCUFFT_callback/NsightEclipse.xml
index ca2c23e69..eaa551ee6 100644
--- a/Samples/4_CUDA_Libraries/simpleCUFFT_callback/NsightEclipse.xml
+++ b/Samples/4_CUDA_Libraries/simpleCUFFT_callback/NsightEclipse.xml
@@ -7,12 +7,12 @@
     <flag>-std=c++11</flag>
   </cflags>
   <cuda_api_list>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaMalloc</toolkit>
     <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaFree</toolkit>
     <toolkit>cudaMemcpyFromSymbol</toolkit>
-    <toolkit>cudaGetDeviceProperties</toolkit>
     <toolkit>cudaGetDevice</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaGetDeviceProperties</toolkit>
   </cuda_api_list>
   <description><![CDATA[Example of using CUFFT. In this example, CUFFT is used to compute the 1D-convolution of some signal with some filter by transforming both into frequency domain, multiplying them together, and transforming the signal back to time domain. The difference between this example and the Simple CUFFT example is that the multiplication step is done by the CUFFT kernel with a user-supplied CUFFT callback routine, rather than by a separate kernel call.]]></description>
   <devicecompilation>separate</devicecompilation>
@@ -60,6 +60,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT_callback/README.md b/Samples/4_CUDA_Libraries/simpleCUFFT_callback/README.md
index 299380028..71cd8ad11 100644
--- a/Samples/4_CUDA_Libraries/simpleCUFFT_callback/README.md
+++ b/Samples/4_CUDA_Libraries/simpleCUFFT_callback/README.md
@@ -10,7 +10,7 @@ Image Processing, CUFFT Library
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, aarch64
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaMalloc, cudaMemcpy, cudaMemcpyFromSymbol, cudaGetDeviceProperties, cudaGetDevice
+cudaMemcpy, cudaFree, cudaMemcpyFromSymbol, cudaGetDevice, cudaMalloc, cudaGetDeviceProperties
 
 ## Dependencies needed to build/run
 [callback](../../../README.md#callback), [CUFFT](../../../README.md#cufft)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/4_CUDA_Libraries/watershedSegmentationNPP/NsightEclipse.xml b/Samples/4_CUDA_Libraries/watershedSegmentationNPP/NsightEclipse.xml
index 3d351a11c..b16c392f5 100644
--- a/Samples/4_CUDA_Libraries/watershedSegmentationNPP/NsightEclipse.xml
+++ b/Samples/4_CUDA_Libraries/watershedSegmentationNPP/NsightEclipse.xml
@@ -3,14 +3,14 @@
 <entry>
   <name>watershedSegmentationNPP</name>
   <cuda_api_list>
+    <toolkit>cudaRuntimeGetVersion</toolkit>
     <toolkit>cudaFree</toolkit>
     <toolkit>cudaDeviceGetAttribute</toolkit>
     <toolkit>cudaDriverGetVersion</toolkit>
+    <toolkit>cudaGetDevice</toolkit>
     <toolkit>cudaStreamGetFlags</toolkit>
     <toolkit>cudaStreamSynchronize</toolkit>
-    <toolkit>cudaRuntimeGetVersion</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
-    <toolkit>cudaGetDevice</toolkit>
   </cuda_api_list>
   <description><![CDATA[An NPP CUDA Sample that demonstrates how to use the NPP watershed segmentation function.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -62,6 +62,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/4_CUDA_Libraries/watershedSegmentationNPP/README.md b/Samples/4_CUDA_Libraries/watershedSegmentationNPP/README.md
index c06333e2d..540e74432 100644
--- a/Samples/4_CUDA_Libraries/watershedSegmentationNPP/README.md
+++ b/Samples/4_CUDA_Libraries/watershedSegmentationNPP/README.md
@@ -10,7 +10,7 @@ Performance Strategies, Image Processing, NPP Library
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaDeviceGetAttribute, cudaDriverGetVersion, cudaStreamGetFlags, cudaStreamSynchronize, cudaRuntimeGetVersion, cudaGetDeviceProperties, cudaGetDevice
+cudaRuntimeGetVersion, cudaFree, cudaDeviceGetAttribute, cudaDriverGetVersion, cudaGetDevice, cudaStreamGetFlags, cudaStreamSynchronize, cudaGetDeviceProperties
 
 ## Dependencies needed to build/run
 [NPP](../../../README.md#npp)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/4_CUDA_Libraries/watershedSegmentationNPP/watershedSegmentationNPP_vs2017.vcxproj b/Samples/4_CUDA_Libraries/watershedSegmentationNPP/watershedSegmentationNPP_vs2017.vcxproj
index 6df4766bc..c752f1355 100644
--- a/Samples/4_CUDA_Libraries/watershedSegmentationNPP/watershedSegmentationNPP_vs2017.vcxproj
+++ b/Samples/4_CUDA_Libraries/watershedSegmentationNPP/watershedSegmentationNPP_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/watershedSegmentationNPP/watershedSegmentationNPP_vs2019.vcxproj b/Samples/4_CUDA_Libraries/watershedSegmentationNPP/watershedSegmentationNPP_vs2019.vcxproj
index 494b75bcd..10f8fef82 100644
--- a/Samples/4_CUDA_Libraries/watershedSegmentationNPP/watershedSegmentationNPP_vs2019.vcxproj
+++ b/Samples/4_CUDA_Libraries/watershedSegmentationNPP/watershedSegmentationNPP_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/watershedSegmentationNPP/watershedSegmentationNPP_vs2022.vcxproj b/Samples/4_CUDA_Libraries/watershedSegmentationNPP/watershedSegmentationNPP_vs2022.vcxproj
index e4e32e491..d960f01c0 100644
--- a/Samples/4_CUDA_Libraries/watershedSegmentationNPP/watershedSegmentationNPP_vs2022.vcxproj
+++ b/Samples/4_CUDA_Libraries/watershedSegmentationNPP/watershedSegmentationNPP_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/BlackScholes/BlackScholes_vs2017.vcxproj b/Samples/5_Domain_Specific/BlackScholes/BlackScholes_vs2017.vcxproj
index 625c2f57c..c63b2d70e 100644
--- a/Samples/5_Domain_Specific/BlackScholes/BlackScholes_vs2017.vcxproj
+++ b/Samples/5_Domain_Specific/BlackScholes/BlackScholes_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/BlackScholes.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/BlackScholes/BlackScholes_vs2019.vcxproj b/Samples/5_Domain_Specific/BlackScholes/BlackScholes_vs2019.vcxproj
index d872e9986..adf0db2df 100644
--- a/Samples/5_Domain_Specific/BlackScholes/BlackScholes_vs2019.vcxproj
+++ b/Samples/5_Domain_Specific/BlackScholes/BlackScholes_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/BlackScholes.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/BlackScholes/BlackScholes_vs2022.vcxproj b/Samples/5_Domain_Specific/BlackScholes/BlackScholes_vs2022.vcxproj
index 698318227..aa30adea6 100644
--- a/Samples/5_Domain_Specific/BlackScholes/BlackScholes_vs2022.vcxproj
+++ b/Samples/5_Domain_Specific/BlackScholes/BlackScholes_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/BlackScholes.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/BlackScholes/Makefile b/Samples/5_Domain_Specific/BlackScholes/Makefile
index b7d3c5292..7935e5400 100644
--- a/Samples/5_Domain_Specific/BlackScholes/Makefile
+++ b/Samples/5_Domain_Specific/BlackScholes/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/5_Domain_Specific/BlackScholes/NsightEclipse.xml b/Samples/5_Domain_Specific/BlackScholes/NsightEclipse.xml
index ab1f601db..8af9aa6dc 100644
--- a/Samples/5_Domain_Specific/BlackScholes/NsightEclipse.xml
+++ b/Samples/5_Domain_Specific/BlackScholes/NsightEclipse.xml
@@ -7,9 +7,9 @@
   </cflags>
   <cuda_api_list>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaFree</toolkit>
     <toolkit>cudaDeviceSynchronize</toolkit>
     <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaFree</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample evaluates fair call and put prices for a given set of European options by Black-Scholes formula.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -50,6 +50,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/5_Domain_Specific/BlackScholes/README.md b/Samples/5_Domain_Specific/BlackScholes/README.md
index 858b17e6e..2d5354900 100644
--- a/Samples/5_Domain_Specific/BlackScholes/README.md
+++ b/Samples/5_Domain_Specific/BlackScholes/README.md
@@ -10,7 +10,7 @@ Computational Finance
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMalloc, cudaFree, cudaDeviceSynchronize, cudaMemcpy
+cudaMalloc, cudaDeviceSynchronize, cudaMemcpy, cudaFree
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/5_Domain_Specific/BlackScholes_nvrtc/BlackScholes_nvrtc_vs2017.vcxproj b/Samples/5_Domain_Specific/BlackScholes_nvrtc/BlackScholes_nvrtc_vs2017.vcxproj
index 01556903c..0432f89e9 100644
--- a/Samples/5_Domain_Specific/BlackScholes_nvrtc/BlackScholes_nvrtc_vs2017.vcxproj
+++ b/Samples/5_Domain_Specific/BlackScholes_nvrtc/BlackScholes_nvrtc_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/BlackScholes_nvrtc/BlackScholes_nvrtc_vs2019.vcxproj b/Samples/5_Domain_Specific/BlackScholes_nvrtc/BlackScholes_nvrtc_vs2019.vcxproj
index 309579cc2..c97e0a3cf 100644
--- a/Samples/5_Domain_Specific/BlackScholes_nvrtc/BlackScholes_nvrtc_vs2019.vcxproj
+++ b/Samples/5_Domain_Specific/BlackScholes_nvrtc/BlackScholes_nvrtc_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/BlackScholes_nvrtc/BlackScholes_nvrtc_vs2022.vcxproj b/Samples/5_Domain_Specific/BlackScholes_nvrtc/BlackScholes_nvrtc_vs2022.vcxproj
index d65aea1d8..3796da00f 100644
--- a/Samples/5_Domain_Specific/BlackScholes_nvrtc/BlackScholes_nvrtc_vs2022.vcxproj
+++ b/Samples/5_Domain_Specific/BlackScholes_nvrtc/BlackScholes_nvrtc_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/BlackScholes_nvrtc/README.md b/Samples/5_Domain_Specific/BlackScholes_nvrtc/README.md
index b0f955812..a0e4aa679 100644
--- a/Samples/5_Domain_Specific/BlackScholes_nvrtc/README.md
+++ b/Samples/5_Domain_Specific/BlackScholes_nvrtc/README.md
@@ -10,7 +10,7 @@ Computational Finance, Runtime Compilation
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,7 +23,7 @@ x86_64, ppc64le, aarch64
 ## CUDA APIs involved
 
 ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html)
-cuModuleGetFunction, cuMemAlloc, cuLaunchKernel, cuCtxSynchronize, cuMemFree, cuMemcpyDtoH, cuMemcpyHtoD
+cuMemcpyDtoH, cuLaunchKernel, cuMemcpyHtoD, cuCtxSynchronize, cuMemAlloc, cuMemFree, cuModuleGetFunction
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
 cudaBlockSize, cudaGridSize
@@ -33,7 +33,7 @@ cudaBlockSize, cudaGridSize
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/5_Domain_Specific/FDTD3d/FDTD3d_vs2017.vcxproj b/Samples/5_Domain_Specific/FDTD3d/FDTD3d_vs2017.vcxproj
index 1227bc24e..09368fc92 100644
--- a/Samples/5_Domain_Specific/FDTD3d/FDTD3d_vs2017.vcxproj
+++ b/Samples/5_Domain_Specific/FDTD3d/FDTD3d_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/FDTD3d.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -112,6 +112,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/FDTD3d/FDTD3d_vs2019.vcxproj b/Samples/5_Domain_Specific/FDTD3d/FDTD3d_vs2019.vcxproj
index 10f2dc237..09c340056 100644
--- a/Samples/5_Domain_Specific/FDTD3d/FDTD3d_vs2019.vcxproj
+++ b/Samples/5_Domain_Specific/FDTD3d/FDTD3d_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/FDTD3d.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/FDTD3d/FDTD3d_vs2022.vcxproj b/Samples/5_Domain_Specific/FDTD3d/FDTD3d_vs2022.vcxproj
index 8f01460ab..6af53b8b3 100644
--- a/Samples/5_Domain_Specific/FDTD3d/FDTD3d_vs2022.vcxproj
+++ b/Samples/5_Domain_Specific/FDTD3d/FDTD3d_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/FDTD3d.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/FDTD3d/Makefile b/Samples/5_Domain_Specific/FDTD3d/Makefile
index 9f41e1d1a..bbbee3e3a 100644
--- a/Samples/5_Domain_Specific/FDTD3d/Makefile
+++ b/Samples/5_Domain_Specific/FDTD3d/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/5_Domain_Specific/FDTD3d/NsightEclipse.xml b/Samples/5_Domain_Specific/FDTD3d/NsightEclipse.xml
index 68eaef75d..7528550ba 100644
--- a/Samples/5_Domain_Specific/FDTD3d/NsightEclipse.xml
+++ b/Samples/5_Domain_Specific/FDTD3d/NsightEclipse.xml
@@ -3,19 +3,19 @@
 <entry>
   <name>FDTD3d</name>
   <cuda_api_list>
-    <toolkit>cudaMemcpyToSymbol</toolkit>
+    <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaMalloc</toolkit>
     <toolkit>cudaFree</toolkit>
-    <toolkit>cudaEventRecord</toolkit>
-    <toolkit>cudaEventCreate</toolkit>
     <toolkit>cudaFuncGetAttributes</toolkit>
-    <toolkit>cudaGetDeviceCount</toolkit>
-    <toolkit>cudaEventElapsedTime</toolkit>
-    <toolkit>cudaDeviceSynchronize</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaEventDestroy</toolkit>
     <toolkit>cudaSetDevice</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
+    <toolkit>cudaDeviceSynchronize</toolkit>
+    <toolkit>cudaEventRecord</toolkit>
+    <toolkit>cudaMemcpyToSymbol</toolkit>
+    <toolkit>cudaEventDestroy</toolkit>
+    <toolkit>cudaEventElapsedTime</toolkit>
+    <toolkit>cudaGetDeviceCount</toolkit>
+    <toolkit>cudaEventCreate</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample applies a finite differences time domain progression stencil on a 3D surface.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -60,6 +60,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/5_Domain_Specific/FDTD3d/README.md b/Samples/5_Domain_Specific/FDTD3d/README.md
index c884443d4..80774398f 100644
--- a/Samples/5_Domain_Specific/FDTD3d/README.md
+++ b/Samples/5_Domain_Specific/FDTD3d/README.md
@@ -10,7 +10,7 @@ Performance Strategies
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMemcpyToSymbol, cudaFree, cudaEventRecord, cudaEventCreate, cudaFuncGetAttributes, cudaGetDeviceCount, cudaEventElapsedTime, cudaDeviceSynchronize, cudaMalloc, cudaEventDestroy, cudaSetDevice, cudaMemcpy, cudaGetDeviceProperties
+cudaMemcpy, cudaMalloc, cudaFree, cudaFuncGetAttributes, cudaSetDevice, cudaGetDeviceProperties, cudaDeviceSynchronize, cudaEventRecord, cudaMemcpyToSymbol, cudaEventDestroy, cudaEventElapsedTime, cudaGetDeviceCount, cudaEventCreate
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/5_Domain_Specific/HSOpticalFlow/FlowCPU.flo b/Samples/5_Domain_Specific/HSOpticalFlow/FlowCPU.flo
new file mode 100644
index 000000000..d92d45490
Binary files /dev/null and b/Samples/5_Domain_Specific/HSOpticalFlow/FlowCPU.flo differ
diff --git a/Samples/5_Domain_Specific/HSOpticalFlow/FlowGPU.flo b/Samples/5_Domain_Specific/HSOpticalFlow/FlowGPU.flo
new file mode 100644
index 000000000..45e8df6f9
Binary files /dev/null and b/Samples/5_Domain_Specific/HSOpticalFlow/FlowGPU.flo differ
diff --git a/Samples/5_Domain_Specific/HSOpticalFlow/HSOpticalFlow_vs2017.vcxproj b/Samples/5_Domain_Specific/HSOpticalFlow/HSOpticalFlow_vs2017.vcxproj
index 26fe77dc5..28f83eadf 100644
--- a/Samples/5_Domain_Specific/HSOpticalFlow/HSOpticalFlow_vs2017.vcxproj
+++ b/Samples/5_Domain_Specific/HSOpticalFlow/HSOpticalFlow_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/HSOpticalFlow.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -117,6 +117,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/HSOpticalFlow/HSOpticalFlow_vs2019.vcxproj b/Samples/5_Domain_Specific/HSOpticalFlow/HSOpticalFlow_vs2019.vcxproj
index 563a57bef..f9a6290c1 100644
--- a/Samples/5_Domain_Specific/HSOpticalFlow/HSOpticalFlow_vs2019.vcxproj
+++ b/Samples/5_Domain_Specific/HSOpticalFlow/HSOpticalFlow_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/HSOpticalFlow.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -113,6 +113,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/HSOpticalFlow/HSOpticalFlow_vs2022.vcxproj b/Samples/5_Domain_Specific/HSOpticalFlow/HSOpticalFlow_vs2022.vcxproj
index 241a3653f..fe372f4e5 100644
--- a/Samples/5_Domain_Specific/HSOpticalFlow/HSOpticalFlow_vs2022.vcxproj
+++ b/Samples/5_Domain_Specific/HSOpticalFlow/HSOpticalFlow_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/HSOpticalFlow.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -113,6 +113,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/HSOpticalFlow/Makefile b/Samples/5_Domain_Specific/HSOpticalFlow/Makefile
index f6f1a0878..98ce7ac61 100644
--- a/Samples/5_Domain_Specific/HSOpticalFlow/Makefile
+++ b/Samples/5_Domain_Specific/HSOpticalFlow/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/5_Domain_Specific/HSOpticalFlow/NsightEclipse.xml b/Samples/5_Domain_Specific/HSOpticalFlow/NsightEclipse.xml
index 3de542e30..548a90c34 100644
--- a/Samples/5_Domain_Specific/HSOpticalFlow/NsightEclipse.xml
+++ b/Samples/5_Domain_Specific/HSOpticalFlow/NsightEclipse.xml
@@ -4,9 +4,9 @@
   <name>HSOpticalFlow</name>
   <cuda_api_list>
     <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaMemset</toolkit>
     <toolkit>cudaFree</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
   </cuda_api_list>
   <description><![CDATA[Variational optical flow estimation example.  Uses textures for image operations. Shows how simple PDE solver can be accelerated with CUDA.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -52,6 +52,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/5_Domain_Specific/HSOpticalFlow/README.md b/Samples/5_Domain_Specific/HSOpticalFlow/README.md
index 363d7f17c..195c8954c 100644
--- a/Samples/5_Domain_Specific/HSOpticalFlow/README.md
+++ b/Samples/5_Domain_Specific/HSOpticalFlow/README.md
@@ -10,7 +10,7 @@ Image Processing, Data Parallel Algorithms
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMalloc, cudaMemset, cudaFree, cudaMemcpy
+cudaMalloc, cudaMemcpy, cudaMemset, cudaFree
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/5_Domain_Specific/Mandelbrot/Makefile b/Samples/5_Domain_Specific/Mandelbrot/Makefile
index 3daf6eeaf..777aa73cd 100644
--- a/Samples/5_Domain_Specific/Mandelbrot/Makefile
+++ b/Samples/5_Domain_Specific/Mandelbrot/Makefile
@@ -299,9 +299,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/5_Domain_Specific/Mandelbrot/Mandelbrot_vs2017.vcxproj b/Samples/5_Domain_Specific/Mandelbrot/Mandelbrot_vs2017.vcxproj
index ef345fe8e..4d8405080 100644
--- a/Samples/5_Domain_Specific/Mandelbrot/Mandelbrot_vs2017.vcxproj
+++ b/Samples/5_Domain_Specific/Mandelbrot/Mandelbrot_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/Mandelbrot.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -121,6 +121,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/Mandelbrot/Mandelbrot_vs2019.vcxproj b/Samples/5_Domain_Specific/Mandelbrot/Mandelbrot_vs2019.vcxproj
index 645983939..f57745885 100644
--- a/Samples/5_Domain_Specific/Mandelbrot/Mandelbrot_vs2019.vcxproj
+++ b/Samples/5_Domain_Specific/Mandelbrot/Mandelbrot_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/Mandelbrot.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -117,6 +117,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/Mandelbrot/Mandelbrot_vs2022.vcxproj b/Samples/5_Domain_Specific/Mandelbrot/Mandelbrot_vs2022.vcxproj
index 786f81783..ce2b9586f 100644
--- a/Samples/5_Domain_Specific/Mandelbrot/Mandelbrot_vs2022.vcxproj
+++ b/Samples/5_Domain_Specific/Mandelbrot/Mandelbrot_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/Mandelbrot.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -117,6 +117,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/Mandelbrot/NsightEclipse.xml b/Samples/5_Domain_Specific/Mandelbrot/NsightEclipse.xml
index 00d22c181..d74f3c4f8 100644
--- a/Samples/5_Domain_Specific/Mandelbrot/NsightEclipse.xml
+++ b/Samples/5_Domain_Specific/Mandelbrot/NsightEclipse.xml
@@ -3,19 +3,19 @@
 <entry>
   <name>Mandelbrot</name>
   <cuda_api_list>
+    <toolkit>cudaGLUnmapBufferObject</toolkit>
+    <toolkit>cudaGraphicsUnmapResources</toolkit>
+    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaFree</toolkit>
+    <toolkit>cudaGLMapBufferObject</toolkit>
+    <toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
     <toolkit>cudaGraphicsMapResources</toolkit>
-    <toolkit>cudaGraphicsUnregisterResource</toolkit>
-    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
     <toolkit>cudaGLUnregisterBufferObject</toolkit>
-    <toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
-    <toolkit>cudaGLUnmapBufferObject</toolkit>
     <toolkit>cudaDeviceSynchronize</toolkit>
-    <toolkit>cudaGLMapBufferObject</toolkit>
-    <toolkit>cudaMalloc</toolkit>
     <toolkit>cudaGLRegisterBufferObject</toolkit>
-    <toolkit>cudaGraphicsUnmapResources</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaGraphicsUnregisterResource</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample uses CUDA to compute and display the Mandelbrot or Julia sets interactively. It also illustrates the use of "double single" arithmetic to improve precision when zooming a long way into the pattern. This sample uses double precision.  Thanks to Mark Granger of NewTek who submitted this code sample.!]]></description>
@@ -79,6 +79,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/5_Domain_Specific/Mandelbrot/README.md b/Samples/5_Domain_Specific/Mandelbrot/README.md
index 57131463e..a09cfeed1 100644
--- a/Samples/5_Domain_Specific/Mandelbrot/README.md
+++ b/Samples/5_Domain_Specific/Mandelbrot/README.md
@@ -10,7 +10,7 @@ Graphics Interop, Data Parallel Algorithms
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaGraphicsMapResources, cudaGraphicsUnregisterResource, cudaGraphicsGLRegisterBuffer, cudaGLUnregisterBufferObject, cudaGraphicsResourceGetMappedPointer, cudaGLUnmapBufferObject, cudaDeviceSynchronize, cudaGLMapBufferObject, cudaMalloc, cudaGLRegisterBufferObject, cudaGraphicsUnmapResources, cudaMemcpy, cudaGetDeviceProperties
+cudaGLUnmapBufferObject, cudaGraphicsUnmapResources, cudaMemcpy, cudaFree, cudaGLMapBufferObject, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaGLUnregisterBufferObject, cudaDeviceSynchronize, cudaGLRegisterBufferObject, cudaGraphicsUnregisterResource, cudaMalloc, cudaGraphicsGLRegisterBuffer, cudaGetDeviceProperties
 
 ## Dependencies needed to build/run
 [X11](../../../README.md#x11), [GL](../../../README.md#gl)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/5_Domain_Specific/Mandelbrot/findgllib.mk b/Samples/5_Domain_Specific/Mandelbrot/findgllib.mk
index f0a5c5512..998fcf0f1 100644
--- a/Samples/5_Domain_Specific/Mandelbrot/findgllib.mk
+++ b/Samples/5_Domain_Specific/Mandelbrot/findgllib.mk
@@ -53,11 +53,12 @@ endif
 ifeq ("$(TARGET_OS)","linux")
     # $(info) >> findgllib.mk -> LINUX path <<<)
     # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside
-    UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu      >/dev/null 2>&1; echo $$?)
-    FEDORA = $(shell echo $(DISTRO) | grep -i fedora      >/dev/null 2>&1; echo $$?)
-    RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?)
-    CENTOS = $(shell echo $(DISTRO) | grep -i centos      >/dev/null 2>&1; echo $$?)
+    UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu       >/dev/null 2>&1; echo $$?)
+    FEDORA = $(shell echo $(DISTRO) | grep -i fedora       >/dev/null 2>&1; echo $$?)
+    RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel'  >/dev/null 2>&1; echo $$?)
+    CENTOS = $(shell echo $(DISTRO) | grep -i centos       >/dev/null 2>&1; echo $$?)
     SUSE   = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?)
+    KYLIN  = $(shell echo $(DISTRO) | grep -i kylin        >/dev/null 2>&1; echo $$?)
     ifeq ("$(UBUNTU)","0")
       ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
         GLPATH := /usr/arm-linux-gnueabihf/lib
@@ -87,27 +88,17 @@ ifeq ("$(TARGET_OS)","linux")
         DFLT_PATH ?= /usr/lib
       endif
     endif
+
     ifeq ("$(SUSE)","0")
       GLPATH    ?= /usr/X11R6/lib64
       GLLINK    ?= -L/usr/X11R6/lib64
       DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(FEDORA)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(RHEL)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(CENTOS)","0")
+    else 
       GLPATH    ?= /usr/lib64/nvidia
       GLLINK    ?= -L/usr/lib64/nvidia
       DFLT_PATH ?= /usr/lib64
     endif
-  
+      
   # find libGL, libGLU 
   GLLIB  := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGL.so  -print 2>/dev/null)
   GLULIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLU.so -print 2>/dev/null)
diff --git a/Samples/5_Domain_Specific/MonteCarloMultiGPU/Makefile b/Samples/5_Domain_Specific/MonteCarloMultiGPU/Makefile
index 503c96781..c38f7e446 100644
--- a/Samples/5_Domain_Specific/MonteCarloMultiGPU/Makefile
+++ b/Samples/5_Domain_Specific/MonteCarloMultiGPU/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/5_Domain_Specific/MonteCarloMultiGPU/MonteCarloMultiGPU_vs2017.vcxproj b/Samples/5_Domain_Specific/MonteCarloMultiGPU/MonteCarloMultiGPU_vs2017.vcxproj
index f37ca9a98..3330def6a 100644
--- a/Samples/5_Domain_Specific/MonteCarloMultiGPU/MonteCarloMultiGPU_vs2017.vcxproj
+++ b/Samples/5_Domain_Specific/MonteCarloMultiGPU/MonteCarloMultiGPU_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/MonteCarloMultiGPU.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -113,6 +113,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/MonteCarloMultiGPU/MonteCarloMultiGPU_vs2019.vcxproj b/Samples/5_Domain_Specific/MonteCarloMultiGPU/MonteCarloMultiGPU_vs2019.vcxproj
index 9c88c3f35..cf2d4ad26 100644
--- a/Samples/5_Domain_Specific/MonteCarloMultiGPU/MonteCarloMultiGPU_vs2019.vcxproj
+++ b/Samples/5_Domain_Specific/MonteCarloMultiGPU/MonteCarloMultiGPU_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/MonteCarloMultiGPU.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -109,6 +109,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/MonteCarloMultiGPU/MonteCarloMultiGPU_vs2022.vcxproj b/Samples/5_Domain_Specific/MonteCarloMultiGPU/MonteCarloMultiGPU_vs2022.vcxproj
index a047f1c0d..852394d84 100644
--- a/Samples/5_Domain_Specific/MonteCarloMultiGPU/MonteCarloMultiGPU_vs2022.vcxproj
+++ b/Samples/5_Domain_Specific/MonteCarloMultiGPU/MonteCarloMultiGPU_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/MonteCarloMultiGPU.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -109,6 +109,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/MonteCarloMultiGPU/NsightEclipse.xml b/Samples/5_Domain_Specific/MonteCarloMultiGPU/NsightEclipse.xml
index 34d2d8e04..bbe55936c 100644
--- a/Samples/5_Domain_Specific/MonteCarloMultiGPU/NsightEclipse.xml
+++ b/Samples/5_Domain_Specific/MonteCarloMultiGPU/NsightEclipse.xml
@@ -3,23 +3,23 @@
 <entry>
   <name>MonteCarloMultiGPU</name>
   <cuda_api_list>
-    <toolkit>cudaMemset</toolkit>
-    <toolkit>cudaFree</toolkit>
     <toolkit>cudaStreamDestroy</toolkit>
-    <toolkit>cudaEventRecord</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaFree</toolkit>
     <toolkit>cudaMallocHost</toolkit>
-    <toolkit>cudaStreamCreate</toolkit>
-    <toolkit>cudaEventCreate</toolkit>
-    <toolkit>cudaGetDeviceCount</toolkit>
-    <toolkit>cudaDeviceSynchronize</toolkit>
+    <toolkit>cudaSetDevice</toolkit>
     <toolkit>cudaEventSynchronize</toolkit>
+    <toolkit>cudaGetDeviceProperties</toolkit>
+    <toolkit>cudaDeviceSynchronize</toolkit>
+    <toolkit>cudaEventRecord</toolkit>
     <toolkit>cudaFreeHost</toolkit>
-    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaMemset</toolkit>
+    <toolkit>cudaStreamSynchronize</toolkit>
     <toolkit>cudaEventDestroy</toolkit>
-    <toolkit>cudaSetDevice</toolkit>
     <toolkit>cudaMemcpyAsync</toolkit>
-    <toolkit>cudaStreamSynchronize</toolkit>
-    <toolkit>cudaGetDeviceProperties</toolkit>
+    <toolkit>cudaStreamCreate</toolkit>
+    <toolkit>cudaGetDeviceCount</toolkit>
+    <toolkit>cudaEventCreate</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample evaluates fair call price for a given set of European options using the Monte Carlo approach, taking advantage of all CUDA-capable GPUs installed in the system. This sample use double precision hardware if a GTX 200 class GPU is present.  The sample also takes advantage of CUDA 4.0 capability to supporting using a single CPU thread to control multiple GPUs]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -69,6 +69,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/5_Domain_Specific/MonteCarloMultiGPU/README.md b/Samples/5_Domain_Specific/MonteCarloMultiGPU/README.md
index 7a0f77cb6..5eff98b76 100644
--- a/Samples/5_Domain_Specific/MonteCarloMultiGPU/README.md
+++ b/Samples/5_Domain_Specific/MonteCarloMultiGPU/README.md
@@ -10,7 +10,7 @@ Random Number Generator, Computational Finance, CURAND Library
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMemset, cudaFree, cudaStreamDestroy, cudaEventRecord, cudaMallocHost, cudaStreamCreate, cudaEventCreate, cudaGetDeviceCount, cudaDeviceSynchronize, cudaEventSynchronize, cudaFreeHost, cudaMalloc, cudaEventDestroy, cudaSetDevice, cudaMemcpyAsync, cudaStreamSynchronize, cudaGetDeviceProperties
+cudaStreamDestroy, cudaMalloc, cudaFree, cudaMallocHost, cudaSetDevice, cudaEventSynchronize, cudaGetDeviceProperties, cudaDeviceSynchronize, cudaEventRecord, cudaFreeHost, cudaMemset, cudaStreamSynchronize, cudaEventDestroy, cudaMemcpyAsync, cudaStreamCreate, cudaGetDeviceCount, cudaEventCreate
 
 ## Dependencies needed to build/run
 [CURAND](../../../README.md#curand)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/5_Domain_Specific/NV12toBGRandResize/Makefile b/Samples/5_Domain_Specific/NV12toBGRandResize/Makefile
index cb4fefd63..f0ea1f7ce 100644
--- a/Samples/5_Domain_Specific/NV12toBGRandResize/Makefile
+++ b/Samples/5_Domain_Specific/NV12toBGRandResize/Makefile
@@ -285,9 +285,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/5_Domain_Specific/NV12toBGRandResize/NV12toBGRandResize_vs2017.vcxproj b/Samples/5_Domain_Specific/NV12toBGRandResize/NV12toBGRandResize_vs2017.vcxproj
index fe9e7d3b9..845bded6b 100644
--- a/Samples/5_Domain_Specific/NV12toBGRandResize/NV12toBGRandResize_vs2017.vcxproj
+++ b/Samples/5_Domain_Specific/NV12toBGRandResize/NV12toBGRandResize_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/NV12toBGRandResize.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -112,6 +112,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/NV12toBGRandResize/NV12toBGRandResize_vs2019.vcxproj b/Samples/5_Domain_Specific/NV12toBGRandResize/NV12toBGRandResize_vs2019.vcxproj
index b4c0cf63c..88c92e39c 100644
--- a/Samples/5_Domain_Specific/NV12toBGRandResize/NV12toBGRandResize_vs2019.vcxproj
+++ b/Samples/5_Domain_Specific/NV12toBGRandResize/NV12toBGRandResize_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/NV12toBGRandResize.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/NV12toBGRandResize/NV12toBGRandResize_vs2022.vcxproj b/Samples/5_Domain_Specific/NV12toBGRandResize/NV12toBGRandResize_vs2022.vcxproj
index 290d571c6..09b89f949 100644
--- a/Samples/5_Domain_Specific/NV12toBGRandResize/NV12toBGRandResize_vs2022.vcxproj
+++ b/Samples/5_Domain_Specific/NV12toBGRandResize/NV12toBGRandResize_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/NV12toBGRandResize.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/NV12toBGRandResize/NsightEclipse.xml b/Samples/5_Domain_Specific/NV12toBGRandResize/NsightEclipse.xml
index 6cd413145..cec28308d 100644
--- a/Samples/5_Domain_Specific/NV12toBGRandResize/NsightEclipse.xml
+++ b/Samples/5_Domain_Specific/NV12toBGRandResize/NsightEclipse.xml
@@ -3,21 +3,21 @@
 <entry>
   <name>NV12toBGRandResize</name>
   <cuda_api_list>
+    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaStreamDestroy</toolkit>
+    <toolkit>cudaMalloc</toolkit>
     <toolkit>cudaFree</toolkit>
-    <toolkit>cudaEventRecord</toolkit>
     <toolkit>cudaMallocManaged</toolkit>
-    <toolkit>cudaStreamCreate</toolkit>
-    <toolkit>cudaEventCreate</toolkit>
-    <toolkit>cudaEventElapsedTime</toolkit>
-    <toolkit>cudaDeviceSynchronize</toolkit>
+    <toolkit>cudaStreamAttachMemAsync</toolkit>
     <toolkit>cudaDestroyTextureObject</toolkit>
     <toolkit>cudaEventSynchronize</toolkit>
-    <toolkit>cudaStreamAttachMemAsync</toolkit>
+    <toolkit>cudaDeviceSynchronize</toolkit>
     <toolkit>cudaCreateTextureObject</toolkit>
-    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaEventRecord</toolkit>
     <toolkit>cudaEventDestroy</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaEventElapsedTime</toolkit>
+    <toolkit>cudaStreamCreate</toolkit>
+    <toolkit>cudaEventCreate</toolkit>
   </cuda_api_list>
   <description><![CDATA[This code shows two ways to convert and resize NV12 frames to BGR 3 planars frames using CUDA in batch. Way-1, Convert NV12 Input to BGR @ Input Resolution-1, then Resize to Resolution#2. Way-2, resize NV12 Input to Resolution#2 then convert it to BGR Output. NVIDIA HW Decoder, both dGPU and Tegra, normally outputs NV12 pitch format frames. For the inference using TensorRT, the input frame needs to be BGR planar format with possibly different size. So, conversion and resizing from NV12 to BGR planar is usually required for the inference following decoding. This CUDA code provides a reference implementation for conversion and resizing.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -58,6 +58,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/5_Domain_Specific/NV12toBGRandResize/README.md b/Samples/5_Domain_Specific/NV12toBGRandResize/README.md
index 738dd5b35..f7899913c 100644
--- a/Samples/5_Domain_Specific/NV12toBGRandResize/README.md
+++ b/Samples/5_Domain_Specific/NV12toBGRandResize/README.md
@@ -10,7 +10,7 @@ Graphics Interop, Image Processing, Video Processing
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, aarch64
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaStreamDestroy, cudaFree, cudaEventRecord, cudaMallocManaged, cudaStreamCreate, cudaEventCreate, cudaEventElapsedTime, cudaDeviceSynchronize, cudaDestroyTextureObject, cudaEventSynchronize, cudaStreamAttachMemAsync, cudaCreateTextureObject, cudaMalloc, cudaEventDestroy, cudaMemcpy
+cudaMemcpy, cudaStreamDestroy, cudaMalloc, cudaFree, cudaMallocManaged, cudaStreamAttachMemAsync, cudaDestroyTextureObject, cudaEventSynchronize, cudaDeviceSynchronize, cudaCreateTextureObject, cudaEventRecord, cudaEventDestroy, cudaEventElapsedTime, cudaStreamCreate, cudaEventCreate
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/5_Domain_Specific/SLID3D10Texture/README.md b/Samples/5_Domain_Specific/SLID3D10Texture/README.md
index 0c62e6009..ddd18f042 100644
--- a/Samples/5_Domain_Specific/SLID3D10Texture/README.md
+++ b/Samples/5_Domain_Specific/SLID3D10Texture/README.md
@@ -10,7 +10,7 @@ Performance Strategies, Graphics Interop, Image Processing, 2D Textures
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -26,14 +26,14 @@ x86_64
 cuCtxPushCurrent, cuCtxPopCurrent
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMemset, cudaFree, cudaGraphicsMapResources, cudaGraphicsUnregisterResource, cudaGraphicsResourceSetMapFlags, cudaGetDeviceCount, cudaMallocPitch, cudaMalloc, cudaGraphicsSubResourceGetMappedArray, cudaGetLastError, cudaGraphicsUnmapResources, cudaGetErrorString, cudaGetDeviceProperties
+cudaGraphicsUnmapResources, cudaMalloc, cudaMallocPitch, cudaGetErrorString, cudaFree, cudaGraphicsResourceSetMapFlags, cudaGetLastError, cudaGraphicsMapResources, cudaGetDeviceCount, cudaMemset, cudaGraphicsUnregisterResource, cudaGraphicsSubResourceGetMappedArray, cudaGetDeviceProperties
 
 ## Dependencies needed to build/run
 [DirectX](../../../README.md#directx)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/5_Domain_Specific/SLID3D10Texture/SLID3D10Texture_vs2017.vcxproj b/Samples/5_Domain_Specific/SLID3D10Texture/SLID3D10Texture_vs2017.vcxproj
index 4e8e897eb..3b3d82ca8 100644
--- a/Samples/5_Domain_Specific/SLID3D10Texture/SLID3D10Texture_vs2017.vcxproj
+++ b/Samples/5_Domain_Specific/SLID3D10Texture/SLID3D10Texture_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/SLID3D10Texture.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -109,6 +109,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/SLID3D10Texture/SLID3D10Texture_vs2019.vcxproj b/Samples/5_Domain_Specific/SLID3D10Texture/SLID3D10Texture_vs2019.vcxproj
index a7944ca58..96c3165a7 100644
--- a/Samples/5_Domain_Specific/SLID3D10Texture/SLID3D10Texture_vs2019.vcxproj
+++ b/Samples/5_Domain_Specific/SLID3D10Texture/SLID3D10Texture_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/SLID3D10Texture.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -105,6 +105,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/SLID3D10Texture/SLID3D10Texture_vs2022.vcxproj b/Samples/5_Domain_Specific/SLID3D10Texture/SLID3D10Texture_vs2022.vcxproj
index b0a7dccf0..e26cc9a44 100644
--- a/Samples/5_Domain_Specific/SLID3D10Texture/SLID3D10Texture_vs2022.vcxproj
+++ b/Samples/5_Domain_Specific/SLID3D10Texture/SLID3D10Texture_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/SLID3D10Texture.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -105,6 +105,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/SobelFilter/Makefile b/Samples/5_Domain_Specific/SobelFilter/Makefile
index bc7f79a22..16893f8c0 100644
--- a/Samples/5_Domain_Specific/SobelFilter/Makefile
+++ b/Samples/5_Domain_Specific/SobelFilter/Makefile
@@ -299,9 +299,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/5_Domain_Specific/SobelFilter/NsightEclipse.xml b/Samples/5_Domain_Specific/SobelFilter/NsightEclipse.xml
index 48e1cb0de..1409139c1 100644
--- a/Samples/5_Domain_Specific/SobelFilter/NsightEclipse.xml
+++ b/Samples/5_Domain_Specific/SobelFilter/NsightEclipse.xml
@@ -3,20 +3,20 @@
 <entry>
   <name>SobelFilter</name>
   <cuda_api_list>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaGraphicsMapResources</toolkit>
+    <toolkit>cudaGraphicsUnmapResources</toolkit>
+    <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaMallocArray</toolkit>
     <toolkit>cudaFreeArray</toolkit>
-    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
+    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaGetErrorString</toolkit>
     <toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
-    <toolkit>cudaDeviceSynchronize</toolkit>
+    <toolkit>cudaGraphicsMapResources</toolkit>
     <toolkit>cudaDestroyTextureObject</toolkit>
+    <toolkit>cudaDeviceSynchronize</toolkit>
     <toolkit>cudaCreateTextureObject</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaMallocArray</toolkit>
     <toolkit>cudaGraphicsUnregisterResource</toolkit>
-    <toolkit>cudaGraphicsUnmapResources</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
-    <toolkit>cudaGetErrorString</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample implements the Sobel edge detection filter for 8-bit monochrome images.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -89,6 +89,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/5_Domain_Specific/SobelFilter/README.md b/Samples/5_Domain_Specific/SobelFilter/README.md
index 2c8b25648..f33e8df73 100644
--- a/Samples/5_Domain_Specific/SobelFilter/README.md
+++ b/Samples/5_Domain_Specific/SobelFilter/README.md
@@ -10,7 +10,7 @@ Graphics Interop, Image Processing
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaGraphicsMapResources, cudaFreeArray, cudaGraphicsGLRegisterBuffer, cudaGraphicsResourceGetMappedPointer, cudaDeviceSynchronize, cudaDestroyTextureObject, cudaCreateTextureObject, cudaMalloc, cudaMallocArray, cudaGraphicsUnregisterResource, cudaGraphicsUnmapResources, cudaMemcpy, cudaGetErrorString
+cudaGraphicsUnmapResources, cudaMemcpy, cudaMallocArray, cudaFreeArray, cudaFree, cudaGetErrorString, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDestroyTextureObject, cudaDeviceSynchronize, cudaCreateTextureObject, cudaGraphicsUnregisterResource, cudaMalloc, cudaGraphicsGLRegisterBuffer
 
 ## Dependencies needed to build/run
 [X11](../../../README.md#x11), [GL](../../../README.md#gl)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/5_Domain_Specific/SobelFilter/SobelFilter_vs2017.vcxproj b/Samples/5_Domain_Specific/SobelFilter/SobelFilter_vs2017.vcxproj
index 4914918d6..bb26dfe6d 100644
--- a/Samples/5_Domain_Specific/SobelFilter/SobelFilter_vs2017.vcxproj
+++ b/Samples/5_Domain_Specific/SobelFilter/SobelFilter_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/SobelFilter.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -118,6 +118,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/SobelFilter/SobelFilter_vs2019.vcxproj b/Samples/5_Domain_Specific/SobelFilter/SobelFilter_vs2019.vcxproj
index 6bef30108..8dcd7a832 100644
--- a/Samples/5_Domain_Specific/SobelFilter/SobelFilter_vs2019.vcxproj
+++ b/Samples/5_Domain_Specific/SobelFilter/SobelFilter_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/SobelFilter.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -114,6 +114,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/SobelFilter/SobelFilter_vs2022.vcxproj b/Samples/5_Domain_Specific/SobelFilter/SobelFilter_vs2022.vcxproj
index cffc57574..d9f50ec32 100644
--- a/Samples/5_Domain_Specific/SobelFilter/SobelFilter_vs2022.vcxproj
+++ b/Samples/5_Domain_Specific/SobelFilter/SobelFilter_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/SobelFilter.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -114,6 +114,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/SobelFilter/findgllib.mk b/Samples/5_Domain_Specific/SobelFilter/findgllib.mk
index f0a5c5512..998fcf0f1 100644
--- a/Samples/5_Domain_Specific/SobelFilter/findgllib.mk
+++ b/Samples/5_Domain_Specific/SobelFilter/findgllib.mk
@@ -53,11 +53,12 @@ endif
 ifeq ("$(TARGET_OS)","linux")
     # $(info) >> findgllib.mk -> LINUX path <<<)
     # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside
-    UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu      >/dev/null 2>&1; echo $$?)
-    FEDORA = $(shell echo $(DISTRO) | grep -i fedora      >/dev/null 2>&1; echo $$?)
-    RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?)
-    CENTOS = $(shell echo $(DISTRO) | grep -i centos      >/dev/null 2>&1; echo $$?)
+    UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu       >/dev/null 2>&1; echo $$?)
+    FEDORA = $(shell echo $(DISTRO) | grep -i fedora       >/dev/null 2>&1; echo $$?)
+    RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel'  >/dev/null 2>&1; echo $$?)
+    CENTOS = $(shell echo $(DISTRO) | grep -i centos       >/dev/null 2>&1; echo $$?)
     SUSE   = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?)
+    KYLIN  = $(shell echo $(DISTRO) | grep -i kylin        >/dev/null 2>&1; echo $$?)
     ifeq ("$(UBUNTU)","0")
       ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
         GLPATH := /usr/arm-linux-gnueabihf/lib
@@ -87,27 +88,17 @@ ifeq ("$(TARGET_OS)","linux")
         DFLT_PATH ?= /usr/lib
       endif
     endif
+
     ifeq ("$(SUSE)","0")
       GLPATH    ?= /usr/X11R6/lib64
       GLLINK    ?= -L/usr/X11R6/lib64
       DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(FEDORA)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(RHEL)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(CENTOS)","0")
+    else 
       GLPATH    ?= /usr/lib64/nvidia
       GLLINK    ?= -L/usr/lib64/nvidia
       DFLT_PATH ?= /usr/lib64
     endif
-  
+      
   # find libGL, libGLU 
   GLLIB  := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGL.so  -print 2>/dev/null)
   GLULIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLU.so -print 2>/dev/null)
diff --git a/Samples/5_Domain_Specific/SobolQRNG/Makefile b/Samples/5_Domain_Specific/SobolQRNG/Makefile
index 080a5c7c1..7d80c57eb 100644
--- a/Samples/5_Domain_Specific/SobolQRNG/Makefile
+++ b/Samples/5_Domain_Specific/SobolQRNG/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/5_Domain_Specific/SobolQRNG/NsightEclipse.xml b/Samples/5_Domain_Specific/SobolQRNG/NsightEclipse.xml
index d12d97cce..cddf025fb 100644
--- a/Samples/5_Domain_Specific/SobolQRNG/NsightEclipse.xml
+++ b/Samples/5_Domain_Specific/SobolQRNG/NsightEclipse.xml
@@ -3,13 +3,13 @@
 <entry>
   <name>SobolQRNG</name>
   <cuda_api_list>
+    <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaGetErrorString</toolkit>
     <toolkit>cudaFree</toolkit>
     <toolkit>cudaDeviceSynchronize</toolkit>
+    <toolkit>cudaGetDevice</toolkit>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
-    <toolkit>cudaGetErrorString</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
-    <toolkit>cudaGetDevice</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample implements Sobol Quasirandom Sequence Generator.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -50,6 +50,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/5_Domain_Specific/SobolQRNG/README.md b/Samples/5_Domain_Specific/SobolQRNG/README.md
index 13789f59a..72bb3f844 100644
--- a/Samples/5_Domain_Specific/SobolQRNG/README.md
+++ b/Samples/5_Domain_Specific/SobolQRNG/README.md
@@ -10,7 +10,7 @@ Computational Finance
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaDeviceSynchronize, cudaMalloc, cudaMemcpy, cudaGetErrorString, cudaGetDeviceProperties, cudaGetDevice
+cudaMemcpy, cudaGetErrorString, cudaFree, cudaDeviceSynchronize, cudaGetDevice, cudaMalloc, cudaGetDeviceProperties
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/5_Domain_Specific/SobolQRNG/SobolQRNG_vs2017.vcxproj b/Samples/5_Domain_Specific/SobolQRNG/SobolQRNG_vs2017.vcxproj
index ba88ef6a0..2dc8daa7c 100644
--- a/Samples/5_Domain_Specific/SobolQRNG/SobolQRNG_vs2017.vcxproj
+++ b/Samples/5_Domain_Specific/SobolQRNG/SobolQRNG_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/SobolQRNG.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -113,6 +113,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/SobolQRNG/SobolQRNG_vs2019.vcxproj b/Samples/5_Domain_Specific/SobolQRNG/SobolQRNG_vs2019.vcxproj
index acb7b91ab..1f7410880 100644
--- a/Samples/5_Domain_Specific/SobolQRNG/SobolQRNG_vs2019.vcxproj
+++ b/Samples/5_Domain_Specific/SobolQRNG/SobolQRNG_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/SobolQRNG.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -109,6 +109,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/SobolQRNG/SobolQRNG_vs2022.vcxproj b/Samples/5_Domain_Specific/SobolQRNG/SobolQRNG_vs2022.vcxproj
index 26344f69c..d54706be4 100644
--- a/Samples/5_Domain_Specific/SobolQRNG/SobolQRNG_vs2022.vcxproj
+++ b/Samples/5_Domain_Specific/SobolQRNG/SobolQRNG_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/SobolQRNG.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -109,6 +109,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/VFlockingD3D10/README.md b/Samples/5_Domain_Specific/VFlockingD3D10/README.md
index 0db3f2f80..613b56639 100644
--- a/Samples/5_Domain_Specific/VFlockingD3D10/README.md
+++ b/Samples/5_Domain_Specific/VFlockingD3D10/README.md
@@ -10,7 +10,7 @@ Graphics Interop, Data Parallel Algorithms, Physically-Based Simulation, Perform
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMemset, cudaFree, cudaGraphicsMapResources, cudaEventRecord, cudaGraphicsUnregisterResource, cudaEventCreate, cudaGraphicsResourceGetMappedPointer, cudaGetDeviceCount, cudaEventElapsedTime, cudaDeviceSynchronize, cudaEventSynchronize, cudaGraphicsResourceSetMapFlags, cudaMalloc, cudaGetLastError, cudaGraphicsUnmapResources, cudaMemcpy, cudaGetErrorString, cudaGetDeviceProperties
+cudaGraphicsUnmapResources, cudaMemcpy, cudaFree, cudaGetErrorString, cudaGraphicsResourceSetMapFlags, cudaGraphicsResourceGetMappedPointer, cudaGetLastError, cudaGraphicsMapResources, cudaEventSynchronize, cudaGetDeviceProperties, cudaDeviceSynchronize, cudaEventRecord, cudaMemset, cudaGraphicsUnregisterResource, cudaMalloc, cudaEventElapsedTime, cudaGetDeviceCount, cudaEventCreate
 
 ## Dependencies needed to build/run
 [DirectX](../../../README.md#directx)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/5_Domain_Specific/VFlockingD3D10/VFlockingD3D10_vs2017.vcxproj b/Samples/5_Domain_Specific/VFlockingD3D10/VFlockingD3D10_vs2017.vcxproj
index 37fe522b8..7744ff267 100644
--- a/Samples/5_Domain_Specific/VFlockingD3D10/VFlockingD3D10_vs2017.vcxproj
+++ b/Samples/5_Domain_Specific/VFlockingD3D10/VFlockingD3D10_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/VFlockingD3D10.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -110,6 +110,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/VFlockingD3D10/VFlockingD3D10_vs2019.vcxproj b/Samples/5_Domain_Specific/VFlockingD3D10/VFlockingD3D10_vs2019.vcxproj
index de8440f74..3c776cc94 100644
--- a/Samples/5_Domain_Specific/VFlockingD3D10/VFlockingD3D10_vs2019.vcxproj
+++ b/Samples/5_Domain_Specific/VFlockingD3D10/VFlockingD3D10_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/VFlockingD3D10.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -106,6 +106,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/VFlockingD3D10/VFlockingD3D10_vs2022.vcxproj b/Samples/5_Domain_Specific/VFlockingD3D10/VFlockingD3D10_vs2022.vcxproj
index 0213cd64d..65635eceb 100644
--- a/Samples/5_Domain_Specific/VFlockingD3D10/VFlockingD3D10_vs2022.vcxproj
+++ b/Samples/5_Domain_Specific/VFlockingD3D10/VFlockingD3D10_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/VFlockingD3D10.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -106,6 +106,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/bicubicTexture/Makefile b/Samples/5_Domain_Specific/bicubicTexture/Makefile
index 2e3a5627b..cc6c26808 100644
--- a/Samples/5_Domain_Specific/bicubicTexture/Makefile
+++ b/Samples/5_Domain_Specific/bicubicTexture/Makefile
@@ -299,9 +299,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/5_Domain_Specific/bicubicTexture/NsightEclipse.xml b/Samples/5_Domain_Specific/bicubicTexture/NsightEclipse.xml
index b9f435b02..bb42ff843 100644
--- a/Samples/5_Domain_Specific/bicubicTexture/NsightEclipse.xml
+++ b/Samples/5_Domain_Specific/bicubicTexture/NsightEclipse.xml
@@ -3,20 +3,20 @@
 <entry>
   <name>bicubicTexture</name>
   <cuda_api_list>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaGraphicsMapResources</toolkit>
+    <toolkit>cudaGraphicsUnmapResources</toolkit>
+    <toolkit>cudaCreateChannelDesc</toolkit>
+    <toolkit>cudaMallocArray</toolkit>
     <toolkit>cudaFreeArray</toolkit>
-    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
+    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
-    <toolkit>cudaDeviceSynchronize</toolkit>
+    <toolkit>cudaGraphicsMapResources</toolkit>
     <toolkit>cudaDestroyTextureObject</toolkit>
+    <toolkit>cudaDeviceSynchronize</toolkit>
     <toolkit>cudaCreateTextureObject</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaMallocArray</toolkit>
-    <toolkit>cudaCreateChannelDesc</toolkit>
     <toolkit>cudaGraphicsUnregisterResource</toolkit>
-    <toolkit>cudaGraphicsUnmapResources</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample demonstrates how to efficiently implement a Bicubic B-spline interpolation filter with CUDA texture.]]></description>
@@ -86,6 +86,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/5_Domain_Specific/bicubicTexture/README.md b/Samples/5_Domain_Specific/bicubicTexture/README.md
index db1fc40f2..c972ebdab 100644
--- a/Samples/5_Domain_Specific/bicubicTexture/README.md
+++ b/Samples/5_Domain_Specific/bicubicTexture/README.md
@@ -10,7 +10,7 @@ Graphics Interop, Image Processing
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaGraphicsMapResources, cudaFreeArray, cudaGraphicsGLRegisterBuffer, cudaGraphicsResourceGetMappedPointer, cudaDeviceSynchronize, cudaDestroyTextureObject, cudaCreateTextureObject, cudaMalloc, cudaMallocArray, cudaCreateChannelDesc, cudaGraphicsUnregisterResource, cudaGraphicsUnmapResources, cudaMemcpy, cudaGetDeviceProperties
+cudaGraphicsUnmapResources, cudaCreateChannelDesc, cudaMallocArray, cudaFreeArray, cudaFree, cudaMemcpy, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDestroyTextureObject, cudaDeviceSynchronize, cudaCreateTextureObject, cudaGraphicsUnregisterResource, cudaMalloc, cudaGraphicsGLRegisterBuffer, cudaGetDeviceProperties
 
 ## Dependencies needed to build/run
 [X11](../../../README.md#x11), [GL](../../../README.md#gl)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/5_Domain_Specific/bicubicTexture/bicubicTexture_vs2017.vcxproj b/Samples/5_Domain_Specific/bicubicTexture/bicubicTexture_vs2017.vcxproj
index f7c07a0ed..11e527bb3 100644
--- a/Samples/5_Domain_Specific/bicubicTexture/bicubicTexture_vs2017.vcxproj
+++ b/Samples/5_Domain_Specific/bicubicTexture/bicubicTexture_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/bicubicTexture.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -118,6 +118,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/bicubicTexture/bicubicTexture_vs2019.vcxproj b/Samples/5_Domain_Specific/bicubicTexture/bicubicTexture_vs2019.vcxproj
index 9a4ebd55b..dbb64abb2 100644
--- a/Samples/5_Domain_Specific/bicubicTexture/bicubicTexture_vs2019.vcxproj
+++ b/Samples/5_Domain_Specific/bicubicTexture/bicubicTexture_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/bicubicTexture.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -114,6 +114,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/bicubicTexture/bicubicTexture_vs2022.vcxproj b/Samples/5_Domain_Specific/bicubicTexture/bicubicTexture_vs2022.vcxproj
index 9bb627c09..a54bf6b97 100644
--- a/Samples/5_Domain_Specific/bicubicTexture/bicubicTexture_vs2022.vcxproj
+++ b/Samples/5_Domain_Specific/bicubicTexture/bicubicTexture_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/bicubicTexture.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -114,6 +114,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/bicubicTexture/data/teapot512.pgm b/Samples/5_Domain_Specific/bicubicTexture/data/teapot512.pgm
index 23db9b9a3..4e5ca4595 100644
Binary files a/Samples/5_Domain_Specific/bicubicTexture/data/teapot512.pgm and b/Samples/5_Domain_Specific/bicubicTexture/data/teapot512.pgm differ
diff --git a/Samples/5_Domain_Specific/bicubicTexture/findgllib.mk b/Samples/5_Domain_Specific/bicubicTexture/findgllib.mk
index f0a5c5512..998fcf0f1 100644
--- a/Samples/5_Domain_Specific/bicubicTexture/findgllib.mk
+++ b/Samples/5_Domain_Specific/bicubicTexture/findgllib.mk
@@ -53,11 +53,12 @@ endif
 ifeq ("$(TARGET_OS)","linux")
     # $(info) >> findgllib.mk -> LINUX path <<<)
     # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside
-    UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu      >/dev/null 2>&1; echo $$?)
-    FEDORA = $(shell echo $(DISTRO) | grep -i fedora      >/dev/null 2>&1; echo $$?)
-    RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?)
-    CENTOS = $(shell echo $(DISTRO) | grep -i centos      >/dev/null 2>&1; echo $$?)
+    UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu       >/dev/null 2>&1; echo $$?)
+    FEDORA = $(shell echo $(DISTRO) | grep -i fedora       >/dev/null 2>&1; echo $$?)
+    RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel'  >/dev/null 2>&1; echo $$?)
+    CENTOS = $(shell echo $(DISTRO) | grep -i centos       >/dev/null 2>&1; echo $$?)
     SUSE   = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?)
+    KYLIN  = $(shell echo $(DISTRO) | grep -i kylin        >/dev/null 2>&1; echo $$?)
     ifeq ("$(UBUNTU)","0")
       ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
         GLPATH := /usr/arm-linux-gnueabihf/lib
@@ -87,27 +88,17 @@ ifeq ("$(TARGET_OS)","linux")
         DFLT_PATH ?= /usr/lib
       endif
     endif
+
     ifeq ("$(SUSE)","0")
       GLPATH    ?= /usr/X11R6/lib64
       GLLINK    ?= -L/usr/X11R6/lib64
       DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(FEDORA)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(RHEL)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(CENTOS)","0")
+    else 
       GLPATH    ?= /usr/lib64/nvidia
       GLLINK    ?= -L/usr/lib64/nvidia
       DFLT_PATH ?= /usr/lib64
     endif
-  
+      
   # find libGL, libGLU 
   GLLIB  := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGL.so  -print 2>/dev/null)
   GLULIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLU.so -print 2>/dev/null)
diff --git a/Samples/5_Domain_Specific/bilateralFilter/Makefile b/Samples/5_Domain_Specific/bilateralFilter/Makefile
index 800dfc137..2a4ee06bc 100644
--- a/Samples/5_Domain_Specific/bilateralFilter/Makefile
+++ b/Samples/5_Domain_Specific/bilateralFilter/Makefile
@@ -299,9 +299,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/5_Domain_Specific/bilateralFilter/NsightEclipse.xml b/Samples/5_Domain_Specific/bilateralFilter/NsightEclipse.xml
index 8ca67bd8f..b9e13ffa6 100644
--- a/Samples/5_Domain_Specific/bilateralFilter/NsightEclipse.xml
+++ b/Samples/5_Domain_Specific/bilateralFilter/NsightEclipse.xml
@@ -3,18 +3,18 @@
 <entry>
   <name>bilateralFilter</name>
   <cuda_api_list>
-    <toolkit>cudaMemcpyToSymbol</toolkit>
+    <toolkit>cudaRuntimeGetVersion</toolkit>
+    <toolkit>cudaGraphicsUnmapResources</toolkit>
+    <toolkit>cudaMallocPitch</toolkit>
     <toolkit>cudaFree</toolkit>
-    <toolkit>cudaGraphicsMapResources</toolkit>
-    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
     <toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
-    <toolkit>cudaDeviceSynchronize</toolkit>
+    <toolkit>cudaGraphicsMapResources</toolkit>
     <toolkit>cudaDestroyTextureObject</toolkit>
-    <toolkit>cudaMallocPitch</toolkit>
+    <toolkit>cudaDeviceSynchronize</toolkit>
     <toolkit>cudaCreateTextureObject</toolkit>
+    <toolkit>cudaMemcpyToSymbol</toolkit>
     <toolkit>cudaGraphicsUnregisterResource</toolkit>
-    <toolkit>cudaGraphicsUnmapResources</toolkit>
-    <toolkit>cudaRuntimeGetVersion</toolkit>
+    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
   </cuda_api_list>
   <description><![CDATA[Bilateral filter is an edge-preserving non-linear smoothing filter that is implemented with CUDA with OpenGL rendering. It can be used in image recovery and denoising. Each pixel is weight by considering both the spatial distance and color distance between its neighbors. Reference:"C. Tomasi, R. Manduchi, Bilateral Filtering for Gray and Color Images, proceeding of the ICCV, 1998, http://users.soe.ucsc.edu/~manduchi/Papers/ICCV98.pdf"]]></description>
@@ -86,6 +86,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/5_Domain_Specific/bilateralFilter/README.md b/Samples/5_Domain_Specific/bilateralFilter/README.md
index 8741c5892..b31f086f5 100644
--- a/Samples/5_Domain_Specific/bilateralFilter/README.md
+++ b/Samples/5_Domain_Specific/bilateralFilter/README.md
@@ -10,7 +10,7 @@ Graphics Interop, Image Processing
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMemcpyToSymbol, cudaFree, cudaGraphicsMapResources, cudaGraphicsGLRegisterBuffer, cudaGraphicsResourceGetMappedPointer, cudaDeviceSynchronize, cudaDestroyTextureObject, cudaMallocPitch, cudaCreateTextureObject, cudaGraphicsUnregisterResource, cudaGraphicsUnmapResources, cudaRuntimeGetVersion, cudaGetDeviceProperties
+cudaRuntimeGetVersion, cudaGraphicsUnmapResources, cudaMallocPitch, cudaFree, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDestroyTextureObject, cudaDeviceSynchronize, cudaCreateTextureObject, cudaMemcpyToSymbol, cudaGraphicsUnregisterResource, cudaGraphicsGLRegisterBuffer, cudaGetDeviceProperties
 
 ## Dependencies needed to build/run
 [X11](../../../README.md#x11), [GL](../../../README.md#gl)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/5_Domain_Specific/bilateralFilter/bilateralFilter_vs2017.vcxproj b/Samples/5_Domain_Specific/bilateralFilter/bilateralFilter_vs2017.vcxproj
index 5a64d5c92..66d5cb169 100644
--- a/Samples/5_Domain_Specific/bilateralFilter/bilateralFilter_vs2017.vcxproj
+++ b/Samples/5_Domain_Specific/bilateralFilter/bilateralFilter_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/bilateralFilter.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -120,6 +120,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/bilateralFilter/bilateralFilter_vs2019.vcxproj b/Samples/5_Domain_Specific/bilateralFilter/bilateralFilter_vs2019.vcxproj
index 231749363..908162125 100644
--- a/Samples/5_Domain_Specific/bilateralFilter/bilateralFilter_vs2019.vcxproj
+++ b/Samples/5_Domain_Specific/bilateralFilter/bilateralFilter_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/bilateralFilter.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -116,6 +116,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/bilateralFilter/bilateralFilter_vs2022.vcxproj b/Samples/5_Domain_Specific/bilateralFilter/bilateralFilter_vs2022.vcxproj
index 3a4c27a8f..8f7f94ad9 100644
--- a/Samples/5_Domain_Specific/bilateralFilter/bilateralFilter_vs2022.vcxproj
+++ b/Samples/5_Domain_Specific/bilateralFilter/bilateralFilter_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/bilateralFilter.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -116,6 +116,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/bilateralFilter/findgllib.mk b/Samples/5_Domain_Specific/bilateralFilter/findgllib.mk
index f0a5c5512..998fcf0f1 100644
--- a/Samples/5_Domain_Specific/bilateralFilter/findgllib.mk
+++ b/Samples/5_Domain_Specific/bilateralFilter/findgllib.mk
@@ -53,11 +53,12 @@ endif
 ifeq ("$(TARGET_OS)","linux")
     # $(info) >> findgllib.mk -> LINUX path <<<)
     # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside
-    UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu      >/dev/null 2>&1; echo $$?)
-    FEDORA = $(shell echo $(DISTRO) | grep -i fedora      >/dev/null 2>&1; echo $$?)
-    RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?)
-    CENTOS = $(shell echo $(DISTRO) | grep -i centos      >/dev/null 2>&1; echo $$?)
+    UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu       >/dev/null 2>&1; echo $$?)
+    FEDORA = $(shell echo $(DISTRO) | grep -i fedora       >/dev/null 2>&1; echo $$?)
+    RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel'  >/dev/null 2>&1; echo $$?)
+    CENTOS = $(shell echo $(DISTRO) | grep -i centos       >/dev/null 2>&1; echo $$?)
     SUSE   = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?)
+    KYLIN  = $(shell echo $(DISTRO) | grep -i kylin        >/dev/null 2>&1; echo $$?)
     ifeq ("$(UBUNTU)","0")
       ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
         GLPATH := /usr/arm-linux-gnueabihf/lib
@@ -87,27 +88,17 @@ ifeq ("$(TARGET_OS)","linux")
         DFLT_PATH ?= /usr/lib
       endif
     endif
+
     ifeq ("$(SUSE)","0")
       GLPATH    ?= /usr/X11R6/lib64
       GLLINK    ?= -L/usr/X11R6/lib64
       DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(FEDORA)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(RHEL)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(CENTOS)","0")
+    else 
       GLPATH    ?= /usr/lib64/nvidia
       GLLINK    ?= -L/usr/lib64/nvidia
       DFLT_PATH ?= /usr/lib64
     endif
-  
+      
   # find libGL, libGLU 
   GLLIB  := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGL.so  -print 2>/dev/null)
   GLULIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLU.so -print 2>/dev/null)
diff --git a/Samples/5_Domain_Specific/binomialOptions/Makefile b/Samples/5_Domain_Specific/binomialOptions/Makefile
index 2c85e8cec..7d498db9c 100644
--- a/Samples/5_Domain_Specific/binomialOptions/Makefile
+++ b/Samples/5_Domain_Specific/binomialOptions/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/5_Domain_Specific/binomialOptions/NsightEclipse.xml b/Samples/5_Domain_Specific/binomialOptions/NsightEclipse.xml
index 0730f13d0..f5273e497 100644
--- a/Samples/5_Domain_Specific/binomialOptions/NsightEclipse.xml
+++ b/Samples/5_Domain_Specific/binomialOptions/NsightEclipse.xml
@@ -3,8 +3,8 @@
 <entry>
   <name>binomialOptions</name>
   <cuda_api_list>
-    <toolkit>cudaMemcpyToSymbol</toolkit>
     <toolkit>cudaDeviceSynchronize</toolkit>
+    <toolkit>cudaMemcpyToSymbol</toolkit>
     <toolkit>cudaMemcpyFromSymbol</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample evaluates fair call price for a given set of European options under binomial model.]]></description>
@@ -46,6 +46,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/5_Domain_Specific/binomialOptions/README.md b/Samples/5_Domain_Specific/binomialOptions/README.md
index 869a40cf3..574d7e1fb 100644
--- a/Samples/5_Domain_Specific/binomialOptions/README.md
+++ b/Samples/5_Domain_Specific/binomialOptions/README.md
@@ -10,7 +10,7 @@ Computational Finance
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMemcpyToSymbol, cudaDeviceSynchronize, cudaMemcpyFromSymbol
+cudaDeviceSynchronize, cudaMemcpyToSymbol, cudaMemcpyFromSymbol
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/5_Domain_Specific/binomialOptions/binomialOptions_vs2017.vcxproj b/Samples/5_Domain_Specific/binomialOptions/binomialOptions_vs2017.vcxproj
index a5a832abc..8416e0338 100644
--- a/Samples/5_Domain_Specific/binomialOptions/binomialOptions_vs2017.vcxproj
+++ b/Samples/5_Domain_Specific/binomialOptions/binomialOptions_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/binomialOptions.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -110,6 +110,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/binomialOptions/binomialOptions_vs2019.vcxproj b/Samples/5_Domain_Specific/binomialOptions/binomialOptions_vs2019.vcxproj
index 95af14387..806fed73f 100644
--- a/Samples/5_Domain_Specific/binomialOptions/binomialOptions_vs2019.vcxproj
+++ b/Samples/5_Domain_Specific/binomialOptions/binomialOptions_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/binomialOptions.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -106,6 +106,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/binomialOptions/binomialOptions_vs2022.vcxproj b/Samples/5_Domain_Specific/binomialOptions/binomialOptions_vs2022.vcxproj
index 7ff63d934..616a1dc67 100644
--- a/Samples/5_Domain_Specific/binomialOptions/binomialOptions_vs2022.vcxproj
+++ b/Samples/5_Domain_Specific/binomialOptions/binomialOptions_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/binomialOptions.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -106,6 +106,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/binomialOptions_nvrtc/README.md b/Samples/5_Domain_Specific/binomialOptions_nvrtc/README.md
index 67b0cf18f..0d96c7ef8 100644
--- a/Samples/5_Domain_Specific/binomialOptions_nvrtc/README.md
+++ b/Samples/5_Domain_Specific/binomialOptions_nvrtc/README.md
@@ -10,7 +10,7 @@ Computational Finance, Runtime Compilation
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,7 +23,7 @@ x86_64, ppc64le, aarch64
 ## CUDA APIs involved
 
 ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html)
-cuLaunchKernel, cuModuleGetGlobal, cuCtxSynchronize, cuMemcpyDtoH, cuModuleGetFunction, cuMemcpyHtoD
+cuMemcpyDtoH, cuLaunchKernel, cuMemcpyHtoD, cuModuleGetGlobal, cuCtxSynchronize, cuModuleGetFunction
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
 cudaBlockSize, cudaGridSize
@@ -33,7 +33,7 @@ cudaBlockSize, cudaGridSize
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/5_Domain_Specific/binomialOptions_nvrtc/binomialOptions_nvrtc_vs2017.vcxproj b/Samples/5_Domain_Specific/binomialOptions_nvrtc/binomialOptions_nvrtc_vs2017.vcxproj
index b5cd46deb..4e020948c 100644
--- a/Samples/5_Domain_Specific/binomialOptions_nvrtc/binomialOptions_nvrtc_vs2017.vcxproj
+++ b/Samples/5_Domain_Specific/binomialOptions_nvrtc/binomialOptions_nvrtc_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -112,6 +112,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/binomialOptions_nvrtc/binomialOptions_nvrtc_vs2019.vcxproj b/Samples/5_Domain_Specific/binomialOptions_nvrtc/binomialOptions_nvrtc_vs2019.vcxproj
index e79add30f..153e2b1c8 100644
--- a/Samples/5_Domain_Specific/binomialOptions_nvrtc/binomialOptions_nvrtc_vs2019.vcxproj
+++ b/Samples/5_Domain_Specific/binomialOptions_nvrtc/binomialOptions_nvrtc_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/binomialOptions_nvrtc/binomialOptions_nvrtc_vs2022.vcxproj b/Samples/5_Domain_Specific/binomialOptions_nvrtc/binomialOptions_nvrtc_vs2022.vcxproj
index 7dbfcf7a3..d2720c08d 100644
--- a/Samples/5_Domain_Specific/binomialOptions_nvrtc/binomialOptions_nvrtc_vs2022.vcxproj
+++ b/Samples/5_Domain_Specific/binomialOptions_nvrtc/binomialOptions_nvrtc_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/convolutionFFT2D/Makefile b/Samples/5_Domain_Specific/convolutionFFT2D/Makefile
index 35185f284..0528320af 100644
--- a/Samples/5_Domain_Specific/convolutionFFT2D/Makefile
+++ b/Samples/5_Domain_Specific/convolutionFFT2D/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/5_Domain_Specific/convolutionFFT2D/NsightEclipse.xml b/Samples/5_Domain_Specific/convolutionFFT2D/NsightEclipse.xml
index f197fa28d..f81340552 100644
--- a/Samples/5_Domain_Specific/convolutionFFT2D/NsightEclipse.xml
+++ b/Samples/5_Domain_Specific/convolutionFFT2D/NsightEclipse.xml
@@ -3,13 +3,13 @@
 <entry>
   <name>convolutionFFT2D</name>
   <cuda_api_list>
-    <toolkit>cudaMemset</toolkit>
+    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaFree</toolkit>
-    <toolkit>cudaDeviceSynchronize</toolkit>
     <toolkit>cudaDestroyTextureObject</toolkit>
+    <toolkit>cudaDeviceSynchronize</toolkit>
     <toolkit>cudaCreateTextureObject</toolkit>
+    <toolkit>cudaMemset</toolkit>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample demonstrates how 2D convolutions with very large kernel sizes can be efficiently implemented using FFT transformations.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -58,6 +58,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/5_Domain_Specific/convolutionFFT2D/README.md b/Samples/5_Domain_Specific/convolutionFFT2D/README.md
index 089b847d8..0f8d51935 100644
--- a/Samples/5_Domain_Specific/convolutionFFT2D/README.md
+++ b/Samples/5_Domain_Specific/convolutionFFT2D/README.md
@@ -10,7 +10,7 @@ Image Processing, CUFFT Library
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMemset, cudaFree, cudaDeviceSynchronize, cudaDestroyTextureObject, cudaCreateTextureObject, cudaMalloc, cudaMemcpy
+cudaMemcpy, cudaFree, cudaDestroyTextureObject, cudaDeviceSynchronize, cudaCreateTextureObject, cudaMemset, cudaMalloc
 
 ## Dependencies needed to build/run
 [CUFFT](../../../README.md#cufft)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/5_Domain_Specific/convolutionFFT2D/convolutionFFT2D_vs2017.vcxproj b/Samples/5_Domain_Specific/convolutionFFT2D/convolutionFFT2D_vs2017.vcxproj
index 997c79125..4fe20d854 100644
--- a/Samples/5_Domain_Specific/convolutionFFT2D/convolutionFFT2D_vs2017.vcxproj
+++ b/Samples/5_Domain_Specific/convolutionFFT2D/convolutionFFT2D_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/convolutionFFT2D.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -110,6 +110,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/convolutionFFT2D/convolutionFFT2D_vs2019.vcxproj b/Samples/5_Domain_Specific/convolutionFFT2D/convolutionFFT2D_vs2019.vcxproj
index 3156c87f0..71bad451e 100644
--- a/Samples/5_Domain_Specific/convolutionFFT2D/convolutionFFT2D_vs2019.vcxproj
+++ b/Samples/5_Domain_Specific/convolutionFFT2D/convolutionFFT2D_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/convolutionFFT2D.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -106,6 +106,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/convolutionFFT2D/convolutionFFT2D_vs2022.vcxproj b/Samples/5_Domain_Specific/convolutionFFT2D/convolutionFFT2D_vs2022.vcxproj
index 641a761c0..a73a0cedb 100644
--- a/Samples/5_Domain_Specific/convolutionFFT2D/convolutionFFT2D_vs2022.vcxproj
+++ b/Samples/5_Domain_Specific/convolutionFFT2D/convolutionFFT2D_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/convolutionFFT2D.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -106,6 +106,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/dwtHaar1D/Makefile b/Samples/5_Domain_Specific/dwtHaar1D/Makefile
index d24cac3f6..abd407f4f 100644
--- a/Samples/5_Domain_Specific/dwtHaar1D/Makefile
+++ b/Samples/5_Domain_Specific/dwtHaar1D/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/5_Domain_Specific/dwtHaar1D/NsightEclipse.xml b/Samples/5_Domain_Specific/dwtHaar1D/NsightEclipse.xml
index 386a5469b..daa961211 100644
--- a/Samples/5_Domain_Specific/dwtHaar1D/NsightEclipse.xml
+++ b/Samples/5_Domain_Specific/dwtHaar1D/NsightEclipse.xml
@@ -4,8 +4,8 @@
   <name>dwtHaar1D</name>
   <cuda_api_list>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaFree</toolkit>
     <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaFree</toolkit>
   </cuda_api_list>
   <description><![CDATA[Discrete Haar wavelet decomposition for 1D signals with a length which is a power of 2.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -46,6 +46,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/5_Domain_Specific/dwtHaar1D/README.md b/Samples/5_Domain_Specific/dwtHaar1D/README.md
index 6d4eb9989..da368a4e9 100644
--- a/Samples/5_Domain_Specific/dwtHaar1D/README.md
+++ b/Samples/5_Domain_Specific/dwtHaar1D/README.md
@@ -10,7 +10,7 @@ Image Processing, Video Compression
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMalloc, cudaFree, cudaMemcpy
+cudaMalloc, cudaMemcpy, cudaFree
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/5_Domain_Specific/dwtHaar1D/dwtHaar1D_vs2017.vcxproj b/Samples/5_Domain_Specific/dwtHaar1D/dwtHaar1D_vs2017.vcxproj
index 6592422c6..6b6935741 100644
--- a/Samples/5_Domain_Specific/dwtHaar1D/dwtHaar1D_vs2017.vcxproj
+++ b/Samples/5_Domain_Specific/dwtHaar1D/dwtHaar1D_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/dwtHaar1D.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/dwtHaar1D/dwtHaar1D_vs2019.vcxproj b/Samples/5_Domain_Specific/dwtHaar1D/dwtHaar1D_vs2019.vcxproj
index 9cd792c5d..c191c4ff6 100644
--- a/Samples/5_Domain_Specific/dwtHaar1D/dwtHaar1D_vs2019.vcxproj
+++ b/Samples/5_Domain_Specific/dwtHaar1D/dwtHaar1D_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/dwtHaar1D.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/dwtHaar1D/dwtHaar1D_vs2022.vcxproj b/Samples/5_Domain_Specific/dwtHaar1D/dwtHaar1D_vs2022.vcxproj
index 739fd5e63..bba596d8e 100644
--- a/Samples/5_Domain_Specific/dwtHaar1D/dwtHaar1D_vs2022.vcxproj
+++ b/Samples/5_Domain_Specific/dwtHaar1D/dwtHaar1D_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/dwtHaar1D.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/dxtc/Makefile b/Samples/5_Domain_Specific/dxtc/Makefile
index 2de94434f..b379d6c88 100644
--- a/Samples/5_Domain_Specific/dxtc/Makefile
+++ b/Samples/5_Domain_Specific/dxtc/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/5_Domain_Specific/dxtc/NsightEclipse.xml b/Samples/5_Domain_Specific/dxtc/NsightEclipse.xml
index f194bea9c..a31d16cbc 100644
--- a/Samples/5_Domain_Specific/dxtc/NsightEclipse.xml
+++ b/Samples/5_Domain_Specific/dxtc/NsightEclipse.xml
@@ -3,12 +3,12 @@
 <entry>
   <name>dxtc</name>
   <cuda_api_list>
+    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaFree</toolkit>
     <toolkit>cudaDeviceSynchronize</toolkit>
+    <toolkit>cudaGetDevice</toolkit>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
-    <toolkit>cudaGetDevice</toolkit>
   </cuda_api_list>
   <description><![CDATA[High Quality DXT Compression using CUDA. This example shows how to implement an existing computationally-intensive CPU compression algorithm in parallel on the GPU, and obtain an order of magnitude performance improvement.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -61,6 +61,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/5_Domain_Specific/dxtc/README.md b/Samples/5_Domain_Specific/dxtc/README.md
index f45d97eac..3a805668a 100644
--- a/Samples/5_Domain_Specific/dxtc/README.md
+++ b/Samples/5_Domain_Specific/dxtc/README.md
@@ -10,7 +10,7 @@ Cooperative Groups, Image Processing, Image Compression
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaDeviceSynchronize, cudaMalloc, cudaMemcpy, cudaGetDeviceProperties, cudaGetDevice
+cudaMemcpy, cudaFree, cudaDeviceSynchronize, cudaGetDevice, cudaMalloc, cudaGetDeviceProperties
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/5_Domain_Specific/dxtc/dxtc_vs2017.vcxproj b/Samples/5_Domain_Specific/dxtc/dxtc_vs2017.vcxproj
index c655b3046..e1d68433b 100644
--- a/Samples/5_Domain_Specific/dxtc/dxtc_vs2017.vcxproj
+++ b/Samples/5_Domain_Specific/dxtc/dxtc_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/dxtc.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -109,6 +109,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/dxtc/dxtc_vs2019.vcxproj b/Samples/5_Domain_Specific/dxtc/dxtc_vs2019.vcxproj
index 53567caa1..c1b7f6dbc 100644
--- a/Samples/5_Domain_Specific/dxtc/dxtc_vs2019.vcxproj
+++ b/Samples/5_Domain_Specific/dxtc/dxtc_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/dxtc.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -105,6 +105,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/dxtc/dxtc_vs2022.vcxproj b/Samples/5_Domain_Specific/dxtc/dxtc_vs2022.vcxproj
index bf3c5c259..8609b640a 100644
--- a/Samples/5_Domain_Specific/dxtc/dxtc_vs2022.vcxproj
+++ b/Samples/5_Domain_Specific/dxtc/dxtc_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/dxtc.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -105,6 +105,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/fastWalshTransform/Makefile b/Samples/5_Domain_Specific/fastWalshTransform/Makefile
index 1ba7282a1..3cf3f547c 100644
--- a/Samples/5_Domain_Specific/fastWalshTransform/Makefile
+++ b/Samples/5_Domain_Specific/fastWalshTransform/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/5_Domain_Specific/fastWalshTransform/NsightEclipse.xml b/Samples/5_Domain_Specific/fastWalshTransform/NsightEclipse.xml
index a88475e9f..9e627352e 100644
--- a/Samples/5_Domain_Specific/fastWalshTransform/NsightEclipse.xml
+++ b/Samples/5_Domain_Specific/fastWalshTransform/NsightEclipse.xml
@@ -3,11 +3,11 @@
 <entry>
   <name>fastWalshTransform</name>
   <cuda_api_list>
-    <toolkit>cudaMemset</toolkit>
+    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaFree</toolkit>
     <toolkit>cudaDeviceSynchronize</toolkit>
+    <toolkit>cudaMemset</toolkit>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
   </cuda_api_list>
   <description><![CDATA[Naturally(Hadamard)-ordered Fast Walsh Transform for batching vectors of arbitrary eligible lengths that are power of two in size.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -54,6 +54,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/5_Domain_Specific/fastWalshTransform/README.md b/Samples/5_Domain_Specific/fastWalshTransform/README.md
index 00dd99d20..473f4ce96 100644
--- a/Samples/5_Domain_Specific/fastWalshTransform/README.md
+++ b/Samples/5_Domain_Specific/fastWalshTransform/README.md
@@ -10,7 +10,7 @@ Linear Algebra, Data-Parallel Algorithms, Video Compression
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMemset, cudaFree, cudaDeviceSynchronize, cudaMalloc, cudaMemcpy
+cudaMemcpy, cudaFree, cudaDeviceSynchronize, cudaMemset, cudaMalloc
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/5_Domain_Specific/fastWalshTransform/fastWalshTransform_vs2017.vcxproj b/Samples/5_Domain_Specific/fastWalshTransform/fastWalshTransform_vs2017.vcxproj
index 6b2fb2f24..6ee445df1 100644
--- a/Samples/5_Domain_Specific/fastWalshTransform/fastWalshTransform_vs2017.vcxproj
+++ b/Samples/5_Domain_Specific/fastWalshTransform/fastWalshTransform_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/fastWalshTransform.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/fastWalshTransform/fastWalshTransform_vs2019.vcxproj b/Samples/5_Domain_Specific/fastWalshTransform/fastWalshTransform_vs2019.vcxproj
index d6fdbf349..71cafa089 100644
--- a/Samples/5_Domain_Specific/fastWalshTransform/fastWalshTransform_vs2019.vcxproj
+++ b/Samples/5_Domain_Specific/fastWalshTransform/fastWalshTransform_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/fastWalshTransform.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/fastWalshTransform/fastWalshTransform_vs2022.vcxproj b/Samples/5_Domain_Specific/fastWalshTransform/fastWalshTransform_vs2022.vcxproj
index 129775785..8e30886b2 100644
--- a/Samples/5_Domain_Specific/fastWalshTransform/fastWalshTransform_vs2022.vcxproj
+++ b/Samples/5_Domain_Specific/fastWalshTransform/fastWalshTransform_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/fastWalshTransform.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/fluidsD3D9/README.md b/Samples/5_Domain_Specific/fluidsD3D9/README.md
index 716a5aab9..912936d8d 100644
--- a/Samples/5_Domain_Specific/fluidsD3D9/README.md
+++ b/Samples/5_Domain_Specific/fluidsD3D9/README.md
@@ -10,7 +10,7 @@ Graphics Interop, CUFFT Library, Physically-Based Simulation
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaGraphicsMapResources, cudaGraphicsUnregisterResource, cudaFreeArray, cudaGraphicsResourceGetMappedPointer, cudaDestroyTextureObject, cudaMallocPitch, cudaCreateTextureObject, cudaMalloc, cudaMallocArray, cudaGetLastError, cudaGraphicsUnmapResources, cudaMemcpy, cudaGetDeviceProperties, cudaGetDevice
+cudaGraphicsUnmapResources, cudaMemcpy, cudaMallocArray, cudaFreeArray, cudaFree, cudaMallocPitch, cudaGraphicsResourceGetMappedPointer, cudaGetLastError, cudaGraphicsMapResources, cudaDestroyTextureObject, cudaGetDevice, cudaCreateTextureObject, cudaGraphicsUnregisterResource, cudaMalloc, cudaGetDeviceProperties
 
 ## Dependencies needed to build/run
 [DirectX](../../../README.md#directx)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/5_Domain_Specific/fluidsD3D9/fluidsD3D9_vs2017.vcxproj b/Samples/5_Domain_Specific/fluidsD3D9/fluidsD3D9_vs2017.vcxproj
index fe88c5063..442216834 100644
--- a/Samples/5_Domain_Specific/fluidsD3D9/fluidsD3D9_vs2017.vcxproj
+++ b/Samples/5_Domain_Specific/fluidsD3D9/fluidsD3D9_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/fluidsD3D9.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -110,6 +110,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/fluidsD3D9/fluidsD3D9_vs2019.vcxproj b/Samples/5_Domain_Specific/fluidsD3D9/fluidsD3D9_vs2019.vcxproj
index c7cd7d376..3feb3a219 100644
--- a/Samples/5_Domain_Specific/fluidsD3D9/fluidsD3D9_vs2019.vcxproj
+++ b/Samples/5_Domain_Specific/fluidsD3D9/fluidsD3D9_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/fluidsD3D9.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -106,6 +106,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/fluidsD3D9/fluidsD3D9_vs2022.vcxproj b/Samples/5_Domain_Specific/fluidsD3D9/fluidsD3D9_vs2022.vcxproj
index 2ff8ad414..3dedaf001 100644
--- a/Samples/5_Domain_Specific/fluidsD3D9/fluidsD3D9_vs2022.vcxproj
+++ b/Samples/5_Domain_Specific/fluidsD3D9/fluidsD3D9_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/fluidsD3D9.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -106,6 +106,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/fluidsGL/Makefile b/Samples/5_Domain_Specific/fluidsGL/Makefile
index 4596e5edd..1c9872284 100644
--- a/Samples/5_Domain_Specific/fluidsGL/Makefile
+++ b/Samples/5_Domain_Specific/fluidsGL/Makefile
@@ -299,9 +299,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/5_Domain_Specific/fluidsGL/NsightEclipse.xml b/Samples/5_Domain_Specific/fluidsGL/NsightEclipse.xml
index e2ebcaaa0..96bb4ea0d 100644
--- a/Samples/5_Domain_Specific/fluidsGL/NsightEclipse.xml
+++ b/Samples/5_Domain_Specific/fluidsGL/NsightEclipse.xml
@@ -3,19 +3,19 @@
 <entry>
   <name>fluidsGL</name>
   <cuda_api_list>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaGraphicsMapResources</toolkit>
+    <toolkit>cudaGraphicsUnmapResources</toolkit>
+    <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaMallocArray</toolkit>
     <toolkit>cudaFreeArray</toolkit>
-    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
+    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaMallocPitch</toolkit>
     <toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
+    <toolkit>cudaGraphicsMapResources</toolkit>
     <toolkit>cudaDestroyTextureObject</toolkit>
-    <toolkit>cudaMallocPitch</toolkit>
     <toolkit>cudaCreateTextureObject</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaMallocArray</toolkit>
     <toolkit>cudaGraphicsUnregisterResource</toolkit>
-    <toolkit>cudaGraphicsUnmapResources</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
   </cuda_api_list>
   <description><![CDATA[An example of fluid simulation using CUDA and CUFFT, with OpenGL rendering.]]></description>
@@ -83,6 +83,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/5_Domain_Specific/fluidsGL/README.md b/Samples/5_Domain_Specific/fluidsGL/README.md
index ea572321d..0d492ebc8 100644
--- a/Samples/5_Domain_Specific/fluidsGL/README.md
+++ b/Samples/5_Domain_Specific/fluidsGL/README.md
@@ -10,7 +10,7 @@ Graphics Interop, CUFFT Library, Physically-Based Simulation
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaGraphicsMapResources, cudaFreeArray, cudaGraphicsGLRegisterBuffer, cudaGraphicsResourceGetMappedPointer, cudaDestroyTextureObject, cudaMallocPitch, cudaCreateTextureObject, cudaMalloc, cudaMallocArray, cudaGraphicsUnregisterResource, cudaGraphicsUnmapResources, cudaMemcpy, cudaGetDeviceProperties
+cudaGraphicsUnmapResources, cudaMemcpy, cudaMallocArray, cudaFreeArray, cudaFree, cudaMallocPitch, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDestroyTextureObject, cudaCreateTextureObject, cudaGraphicsUnregisterResource, cudaMalloc, cudaGraphicsGLRegisterBuffer, cudaGetDeviceProperties
 
 ## Dependencies needed to build/run
 [X11](../../../README.md#x11), [GL](../../../README.md#gl), [CUFFT](../../../README.md#cufft)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/5_Domain_Specific/fluidsGL/findgllib.mk b/Samples/5_Domain_Specific/fluidsGL/findgllib.mk
index f0a5c5512..998fcf0f1 100644
--- a/Samples/5_Domain_Specific/fluidsGL/findgllib.mk
+++ b/Samples/5_Domain_Specific/fluidsGL/findgllib.mk
@@ -53,11 +53,12 @@ endif
 ifeq ("$(TARGET_OS)","linux")
     # $(info) >> findgllib.mk -> LINUX path <<<)
     # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside
-    UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu      >/dev/null 2>&1; echo $$?)
-    FEDORA = $(shell echo $(DISTRO) | grep -i fedora      >/dev/null 2>&1; echo $$?)
-    RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?)
-    CENTOS = $(shell echo $(DISTRO) | grep -i centos      >/dev/null 2>&1; echo $$?)
+    UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu       >/dev/null 2>&1; echo $$?)
+    FEDORA = $(shell echo $(DISTRO) | grep -i fedora       >/dev/null 2>&1; echo $$?)
+    RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel'  >/dev/null 2>&1; echo $$?)
+    CENTOS = $(shell echo $(DISTRO) | grep -i centos       >/dev/null 2>&1; echo $$?)
     SUSE   = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?)
+    KYLIN  = $(shell echo $(DISTRO) | grep -i kylin        >/dev/null 2>&1; echo $$?)
     ifeq ("$(UBUNTU)","0")
       ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
         GLPATH := /usr/arm-linux-gnueabihf/lib
@@ -87,27 +88,17 @@ ifeq ("$(TARGET_OS)","linux")
         DFLT_PATH ?= /usr/lib
       endif
     endif
+
     ifeq ("$(SUSE)","0")
       GLPATH    ?= /usr/X11R6/lib64
       GLLINK    ?= -L/usr/X11R6/lib64
       DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(FEDORA)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(RHEL)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(CENTOS)","0")
+    else 
       GLPATH    ?= /usr/lib64/nvidia
       GLLINK    ?= -L/usr/lib64/nvidia
       DFLT_PATH ?= /usr/lib64
     endif
-  
+      
   # find libGL, libGLU 
   GLLIB  := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGL.so  -print 2>/dev/null)
   GLULIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLU.so -print 2>/dev/null)
diff --git a/Samples/5_Domain_Specific/fluidsGL/fluidsGL_vs2017.vcxproj b/Samples/5_Domain_Specific/fluidsGL/fluidsGL_vs2017.vcxproj
index c90c550ec..8d2822ef0 100644
--- a/Samples/5_Domain_Specific/fluidsGL/fluidsGL_vs2017.vcxproj
+++ b/Samples/5_Domain_Specific/fluidsGL/fluidsGL_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/fluidsGL.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -120,6 +120,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/fluidsGL/fluidsGL_vs2019.vcxproj b/Samples/5_Domain_Specific/fluidsGL/fluidsGL_vs2019.vcxproj
index 1e2ee21ea..aa2839e78 100644
--- a/Samples/5_Domain_Specific/fluidsGL/fluidsGL_vs2019.vcxproj
+++ b/Samples/5_Domain_Specific/fluidsGL/fluidsGL_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/fluidsGL.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -116,6 +116,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/fluidsGL/fluidsGL_vs2022.vcxproj b/Samples/5_Domain_Specific/fluidsGL/fluidsGL_vs2022.vcxproj
index 8e7553c73..eeae6a675 100644
--- a/Samples/5_Domain_Specific/fluidsGL/fluidsGL_vs2022.vcxproj
+++ b/Samples/5_Domain_Specific/fluidsGL/fluidsGL_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/fluidsGL.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -116,6 +116,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/fluidsGLES/Makefile b/Samples/5_Domain_Specific/fluidsGLES/Makefile
index 269debb05..00b514cf3 100644
--- a/Samples/5_Domain_Specific/fluidsGLES/Makefile
+++ b/Samples/5_Domain_Specific/fluidsGLES/Makefile
@@ -313,9 +313,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/5_Domain_Specific/fluidsGLES/NsightEclipse.xml b/Samples/5_Domain_Specific/fluidsGLES/NsightEclipse.xml
index e5feea24b..afaeff7e7 100644
--- a/Samples/5_Domain_Specific/fluidsGLES/NsightEclipse.xml
+++ b/Samples/5_Domain_Specific/fluidsGLES/NsightEclipse.xml
@@ -3,19 +3,19 @@
 <entry>
   <name>fluidsGLES</name>
   <cuda_api_list>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaGraphicsMapResources</toolkit>
+    <toolkit>cudaGraphicsUnmapResources</toolkit>
+    <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaMallocArray</toolkit>
     <toolkit>cudaFreeArray</toolkit>
-    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
+    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaMallocPitch</toolkit>
     <toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
+    <toolkit>cudaGraphicsMapResources</toolkit>
     <toolkit>cudaDestroyTextureObject</toolkit>
-    <toolkit>cudaMallocPitch</toolkit>
     <toolkit>cudaCreateTextureObject</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaMallocArray</toolkit>
     <toolkit>cudaGraphicsUnregisterResource</toolkit>
-    <toolkit>cudaGraphicsUnmapResources</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
   </cuda_api_list>
   <description><![CDATA[An example of fluid simulation using CUDA and CUFFT, with OpenGLES rendering.]]></description>
@@ -74,6 +74,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>arm</arch>
diff --git a/Samples/5_Domain_Specific/fluidsGLES/README.md b/Samples/5_Domain_Specific/fluidsGLES/README.md
index 406eb1bcb..b2432dd03 100644
--- a/Samples/5_Domain_Specific/fluidsGLES/README.md
+++ b/Samples/5_Domain_Specific/fluidsGLES/README.md
@@ -10,7 +10,7 @@ Graphics Interop, CUFFT Library, Physically-Based Simulation
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaGraphicsMapResources, cudaFreeArray, cudaGraphicsGLRegisterBuffer, cudaGraphicsResourceGetMappedPointer, cudaDestroyTextureObject, cudaMallocPitch, cudaCreateTextureObject, cudaMalloc, cudaMallocArray, cudaGraphicsUnregisterResource, cudaGraphicsUnmapResources, cudaMemcpy, cudaGetDeviceProperties
+cudaGraphicsUnmapResources, cudaMemcpy, cudaMallocArray, cudaFreeArray, cudaFree, cudaMallocPitch, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDestroyTextureObject, cudaCreateTextureObject, cudaGraphicsUnregisterResource, cudaMalloc, cudaGraphicsGLRegisterBuffer, cudaGetDeviceProperties
 
 ## Dependencies needed to build/run
 [X11](../../../README.md#x11), [GLES](../../../README.md#gles), [CUFFT](../../../README.md#cufft)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/5_Domain_Specific/fluidsGLES/findgleslib.mk b/Samples/5_Domain_Specific/fluidsGLES/findgleslib.mk
index bcb335c1f..6da2f0781 100644
--- a/Samples/5_Domain_Specific/fluidsGLES/findgleslib.mk
+++ b/Samples/5_Domain_Specific/fluidsGLES/findgleslib.mk
@@ -60,6 +60,7 @@ ifeq ("$(TARGET_OS)","linux")
     RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?)
     CENTOS = $(shell echo $(DISTRO) | grep -i centos      >/dev/null 2>&1; echo $$?)
     SUSE   = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?)
+    KYLIN  = $(shell echo $(DISTRO) | grep -i kylin        >/dev/null 2>&1; echo $$?)
     ifeq ("$(UBUNTU)","0")
       ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
         GLPATH := /usr/arm-linux-gnueabihf/lib
@@ -86,27 +87,17 @@ ifeq ("$(TARGET_OS)","linux")
         DFLT_PATH ?= /usr/lib
       endif
     endif
-    ifeq ("$(SUSE)","0")
+	
+	ifeq ("$(SUSE)","0")
       GLPATH    ?= /usr/X11R6/lib64
       GLLINK    ?= -L/usr/X11R6/lib64
       DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(FEDORA)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(RHEL)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(CENTOS)","0")
+    else 
       GLPATH    ?= /usr/lib64/nvidia
       GLLINK    ?= -L/usr/lib64/nvidia
       DFLT_PATH ?= /usr/lib64
     endif
-  
+	  
   # find libGL, libGLU, libXi, 
   EGLLIB  := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libEGL.so    -print 2>/dev/null)
   GLESLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLESv2.so -print 2>/dev/null)
diff --git a/Samples/5_Domain_Specific/marchingCubes/Makefile b/Samples/5_Domain_Specific/marchingCubes/Makefile
index 91180207e..baf4f0f0f 100644
--- a/Samples/5_Domain_Specific/marchingCubes/Makefile
+++ b/Samples/5_Domain_Specific/marchingCubes/Makefile
@@ -324,9 +324,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/5_Domain_Specific/marchingCubes/NsightEclipse.xml b/Samples/5_Domain_Specific/marchingCubes/NsightEclipse.xml
index ea03311eb..0c23d0201 100644
--- a/Samples/5_Domain_Specific/marchingCubes/NsightEclipse.xml
+++ b/Samples/5_Domain_Specific/marchingCubes/NsightEclipse.xml
@@ -6,21 +6,21 @@
     <flag>--std=c++14</flag>
   </cflags>
   <cuda_api_list>
+    <toolkit>cudaGLUnmapBufferObject</toolkit>
+    <toolkit>cudaGraphicsUnmapResources</toolkit>
+    <toolkit>cudaCreateChannelDesc</toolkit>
+    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaFree</toolkit>
-    <toolkit>cudaGraphicsMapResources</toolkit>
-    <toolkit>cudaGraphicsUnregisterResource</toolkit>
-    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
-    <toolkit>cudaGLUnregisterBufferObject</toolkit>
+    <toolkit>cudaGLMapBufferObject</toolkit>
     <toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
-    <toolkit>cudaGLUnmapBufferObject</toolkit>
+    <toolkit>cudaGraphicsMapResources</toolkit>
     <toolkit>cudaDestroyTextureObject</toolkit>
-    <toolkit>cudaGLMapBufferObject</toolkit>
+    <toolkit>cudaGLUnregisterBufferObject</toolkit>
     <toolkit>cudaCreateTextureObject</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaCreateChannelDesc</toolkit>
     <toolkit>cudaGLRegisterBufferObject</toolkit>
-    <toolkit>cudaGraphicsUnmapResources</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaGraphicsUnregisterResource</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample extracts a geometric isosurface from a volume dataset using the marching cubes algorithm. It uses the scan (prefix sum) function from the Thrust library to perform stream compaction.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -92,6 +92,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/5_Domain_Specific/marchingCubes/README.md b/Samples/5_Domain_Specific/marchingCubes/README.md
index 683f4f6b6..0c2ed9e1a 100644
--- a/Samples/5_Domain_Specific/marchingCubes/README.md
+++ b/Samples/5_Domain_Specific/marchingCubes/README.md
@@ -10,7 +10,7 @@ OpenGL Graphics Interop, Vertex Buffers, 3D Graphics, Physically Based Simulatio
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaGraphicsMapResources, cudaGraphicsUnregisterResource, cudaGraphicsGLRegisterBuffer, cudaGLUnregisterBufferObject, cudaGraphicsResourceGetMappedPointer, cudaGLUnmapBufferObject, cudaDestroyTextureObject, cudaGLMapBufferObject, cudaCreateTextureObject, cudaMalloc, cudaCreateChannelDesc, cudaGLRegisterBufferObject, cudaGraphicsUnmapResources, cudaMemcpy
+cudaGLUnmapBufferObject, cudaGraphicsUnmapResources, cudaCreateChannelDesc, cudaMemcpy, cudaFree, cudaGLMapBufferObject, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDestroyTextureObject, cudaGLUnregisterBufferObject, cudaCreateTextureObject, cudaGLRegisterBufferObject, cudaGraphicsUnregisterResource, cudaMalloc, cudaGraphicsGLRegisterBuffer
 
 ## Dependencies needed to build/run
 [X11](../../../README.md#x11), [GL](../../../README.md#gl)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/5_Domain_Specific/marchingCubes/findgllib.mk b/Samples/5_Domain_Specific/marchingCubes/findgllib.mk
index f0a5c5512..998fcf0f1 100644
--- a/Samples/5_Domain_Specific/marchingCubes/findgllib.mk
+++ b/Samples/5_Domain_Specific/marchingCubes/findgllib.mk
@@ -53,11 +53,12 @@ endif
 ifeq ("$(TARGET_OS)","linux")
     # $(info) >> findgllib.mk -> LINUX path <<<)
     # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside
-    UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu      >/dev/null 2>&1; echo $$?)
-    FEDORA = $(shell echo $(DISTRO) | grep -i fedora      >/dev/null 2>&1; echo $$?)
-    RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?)
-    CENTOS = $(shell echo $(DISTRO) | grep -i centos      >/dev/null 2>&1; echo $$?)
+    UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu       >/dev/null 2>&1; echo $$?)
+    FEDORA = $(shell echo $(DISTRO) | grep -i fedora       >/dev/null 2>&1; echo $$?)
+    RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel'  >/dev/null 2>&1; echo $$?)
+    CENTOS = $(shell echo $(DISTRO) | grep -i centos       >/dev/null 2>&1; echo $$?)
     SUSE   = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?)
+    KYLIN  = $(shell echo $(DISTRO) | grep -i kylin        >/dev/null 2>&1; echo $$?)
     ifeq ("$(UBUNTU)","0")
       ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
         GLPATH := /usr/arm-linux-gnueabihf/lib
@@ -87,27 +88,17 @@ ifeq ("$(TARGET_OS)","linux")
         DFLT_PATH ?= /usr/lib
       endif
     endif
+
     ifeq ("$(SUSE)","0")
       GLPATH    ?= /usr/X11R6/lib64
       GLLINK    ?= -L/usr/X11R6/lib64
       DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(FEDORA)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(RHEL)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(CENTOS)","0")
+    else 
       GLPATH    ?= /usr/lib64/nvidia
       GLLINK    ?= -L/usr/lib64/nvidia
       DFLT_PATH ?= /usr/lib64
     endif
-  
+      
   # find libGL, libGLU 
   GLLIB  := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGL.so  -print 2>/dev/null)
   GLULIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLU.so -print 2>/dev/null)
diff --git a/Samples/5_Domain_Specific/marchingCubes/marchingCubes_vs2017.vcxproj b/Samples/5_Domain_Specific/marchingCubes/marchingCubes_vs2017.vcxproj
index 9eb7601b4..9d5e9d284 100644
--- a/Samples/5_Domain_Specific/marchingCubes/marchingCubes_vs2017.vcxproj
+++ b/Samples/5_Domain_Specific/marchingCubes/marchingCubes_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/marchingCubes.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -119,6 +119,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/marchingCubes/marchingCubes_vs2019.vcxproj b/Samples/5_Domain_Specific/marchingCubes/marchingCubes_vs2019.vcxproj
index e2edca520..9e370d0bd 100644
--- a/Samples/5_Domain_Specific/marchingCubes/marchingCubes_vs2019.vcxproj
+++ b/Samples/5_Domain_Specific/marchingCubes/marchingCubes_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/marchingCubes.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -115,6 +115,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/marchingCubes/marchingCubes_vs2022.vcxproj b/Samples/5_Domain_Specific/marchingCubes/marchingCubes_vs2022.vcxproj
index 284e00cbe..ef1da8804 100644
--- a/Samples/5_Domain_Specific/marchingCubes/marchingCubes_vs2022.vcxproj
+++ b/Samples/5_Domain_Specific/marchingCubes/marchingCubes_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/marchingCubes.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -115,6 +115,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/nbody/Makefile b/Samples/5_Domain_Specific/nbody/Makefile
index d1d2e6141..f4e1df427 100644
--- a/Samples/5_Domain_Specific/nbody/Makefile
+++ b/Samples/5_Domain_Specific/nbody/Makefile
@@ -299,9 +299,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/5_Domain_Specific/nbody/NsightEclipse.xml b/Samples/5_Domain_Specific/nbody/NsightEclipse.xml
index 293adc581..213de2217 100644
--- a/Samples/5_Domain_Specific/nbody/NsightEclipse.xml
+++ b/Samples/5_Domain_Specific/nbody/NsightEclipse.xml
@@ -6,24 +6,24 @@
     <flag>-ftz=true</flag>
   </cflags>
   <cuda_api_list>
-    <toolkit>cudaMemcpyToSymbol</toolkit>
-    <toolkit>cudaGraphicsMapResources</toolkit>
-    <toolkit>cudaEventRecord</toolkit>
-    <toolkit>cudaStreamQuery</toolkit>
-    <toolkit>cudaEventCreate</toolkit>
+    <toolkit>cudaGraphicsUnmapResources</toolkit>
+    <toolkit>cudaSetDeviceFlags</toolkit>
+    <toolkit>cudaGraphicsResourceSetMapFlags</toolkit>
     <toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
+    <toolkit>cudaGraphicsMapResources</toolkit>
+    <toolkit>cudaSetDevice</toolkit>
+    <toolkit>cudaEventSynchronize</toolkit>
     <toolkit>cudaGetDeviceCount</toolkit>
-    <toolkit>cudaEventElapsedTime</toolkit>
+    <toolkit>cudaGetDeviceProperties</toolkit>
     <toolkit>cudaDeviceSynchronize</toolkit>
-    <toolkit>cudaEventSynchronize</toolkit>
-    <toolkit>cudaGraphicsResourceSetMapFlags</toolkit>
-    <toolkit>cudaSetDeviceFlags</toolkit>
+    <toolkit>cudaEventRecord</toolkit>
+    <toolkit>cudaGetDevice</toolkit>
+    <toolkit>cudaMemcpyToSymbol</toolkit>
+    <toolkit>cudaStreamQuery</toolkit>
     <toolkit>cudaEventDestroy</toolkit>
+    <toolkit>cudaEventElapsedTime</toolkit>
     <toolkit>cudaDeviceCanAccessPeer</toolkit>
-    <toolkit>cudaSetDevice</toolkit>
-    <toolkit>cudaGraphicsUnmapResources</toolkit>
-    <toolkit>cudaGetDeviceProperties</toolkit>
-    <toolkit>cudaGetDevice</toolkit>
+    <toolkit>cudaEventCreate</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample demonstrates efficient all-pairs simulation of a gravitational n-body simulation in CUDA.  This sample accompanies the GPU Gems 3 chapter "Fast N-Body Simulation with CUDA".  With CUDA 5.5, performance on Tesla K20c has increased to over 1.8TFLOP/s single precision.  Double Performance has also improved on all Kepler and Fermi GPU architectures as well.  Starting in CUDA 4.0, the nBody sample has been updated to take advantage of new features to easily scale the n-body simulation across multiple GPUs in a single PC.  Adding "-numbodies=<bodies>" to the command line will allow users to set # of bodies for simulation.  Adding “-numdevices=<N>” to the command line option will cause the sample to use N devices (if available) for simulation.  In this mode, the position and velocity data for all bodies are read from system memory using “zero copy” rather than from device memory.  For a small number of devices (4 or fewer) and a large enough number of bodies, bandwidth is not a bottleneck so we can achieve strong scaling across these devices.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -91,6 +91,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/5_Domain_Specific/nbody/README.md b/Samples/5_Domain_Specific/nbody/README.md
index 40d122a7b..837296c76 100644
--- a/Samples/5_Domain_Specific/nbody/README.md
+++ b/Samples/5_Domain_Specific/nbody/README.md
@@ -10,7 +10,7 @@ Graphics Interop, Data Parallel Algorithms, Physically-Based Simulation
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMemcpyToSymbol, cudaGraphicsMapResources, cudaEventRecord, cudaStreamQuery, cudaEventCreate, cudaGraphicsResourceGetMappedPointer, cudaGetDeviceCount, cudaEventElapsedTime, cudaDeviceSynchronize, cudaEventSynchronize, cudaGraphicsResourceSetMapFlags, cudaSetDeviceFlags, cudaEventDestroy, cudaDeviceCanAccessPeer, cudaSetDevice, cudaGraphicsUnmapResources, cudaGetDeviceProperties, cudaGetDevice
+cudaGraphicsUnmapResources, cudaSetDeviceFlags, cudaGraphicsResourceSetMapFlags, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaSetDevice, cudaEventSynchronize, cudaGetDeviceCount, cudaGetDeviceProperties, cudaDeviceSynchronize, cudaEventRecord, cudaGetDevice, cudaMemcpyToSymbol, cudaStreamQuery, cudaEventDestroy, cudaEventElapsedTime, cudaDeviceCanAccessPeer, cudaEventCreate
 
 ## Dependencies needed to build/run
 [X11](../../../README.md#x11), [GL](../../../README.md#gl)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/5_Domain_Specific/nbody/findgllib.mk b/Samples/5_Domain_Specific/nbody/findgllib.mk
index f0a5c5512..998fcf0f1 100644
--- a/Samples/5_Domain_Specific/nbody/findgllib.mk
+++ b/Samples/5_Domain_Specific/nbody/findgllib.mk
@@ -53,11 +53,12 @@ endif
 ifeq ("$(TARGET_OS)","linux")
     # $(info) >> findgllib.mk -> LINUX path <<<)
     # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside
-    UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu      >/dev/null 2>&1; echo $$?)
-    FEDORA = $(shell echo $(DISTRO) | grep -i fedora      >/dev/null 2>&1; echo $$?)
-    RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?)
-    CENTOS = $(shell echo $(DISTRO) | grep -i centos      >/dev/null 2>&1; echo $$?)
+    UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu       >/dev/null 2>&1; echo $$?)
+    FEDORA = $(shell echo $(DISTRO) | grep -i fedora       >/dev/null 2>&1; echo $$?)
+    RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel'  >/dev/null 2>&1; echo $$?)
+    CENTOS = $(shell echo $(DISTRO) | grep -i centos       >/dev/null 2>&1; echo $$?)
     SUSE   = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?)
+    KYLIN  = $(shell echo $(DISTRO) | grep -i kylin        >/dev/null 2>&1; echo $$?)
     ifeq ("$(UBUNTU)","0")
       ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
         GLPATH := /usr/arm-linux-gnueabihf/lib
@@ -87,27 +88,17 @@ ifeq ("$(TARGET_OS)","linux")
         DFLT_PATH ?= /usr/lib
       endif
     endif
+
     ifeq ("$(SUSE)","0")
       GLPATH    ?= /usr/X11R6/lib64
       GLLINK    ?= -L/usr/X11R6/lib64
       DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(FEDORA)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(RHEL)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(CENTOS)","0")
+    else 
       GLPATH    ?= /usr/lib64/nvidia
       GLLINK    ?= -L/usr/lib64/nvidia
       DFLT_PATH ?= /usr/lib64
     endif
-  
+      
   # find libGL, libGLU 
   GLLIB  := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGL.so  -print 2>/dev/null)
   GLULIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLU.so -print 2>/dev/null)
diff --git a/Samples/5_Domain_Specific/nbody/nbody_vs2017.vcxproj b/Samples/5_Domain_Specific/nbody/nbody_vs2017.vcxproj
index 1406b31bf..99e5a6bfb 100644
--- a/Samples/5_Domain_Specific/nbody/nbody_vs2017.vcxproj
+++ b/Samples/5_Domain_Specific/nbody/nbody_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/nbody.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -125,6 +125,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/nbody/nbody_vs2019.vcxproj b/Samples/5_Domain_Specific/nbody/nbody_vs2019.vcxproj
index ae04b090f..7662e5009 100644
--- a/Samples/5_Domain_Specific/nbody/nbody_vs2019.vcxproj
+++ b/Samples/5_Domain_Specific/nbody/nbody_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/nbody.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -121,6 +121,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/nbody/nbody_vs2022.vcxproj b/Samples/5_Domain_Specific/nbody/nbody_vs2022.vcxproj
index c1495525e..1c2c9ecfb 100644
--- a/Samples/5_Domain_Specific/nbody/nbody_vs2022.vcxproj
+++ b/Samples/5_Domain_Specific/nbody/nbody_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/nbody.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -121,6 +121,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/nbody_opengles/Makefile b/Samples/5_Domain_Specific/nbody_opengles/Makefile
index 5b56f1054..ef0b753bf 100644
--- a/Samples/5_Domain_Specific/nbody_opengles/Makefile
+++ b/Samples/5_Domain_Specific/nbody_opengles/Makefile
@@ -313,9 +313,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/5_Domain_Specific/nbody_opengles/NsightEclipse.xml b/Samples/5_Domain_Specific/nbody_opengles/NsightEclipse.xml
index e7e56c9a8..4ffba110a 100644
--- a/Samples/5_Domain_Specific/nbody_opengles/NsightEclipse.xml
+++ b/Samples/5_Domain_Specific/nbody_opengles/NsightEclipse.xml
@@ -6,23 +6,23 @@
     <flag>-ftz=true</flag>
   </cflags>
   <cuda_api_list>
-    <toolkit>cudaMemcpyToSymbol</toolkit>
-    <toolkit>cudaGraphicsMapResources</toolkit>
-    <toolkit>cudaEventRecord</toolkit>
-    <toolkit>cudaStreamQuery</toolkit>
-    <toolkit>cudaEventCreate</toolkit>
-    <toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
-    <toolkit>cudaGetDeviceCount</toolkit>
-    <toolkit>cudaEventElapsedTime</toolkit>
-    <toolkit>cudaDeviceSynchronize</toolkit>
-    <toolkit>cudaEventSynchronize</toolkit>
-    <toolkit>cudaGraphicsResourceSetMapFlags</toolkit>
+    <toolkit>cudaGraphicsUnmapResources</toolkit>
     <toolkit>cudaSetDeviceFlags</toolkit>
-    <toolkit>cudaEventDestroy</toolkit>
+    <toolkit>cudaGraphicsResourceSetMapFlags</toolkit>
+    <toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
+    <toolkit>cudaGraphicsMapResources</toolkit>
     <toolkit>cudaSetDevice</toolkit>
-    <toolkit>cudaGraphicsUnmapResources</toolkit>
+    <toolkit>cudaEventSynchronize</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
+    <toolkit>cudaDeviceSynchronize</toolkit>
+    <toolkit>cudaEventRecord</toolkit>
     <toolkit>cudaGetDevice</toolkit>
+    <toolkit>cudaMemcpyToSymbol</toolkit>
+    <toolkit>cudaStreamQuery</toolkit>
+    <toolkit>cudaEventDestroy</toolkit>
+    <toolkit>cudaEventElapsedTime</toolkit>
+    <toolkit>cudaGetDeviceCount</toolkit>
+    <toolkit>cudaEventCreate</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample demonstrates efficient all-pairs simulation of a gravitational n-body simulation in CUDA. Unlike the OpenGL nbody sample, there is no user interaction.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -81,6 +81,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>arm</arch>
diff --git a/Samples/5_Domain_Specific/nbody_opengles/README.md b/Samples/5_Domain_Specific/nbody_opengles/README.md
index 3c4772d26..a7911e09e 100644
--- a/Samples/5_Domain_Specific/nbody_opengles/README.md
+++ b/Samples/5_Domain_Specific/nbody_opengles/README.md
@@ -10,7 +10,7 @@ Graphics Interop, Data Parallel Algorithms, Physically-Based Simulation
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMemcpyToSymbol, cudaGraphicsMapResources, cudaEventRecord, cudaStreamQuery, cudaEventCreate, cudaGraphicsResourceGetMappedPointer, cudaGetDeviceCount, cudaEventElapsedTime, cudaDeviceSynchronize, cudaEventSynchronize, cudaGraphicsResourceSetMapFlags, cudaSetDeviceFlags, cudaEventDestroy, cudaSetDevice, cudaGraphicsUnmapResources, cudaGetDeviceProperties, cudaGetDevice
+cudaGraphicsUnmapResources, cudaSetDeviceFlags, cudaGraphicsResourceSetMapFlags, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaSetDevice, cudaEventSynchronize, cudaGetDeviceProperties, cudaDeviceSynchronize, cudaEventRecord, cudaGetDevice, cudaMemcpyToSymbol, cudaStreamQuery, cudaEventDestroy, cudaEventElapsedTime, cudaGetDeviceCount, cudaEventCreate
 
 ## Dependencies needed to build/run
 [X11](../../../README.md#x11), [GLES](../../../README.md#gles)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/5_Domain_Specific/nbody_opengles/findgleslib.mk b/Samples/5_Domain_Specific/nbody_opengles/findgleslib.mk
index bcb335c1f..6da2f0781 100644
--- a/Samples/5_Domain_Specific/nbody_opengles/findgleslib.mk
+++ b/Samples/5_Domain_Specific/nbody_opengles/findgleslib.mk
@@ -60,6 +60,7 @@ ifeq ("$(TARGET_OS)","linux")
     RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?)
     CENTOS = $(shell echo $(DISTRO) | grep -i centos      >/dev/null 2>&1; echo $$?)
     SUSE   = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?)
+    KYLIN  = $(shell echo $(DISTRO) | grep -i kylin        >/dev/null 2>&1; echo $$?)
     ifeq ("$(UBUNTU)","0")
       ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
         GLPATH := /usr/arm-linux-gnueabihf/lib
@@ -86,27 +87,17 @@ ifeq ("$(TARGET_OS)","linux")
         DFLT_PATH ?= /usr/lib
       endif
     endif
-    ifeq ("$(SUSE)","0")
+	
+	ifeq ("$(SUSE)","0")
       GLPATH    ?= /usr/X11R6/lib64
       GLLINK    ?= -L/usr/X11R6/lib64
       DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(FEDORA)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(RHEL)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(CENTOS)","0")
+    else 
       GLPATH    ?= /usr/lib64/nvidia
       GLLINK    ?= -L/usr/lib64/nvidia
       DFLT_PATH ?= /usr/lib64
     endif
-  
+	  
   # find libGL, libGLU, libXi, 
   EGLLIB  := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libEGL.so    -print 2>/dev/null)
   GLESLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLESv2.so -print 2>/dev/null)
diff --git a/Samples/5_Domain_Specific/nbody_screen/Makefile b/Samples/5_Domain_Specific/nbody_screen/Makefile
index 60558f494..b54e17953 100644
--- a/Samples/5_Domain_Specific/nbody_screen/Makefile
+++ b/Samples/5_Domain_Specific/nbody_screen/Makefile
@@ -320,9 +320,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/5_Domain_Specific/nbody_screen/NsightEclipse.xml b/Samples/5_Domain_Specific/nbody_screen/NsightEclipse.xml
index 1c3275ce4..074a2e5b4 100644
--- a/Samples/5_Domain_Specific/nbody_screen/NsightEclipse.xml
+++ b/Samples/5_Domain_Specific/nbody_screen/NsightEclipse.xml
@@ -6,23 +6,23 @@
     <flag>-ftz=true</flag>
   </cflags>
   <cuda_api_list>
-    <toolkit>cudaMemcpyToSymbol</toolkit>
-    <toolkit>cudaGraphicsMapResources</toolkit>
-    <toolkit>cudaEventRecord</toolkit>
-    <toolkit>cudaStreamQuery</toolkit>
-    <toolkit>cudaEventCreate</toolkit>
-    <toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
-    <toolkit>cudaGetDeviceCount</toolkit>
-    <toolkit>cudaEventElapsedTime</toolkit>
-    <toolkit>cudaDeviceSynchronize</toolkit>
-    <toolkit>cudaEventSynchronize</toolkit>
-    <toolkit>cudaGraphicsResourceSetMapFlags</toolkit>
+    <toolkit>cudaGraphicsUnmapResources</toolkit>
     <toolkit>cudaSetDeviceFlags</toolkit>
-    <toolkit>cudaEventDestroy</toolkit>
+    <toolkit>cudaGraphicsResourceSetMapFlags</toolkit>
+    <toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
+    <toolkit>cudaGraphicsMapResources</toolkit>
     <toolkit>cudaSetDevice</toolkit>
-    <toolkit>cudaGraphicsUnmapResources</toolkit>
+    <toolkit>cudaEventSynchronize</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
+    <toolkit>cudaDeviceSynchronize</toolkit>
+    <toolkit>cudaEventRecord</toolkit>
     <toolkit>cudaGetDevice</toolkit>
+    <toolkit>cudaMemcpyToSymbol</toolkit>
+    <toolkit>cudaStreamQuery</toolkit>
+    <toolkit>cudaEventDestroy</toolkit>
+    <toolkit>cudaEventElapsedTime</toolkit>
+    <toolkit>cudaGetDeviceCount</toolkit>
+    <toolkit>cudaEventCreate</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample demonstrates efficient all-pairs simulation of a gravitational n-body simulation in CUDA. Unlike the OpenGL nbody sample, there is no user interaction.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -79,6 +79,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <platform>qnx</platform>
diff --git a/Samples/5_Domain_Specific/nbody_screen/README.md b/Samples/5_Domain_Specific/nbody_screen/README.md
index 73787bdd8..54b9df1c0 100644
--- a/Samples/5_Domain_Specific/nbody_screen/README.md
+++ b/Samples/5_Domain_Specific/nbody_screen/README.md
@@ -10,7 +10,7 @@ Graphics Interop, Data Parallel Algorithms, Physically-Based Simulation
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ aarch64
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMemcpyToSymbol, cudaGraphicsMapResources, cudaEventRecord, cudaStreamQuery, cudaEventCreate, cudaGraphicsResourceGetMappedPointer, cudaGetDeviceCount, cudaEventElapsedTime, cudaDeviceSynchronize, cudaEventSynchronize, cudaGraphicsResourceSetMapFlags, cudaSetDeviceFlags, cudaEventDestroy, cudaSetDevice, cudaGraphicsUnmapResources, cudaGetDeviceProperties, cudaGetDevice
+cudaGraphicsUnmapResources, cudaSetDeviceFlags, cudaGraphicsResourceSetMapFlags, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaSetDevice, cudaEventSynchronize, cudaGetDeviceProperties, cudaDeviceSynchronize, cudaEventRecord, cudaGetDevice, cudaMemcpyToSymbol, cudaStreamQuery, cudaEventDestroy, cudaEventElapsedTime, cudaGetDeviceCount, cudaEventCreate
 
 ## Dependencies needed to build/run
 [screen](../../../README.md#screen), [GLES](../../../README.md#gles)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/5_Domain_Specific/nbody_screen/findgleslib.mk b/Samples/5_Domain_Specific/nbody_screen/findgleslib.mk
index bcb335c1f..6da2f0781 100644
--- a/Samples/5_Domain_Specific/nbody_screen/findgleslib.mk
+++ b/Samples/5_Domain_Specific/nbody_screen/findgleslib.mk
@@ -60,6 +60,7 @@ ifeq ("$(TARGET_OS)","linux")
     RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?)
     CENTOS = $(shell echo $(DISTRO) | grep -i centos      >/dev/null 2>&1; echo $$?)
     SUSE   = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?)
+    KYLIN  = $(shell echo $(DISTRO) | grep -i kylin        >/dev/null 2>&1; echo $$?)
     ifeq ("$(UBUNTU)","0")
       ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
         GLPATH := /usr/arm-linux-gnueabihf/lib
@@ -86,27 +87,17 @@ ifeq ("$(TARGET_OS)","linux")
         DFLT_PATH ?= /usr/lib
       endif
     endif
-    ifeq ("$(SUSE)","0")
+	
+	ifeq ("$(SUSE)","0")
       GLPATH    ?= /usr/X11R6/lib64
       GLLINK    ?= -L/usr/X11R6/lib64
       DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(FEDORA)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(RHEL)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(CENTOS)","0")
+    else 
       GLPATH    ?= /usr/lib64/nvidia
       GLLINK    ?= -L/usr/lib64/nvidia
       DFLT_PATH ?= /usr/lib64
     endif
-  
+	  
   # find libGL, libGLU, libXi, 
   EGLLIB  := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libEGL.so    -print 2>/dev/null)
   GLESLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLESv2.so -print 2>/dev/null)
diff --git a/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/Makefile b/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/Makefile
index 6933444e7..37afba153 100644
--- a/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/Makefile
+++ b/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/NsightEclipse.xml b/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/NsightEclipse.xml
index 1fc38a6c9..57679e4cf 100644
--- a/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/NsightEclipse.xml
+++ b/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/NsightEclipse.xml
@@ -3,30 +3,30 @@
 <entry>
   <name>p2pBandwidthLatencyTest</name>
   <cuda_api_list>
-    <toolkit>cudaDeviceEnablePeerAccess</toolkit>
-    <toolkit>cudaOccupancyMaxPotentialBlockSize</toolkit>
-    <toolkit>cudaStreamCreateWithFlags</toolkit>
-    <toolkit>cudaDeviceCanAccessPeer</toolkit>
-    <toolkit>cudaStreamDestroy</toolkit>
-    <toolkit>cudaHostAlloc</toolkit>
-    <toolkit>cudaEventCreate</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaEventDestroy</toolkit>
     <toolkit>cudaSetDevice</toolkit>
-    <toolkit>cudaMemcpyPeerAsync</toolkit>
-    <toolkit>cudaGetDeviceProperties</toolkit>
+    <toolkit>cudaEventDestroy</toolkit>
+    <toolkit>cudaOccupancyMaxPotentialBlockSize</toolkit>
     <toolkit>cudaCheckError</toolkit>
+    <toolkit>cudaFreeHost</toolkit>
     <toolkit>cudaGetDeviceCount</toolkit>
-    <toolkit>cudaEventElapsedTime</toolkit>
+    <toolkit>cudaDeviceCanAccessPeer</toolkit>
+    <toolkit>cudaStreamCreateWithFlags</toolkit>
+    <toolkit>cudaStreamDestroy</toolkit>
     <toolkit>cudaGetLastError</toolkit>
-    <toolkit>cudaDeviceDisablePeerAccess</toolkit>
-    <toolkit>cudaStreamSynchronize</toolkit>
-    <toolkit>cudaGetErrorString</toolkit>
-    <toolkit>cudaStreamWaitEvent</toolkit>
     <toolkit>cudaMemset</toolkit>
+    <toolkit>cudaStreamWaitEvent</toolkit>
+    <toolkit>cudaEventElapsedTime</toolkit>
+    <toolkit>cudaEventCreate</toolkit>
+    <toolkit>cudaHostAlloc</toolkit>
     <toolkit>cudaFree</toolkit>
+    <toolkit>cudaGetErrorString</toolkit>
+    <toolkit>cudaMemcpyPeerAsync</toolkit>
+    <toolkit>cudaDeviceDisablePeerAccess</toolkit>
     <toolkit>cudaEventRecord</toolkit>
-    <toolkit>cudaFreeHost</toolkit>
+    <toolkit>cudaStreamSynchronize</toolkit>
+    <toolkit>cudaDeviceEnablePeerAccess</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaGetDeviceProperties</toolkit>
   </cuda_api_list>
   <description><![CDATA[This application demonstrates the CUDA Peer-To-Peer (P2P) data transfers between pairs of GPUs and computes latency and bandwidth.  Tests on GPU pairs using P2P and without P2P are tested.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -71,6 +71,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/README.md b/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/README.md
index 72e34fb1b..1df07a630 100644
--- a/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/README.md
+++ b/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/README.md
@@ -10,7 +10,7 @@ Performance Strategies, Asynchronous Data Transfers, Unified Virtual Address Spa
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaDeviceEnablePeerAccess, cudaOccupancyMaxPotentialBlockSize, cudaStreamCreateWithFlags, cudaDeviceCanAccessPeer, cudaStreamDestroy, cudaHostAlloc, cudaEventCreate, cudaMalloc, cudaEventDestroy, cudaSetDevice, cudaMemcpyPeerAsync, cudaGetDeviceProperties, cudaCheckError, cudaGetDeviceCount, cudaEventElapsedTime, cudaGetLastError, cudaDeviceDisablePeerAccess, cudaStreamSynchronize, cudaGetErrorString, cudaStreamWaitEvent, cudaMemset, cudaFree, cudaEventRecord, cudaFreeHost
+cudaSetDevice, cudaEventDestroy, cudaOccupancyMaxPotentialBlockSize, cudaCheckError, cudaFreeHost, cudaGetDeviceCount, cudaDeviceCanAccessPeer, cudaStreamCreateWithFlags, cudaStreamDestroy, cudaGetLastError, cudaMemset, cudaStreamWaitEvent, cudaEventElapsedTime, cudaEventCreate, cudaHostAlloc, cudaFree, cudaGetErrorString, cudaMemcpyPeerAsync, cudaDeviceDisablePeerAccess, cudaEventRecord, cudaStreamSynchronize, cudaDeviceEnablePeerAccess, cudaMalloc, cudaGetDeviceProperties
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/p2pBandwidthLatencyTest_vs2017.vcxproj b/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/p2pBandwidthLatencyTest_vs2017.vcxproj
index 545fa82c6..cbed6fc18 100644
--- a/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/p2pBandwidthLatencyTest_vs2017.vcxproj
+++ b/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/p2pBandwidthLatencyTest_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/p2pBandwidthLatencyTest.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/p2pBandwidthLatencyTest_vs2019.vcxproj b/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/p2pBandwidthLatencyTest_vs2019.vcxproj
index f8e092744..43fbfc443 100644
--- a/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/p2pBandwidthLatencyTest_vs2019.vcxproj
+++ b/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/p2pBandwidthLatencyTest_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/p2pBandwidthLatencyTest.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/p2pBandwidthLatencyTest_vs2022.vcxproj b/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/p2pBandwidthLatencyTest_vs2022.vcxproj
index 6e35634da..284865521 100644
--- a/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/p2pBandwidthLatencyTest_vs2022.vcxproj
+++ b/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/p2pBandwidthLatencyTest_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/p2pBandwidthLatencyTest.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/postProcessGL/Makefile b/Samples/5_Domain_Specific/postProcessGL/Makefile
index 03790fc8d..c6f18f8df 100644
--- a/Samples/5_Domain_Specific/postProcessGL/Makefile
+++ b/Samples/5_Domain_Specific/postProcessGL/Makefile
@@ -299,9 +299,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/5_Domain_Specific/postProcessGL/NsightEclipse.xml b/Samples/5_Domain_Specific/postProcessGL/NsightEclipse.xml
index c477aeef3..756864f0a 100644
--- a/Samples/5_Domain_Specific/postProcessGL/NsightEclipse.xml
+++ b/Samples/5_Domain_Specific/postProcessGL/NsightEclipse.xml
@@ -3,22 +3,22 @@
 <entry>
   <name>postProcessGL</name>
   <cuda_api_list>
+    <toolkit>cudaHostAlloc</toolkit>
+    <toolkit>cudaGraphicsUnmapResources</toolkit>
+    <toolkit>cudaMalloc</toolkit>
     <toolkit>cudaFree</toolkit>
+    <toolkit>cudaGetChannelDesc</toolkit>
+    <toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
     <toolkit>cudaGraphicsMapResources</toolkit>
-    <toolkit>cudaGraphicsUnregisterResource</toolkit>
+    <toolkit>cudaDestroyTextureObject</toolkit>
     <toolkit>cudaMemcpyToArray</toolkit>
-    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
-    <toolkit>cudaHostAlloc</toolkit>
-    <toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
-    <toolkit>cudaProcess</toolkit>
     <toolkit>cudaDeviceSynchronize</toolkit>
-    <toolkit>cudaDestroyTextureObject</toolkit>
     <toolkit>cudaCreateTextureObject</toolkit>
+    <toolkit>cudaProcess</toolkit>
+    <toolkit>cudaGraphicsUnregisterResource</toolkit>
     <toolkit>cudaGraphicsSubResourceGetMappedArray</toolkit>
-    <toolkit>cudaGetChannelDesc</toolkit>
-    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
     <toolkit>cudaGraphicsGLRegisterImage</toolkit>
-    <toolkit>cudaGraphicsUnmapResources</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample shows how to post-process an image rendered in OpenGL using CUDA.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -85,6 +85,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/5_Domain_Specific/postProcessGL/README.md b/Samples/5_Domain_Specific/postProcessGL/README.md
index 53aa76e78..821e00ff5 100644
--- a/Samples/5_Domain_Specific/postProcessGL/README.md
+++ b/Samples/5_Domain_Specific/postProcessGL/README.md
@@ -10,7 +10,7 @@ Graphics Interop, Image Processing
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaGraphicsMapResources, cudaGraphicsUnregisterResource, cudaMemcpyToArray, cudaGraphicsGLRegisterBuffer, cudaHostAlloc, cudaGraphicsResourceGetMappedPointer, cudaProcess, cudaDeviceSynchronize, cudaDestroyTextureObject, cudaCreateTextureObject, cudaGraphicsSubResourceGetMappedArray, cudaGetChannelDesc, cudaMalloc, cudaGraphicsGLRegisterImage, cudaGraphicsUnmapResources
+cudaHostAlloc, cudaGraphicsUnmapResources, cudaMalloc, cudaFree, cudaGetChannelDesc, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDestroyTextureObject, cudaMemcpyToArray, cudaDeviceSynchronize, cudaCreateTextureObject, cudaProcess, cudaGraphicsUnregisterResource, cudaGraphicsSubResourceGetMappedArray, cudaGraphicsGLRegisterBuffer, cudaGraphicsGLRegisterImage
 
 ## Dependencies needed to build/run
 [X11](../../../README.md#x11), [GL](../../../README.md#gl)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/5_Domain_Specific/postProcessGL/findgllib.mk b/Samples/5_Domain_Specific/postProcessGL/findgllib.mk
index f0a5c5512..998fcf0f1 100644
--- a/Samples/5_Domain_Specific/postProcessGL/findgllib.mk
+++ b/Samples/5_Domain_Specific/postProcessGL/findgllib.mk
@@ -53,11 +53,12 @@ endif
 ifeq ("$(TARGET_OS)","linux")
     # $(info) >> findgllib.mk -> LINUX path <<<)
     # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside
-    UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu      >/dev/null 2>&1; echo $$?)
-    FEDORA = $(shell echo $(DISTRO) | grep -i fedora      >/dev/null 2>&1; echo $$?)
-    RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?)
-    CENTOS = $(shell echo $(DISTRO) | grep -i centos      >/dev/null 2>&1; echo $$?)
+    UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu       >/dev/null 2>&1; echo $$?)
+    FEDORA = $(shell echo $(DISTRO) | grep -i fedora       >/dev/null 2>&1; echo $$?)
+    RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel'  >/dev/null 2>&1; echo $$?)
+    CENTOS = $(shell echo $(DISTRO) | grep -i centos       >/dev/null 2>&1; echo $$?)
     SUSE   = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?)
+    KYLIN  = $(shell echo $(DISTRO) | grep -i kylin        >/dev/null 2>&1; echo $$?)
     ifeq ("$(UBUNTU)","0")
       ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
         GLPATH := /usr/arm-linux-gnueabihf/lib
@@ -87,27 +88,17 @@ ifeq ("$(TARGET_OS)","linux")
         DFLT_PATH ?= /usr/lib
       endif
     endif
+
     ifeq ("$(SUSE)","0")
       GLPATH    ?= /usr/X11R6/lib64
       GLLINK    ?= -L/usr/X11R6/lib64
       DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(FEDORA)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(RHEL)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(CENTOS)","0")
+    else 
       GLPATH    ?= /usr/lib64/nvidia
       GLLINK    ?= -L/usr/lib64/nvidia
       DFLT_PATH ?= /usr/lib64
     endif
-  
+      
   # find libGL, libGLU 
   GLLIB  := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGL.so  -print 2>/dev/null)
   GLULIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLU.so -print 2>/dev/null)
diff --git a/Samples/5_Domain_Specific/postProcessGL/postProcessGL_vs2017.vcxproj b/Samples/5_Domain_Specific/postProcessGL/postProcessGL_vs2017.vcxproj
index 015eb83de..23cf04064 100644
--- a/Samples/5_Domain_Specific/postProcessGL/postProcessGL_vs2017.vcxproj
+++ b/Samples/5_Domain_Specific/postProcessGL/postProcessGL_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/postProcessGL.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -118,6 +118,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/postProcessGL/postProcessGL_vs2019.vcxproj b/Samples/5_Domain_Specific/postProcessGL/postProcessGL_vs2019.vcxproj
index 7ac06e892..08c28e4dd 100644
--- a/Samples/5_Domain_Specific/postProcessGL/postProcessGL_vs2019.vcxproj
+++ b/Samples/5_Domain_Specific/postProcessGL/postProcessGL_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/postProcessGL.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -114,6 +114,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/postProcessGL/postProcessGL_vs2022.vcxproj b/Samples/5_Domain_Specific/postProcessGL/postProcessGL_vs2022.vcxproj
index b549c0923..b70819bab 100644
--- a/Samples/5_Domain_Specific/postProcessGL/postProcessGL_vs2022.vcxproj
+++ b/Samples/5_Domain_Specific/postProcessGL/postProcessGL_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/postProcessGL.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -114,6 +114,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/quasirandomGenerator/Makefile b/Samples/5_Domain_Specific/quasirandomGenerator/Makefile
index 3bc715cbe..04628cd3c 100644
--- a/Samples/5_Domain_Specific/quasirandomGenerator/Makefile
+++ b/Samples/5_Domain_Specific/quasirandomGenerator/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/5_Domain_Specific/quasirandomGenerator/NsightEclipse.xml b/Samples/5_Domain_Specific/quasirandomGenerator/NsightEclipse.xml
index b66fab731..c35eaa112 100644
--- a/Samples/5_Domain_Specific/quasirandomGenerator/NsightEclipse.xml
+++ b/Samples/5_Domain_Specific/quasirandomGenerator/NsightEclipse.xml
@@ -3,12 +3,12 @@
 <entry>
   <name>quasirandomGenerator</name>
   <cuda_api_list>
-    <toolkit>cudaMemcpyToSymbol</toolkit>
-    <toolkit>cudaMemset</toolkit>
+    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaFree</toolkit>
     <toolkit>cudaDeviceSynchronize</toolkit>
+    <toolkit>cudaMemset</toolkit>
+    <toolkit>cudaMemcpyToSymbol</toolkit>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample implements Niederreiter Quasirandom Sequence Generator and Inverse Cumulative Normal Distribution functions for the generation of Standard Normal Distributions.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -49,6 +49,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/5_Domain_Specific/quasirandomGenerator/README.md b/Samples/5_Domain_Specific/quasirandomGenerator/README.md
index 8abc9977a..54afdbbaa 100644
--- a/Samples/5_Domain_Specific/quasirandomGenerator/README.md
+++ b/Samples/5_Domain_Specific/quasirandomGenerator/README.md
@@ -10,7 +10,7 @@ Computational Finance
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMemcpyToSymbol, cudaMemset, cudaFree, cudaDeviceSynchronize, cudaMalloc, cudaMemcpy
+cudaMemcpy, cudaFree, cudaDeviceSynchronize, cudaMemset, cudaMemcpyToSymbol, cudaMalloc
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/5_Domain_Specific/quasirandomGenerator/quasirandomGenerator_vs2017.vcxproj b/Samples/5_Domain_Specific/quasirandomGenerator/quasirandomGenerator_vs2017.vcxproj
index c23b11728..be6fcdaeb 100644
--- a/Samples/5_Domain_Specific/quasirandomGenerator/quasirandomGenerator_vs2017.vcxproj
+++ b/Samples/5_Domain_Specific/quasirandomGenerator/quasirandomGenerator_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/quasirandomGenerator.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -109,6 +109,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/quasirandomGenerator/quasirandomGenerator_vs2019.vcxproj b/Samples/5_Domain_Specific/quasirandomGenerator/quasirandomGenerator_vs2019.vcxproj
index 4d1c0c507..a7083425b 100644
--- a/Samples/5_Domain_Specific/quasirandomGenerator/quasirandomGenerator_vs2019.vcxproj
+++ b/Samples/5_Domain_Specific/quasirandomGenerator/quasirandomGenerator_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/quasirandomGenerator.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -105,6 +105,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/quasirandomGenerator/quasirandomGenerator_vs2022.vcxproj b/Samples/5_Domain_Specific/quasirandomGenerator/quasirandomGenerator_vs2022.vcxproj
index dbec91a6c..0861b23ae 100644
--- a/Samples/5_Domain_Specific/quasirandomGenerator/quasirandomGenerator_vs2022.vcxproj
+++ b/Samples/5_Domain_Specific/quasirandomGenerator/quasirandomGenerator_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/quasirandomGenerator.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -105,6 +105,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/README.md b/Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/README.md
index 3e20ffa03..c91d1a249 100644
--- a/Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/README.md
+++ b/Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/README.md
@@ -10,7 +10,7 @@ Computational Finance, Runtime Compilation
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, aarch64
 ## CUDA APIs involved
 
 ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html)
-cuMemFree, cuMemcpyDtoH, cuMemAlloc
+cuMemcpyDtoH, cuMemAlloc, cuMemFree
 
 ## Dependencies needed to build/run
 [NVRTC](../../../README.md#nvrtc)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/quasirandomGenerator_nvrtc_vs2017.vcxproj b/Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/quasirandomGenerator_nvrtc_vs2017.vcxproj
index b07a5e07e..3faf85502 100644
--- a/Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/quasirandomGenerator_nvrtc_vs2017.vcxproj
+++ b/Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/quasirandomGenerator_nvrtc_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -110,6 +110,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/quasirandomGenerator_nvrtc_vs2019.vcxproj b/Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/quasirandomGenerator_nvrtc_vs2019.vcxproj
index e11918b8b..cb8893b84 100644
--- a/Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/quasirandomGenerator_nvrtc_vs2019.vcxproj
+++ b/Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/quasirandomGenerator_nvrtc_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -106,6 +106,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/quasirandomGenerator_nvrtc_vs2022.vcxproj b/Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/quasirandomGenerator_nvrtc_vs2022.vcxproj
index 0bdb2adbb..9dc93ac6f 100644
--- a/Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/quasirandomGenerator_nvrtc_vs2022.vcxproj
+++ b/Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/quasirandomGenerator_nvrtc_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -106,6 +106,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/recursiveGaussian/Makefile b/Samples/5_Domain_Specific/recursiveGaussian/Makefile
index 011149f3c..f228dea7d 100644
--- a/Samples/5_Domain_Specific/recursiveGaussian/Makefile
+++ b/Samples/5_Domain_Specific/recursiveGaussian/Makefile
@@ -299,9 +299,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/5_Domain_Specific/recursiveGaussian/NsightEclipse.xml b/Samples/5_Domain_Specific/recursiveGaussian/NsightEclipse.xml
index 00c1b1b7f..624f69c5a 100644
--- a/Samples/5_Domain_Specific/recursiveGaussian/NsightEclipse.xml
+++ b/Samples/5_Domain_Specific/recursiveGaussian/NsightEclipse.xml
@@ -3,17 +3,17 @@
 <entry>
   <name>recursiveGaussian</name>
   <cuda_api_list>
+    <toolkit>cudaGraphicsUnmapResources</toolkit>
+    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaFree</toolkit>
-    <toolkit>cudaGraphicsMapResources</toolkit>
-    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
     <toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
+    <toolkit>cudaGraphicsMapResources</toolkit>
     <toolkit>cudaDeviceSynchronize</toolkit>
-    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaGetDevice</toolkit>
     <toolkit>cudaGraphicsUnregisterResource</toolkit>
-    <toolkit>cudaGraphicsUnmapResources</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
-    <toolkit>cudaGetDevice</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample implements a Gaussian blur using Deriche's recursive method. The advantage of this method is that the execution time is independent of the filter width.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -84,6 +84,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/5_Domain_Specific/recursiveGaussian/README.md b/Samples/5_Domain_Specific/recursiveGaussian/README.md
index 50d331525..9e1475c4b 100644
--- a/Samples/5_Domain_Specific/recursiveGaussian/README.md
+++ b/Samples/5_Domain_Specific/recursiveGaussian/README.md
@@ -10,7 +10,7 @@ Graphics Interop, Image Processing
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaGraphicsMapResources, cudaGraphicsGLRegisterBuffer, cudaGraphicsResourceGetMappedPointer, cudaDeviceSynchronize, cudaMalloc, cudaGraphicsUnregisterResource, cudaGraphicsUnmapResources, cudaMemcpy, cudaGetDeviceProperties, cudaGetDevice
+cudaGraphicsUnmapResources, cudaMemcpy, cudaFree, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDeviceSynchronize, cudaGetDevice, cudaGraphicsUnregisterResource, cudaMalloc, cudaGraphicsGLRegisterBuffer, cudaGetDeviceProperties
 
 ## Dependencies needed to build/run
 [X11](../../../README.md#x11), [GL](../../../README.md#gl)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/5_Domain_Specific/recursiveGaussian/findgllib.mk b/Samples/5_Domain_Specific/recursiveGaussian/findgllib.mk
index f0a5c5512..998fcf0f1 100644
--- a/Samples/5_Domain_Specific/recursiveGaussian/findgllib.mk
+++ b/Samples/5_Domain_Specific/recursiveGaussian/findgllib.mk
@@ -53,11 +53,12 @@ endif
 ifeq ("$(TARGET_OS)","linux")
     # $(info) >> findgllib.mk -> LINUX path <<<)
     # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside
-    UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu      >/dev/null 2>&1; echo $$?)
-    FEDORA = $(shell echo $(DISTRO) | grep -i fedora      >/dev/null 2>&1; echo $$?)
-    RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?)
-    CENTOS = $(shell echo $(DISTRO) | grep -i centos      >/dev/null 2>&1; echo $$?)
+    UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu       >/dev/null 2>&1; echo $$?)
+    FEDORA = $(shell echo $(DISTRO) | grep -i fedora       >/dev/null 2>&1; echo $$?)
+    RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel'  >/dev/null 2>&1; echo $$?)
+    CENTOS = $(shell echo $(DISTRO) | grep -i centos       >/dev/null 2>&1; echo $$?)
     SUSE   = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?)
+    KYLIN  = $(shell echo $(DISTRO) | grep -i kylin        >/dev/null 2>&1; echo $$?)
     ifeq ("$(UBUNTU)","0")
       ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
         GLPATH := /usr/arm-linux-gnueabihf/lib
@@ -87,27 +88,17 @@ ifeq ("$(TARGET_OS)","linux")
         DFLT_PATH ?= /usr/lib
       endif
     endif
+
     ifeq ("$(SUSE)","0")
       GLPATH    ?= /usr/X11R6/lib64
       GLLINK    ?= -L/usr/X11R6/lib64
       DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(FEDORA)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(RHEL)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(CENTOS)","0")
+    else 
       GLPATH    ?= /usr/lib64/nvidia
       GLLINK    ?= -L/usr/lib64/nvidia
       DFLT_PATH ?= /usr/lib64
     endif
-  
+      
   # find libGL, libGLU 
   GLLIB  := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGL.so  -print 2>/dev/null)
   GLULIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLU.so -print 2>/dev/null)
diff --git a/Samples/5_Domain_Specific/recursiveGaussian/recursiveGaussian_vs2017.vcxproj b/Samples/5_Domain_Specific/recursiveGaussian/recursiveGaussian_vs2017.vcxproj
index e6c68d03c..f706030a4 100644
--- a/Samples/5_Domain_Specific/recursiveGaussian/recursiveGaussian_vs2017.vcxproj
+++ b/Samples/5_Domain_Specific/recursiveGaussian/recursiveGaussian_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/recursiveGaussian.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -118,6 +118,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/recursiveGaussian/recursiveGaussian_vs2019.vcxproj b/Samples/5_Domain_Specific/recursiveGaussian/recursiveGaussian_vs2019.vcxproj
index a0de34452..72663de2a 100644
--- a/Samples/5_Domain_Specific/recursiveGaussian/recursiveGaussian_vs2019.vcxproj
+++ b/Samples/5_Domain_Specific/recursiveGaussian/recursiveGaussian_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/recursiveGaussian.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -114,6 +114,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/recursiveGaussian/recursiveGaussian_vs2022.vcxproj b/Samples/5_Domain_Specific/recursiveGaussian/recursiveGaussian_vs2022.vcxproj
index 44739af72..947a471ce 100644
--- a/Samples/5_Domain_Specific/recursiveGaussian/recursiveGaussian_vs2022.vcxproj
+++ b/Samples/5_Domain_Specific/recursiveGaussian/recursiveGaussian_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/recursiveGaussian.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -114,6 +114,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/simpleD3D10/README.md b/Samples/5_Domain_Specific/simpleD3D10/README.md
index cf446dc41..a9d7cde79 100644
--- a/Samples/5_Domain_Specific/simpleD3D10/README.md
+++ b/Samples/5_Domain_Specific/simpleD3D10/README.md
@@ -10,7 +10,7 @@ Graphics Interop, 3D Graphics
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaGraphicsMapResources, cudaGraphicsUnregisterResource, cudaGraphicsResourceGetMappedPointer, cudaGetDeviceCount, cudaGetLastError, cudaGraphicsUnmapResources, cudaGetErrorString, cudaGetDeviceProperties
+cudaGraphicsUnmapResources, cudaGetErrorString, cudaGraphicsResourceGetMappedPointer, cudaGetLastError, cudaGraphicsMapResources, cudaGetDeviceCount, cudaGraphicsUnregisterResource, cudaGetDeviceProperties
 
 ## Dependencies needed to build/run
 [DirectX](../../../README.md#directx)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/5_Domain_Specific/simpleD3D10/simpleD3D10_vs2017.vcxproj b/Samples/5_Domain_Specific/simpleD3D10/simpleD3D10_vs2017.vcxproj
index b5bbaf2d1..4dadd1937 100644
--- a/Samples/5_Domain_Specific/simpleD3D10/simpleD3D10_vs2017.vcxproj
+++ b/Samples/5_Domain_Specific/simpleD3D10/simpleD3D10_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/simpleD3D10.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -109,6 +109,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/simpleD3D10/simpleD3D10_vs2019.vcxproj b/Samples/5_Domain_Specific/simpleD3D10/simpleD3D10_vs2019.vcxproj
index 3b3f6b28f..1dca8e143 100644
--- a/Samples/5_Domain_Specific/simpleD3D10/simpleD3D10_vs2019.vcxproj
+++ b/Samples/5_Domain_Specific/simpleD3D10/simpleD3D10_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleD3D10.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -105,6 +105,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/simpleD3D10/simpleD3D10_vs2022.vcxproj b/Samples/5_Domain_Specific/simpleD3D10/simpleD3D10_vs2022.vcxproj
index 738f55bc8..630c0ea1c 100644
--- a/Samples/5_Domain_Specific/simpleD3D10/simpleD3D10_vs2022.vcxproj
+++ b/Samples/5_Domain_Specific/simpleD3D10/simpleD3D10_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleD3D10.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -105,6 +105,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/simpleD3D10RenderTarget/README.md b/Samples/5_Domain_Specific/simpleD3D10RenderTarget/README.md
index a8b689086..49077dc7b 100644
--- a/Samples/5_Domain_Specific/simpleD3D10RenderTarget/README.md
+++ b/Samples/5_Domain_Specific/simpleD3D10RenderTarget/README.md
@@ -10,7 +10,7 @@ Graphics Interop, Texture
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaGraphicsMapResources, cudaGraphicsUnregisterResource, cudaGraphicsResourceGetMappedPointer, cudaGetDeviceCount, cudaMalloc, cudaGraphicsSubResourceGetMappedArray, cudaBindTextureToArray, cudaUnbindTexture, cudaGetLastError, cudaGraphicsUnmapResources, cudaMemcpy, cudaGetErrorString, cudaGetDeviceProperties
+cudaGraphicsUnmapResources, cudaMemcpy, cudaMalloc, cudaUnbindTexture, cudaGetErrorString, cudaGraphicsResourceGetMappedPointer, cudaGetLastError, cudaGraphicsMapResources, cudaGetDeviceCount, cudaGraphicsUnregisterResource, cudaGraphicsSubResourceGetMappedArray, cudaBindTextureToArray, cudaGetDeviceProperties
 
 ## Dependencies needed to build/run
 [DirectX](../../../README.md#directx)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/5_Domain_Specific/simpleD3D10RenderTarget/simpleD3D10RenderTarget_vs2017.vcxproj b/Samples/5_Domain_Specific/simpleD3D10RenderTarget/simpleD3D10RenderTarget_vs2017.vcxproj
index 19a3c9f2e..08ad0dfc2 100644
--- a/Samples/5_Domain_Specific/simpleD3D10RenderTarget/simpleD3D10RenderTarget_vs2017.vcxproj
+++ b/Samples/5_Domain_Specific/simpleD3D10RenderTarget/simpleD3D10RenderTarget_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/simpleD3D10RenderTarget.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -109,6 +109,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/simpleD3D10RenderTarget/simpleD3D10RenderTarget_vs2019.vcxproj b/Samples/5_Domain_Specific/simpleD3D10RenderTarget/simpleD3D10RenderTarget_vs2019.vcxproj
index 335175e08..5478db879 100644
--- a/Samples/5_Domain_Specific/simpleD3D10RenderTarget/simpleD3D10RenderTarget_vs2019.vcxproj
+++ b/Samples/5_Domain_Specific/simpleD3D10RenderTarget/simpleD3D10RenderTarget_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleD3D10RenderTarget.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -105,6 +105,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/simpleD3D10RenderTarget/simpleD3D10RenderTarget_vs2022.vcxproj b/Samples/5_Domain_Specific/simpleD3D10RenderTarget/simpleD3D10RenderTarget_vs2022.vcxproj
index 75759d68e..532e9d4dd 100644
--- a/Samples/5_Domain_Specific/simpleD3D10RenderTarget/simpleD3D10RenderTarget_vs2022.vcxproj
+++ b/Samples/5_Domain_Specific/simpleD3D10RenderTarget/simpleD3D10RenderTarget_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleD3D10RenderTarget.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -105,6 +105,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/simpleD3D10Texture/README.md b/Samples/5_Domain_Specific/simpleD3D10Texture/README.md
index f96f34c7f..c8f1a2ed2 100644
--- a/Samples/5_Domain_Specific/simpleD3D10Texture/README.md
+++ b/Samples/5_Domain_Specific/simpleD3D10Texture/README.md
@@ -10,7 +10,7 @@ Graphics Interop, Texture
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMemset, cudaFree, cudaGraphicsMapResources, cudaGraphicsUnregisterResource, cudaGetDeviceCount, cudaMallocPitch, cudaMalloc, cudaGraphicsSubResourceGetMappedArray, cudaGetLastError, cudaGraphicsUnmapResources, cudaGetErrorString, cudaGetDeviceProperties
+cudaGraphicsUnmapResources, cudaMalloc, cudaMallocPitch, cudaGetErrorString, cudaFree, cudaGetLastError, cudaGraphicsMapResources, cudaGetDeviceCount, cudaMemset, cudaGraphicsUnregisterResource, cudaGraphicsSubResourceGetMappedArray, cudaGetDeviceProperties
 
 ## Dependencies needed to build/run
 [DirectX](../../../README.md#directx)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/5_Domain_Specific/simpleD3D10Texture/simpleD3D10Texture_vs2017.vcxproj b/Samples/5_Domain_Specific/simpleD3D10Texture/simpleD3D10Texture_vs2017.vcxproj
index 2ad151f89..f5f7322f1 100644
--- a/Samples/5_Domain_Specific/simpleD3D10Texture/simpleD3D10Texture_vs2017.vcxproj
+++ b/Samples/5_Domain_Specific/simpleD3D10Texture/simpleD3D10Texture_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/simpleD3D10Texture.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -111,6 +111,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/simpleD3D10Texture/simpleD3D10Texture_vs2019.vcxproj b/Samples/5_Domain_Specific/simpleD3D10Texture/simpleD3D10Texture_vs2019.vcxproj
index 793e7da35..d0c81315a 100644
--- a/Samples/5_Domain_Specific/simpleD3D10Texture/simpleD3D10Texture_vs2019.vcxproj
+++ b/Samples/5_Domain_Specific/simpleD3D10Texture/simpleD3D10Texture_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleD3D10Texture.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/simpleD3D10Texture/simpleD3D10Texture_vs2022.vcxproj b/Samples/5_Domain_Specific/simpleD3D10Texture/simpleD3D10Texture_vs2022.vcxproj
index aa188b082..ebc268195 100644
--- a/Samples/5_Domain_Specific/simpleD3D10Texture/simpleD3D10Texture_vs2022.vcxproj
+++ b/Samples/5_Domain_Specific/simpleD3D10Texture/simpleD3D10Texture_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleD3D10Texture.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/simpleD3D11/README.md b/Samples/5_Domain_Specific/simpleD3D11/README.md
index cfdb0c790..fadf5bf0d 100644
--- a/Samples/5_Domain_Specific/simpleD3D11/README.md
+++ b/Samples/5_Domain_Specific/simpleD3D11/README.md
@@ -10,7 +10,7 @@ Graphics Interop, Image Processing
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaSignalExternalSemaphoresAsync, cudaImportVertexBuffer, cudaAcquireSync, cudaGetDeviceCount, cudaImportExternalSemaphore, cudaReleaseSync, cudaImportExternalMemory, cudaExternalMemoryGetMappedBuffer, cudaImportKeyedMutex, cudaStreamCreateWithFlags, cudaDestroyExternalSemaphore, cudaSetDevice, cudaWaitExternalSemaphoresAsync, cudaDestroyExternalMemory
+cudaImportKeyedMutex, cudaExternalMemoryGetMappedBuffer, cudaStreamCreateWithFlags, cudaWaitExternalSemaphoresAsync, cudaImportExternalSemaphore, cudaFree, cudaImportVertexBuffer, cudaReleaseSync, cudaSetDevice, cudaSignalExternalSemaphoresAsync, cudaAcquireSync, cudaDestroyExternalMemory, cudaImportExternalMemory, cudaGetDeviceCount, cudaDestroyExternalSemaphore
 
 ## Dependencies needed to build/run
 [DirectX](../../../README.md#directx)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/5_Domain_Specific/simpleD3D11/simpleD3D11_vs2017.vcxproj b/Samples/5_Domain_Specific/simpleD3D11/simpleD3D11_vs2017.vcxproj
index 9f0f0e9d0..2a11df06e 100644
--- a/Samples/5_Domain_Specific/simpleD3D11/simpleD3D11_vs2017.vcxproj
+++ b/Samples/5_Domain_Specific/simpleD3D11/simpleD3D11_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/simpleD3D11.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -111,6 +111,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/simpleD3D11/simpleD3D11_vs2019.vcxproj b/Samples/5_Domain_Specific/simpleD3D11/simpleD3D11_vs2019.vcxproj
index af23ccc6d..f3dbb2e0c 100644
--- a/Samples/5_Domain_Specific/simpleD3D11/simpleD3D11_vs2019.vcxproj
+++ b/Samples/5_Domain_Specific/simpleD3D11/simpleD3D11_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleD3D11.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/simpleD3D11/simpleD3D11_vs2022.vcxproj b/Samples/5_Domain_Specific/simpleD3D11/simpleD3D11_vs2022.vcxproj
index b29aa4bb1..7338f410c 100644
--- a/Samples/5_Domain_Specific/simpleD3D11/simpleD3D11_vs2022.vcxproj
+++ b/Samples/5_Domain_Specific/simpleD3D11/simpleD3D11_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleD3D11.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/simpleD3D11Texture/README.md b/Samples/5_Domain_Specific/simpleD3D11Texture/README.md
index da8a38751..9c4cf9544 100644
--- a/Samples/5_Domain_Specific/simpleD3D11Texture/README.md
+++ b/Samples/5_Domain_Specific/simpleD3D11Texture/README.md
@@ -10,7 +10,7 @@ Graphics Interop, Image Processing
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMemset, cudaFree, cudaGraphicsMapResources, cudaGraphicsUnregisterResource, cudaGetDeviceCount, cudaMallocPitch, cudaMalloc, cudaGraphicsSubResourceGetMappedArray, cudaGetLastError, cudaGraphicsUnmapResources, cudaGetErrorString, cudaGetDeviceProperties
+cudaGraphicsUnmapResources, cudaMalloc, cudaMallocPitch, cudaGetErrorString, cudaFree, cudaGetLastError, cudaGraphicsMapResources, cudaGetDeviceCount, cudaMemset, cudaGraphicsUnregisterResource, cudaGraphicsSubResourceGetMappedArray, cudaGetDeviceProperties
 
 ## Dependencies needed to build/run
 [DirectX](../../../README.md#directx)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/5_Domain_Specific/simpleD3D11Texture/simpleD3D11Texture_vs2017.vcxproj b/Samples/5_Domain_Specific/simpleD3D11Texture/simpleD3D11Texture_vs2017.vcxproj
index 81f6d1566..025c68ff9 100644
--- a/Samples/5_Domain_Specific/simpleD3D11Texture/simpleD3D11Texture_vs2017.vcxproj
+++ b/Samples/5_Domain_Specific/simpleD3D11Texture/simpleD3D11Texture_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/simpleD3D11Texture.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -112,6 +112,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/simpleD3D11Texture/simpleD3D11Texture_vs2019.vcxproj b/Samples/5_Domain_Specific/simpleD3D11Texture/simpleD3D11Texture_vs2019.vcxproj
index e2b4e089e..67799b210 100644
--- a/Samples/5_Domain_Specific/simpleD3D11Texture/simpleD3D11Texture_vs2019.vcxproj
+++ b/Samples/5_Domain_Specific/simpleD3D11Texture/simpleD3D11Texture_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleD3D11Texture.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/simpleD3D11Texture/simpleD3D11Texture_vs2022.vcxproj b/Samples/5_Domain_Specific/simpleD3D11Texture/simpleD3D11Texture_vs2022.vcxproj
index 9b06a4fd2..6345c10cc 100644
--- a/Samples/5_Domain_Specific/simpleD3D11Texture/simpleD3D11Texture_vs2022.vcxproj
+++ b/Samples/5_Domain_Specific/simpleD3D11Texture/simpleD3D11Texture_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleD3D11Texture.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/simpleD3D12/NsightEclipse.xml b/Samples/5_Domain_Specific/simpleD3D12/NsightEclipse.xml
index f1843ba85..0e142a5a6 100644
--- a/Samples/5_Domain_Specific/simpleD3D12/NsightEclipse.xml
+++ b/Samples/5_Domain_Specific/simpleD3D12/NsightEclipse.xml
@@ -3,19 +3,19 @@
 <entry>
   <name>simpleD3D12</name>
   <cuda_api_list>
+    <toolkit>cudaWaitExternalSemaphoresAsync</toolkit>
+    <toolkit>cudaExternalMemoryGetMappedBuffer</toolkit>
+    <toolkit>cudaImportExternalSemaphore</toolkit>
     <toolkit>cudaFree</toolkit>
+    <toolkit>cudaSetDevice</toolkit>
     <toolkit>cudaSignalExternalSemaphoresAsync</toolkit>
-    <toolkit>cudaStreamCreate</toolkit>
-    <toolkit>cudaGetDeviceCount</toolkit>
-    <toolkit>cudaImportExternalSemaphore</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
-    <toolkit>cudaImportExternalMemory</toolkit>
-    <toolkit>cudaExternalMemoryGetMappedBuffer</toolkit>
-    <toolkit>cudaDestroyExternalSemaphore</toolkit>
-    <toolkit>cudaSetDevice</toolkit>
-    <toolkit>cudaWaitExternalSemaphoresAsync</toolkit>
     <toolkit>cudaStreamSynchronize</toolkit>
     <toolkit>cudaDestroyExternalMemory</toolkit>
+    <toolkit>cudaStreamCreate</toolkit>
+    <toolkit>cudaImportExternalMemory</toolkit>
+    <toolkit>cudaGetDeviceCount</toolkit>
+    <toolkit>cudaDestroyExternalSemaphore</toolkit>
   </cuda_api_list>
   <description><![CDATA[A program which demonstrates Direct3D12 interoperability with CUDA.  The program creates a sinewave in DX12 vertex buffer which is created using CUDA kernels. DX12 and CUDA synchronizes using DirectX12 Fences. Direct3D then renders the results on the screen.  A DirectX12 Capable NVIDIA GPU is required on Windows10 or higher OS.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -59,6 +59,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <platform>windows10</platform>
diff --git a/Samples/5_Domain_Specific/simpleD3D12/README.md b/Samples/5_Domain_Specific/simpleD3D12/README.md
index 5dc8a4467..2e472bf01 100644
--- a/Samples/5_Domain_Specific/simpleD3D12/README.md
+++ b/Samples/5_Domain_Specific/simpleD3D12/README.md
@@ -10,7 +10,7 @@ Graphics Interop, CUDA DX12 Interop, Image Processing
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaSignalExternalSemaphoresAsync, cudaStreamCreate, cudaGetDeviceCount, cudaImportExternalSemaphore, cudaGetDeviceProperties, cudaImportExternalMemory, cudaExternalMemoryGetMappedBuffer, cudaDestroyExternalSemaphore, cudaSetDevice, cudaWaitExternalSemaphoresAsync, cudaStreamSynchronize, cudaDestroyExternalMemory
+cudaWaitExternalSemaphoresAsync, cudaExternalMemoryGetMappedBuffer, cudaImportExternalSemaphore, cudaFree, cudaSetDevice, cudaSignalExternalSemaphoresAsync, cudaGetDeviceProperties, cudaStreamSynchronize, cudaDestroyExternalMemory, cudaStreamCreate, cudaImportExternalMemory, cudaGetDeviceCount, cudaDestroyExternalSemaphore
 
 ## Dependencies needed to build/run
 [DirectX12](../../../README.md#directx12)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/5_Domain_Specific/simpleD3D12/simpleD3D12_vs2017.vcxproj b/Samples/5_Domain_Specific/simpleD3D12/simpleD3D12_vs2017.vcxproj
index b75c6dae9..35882386e 100644
--- a/Samples/5_Domain_Specific/simpleD3D12/simpleD3D12_vs2017.vcxproj
+++ b/Samples/5_Domain_Specific/simpleD3D12/simpleD3D12_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/simpleD3D12.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -119,6 +119,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/simpleD3D12/simpleD3D12_vs2019.vcxproj b/Samples/5_Domain_Specific/simpleD3D12/simpleD3D12_vs2019.vcxproj
index 44e46df31..32c9763c8 100644
--- a/Samples/5_Domain_Specific/simpleD3D12/simpleD3D12_vs2019.vcxproj
+++ b/Samples/5_Domain_Specific/simpleD3D12/simpleD3D12_vs2019.vcxproj
@@ -39,7 +39,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -68,7 +68,7 @@
       <OutputFile>$(OutDir)/simpleD3D12.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -120,6 +120,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/simpleD3D12/simpleD3D12_vs2022.vcxproj b/Samples/5_Domain_Specific/simpleD3D12/simpleD3D12_vs2022.vcxproj
index 5a793f3c6..a74620920 100644
--- a/Samples/5_Domain_Specific/simpleD3D12/simpleD3D12_vs2022.vcxproj
+++ b/Samples/5_Domain_Specific/simpleD3D12/simpleD3D12_vs2022.vcxproj
@@ -39,7 +39,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -68,7 +68,7 @@
       <OutputFile>$(OutDir)/simpleD3D12.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -120,6 +120,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/simpleD3D9/README.md b/Samples/5_Domain_Specific/simpleD3D9/README.md
index 4a9ea67b1..708a6cf32 100644
--- a/Samples/5_Domain_Specific/simpleD3D9/README.md
+++ b/Samples/5_Domain_Specific/simpleD3D9/README.md
@@ -10,7 +10,7 @@ Graphics Interop
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaGraphicsMapResources, cudaGraphicsUnregisterResource, cudaGraphicsResourceGetMappedPointer, cudaGetLastError, cudaGraphicsUnmapResources
+cudaGraphicsUnmapResources, cudaGraphicsResourceGetMappedPointer, cudaGetLastError, cudaGraphicsMapResources, cudaGraphicsUnregisterResource
 
 ## Dependencies needed to build/run
 [DirectX](../../../README.md#directx)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/5_Domain_Specific/simpleD3D9/simpleD3D9_vs2017.vcxproj b/Samples/5_Domain_Specific/simpleD3D9/simpleD3D9_vs2017.vcxproj
index bea083382..513a7b289 100644
--- a/Samples/5_Domain_Specific/simpleD3D9/simpleD3D9_vs2017.vcxproj
+++ b/Samples/5_Domain_Specific/simpleD3D9/simpleD3D9_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/simpleD3D9.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -109,6 +109,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/simpleD3D9/simpleD3D9_vs2019.vcxproj b/Samples/5_Domain_Specific/simpleD3D9/simpleD3D9_vs2019.vcxproj
index 9089cbf72..6c61823a8 100644
--- a/Samples/5_Domain_Specific/simpleD3D9/simpleD3D9_vs2019.vcxproj
+++ b/Samples/5_Domain_Specific/simpleD3D9/simpleD3D9_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleD3D9.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -105,6 +105,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/simpleD3D9/simpleD3D9_vs2022.vcxproj b/Samples/5_Domain_Specific/simpleD3D9/simpleD3D9_vs2022.vcxproj
index ffda5e782..3389eb905 100644
--- a/Samples/5_Domain_Specific/simpleD3D9/simpleD3D9_vs2022.vcxproj
+++ b/Samples/5_Domain_Specific/simpleD3D9/simpleD3D9_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleD3D9.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -105,6 +105,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/simpleD3D9Texture/README.md b/Samples/5_Domain_Specific/simpleD3D9Texture/README.md
index 96dffa342..51bf5c0b8 100644
--- a/Samples/5_Domain_Specific/simpleD3D9Texture/README.md
+++ b/Samples/5_Domain_Specific/simpleD3D9Texture/README.md
@@ -10,7 +10,7 @@ Graphics Interop, Texture
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMemset, cudaFree, cudaGraphicsMapResources, cudaGraphicsUnregisterResource, cudaMallocPitch, cudaMalloc, cudaGraphicsSubResourceGetMappedArray, cudaGetLastError, cudaGraphicsUnmapResources
+cudaGraphicsUnmapResources, cudaMalloc, cudaMallocPitch, cudaFree, cudaGetLastError, cudaGraphicsMapResources, cudaMemset, cudaGraphicsUnregisterResource, cudaGraphicsSubResourceGetMappedArray
 
 ## Dependencies needed to build/run
 [DirectX](../../../README.md#directx)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/5_Domain_Specific/simpleD3D9Texture/simpleD3D9Texture_vs2017.vcxproj b/Samples/5_Domain_Specific/simpleD3D9Texture/simpleD3D9Texture_vs2017.vcxproj
index 5e316cdcb..ab2a79d1f 100644
--- a/Samples/5_Domain_Specific/simpleD3D9Texture/simpleD3D9Texture_vs2017.vcxproj
+++ b/Samples/5_Domain_Specific/simpleD3D9Texture/simpleD3D9Texture_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/simpleD3D9Texture.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -111,6 +111,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/simpleD3D9Texture/simpleD3D9Texture_vs2019.vcxproj b/Samples/5_Domain_Specific/simpleD3D9Texture/simpleD3D9Texture_vs2019.vcxproj
index 3280f4648..53853db53 100644
--- a/Samples/5_Domain_Specific/simpleD3D9Texture/simpleD3D9Texture_vs2019.vcxproj
+++ b/Samples/5_Domain_Specific/simpleD3D9Texture/simpleD3D9Texture_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleD3D9Texture.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/simpleD3D9Texture/simpleD3D9Texture_vs2022.vcxproj b/Samples/5_Domain_Specific/simpleD3D9Texture/simpleD3D9Texture_vs2022.vcxproj
index 0654afd27..de004fdc7 100644
--- a/Samples/5_Domain_Specific/simpleD3D9Texture/simpleD3D9Texture_vs2022.vcxproj
+++ b/Samples/5_Domain_Specific/simpleD3D9Texture/simpleD3D9Texture_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleD3D9Texture.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/simpleGL/Makefile b/Samples/5_Domain_Specific/simpleGL/Makefile
index ad666c8af..47d5ed800 100644
--- a/Samples/5_Domain_Specific/simpleGL/Makefile
+++ b/Samples/5_Domain_Specific/simpleGL/Makefile
@@ -299,9 +299,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/5_Domain_Specific/simpleGL/NsightEclipse.xml b/Samples/5_Domain_Specific/simpleGL/NsightEclipse.xml
index 228af026d..0b73f33ba 100644
--- a/Samples/5_Domain_Specific/simpleGL/NsightEclipse.xml
+++ b/Samples/5_Domain_Specific/simpleGL/NsightEclipse.xml
@@ -3,15 +3,15 @@
 <entry>
   <name>simpleGL</name>
   <cuda_api_list>
+    <toolkit>cudaGraphicsUnmapResources</toolkit>
+    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaFree</toolkit>
-    <toolkit>cudaGraphicsMapResources</toolkit>
-    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
     <toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
+    <toolkit>cudaGraphicsMapResources</toolkit>
     <toolkit>cudaDeviceSynchronize</toolkit>
-    <toolkit>cudaMalloc</toolkit>
     <toolkit>cudaGraphicsUnregisterResource</toolkit>
-    <toolkit>cudaGraphicsUnmapResources</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
   </cuda_api_list>
   <description><![CDATA[Simple program which demonstrates interoperability between CUDA and OpenGL. The program modifies vertex positions with CUDA and uses OpenGL to render the geometry.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -70,6 +70,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/5_Domain_Specific/simpleGL/README.md b/Samples/5_Domain_Specific/simpleGL/README.md
index e1fc492b7..5fc035275 100644
--- a/Samples/5_Domain_Specific/simpleGL/README.md
+++ b/Samples/5_Domain_Specific/simpleGL/README.md
@@ -10,7 +10,7 @@ Graphics Interop, Vertex Buffers, 3D Graphics
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaGraphicsMapResources, cudaGraphicsGLRegisterBuffer, cudaGraphicsResourceGetMappedPointer, cudaDeviceSynchronize, cudaMalloc, cudaGraphicsUnregisterResource, cudaGraphicsUnmapResources, cudaMemcpy
+cudaGraphicsUnmapResources, cudaMemcpy, cudaFree, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDeviceSynchronize, cudaGraphicsUnregisterResource, cudaMalloc, cudaGraphicsGLRegisterBuffer
 
 ## Dependencies needed to build/run
 [X11](../../../README.md#x11), [GL](../../../README.md#gl)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/5_Domain_Specific/simpleGL/findgllib.mk b/Samples/5_Domain_Specific/simpleGL/findgllib.mk
index f0a5c5512..998fcf0f1 100644
--- a/Samples/5_Domain_Specific/simpleGL/findgllib.mk
+++ b/Samples/5_Domain_Specific/simpleGL/findgllib.mk
@@ -53,11 +53,12 @@ endif
 ifeq ("$(TARGET_OS)","linux")
     # $(info) >> findgllib.mk -> LINUX path <<<)
     # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside
-    UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu      >/dev/null 2>&1; echo $$?)
-    FEDORA = $(shell echo $(DISTRO) | grep -i fedora      >/dev/null 2>&1; echo $$?)
-    RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?)
-    CENTOS = $(shell echo $(DISTRO) | grep -i centos      >/dev/null 2>&1; echo $$?)
+    UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu       >/dev/null 2>&1; echo $$?)
+    FEDORA = $(shell echo $(DISTRO) | grep -i fedora       >/dev/null 2>&1; echo $$?)
+    RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel'  >/dev/null 2>&1; echo $$?)
+    CENTOS = $(shell echo $(DISTRO) | grep -i centos       >/dev/null 2>&1; echo $$?)
     SUSE   = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?)
+    KYLIN  = $(shell echo $(DISTRO) | grep -i kylin        >/dev/null 2>&1; echo $$?)
     ifeq ("$(UBUNTU)","0")
       ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
         GLPATH := /usr/arm-linux-gnueabihf/lib
@@ -87,27 +88,17 @@ ifeq ("$(TARGET_OS)","linux")
         DFLT_PATH ?= /usr/lib
       endif
     endif
+
     ifeq ("$(SUSE)","0")
       GLPATH    ?= /usr/X11R6/lib64
       GLLINK    ?= -L/usr/X11R6/lib64
       DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(FEDORA)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(RHEL)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(CENTOS)","0")
+    else 
       GLPATH    ?= /usr/lib64/nvidia
       GLLINK    ?= -L/usr/lib64/nvidia
       DFLT_PATH ?= /usr/lib64
     endif
-  
+      
   # find libGL, libGLU 
   GLLIB  := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGL.so  -print 2>/dev/null)
   GLULIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLU.so -print 2>/dev/null)
diff --git a/Samples/5_Domain_Specific/simpleGL/simpleGL_vs2017.vcxproj b/Samples/5_Domain_Specific/simpleGL/simpleGL_vs2017.vcxproj
index cf0f4bdc7..89aab5d94 100644
--- a/Samples/5_Domain_Specific/simpleGL/simpleGL_vs2017.vcxproj
+++ b/Samples/5_Domain_Specific/simpleGL/simpleGL_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/simpleGL.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -117,6 +117,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/simpleGL/simpleGL_vs2019.vcxproj b/Samples/5_Domain_Specific/simpleGL/simpleGL_vs2019.vcxproj
index 67bdb2314..934fb116e 100644
--- a/Samples/5_Domain_Specific/simpleGL/simpleGL_vs2019.vcxproj
+++ b/Samples/5_Domain_Specific/simpleGL/simpleGL_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleGL.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -113,6 +113,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/simpleGL/simpleGL_vs2022.vcxproj b/Samples/5_Domain_Specific/simpleGL/simpleGL_vs2022.vcxproj
index f6a41b0ba..e571db13d 100644
--- a/Samples/5_Domain_Specific/simpleGL/simpleGL_vs2022.vcxproj
+++ b/Samples/5_Domain_Specific/simpleGL/simpleGL_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleGL.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -113,6 +113,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/simpleGLES/Makefile b/Samples/5_Domain_Specific/simpleGLES/Makefile
index 51e327739..6e0e516e4 100644
--- a/Samples/5_Domain_Specific/simpleGLES/Makefile
+++ b/Samples/5_Domain_Specific/simpleGLES/Makefile
@@ -313,9 +313,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/5_Domain_Specific/simpleGLES/NsightEclipse.xml b/Samples/5_Domain_Specific/simpleGLES/NsightEclipse.xml
index 93fd9c154..fc5a25be5 100644
--- a/Samples/5_Domain_Specific/simpleGLES/NsightEclipse.xml
+++ b/Samples/5_Domain_Specific/simpleGLES/NsightEclipse.xml
@@ -8,15 +8,15 @@
     <flag>-DUSE_GLES</flag>
   </cflags>
   <cuda_api_list>
+    <toolkit>cudaGraphicsUnmapResources</toolkit>
+    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaFree</toolkit>
-    <toolkit>cudaGraphicsMapResources</toolkit>
-    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
     <toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
+    <toolkit>cudaGraphicsMapResources</toolkit>
     <toolkit>cudaDeviceSynchronize</toolkit>
-    <toolkit>cudaMalloc</toolkit>
     <toolkit>cudaGraphicsUnregisterResource</toolkit>
-    <toolkit>cudaGraphicsUnmapResources</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
   </cuda_api_list>
   <description><![CDATA[Demonstrates data exchange between CUDA and OpenGL ES (aka Graphics interop). The program modifies vertex positions with CUDA and uses OpenGL ES to render the geometry.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -66,6 +66,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <sources>
     <ignore>graphics_interface.c</ignore>
   </sources>
diff --git a/Samples/5_Domain_Specific/simpleGLES/README.md b/Samples/5_Domain_Specific/simpleGLES/README.md
index 454218cfa..50644d746 100644
--- a/Samples/5_Domain_Specific/simpleGLES/README.md
+++ b/Samples/5_Domain_Specific/simpleGLES/README.md
@@ -10,7 +10,7 @@ Graphics Interop, Vertex Buffers, 3D Graphics
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaGraphicsMapResources, cudaGraphicsGLRegisterBuffer, cudaGraphicsResourceGetMappedPointer, cudaDeviceSynchronize, cudaMalloc, cudaGraphicsUnregisterResource, cudaGraphicsUnmapResources, cudaMemcpy
+cudaGraphicsUnmapResources, cudaMemcpy, cudaFree, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDeviceSynchronize, cudaGraphicsUnregisterResource, cudaMalloc, cudaGraphicsGLRegisterBuffer
 
 ## Dependencies needed to build/run
 [X11](../../../README.md#x11), [GLES](../../../README.md#gles)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/5_Domain_Specific/simpleGLES/findgleslib.mk b/Samples/5_Domain_Specific/simpleGLES/findgleslib.mk
index bcb335c1f..6da2f0781 100644
--- a/Samples/5_Domain_Specific/simpleGLES/findgleslib.mk
+++ b/Samples/5_Domain_Specific/simpleGLES/findgleslib.mk
@@ -60,6 +60,7 @@ ifeq ("$(TARGET_OS)","linux")
     RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?)
     CENTOS = $(shell echo $(DISTRO) | grep -i centos      >/dev/null 2>&1; echo $$?)
     SUSE   = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?)
+    KYLIN  = $(shell echo $(DISTRO) | grep -i kylin        >/dev/null 2>&1; echo $$?)
     ifeq ("$(UBUNTU)","0")
       ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
         GLPATH := /usr/arm-linux-gnueabihf/lib
@@ -86,27 +87,17 @@ ifeq ("$(TARGET_OS)","linux")
         DFLT_PATH ?= /usr/lib
       endif
     endif
-    ifeq ("$(SUSE)","0")
+	
+	ifeq ("$(SUSE)","0")
       GLPATH    ?= /usr/X11R6/lib64
       GLLINK    ?= -L/usr/X11R6/lib64
       DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(FEDORA)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(RHEL)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(CENTOS)","0")
+    else 
       GLPATH    ?= /usr/lib64/nvidia
       GLLINK    ?= -L/usr/lib64/nvidia
       DFLT_PATH ?= /usr/lib64
     endif
-  
+	  
   # find libGL, libGLU, libXi, 
   EGLLIB  := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libEGL.so    -print 2>/dev/null)
   GLESLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLESv2.so -print 2>/dev/null)
diff --git a/Samples/5_Domain_Specific/simpleGLES_EGLOutput/Makefile b/Samples/5_Domain_Specific/simpleGLES_EGLOutput/Makefile
index d3e12b7a6..7debcfaa3 100644
--- a/Samples/5_Domain_Specific/simpleGLES_EGLOutput/Makefile
+++ b/Samples/5_Domain_Specific/simpleGLES_EGLOutput/Makefile
@@ -313,9 +313,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/5_Domain_Specific/simpleGLES_EGLOutput/NsightEclipse.xml b/Samples/5_Domain_Specific/simpleGLES_EGLOutput/NsightEclipse.xml
index 27cd49821..a12a32e06 100644
--- a/Samples/5_Domain_Specific/simpleGLES_EGLOutput/NsightEclipse.xml
+++ b/Samples/5_Domain_Specific/simpleGLES_EGLOutput/NsightEclipse.xml
@@ -10,15 +10,15 @@
     <flag>-I/usr/include/drm</flag>
   </cflags>
   <cuda_api_list>
+    <toolkit>cudaGraphicsUnmapResources</toolkit>
+    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaFree</toolkit>
-    <toolkit>cudaGraphicsMapResources</toolkit>
-    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
     <toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
+    <toolkit>cudaGraphicsMapResources</toolkit>
     <toolkit>cudaDeviceSynchronize</toolkit>
-    <toolkit>cudaMalloc</toolkit>
     <toolkit>cudaGraphicsUnregisterResource</toolkit>
-    <toolkit>cudaGraphicsUnmapResources</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
   </cuda_api_list>
   <description><![CDATA[Demonstrates data exchange between CUDA and OpenGL ES (aka Graphics interop). The program modifies vertex positions with CUDA and uses OpenGL ES to render the geometry, and shows how to render directly to the display using the EGLOutput mechanism and the DRM library.
 
@@ -75,6 +75,7 @@ $ sudo modprobe nvidia-drm modeset=1
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <sources>
     <ignore>graphics_interface_egloutput_via_egl.c</ignore>
   </sources>
diff --git a/Samples/5_Domain_Specific/simpleGLES_EGLOutput/README.md b/Samples/5_Domain_Specific/simpleGLES_EGLOutput/README.md
index c468f0bb0..f11b2411d 100644
--- a/Samples/5_Domain_Specific/simpleGLES_EGLOutput/README.md
+++ b/Samples/5_Domain_Specific/simpleGLES_EGLOutput/README.md
@@ -15,7 +15,7 @@ Graphics Interop, Vertex Buffers, 3D Graphics
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -28,14 +28,14 @@ armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaGraphicsMapResources, cudaGraphicsGLRegisterBuffer, cudaGraphicsResourceGetMappedPointer, cudaDeviceSynchronize, cudaMalloc, cudaGraphicsUnregisterResource, cudaGraphicsUnmapResources, cudaMemcpy
+cudaGraphicsUnmapResources, cudaMemcpy, cudaFree, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDeviceSynchronize, cudaGraphicsUnregisterResource, cudaMalloc, cudaGraphicsGLRegisterBuffer
 
 ## Dependencies needed to build/run
 [EGLOutput](../../../README.md#egloutput), [GLES](../../../README.md#gles)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/5_Domain_Specific/simpleGLES_EGLOutput/findgleslib.mk b/Samples/5_Domain_Specific/simpleGLES_EGLOutput/findgleslib.mk
index bcb335c1f..6da2f0781 100644
--- a/Samples/5_Domain_Specific/simpleGLES_EGLOutput/findgleslib.mk
+++ b/Samples/5_Domain_Specific/simpleGLES_EGLOutput/findgleslib.mk
@@ -60,6 +60,7 @@ ifeq ("$(TARGET_OS)","linux")
     RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?)
     CENTOS = $(shell echo $(DISTRO) | grep -i centos      >/dev/null 2>&1; echo $$?)
     SUSE   = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?)
+    KYLIN  = $(shell echo $(DISTRO) | grep -i kylin        >/dev/null 2>&1; echo $$?)
     ifeq ("$(UBUNTU)","0")
       ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
         GLPATH := /usr/arm-linux-gnueabihf/lib
@@ -86,27 +87,17 @@ ifeq ("$(TARGET_OS)","linux")
         DFLT_PATH ?= /usr/lib
       endif
     endif
-    ifeq ("$(SUSE)","0")
+	
+	ifeq ("$(SUSE)","0")
       GLPATH    ?= /usr/X11R6/lib64
       GLLINK    ?= -L/usr/X11R6/lib64
       DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(FEDORA)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(RHEL)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(CENTOS)","0")
+    else 
       GLPATH    ?= /usr/lib64/nvidia
       GLLINK    ?= -L/usr/lib64/nvidia
       DFLT_PATH ?= /usr/lib64
     endif
-  
+	  
   # find libGL, libGLU, libXi, 
   EGLLIB  := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libEGL.so    -print 2>/dev/null)
   GLESLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLESv2.so -print 2>/dev/null)
diff --git a/Samples/5_Domain_Specific/simpleGLES_screen/Makefile b/Samples/5_Domain_Specific/simpleGLES_screen/Makefile
index 971ce716c..2629cc492 100644
--- a/Samples/5_Domain_Specific/simpleGLES_screen/Makefile
+++ b/Samples/5_Domain_Specific/simpleGLES_screen/Makefile
@@ -320,9 +320,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/5_Domain_Specific/simpleGLES_screen/NsightEclipse.xml b/Samples/5_Domain_Specific/simpleGLES_screen/NsightEclipse.xml
index 342e0ce5d..0b911915e 100644
--- a/Samples/5_Domain_Specific/simpleGLES_screen/NsightEclipse.xml
+++ b/Samples/5_Domain_Specific/simpleGLES_screen/NsightEclipse.xml
@@ -9,15 +9,15 @@
     <flag>-DWIN_INTERFACE_CUSTOM</flag>
   </cflags>
   <cuda_api_list>
+    <toolkit>cudaGraphicsUnmapResources</toolkit>
+    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaFree</toolkit>
-    <toolkit>cudaGraphicsMapResources</toolkit>
-    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
     <toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
+    <toolkit>cudaGraphicsMapResources</toolkit>
     <toolkit>cudaDeviceSynchronize</toolkit>
-    <toolkit>cudaMalloc</toolkit>
     <toolkit>cudaGraphicsUnregisterResource</toolkit>
-    <toolkit>cudaGraphicsUnmapResources</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
   </cuda_api_list>
   <description><![CDATA[Demonstrates data exchange between CUDA and OpenGL ES (aka Graphics interop). The program modifies vertex positions with CUDA and uses OpenGL ES to render the geometry.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -67,6 +67,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <sources>
     <ignore>graphics_interface.c</ignore>
   </sources>
diff --git a/Samples/5_Domain_Specific/simpleGLES_screen/README.md b/Samples/5_Domain_Specific/simpleGLES_screen/README.md
index cf4804fd8..78f96be49 100644
--- a/Samples/5_Domain_Specific/simpleGLES_screen/README.md
+++ b/Samples/5_Domain_Specific/simpleGLES_screen/README.md
@@ -10,7 +10,7 @@ Graphics Interop, Vertex Buffers, 3D Graphics
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ aarch64
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaGraphicsMapResources, cudaGraphicsGLRegisterBuffer, cudaGraphicsResourceGetMappedPointer, cudaDeviceSynchronize, cudaMalloc, cudaGraphicsUnregisterResource, cudaGraphicsUnmapResources, cudaMemcpy
+cudaGraphicsUnmapResources, cudaMemcpy, cudaFree, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDeviceSynchronize, cudaGraphicsUnregisterResource, cudaMalloc, cudaGraphicsGLRegisterBuffer
 
 ## Dependencies needed to build/run
 [screen](../../../README.md#screen), [GLES](../../../README.md#gles)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/5_Domain_Specific/simpleGLES_screen/findgleslib.mk b/Samples/5_Domain_Specific/simpleGLES_screen/findgleslib.mk
index bcb335c1f..6da2f0781 100644
--- a/Samples/5_Domain_Specific/simpleGLES_screen/findgleslib.mk
+++ b/Samples/5_Domain_Specific/simpleGLES_screen/findgleslib.mk
@@ -60,6 +60,7 @@ ifeq ("$(TARGET_OS)","linux")
     RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?)
     CENTOS = $(shell echo $(DISTRO) | grep -i centos      >/dev/null 2>&1; echo $$?)
     SUSE   = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?)
+    KYLIN  = $(shell echo $(DISTRO) | grep -i kylin        >/dev/null 2>&1; echo $$?)
     ifeq ("$(UBUNTU)","0")
       ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
         GLPATH := /usr/arm-linux-gnueabihf/lib
@@ -86,27 +87,17 @@ ifeq ("$(TARGET_OS)","linux")
         DFLT_PATH ?= /usr/lib
       endif
     endif
-    ifeq ("$(SUSE)","0")
+	
+	ifeq ("$(SUSE)","0")
       GLPATH    ?= /usr/X11R6/lib64
       GLLINK    ?= -L/usr/X11R6/lib64
       DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(FEDORA)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(RHEL)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(CENTOS)","0")
+    else 
       GLPATH    ?= /usr/lib64/nvidia
       GLLINK    ?= -L/usr/lib64/nvidia
       DFLT_PATH ?= /usr/lib64
     endif
-  
+	  
   # find libGL, libGLU, libXi, 
   EGLLIB  := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libEGL.so    -print 2>/dev/null)
   GLESLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLESv2.so -print 2>/dev/null)
diff --git a/Samples/5_Domain_Specific/simpleVulkan/Makefile b/Samples/5_Domain_Specific/simpleVulkan/Makefile
index 83f57ad69..8b5cfd465 100644
--- a/Samples/5_Domain_Specific/simpleVulkan/Makefile
+++ b/Samples/5_Domain_Specific/simpleVulkan/Makefile
@@ -338,9 +338,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/5_Domain_Specific/simpleVulkan/NsightEclipse.xml b/Samples/5_Domain_Specific/simpleVulkan/NsightEclipse.xml
index a0ec1c564..1d9449e73 100644
--- a/Samples/5_Domain_Specific/simpleVulkan/NsightEclipse.xml
+++ b/Samples/5_Domain_Specific/simpleVulkan/NsightEclipse.xml
@@ -6,24 +6,24 @@
     <flag>--std=c++11</flag>
   </cflags>
   <cuda_api_list>
-    <toolkit>cudaTimelineSemaphore</toolkit>
-    <toolkit>cudaSignalExternalSemaphoresAsync</toolkit>
-    <toolkit>cudaOccupancyMaxActiveBlocksPerMultiprocessor</toolkit>
-    <toolkit>cudaGetDeviceCount</toolkit>
-    <toolkit>cudaImportExternalSemaphore</toolkit>
-    <toolkit>cudaVertMem</toolkit>
-    <toolkit>cudaImportExternalMemory</toolkit>
-    <toolkit>cudaDestroyExternalMemory</toolkit>
+    <toolkit>cudaStreamCreateWithFlags</toolkit>
     <toolkit>cudaExternalMemoryGetMappedBuffer</toolkit>
     <toolkit>cudaSignalSemaphore</toolkit>
-    <toolkit>cudaStreamCreateWithFlags</toolkit>
+    <toolkit>cudaWaitExternalSemaphoresAsync</toolkit>
+    <toolkit>cudaVertMem</toolkit>
+    <toolkit>cudaImportExternalSemaphore</toolkit>
     <toolkit>cudaWaitSemaphore</toolkit>
     <toolkit>cudaHeightMap</toolkit>
-    <toolkit>cudaDestroyExternalSemaphore</toolkit>
     <toolkit>cudaSetDevice</toolkit>
-    <toolkit>cudaWaitExternalSemaphoresAsync</toolkit>
+    <toolkit>cudaGetDeviceCount</toolkit>
+    <toolkit>cudaSignalExternalSemaphoresAsync</toolkit>
+    <toolkit>cudaTimelineSemaphore</toolkit>
     <toolkit>cudaStreamSynchronize</toolkit>
+    <toolkit>cudaDestroyExternalMemory</toolkit>
+    <toolkit>cudaOccupancyMaxActiveBlocksPerMultiprocessor</toolkit>
+    <toolkit>cudaImportExternalMemory</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
+    <toolkit>cudaDestroyExternalSemaphore</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample demonstrates Vulkan CUDA Interop. CUDA imports the Vulkan vertex buffer and operates on it to create sinewave, and synchronizes with Vulkan through vulkan semaphores imported by CUDA. This sample depends on Vulkan SDK, GLFW3 libraries, for building this sample please refer to "Build_instructions.txt" provided in this sample's directory]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -75,6 +75,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/5_Domain_Specific/simpleVulkan/README.md b/Samples/5_Domain_Specific/simpleVulkan/README.md
index 27e2dd04f..fd287027a 100644
--- a/Samples/5_Domain_Specific/simpleVulkan/README.md
+++ b/Samples/5_Domain_Specific/simpleVulkan/README.md
@@ -10,7 +10,7 @@ Graphics Interop, CUDA Vulkan Interop, Data Parallel Algorithms
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, aarch64
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaTimelineSemaphore, cudaSignalExternalSemaphoresAsync, cudaOccupancyMaxActiveBlocksPerMultiprocessor, cudaGetDeviceCount, cudaImportExternalSemaphore, cudaVertMem, cudaImportExternalMemory, cudaDestroyExternalMemory, cudaExternalMemoryGetMappedBuffer, cudaSignalSemaphore, cudaStreamCreateWithFlags, cudaWaitSemaphore, cudaHeightMap, cudaDestroyExternalSemaphore, cudaSetDevice, cudaWaitExternalSemaphoresAsync, cudaStreamSynchronize, cudaGetDeviceProperties
+cudaStreamCreateWithFlags, cudaExternalMemoryGetMappedBuffer, cudaSignalSemaphore, cudaWaitExternalSemaphoresAsync, cudaVertMem, cudaImportExternalSemaphore, cudaWaitSemaphore, cudaHeightMap, cudaSetDevice, cudaGetDeviceCount, cudaSignalExternalSemaphoresAsync, cudaTimelineSemaphore, cudaStreamSynchronize, cudaDestroyExternalMemory, cudaOccupancyMaxActiveBlocksPerMultiprocessor, cudaImportExternalMemory, cudaGetDeviceProperties, cudaDestroyExternalSemaphore
 
 ## Dependencies needed to build/run
 [X11](../../../README.md#x11), [VULKAN](../../../README.md#vulkan)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/5_Domain_Specific/simpleVulkan/findvulkan.mk b/Samples/5_Domain_Specific/simpleVulkan/findvulkan.mk
index 4d4e8aed7..004ab22b5 100644
--- a/Samples/5_Domain_Specific/simpleVulkan/findvulkan.mk
+++ b/Samples/5_Domain_Specific/simpleVulkan/findvulkan.mk
@@ -57,6 +57,7 @@ ifeq ("$(TARGET_OS)","linux")
     RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?)
     CENTOS = $(shell echo $(DISTRO) | grep -i centos      >/dev/null 2>&1; echo $$?)
     SUSE   = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?)
+    KYLIN  = $(shell echo $(DISTRO) | grep -i kylin        >/dev/null 2>&1; echo $$?)
     ifeq ("$(UBUNTU)","0")
       ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
         GLPATH := /usr/arm-linux-gnueabihf/lib
@@ -103,6 +104,11 @@ ifeq ("$(TARGET_OS)","linux")
       GLLINK    ?= -L/usr/lib64/nvidia
       DFLT_PATH ?= /usr/lib64
     endif
+    ifeq ("$(KYLIN)","0")
+      GLPATH    ?= /usr/lib64/nvidia
+      GLLINK    ?= -L/usr/lib64/nvidia
+      DFLT_PATH ?= /usr/lib64
+    endif
 
   VULKAN_SDK_PATH ?= ${VULKAN_SDK}
 
diff --git a/Samples/5_Domain_Specific/simpleVulkan/simpleVulkan_vs2017.vcxproj b/Samples/5_Domain_Specific/simpleVulkan/simpleVulkan_vs2017.vcxproj
index 5e7353a09..1ec49d052 100644
--- a/Samples/5_Domain_Specific/simpleVulkan/simpleVulkan_vs2017.vcxproj
+++ b/Samples/5_Domain_Specific/simpleVulkan/simpleVulkan_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/simpleVulkan.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -121,6 +121,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/simpleVulkan/simpleVulkan_vs2019.vcxproj b/Samples/5_Domain_Specific/simpleVulkan/simpleVulkan_vs2019.vcxproj
index 55b1a4b50..4a4e5a635 100644
--- a/Samples/5_Domain_Specific/simpleVulkan/simpleVulkan_vs2019.vcxproj
+++ b/Samples/5_Domain_Specific/simpleVulkan/simpleVulkan_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleVulkan.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -117,6 +117,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/simpleVulkan/simpleVulkan_vs2022.vcxproj b/Samples/5_Domain_Specific/simpleVulkan/simpleVulkan_vs2022.vcxproj
index 9f655efa4..81252a966 100644
--- a/Samples/5_Domain_Specific/simpleVulkan/simpleVulkan_vs2022.vcxproj
+++ b/Samples/5_Domain_Specific/simpleVulkan/simpleVulkan_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleVulkan.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -117,6 +117,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/simpleVulkanMMAP/Makefile b/Samples/5_Domain_Specific/simpleVulkanMMAP/Makefile
index 5a162a2b8..42e9802f9 100644
--- a/Samples/5_Domain_Specific/simpleVulkanMMAP/Makefile
+++ b/Samples/5_Domain_Specific/simpleVulkanMMAP/Makefile
@@ -340,9 +340,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/5_Domain_Specific/simpleVulkanMMAP/NsightEclipse.xml b/Samples/5_Domain_Specific/simpleVulkanMMAP/NsightEclipse.xml
index fed119ce8..5f91fb68d 100644
--- a/Samples/5_Domain_Specific/simpleVulkanMMAP/NsightEclipse.xml
+++ b/Samples/5_Domain_Specific/simpleVulkanMMAP/NsightEclipse.xml
@@ -6,36 +6,36 @@
     <flag>--std=c++11</flag>
   </cflags>
   <cuda_api_list>
-    <driver>cuMemRelease</driver>
-    <driver>cuMemExportToShareableHandle</driver>
-    <driver>cuMemSetAccess</driver>
-    <driver>cuMemMap</driver>
     <driver>cuMemCreate</driver>
-    <driver>cuMemAddressFree</driver>
+    <driver>cuMemAddressReserve</driver>
     <driver>cuMemGetAllocationGranularity</driver>
+    <driver>cuMemAddressFree</driver>
     <driver>cuMemUnmap</driver>
-    <driver>cuMemAddressReserve</driver>
-    <toolkit>cudaOccupancyMaxActiveBlocksPerMultiprocessor</toolkit>
+    <driver>cuMemMap</driver>
+    <driver>cuMemRelease</driver>
+    <driver>cuMemExportToShareableHandle</driver>
+    <driver>cuMemSetAccess</driver>
+    <toolkit>cudaWaitExternalSemaphoresAsync</toolkit>
     <toolkit>cudaImportExternalSemaphore</toolkit>
-    <toolkit>cudaStreamCreateWithFlags</toolkit>
-    <toolkit>cudaMemcpyAsync</toolkit>
-    <toolkit>cudaStreamDestroy</toolkit>
-    <toolkit>cudaSignalExternalSemaphoresAsync</toolkit>
-    <toolkit>cudaMallocHost</toolkit>
-    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaDeviceGetAttribute</toolkit>
     <toolkit>cudaSetDevice</toolkit>
-    <toolkit>cudaGetDeviceProperties</toolkit>
+    <toolkit>cudaLaunchHostFunc</toolkit>
+    <toolkit>cudaMallocHost</toolkit>
+    <toolkit>cudaSignalExternalSemaphoresAsync</toolkit>
+    <toolkit>cudaFreeHost</toolkit>
+    <toolkit>cudaMemsetAsync</toolkit>
+    <toolkit>cudaMemcpyAsync</toolkit>
     <toolkit>cudaGetDeviceCount</toolkit>
-    <toolkit>cudaDeviceGetAttribute</toolkit>
+    <toolkit>cudaStreamCreateWithFlags</toolkit>
+    <toolkit>cudaStreamDestroy</toolkit>
+    <toolkit>cudaDestroyExternalSemaphore</toolkit>
     <toolkit>cudaSignalSemaphore</toolkit>
     <toolkit>cudaWaitSemaphore</toolkit>
-    <toolkit>cudaDestroyExternalSemaphore</toolkit>
-    <toolkit>cudaStreamSynchronize</toolkit>
     <toolkit>cudaFree</toolkit>
-    <toolkit>cudaLaunchHostFunc</toolkit>
-    <toolkit>cudaMemsetAsync</toolkit>
-    <toolkit>cudaFreeHost</toolkit>
-    <toolkit>cudaWaitExternalSemaphoresAsync</toolkit>
+    <toolkit>cudaStreamSynchronize</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaOccupancyMaxActiveBlocksPerMultiprocessor</toolkit>
+    <toolkit>cudaGetDeviceProperties</toolkit>
   </cuda_api_list>
   <description><![CDATA[ This sample demonstrates Vulkan CUDA Interop via cuMemMap APIs. CUDA exports buffers that Vulkan imports as vertex buffer. CUDA invokes kernels to operate on vertices and synchronizes with Vulkan through vulkan semaphores imported by CUDA. This sample depends on Vulkan SDK, GLFW3 libraries, for building this sample please refer to "Build_instructions.txt" provided in this sample's directory]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -91,6 +91,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <sources>
     <extracompilation>../../../Common/helper_multiprocess.cpp</extracompilation>
     <extraheader>../../../Common/helper_multiprocess.h</extraheader>
diff --git a/Samples/5_Domain_Specific/simpleVulkanMMAP/README.md b/Samples/5_Domain_Specific/simpleVulkanMMAP/README.md
index eea7f28e9..3030b57b9 100644
--- a/Samples/5_Domain_Specific/simpleVulkanMMAP/README.md
+++ b/Samples/5_Domain_Specific/simpleVulkanMMAP/README.md
@@ -10,7 +10,7 @@ cuMemMap IPC, MMAP, Graphics Interop, CUDA Vulkan Interop, Data Parallel Algorit
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,17 +23,17 @@ x86_64, aarch64
 ## CUDA APIs involved
 
 ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html)
-cuMemRelease, cuMemExportToShareableHandle, cuMemSetAccess, cuMemMap, cuMemCreate, cuMemAddressFree, cuMemGetAllocationGranularity, cuMemUnmap, cuMemAddressReserve
+cuMemCreate, cuMemAddressReserve, cuMemGetAllocationGranularity, cuMemAddressFree, cuMemUnmap, cuMemMap, cuMemRelease, cuMemExportToShareableHandle, cuMemSetAccess
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaOccupancyMaxActiveBlocksPerMultiprocessor, cudaImportExternalSemaphore, cudaStreamCreateWithFlags, cudaMemcpyAsync, cudaStreamDestroy, cudaSignalExternalSemaphoresAsync, cudaMallocHost, cudaMalloc, cudaSetDevice, cudaGetDeviceProperties, cudaGetDeviceCount, cudaDeviceGetAttribute, cudaSignalSemaphore, cudaWaitSemaphore, cudaDestroyExternalSemaphore, cudaStreamSynchronize, cudaFree, cudaLaunchHostFunc, cudaMemsetAsync, cudaFreeHost, cudaWaitExternalSemaphoresAsync
+cudaWaitExternalSemaphoresAsync, cudaImportExternalSemaphore, cudaDeviceGetAttribute, cudaSetDevice, cudaLaunchHostFunc, cudaMallocHost, cudaSignalExternalSemaphoresAsync, cudaFreeHost, cudaMemsetAsync, cudaMemcpyAsync, cudaGetDeviceCount, cudaStreamCreateWithFlags, cudaStreamDestroy, cudaDestroyExternalSemaphore, cudaSignalSemaphore, cudaWaitSemaphore, cudaFree, cudaStreamSynchronize, cudaMalloc, cudaOccupancyMaxActiveBlocksPerMultiprocessor, cudaGetDeviceProperties
 
 ## Dependencies needed to build/run
 [X11](../../../README.md#x11), [VULKAN](../../../README.md#vulkan)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/5_Domain_Specific/simpleVulkanMMAP/findvulkan.mk b/Samples/5_Domain_Specific/simpleVulkanMMAP/findvulkan.mk
index 4d4e8aed7..004ab22b5 100644
--- a/Samples/5_Domain_Specific/simpleVulkanMMAP/findvulkan.mk
+++ b/Samples/5_Domain_Specific/simpleVulkanMMAP/findvulkan.mk
@@ -57,6 +57,7 @@ ifeq ("$(TARGET_OS)","linux")
     RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?)
     CENTOS = $(shell echo $(DISTRO) | grep -i centos      >/dev/null 2>&1; echo $$?)
     SUSE   = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?)
+    KYLIN  = $(shell echo $(DISTRO) | grep -i kylin        >/dev/null 2>&1; echo $$?)
     ifeq ("$(UBUNTU)","0")
       ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
         GLPATH := /usr/arm-linux-gnueabihf/lib
@@ -103,6 +104,11 @@ ifeq ("$(TARGET_OS)","linux")
       GLLINK    ?= -L/usr/lib64/nvidia
       DFLT_PATH ?= /usr/lib64
     endif
+    ifeq ("$(KYLIN)","0")
+      GLPATH    ?= /usr/lib64/nvidia
+      GLLINK    ?= -L/usr/lib64/nvidia
+      DFLT_PATH ?= /usr/lib64
+    endif
 
   VULKAN_SDK_PATH ?= ${VULKAN_SDK}
 
diff --git a/Samples/5_Domain_Specific/simpleVulkanMMAP/simpleVulkanMMAP_vs2017.vcxproj b/Samples/5_Domain_Specific/simpleVulkanMMAP/simpleVulkanMMAP_vs2017.vcxproj
index ea52d956b..2c6ebec4e 100644
--- a/Samples/5_Domain_Specific/simpleVulkanMMAP/simpleVulkanMMAP_vs2017.vcxproj
+++ b/Samples/5_Domain_Specific/simpleVulkanMMAP/simpleVulkanMMAP_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/simpleVulkanMMAP.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -123,6 +123,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/simpleVulkanMMAP/simpleVulkanMMAP_vs2019.vcxproj b/Samples/5_Domain_Specific/simpleVulkanMMAP/simpleVulkanMMAP_vs2019.vcxproj
index b41768d6e..1343dd048 100644
--- a/Samples/5_Domain_Specific/simpleVulkanMMAP/simpleVulkanMMAP_vs2019.vcxproj
+++ b/Samples/5_Domain_Specific/simpleVulkanMMAP/simpleVulkanMMAP_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleVulkanMMAP.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -119,6 +119,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/simpleVulkanMMAP/simpleVulkanMMAP_vs2022.vcxproj b/Samples/5_Domain_Specific/simpleVulkanMMAP/simpleVulkanMMAP_vs2022.vcxproj
index a13ab7198..cf29fc5c7 100644
--- a/Samples/5_Domain_Specific/simpleVulkanMMAP/simpleVulkanMMAP_vs2022.vcxproj
+++ b/Samples/5_Domain_Specific/simpleVulkanMMAP/simpleVulkanMMAP_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleVulkanMMAP.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -119,6 +119,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/smokeParticles/Makefile b/Samples/5_Domain_Specific/smokeParticles/Makefile
index b6f4f0ff4..f6afadbea 100644
--- a/Samples/5_Domain_Specific/smokeParticles/Makefile
+++ b/Samples/5_Domain_Specific/smokeParticles/Makefile
@@ -324,9 +324,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/5_Domain_Specific/smokeParticles/NsightEclipse.xml b/Samples/5_Domain_Specific/smokeParticles/NsightEclipse.xml
index ca2f9c07b..581dba9a7 100644
--- a/Samples/5_Domain_Specific/smokeParticles/NsightEclipse.xml
+++ b/Samples/5_Domain_Specific/smokeParticles/NsightEclipse.xml
@@ -6,10 +6,10 @@
     <flag>--std=c++14</flag>
   </cflags>
   <cuda_api_list>
-    <toolkit>cudaCreateTextureObject</toolkit>
     <toolkit>cudaExtent</toolkit>
-    <toolkit>cudaMemcpyToSymbol</toolkit>
     <toolkit>cudaPitchedPtr</toolkit>
+    <toolkit>cudaCreateTextureObject</toolkit>
+    <toolkit>cudaMemcpyToSymbol</toolkit>
   </cuda_api_list>
   <description><![CDATA[Smoke simulation with volumetric shadows using half-angle slicing technique. Uses CUDA for procedural simulation, Thrust Library for sorting algorithms, and OpenGL for graphics rendering.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -79,6 +79,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/5_Domain_Specific/smokeParticles/README.md b/Samples/5_Domain_Specific/smokeParticles/README.md
index e5082eba0..3750d7735 100644
--- a/Samples/5_Domain_Specific/smokeParticles/README.md
+++ b/Samples/5_Domain_Specific/smokeParticles/README.md
@@ -10,7 +10,7 @@ Graphics Interop, Data Parallel Algorithms, Physically-Based Simulation
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaCreateTextureObject, cudaExtent, cudaMemcpyToSymbol, cudaPitchedPtr
+cudaExtent, cudaPitchedPtr, cudaCreateTextureObject, cudaMemcpyToSymbol
 
 ## Dependencies needed to build/run
 [X11](../../../README.md#x11), [GL](../../../README.md#gl)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/5_Domain_Specific/smokeParticles/findgllib.mk b/Samples/5_Domain_Specific/smokeParticles/findgllib.mk
index f0a5c5512..998fcf0f1 100644
--- a/Samples/5_Domain_Specific/smokeParticles/findgllib.mk
+++ b/Samples/5_Domain_Specific/smokeParticles/findgllib.mk
@@ -53,11 +53,12 @@ endif
 ifeq ("$(TARGET_OS)","linux")
     # $(info) >> findgllib.mk -> LINUX path <<<)
     # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside
-    UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu      >/dev/null 2>&1; echo $$?)
-    FEDORA = $(shell echo $(DISTRO) | grep -i fedora      >/dev/null 2>&1; echo $$?)
-    RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?)
-    CENTOS = $(shell echo $(DISTRO) | grep -i centos      >/dev/null 2>&1; echo $$?)
+    UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu       >/dev/null 2>&1; echo $$?)
+    FEDORA = $(shell echo $(DISTRO) | grep -i fedora       >/dev/null 2>&1; echo $$?)
+    RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel'  >/dev/null 2>&1; echo $$?)
+    CENTOS = $(shell echo $(DISTRO) | grep -i centos       >/dev/null 2>&1; echo $$?)
     SUSE   = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?)
+    KYLIN  = $(shell echo $(DISTRO) | grep -i kylin        >/dev/null 2>&1; echo $$?)
     ifeq ("$(UBUNTU)","0")
       ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
         GLPATH := /usr/arm-linux-gnueabihf/lib
@@ -87,27 +88,17 @@ ifeq ("$(TARGET_OS)","linux")
         DFLT_PATH ?= /usr/lib
       endif
     endif
+
     ifeq ("$(SUSE)","0")
       GLPATH    ?= /usr/X11R6/lib64
       GLLINK    ?= -L/usr/X11R6/lib64
       DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(FEDORA)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(RHEL)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(CENTOS)","0")
+    else 
       GLPATH    ?= /usr/lib64/nvidia
       GLLINK    ?= -L/usr/lib64/nvidia
       DFLT_PATH ?= /usr/lib64
     endif
-  
+      
   # find libGL, libGLU 
   GLLIB  := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGL.so  -print 2>/dev/null)
   GLULIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLU.so -print 2>/dev/null)
diff --git a/Samples/5_Domain_Specific/smokeParticles/smokeParticles_vs2017.vcxproj b/Samples/5_Domain_Specific/smokeParticles/smokeParticles_vs2017.vcxproj
index 07d6838af..1807407de 100644
--- a/Samples/5_Domain_Specific/smokeParticles/smokeParticles_vs2017.vcxproj
+++ b/Samples/5_Domain_Specific/smokeParticles/smokeParticles_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/smokeParticles.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -137,6 +137,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/smokeParticles/smokeParticles_vs2019.vcxproj b/Samples/5_Domain_Specific/smokeParticles/smokeParticles_vs2019.vcxproj
index e69a7c0b0..661e64103 100644
--- a/Samples/5_Domain_Specific/smokeParticles/smokeParticles_vs2019.vcxproj
+++ b/Samples/5_Domain_Specific/smokeParticles/smokeParticles_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/smokeParticles.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -133,6 +133,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/smokeParticles/smokeParticles_vs2022.vcxproj b/Samples/5_Domain_Specific/smokeParticles/smokeParticles_vs2022.vcxproj
index 0a64cc05b..d6bb21aa4 100644
--- a/Samples/5_Domain_Specific/smokeParticles/smokeParticles_vs2022.vcxproj
+++ b/Samples/5_Domain_Specific/smokeParticles/smokeParticles_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/smokeParticles.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -133,6 +133,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/stereoDisparity/Makefile b/Samples/5_Domain_Specific/stereoDisparity/Makefile
index 4b2ae8d0e..7608b56be 100644
--- a/Samples/5_Domain_Specific/stereoDisparity/Makefile
+++ b/Samples/5_Domain_Specific/stereoDisparity/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/5_Domain_Specific/stereoDisparity/NsightEclipse.xml b/Samples/5_Domain_Specific/stereoDisparity/NsightEclipse.xml
index e4c72e7fc..a0249c743 100644
--- a/Samples/5_Domain_Specific/stereoDisparity/NsightEclipse.xml
+++ b/Samples/5_Domain_Specific/stereoDisparity/NsightEclipse.xml
@@ -6,16 +6,16 @@
     <clean>*.pgm</clean>
   </cleanextras>
   <cuda_api_list>
+    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaFree</toolkit>
-    <toolkit>cudaEventRecord</toolkit>
-    <toolkit>cudaEventCreate</toolkit>
-    <toolkit>cudaEventElapsedTime</toolkit>
-    <toolkit>cudaDeviceSynchronize</toolkit>
     <toolkit>cudaEventSynchronize</toolkit>
+    <toolkit>cudaDeviceSynchronize</toolkit>
     <toolkit>cudaCreateTextureObject</toolkit>
+    <toolkit>cudaEventRecord</toolkit>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaEventElapsedTime</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
+    <toolkit>cudaEventCreate</toolkit>
   </cuda_api_list>
   <description><![CDATA[A CUDA program that demonstrates how to compute a stereo disparity map using SIMD SAD (Sum of Absolute Difference) intrinsics.  Requires Compute Capability 2.0 or higher.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -61,6 +61,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/5_Domain_Specific/stereoDisparity/README.md b/Samples/5_Domain_Specific/stereoDisparity/README.md
index 1034eb62c..81b9eca46 100644
--- a/Samples/5_Domain_Specific/stereoDisparity/README.md
+++ b/Samples/5_Domain_Specific/stereoDisparity/README.md
@@ -10,7 +10,7 @@ Image Processing, Video Intrinsics
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaEventRecord, cudaEventCreate, cudaEventElapsedTime, cudaDeviceSynchronize, cudaEventSynchronize, cudaCreateTextureObject, cudaMalloc, cudaMemcpy, cudaGetDeviceProperties
+cudaMemcpy, cudaFree, cudaEventSynchronize, cudaDeviceSynchronize, cudaCreateTextureObject, cudaEventRecord, cudaMalloc, cudaEventElapsedTime, cudaGetDeviceProperties, cudaEventCreate
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/5_Domain_Specific/stereoDisparity/stereoDisparity_vs2017.vcxproj b/Samples/5_Domain_Specific/stereoDisparity/stereoDisparity_vs2017.vcxproj
index eb5a3d5bb..9305e49dd 100644
--- a/Samples/5_Domain_Specific/stereoDisparity/stereoDisparity_vs2017.vcxproj
+++ b/Samples/5_Domain_Specific/stereoDisparity/stereoDisparity_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/stereoDisparity.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/stereoDisparity/stereoDisparity_vs2019.vcxproj b/Samples/5_Domain_Specific/stereoDisparity/stereoDisparity_vs2019.vcxproj
index 1147bc9dc..bea60c05e 100644
--- a/Samples/5_Domain_Specific/stereoDisparity/stereoDisparity_vs2019.vcxproj
+++ b/Samples/5_Domain_Specific/stereoDisparity/stereoDisparity_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/stereoDisparity.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/stereoDisparity/stereoDisparity_vs2022.vcxproj b/Samples/5_Domain_Specific/stereoDisparity/stereoDisparity_vs2022.vcxproj
index 16e564159..0b22b48a1 100644
--- a/Samples/5_Domain_Specific/stereoDisparity/stereoDisparity_vs2022.vcxproj
+++ b/Samples/5_Domain_Specific/stereoDisparity/stereoDisparity_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/stereoDisparity.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/volumeFiltering/Makefile b/Samples/5_Domain_Specific/volumeFiltering/Makefile
index dfcda5bba..438c552e2 100644
--- a/Samples/5_Domain_Specific/volumeFiltering/Makefile
+++ b/Samples/5_Domain_Specific/volumeFiltering/Makefile
@@ -299,9 +299,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/5_Domain_Specific/volumeFiltering/NsightEclipse.xml b/Samples/5_Domain_Specific/volumeFiltering/NsightEclipse.xml
index 813926c5c..7b54f46a9 100644
--- a/Samples/5_Domain_Specific/volumeFiltering/NsightEclipse.xml
+++ b/Samples/5_Domain_Specific/volumeFiltering/NsightEclipse.xml
@@ -3,25 +3,25 @@
 <entry>
   <name>volumeFiltering</name>
   <cuda_api_list>
-    <toolkit>cudaGraphicsMapResources</toolkit>
-    <toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
     <toolkit>cudaMemcpy</toolkit>
-    <toolkit>cudaMemcpyToSymbol</toolkit>
+    <toolkit>cudaGraphicsMapResources</toolkit>
     <toolkit>cudaDestroySurfaceObject</toolkit>
-    <toolkit>cudaPitchedPtr</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaGraphicsUnregisterResource</toolkit>
-    <toolkit>cudaDeviceSynchronize</toolkit>
-    <toolkit>cudaDestroyTextureObject</toolkit>
-    <toolkit>cudaMemset</toolkit>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaFreeArray</toolkit>
-    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
     <toolkit>cudaExtent</toolkit>
+    <toolkit>cudaDeviceSynchronize</toolkit>
     <toolkit>cudaCreateSurfaceObject</toolkit>
+    <toolkit>cudaMemcpyToSymbol</toolkit>
+    <toolkit>cudaPitchedPtr</toolkit>
+    <toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
     <toolkit>cudaCreateTextureObject</toolkit>
-    <toolkit>cudaMallocArray</toolkit>
     <toolkit>cudaGraphicsUnmapResources</toolkit>
+    <toolkit>cudaMallocArray</toolkit>
+    <toolkit>cudaFreeArray</toolkit>
+    <toolkit>cudaDestroyTextureObject</toolkit>
+    <toolkit>cudaMemset</toolkit>
+    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
+    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaGraphicsUnregisterResource</toolkit>
+    <toolkit>cudaMalloc</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample demonstrates 3D Volumetric Filtering using 3D Textures and 3D Surface Writes.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -86,6 +86,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/5_Domain_Specific/volumeFiltering/README.md b/Samples/5_Domain_Specific/volumeFiltering/README.md
index 2476ecf5e..a2bc76e36 100644
--- a/Samples/5_Domain_Specific/volumeFiltering/README.md
+++ b/Samples/5_Domain_Specific/volumeFiltering/README.md
@@ -10,7 +10,7 @@ Graphics Interop, Image Processing, 3D Textures, Surface Writes
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaGraphicsMapResources, cudaGraphicsResourceGetMappedPointer, cudaMemcpy, cudaMemcpyToSymbol, cudaDestroySurfaceObject, cudaPitchedPtr, cudaMalloc, cudaGraphicsUnregisterResource, cudaDeviceSynchronize, cudaDestroyTextureObject, cudaMemset, cudaFree, cudaFreeArray, cudaGraphicsGLRegisterBuffer, cudaExtent, cudaCreateSurfaceObject, cudaCreateTextureObject, cudaMallocArray, cudaGraphicsUnmapResources
+cudaMemcpy, cudaGraphicsMapResources, cudaDestroySurfaceObject, cudaExtent, cudaDeviceSynchronize, cudaCreateSurfaceObject, cudaMemcpyToSymbol, cudaPitchedPtr, cudaGraphicsResourceGetMappedPointer, cudaCreateTextureObject, cudaGraphicsUnmapResources, cudaMallocArray, cudaFreeArray, cudaDestroyTextureObject, cudaMemset, cudaGraphicsGLRegisterBuffer, cudaFree, cudaGraphicsUnregisterResource, cudaMalloc
 
 ## Dependencies needed to build/run
 [X11](../../../README.md#x11), [GL](../../../README.md#gl)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/5_Domain_Specific/volumeFiltering/findgllib.mk b/Samples/5_Domain_Specific/volumeFiltering/findgllib.mk
index f0a5c5512..998fcf0f1 100644
--- a/Samples/5_Domain_Specific/volumeFiltering/findgllib.mk
+++ b/Samples/5_Domain_Specific/volumeFiltering/findgllib.mk
@@ -53,11 +53,12 @@ endif
 ifeq ("$(TARGET_OS)","linux")
     # $(info) >> findgllib.mk -> LINUX path <<<)
     # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside
-    UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu      >/dev/null 2>&1; echo $$?)
-    FEDORA = $(shell echo $(DISTRO) | grep -i fedora      >/dev/null 2>&1; echo $$?)
-    RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?)
-    CENTOS = $(shell echo $(DISTRO) | grep -i centos      >/dev/null 2>&1; echo $$?)
+    UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu       >/dev/null 2>&1; echo $$?)
+    FEDORA = $(shell echo $(DISTRO) | grep -i fedora       >/dev/null 2>&1; echo $$?)
+    RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel'  >/dev/null 2>&1; echo $$?)
+    CENTOS = $(shell echo $(DISTRO) | grep -i centos       >/dev/null 2>&1; echo $$?)
     SUSE   = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?)
+    KYLIN  = $(shell echo $(DISTRO) | grep -i kylin        >/dev/null 2>&1; echo $$?)
     ifeq ("$(UBUNTU)","0")
       ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
         GLPATH := /usr/arm-linux-gnueabihf/lib
@@ -87,27 +88,17 @@ ifeq ("$(TARGET_OS)","linux")
         DFLT_PATH ?= /usr/lib
       endif
     endif
+
     ifeq ("$(SUSE)","0")
       GLPATH    ?= /usr/X11R6/lib64
       GLLINK    ?= -L/usr/X11R6/lib64
       DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(FEDORA)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(RHEL)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(CENTOS)","0")
+    else 
       GLPATH    ?= /usr/lib64/nvidia
       GLLINK    ?= -L/usr/lib64/nvidia
       DFLT_PATH ?= /usr/lib64
     endif
-  
+      
   # find libGL, libGLU 
   GLLIB  := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGL.so  -print 2>/dev/null)
   GLULIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLU.so -print 2>/dev/null)
diff --git a/Samples/5_Domain_Specific/volumeFiltering/volumeFiltering_vs2017.vcxproj b/Samples/5_Domain_Specific/volumeFiltering/volumeFiltering_vs2017.vcxproj
index 71b27a634..6e0686632 100644
--- a/Samples/5_Domain_Specific/volumeFiltering/volumeFiltering_vs2017.vcxproj
+++ b/Samples/5_Domain_Specific/volumeFiltering/volumeFiltering_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/volumeFiltering.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -122,6 +122,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/volumeFiltering/volumeFiltering_vs2019.vcxproj b/Samples/5_Domain_Specific/volumeFiltering/volumeFiltering_vs2019.vcxproj
index af2faa783..2a01aa1ce 100644
--- a/Samples/5_Domain_Specific/volumeFiltering/volumeFiltering_vs2019.vcxproj
+++ b/Samples/5_Domain_Specific/volumeFiltering/volumeFiltering_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/volumeFiltering.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -118,6 +118,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/volumeFiltering/volumeFiltering_vs2022.vcxproj b/Samples/5_Domain_Specific/volumeFiltering/volumeFiltering_vs2022.vcxproj
index 3dc02a43b..04608aaec 100644
--- a/Samples/5_Domain_Specific/volumeFiltering/volumeFiltering_vs2022.vcxproj
+++ b/Samples/5_Domain_Specific/volumeFiltering/volumeFiltering_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/volumeFiltering.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -118,6 +118,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/volumeRender/Makefile b/Samples/5_Domain_Specific/volumeRender/Makefile
index b2a74d0de..fdb5649f0 100644
--- a/Samples/5_Domain_Specific/volumeRender/Makefile
+++ b/Samples/5_Domain_Specific/volumeRender/Makefile
@@ -299,9 +299,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/5_Domain_Specific/volumeRender/NsightEclipse.xml b/Samples/5_Domain_Specific/volumeRender/NsightEclipse.xml
index bfef15a20..05b789d99 100644
--- a/Samples/5_Domain_Specific/volumeRender/NsightEclipse.xml
+++ b/Samples/5_Domain_Specific/volumeRender/NsightEclipse.xml
@@ -3,24 +3,24 @@
 <entry>
   <name>volumeRender</name>
   <cuda_api_list>
-    <toolkit>cudaMemcpyToSymbol</toolkit>
-    <toolkit>cudaMemset</toolkit>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaGraphicsMapResources</toolkit>
+    <toolkit>cudaProfilerStop</toolkit>
+    <toolkit>cudaGraphicsUnmapResources</toolkit>
+    <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaMallocArray</toolkit>
     <toolkit>cudaFreeArray</toolkit>
-    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
+    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaPitchedPtr</toolkit>
     <toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
+    <toolkit>cudaGraphicsMapResources</toolkit>
+    <toolkit>cudaDestroyTextureObject</toolkit>
     <toolkit>cudaExtent</toolkit>
     <toolkit>cudaDeviceSynchronize</toolkit>
-    <toolkit>cudaDestroyTextureObject</toolkit>
-    <toolkit>cudaPitchedPtr</toolkit>
     <toolkit>cudaCreateTextureObject</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaProfilerStop</toolkit>
-    <toolkit>cudaMallocArray</toolkit>
+    <toolkit>cudaMemset</toolkit>
+    <toolkit>cudaMemcpyToSymbol</toolkit>
     <toolkit>cudaGraphicsUnregisterResource</toolkit>
-    <toolkit>cudaGraphicsUnmapResources</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample demonstrates basic volume rendering using 3D Textures.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -84,6 +84,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/5_Domain_Specific/volumeRender/README.md b/Samples/5_Domain_Specific/volumeRender/README.md
index acc1fb2a1..d5d6ef58e 100644
--- a/Samples/5_Domain_Specific/volumeRender/README.md
+++ b/Samples/5_Domain_Specific/volumeRender/README.md
@@ -10,7 +10,7 @@ Graphics Interop, Image Processing, 3D Textures
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMemcpyToSymbol, cudaMemset, cudaFree, cudaGraphicsMapResources, cudaFreeArray, cudaGraphicsGLRegisterBuffer, cudaGraphicsResourceGetMappedPointer, cudaExtent, cudaDeviceSynchronize, cudaDestroyTextureObject, cudaPitchedPtr, cudaCreateTextureObject, cudaMalloc, cudaProfilerStop, cudaMallocArray, cudaGraphicsUnregisterResource, cudaGraphicsUnmapResources, cudaMemcpy
+cudaProfilerStop, cudaGraphicsUnmapResources, cudaMemcpy, cudaMallocArray, cudaFreeArray, cudaFree, cudaPitchedPtr, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDestroyTextureObject, cudaExtent, cudaDeviceSynchronize, cudaCreateTextureObject, cudaMemset, cudaMemcpyToSymbol, cudaGraphicsUnregisterResource, cudaMalloc, cudaGraphicsGLRegisterBuffer
 
 ## Dependencies needed to build/run
 [X11](../../../README.md#x11), [GL](../../../README.md#gl)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/5_Domain_Specific/volumeRender/findgllib.mk b/Samples/5_Domain_Specific/volumeRender/findgllib.mk
index f0a5c5512..998fcf0f1 100644
--- a/Samples/5_Domain_Specific/volumeRender/findgllib.mk
+++ b/Samples/5_Domain_Specific/volumeRender/findgllib.mk
@@ -53,11 +53,12 @@ endif
 ifeq ("$(TARGET_OS)","linux")
     # $(info) >> findgllib.mk -> LINUX path <<<)
     # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside
-    UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu      >/dev/null 2>&1; echo $$?)
-    FEDORA = $(shell echo $(DISTRO) | grep -i fedora      >/dev/null 2>&1; echo $$?)
-    RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?)
-    CENTOS = $(shell echo $(DISTRO) | grep -i centos      >/dev/null 2>&1; echo $$?)
+    UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu       >/dev/null 2>&1; echo $$?)
+    FEDORA = $(shell echo $(DISTRO) | grep -i fedora       >/dev/null 2>&1; echo $$?)
+    RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel'  >/dev/null 2>&1; echo $$?)
+    CENTOS = $(shell echo $(DISTRO) | grep -i centos       >/dev/null 2>&1; echo $$?)
     SUSE   = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?)
+    KYLIN  = $(shell echo $(DISTRO) | grep -i kylin        >/dev/null 2>&1; echo $$?)
     ifeq ("$(UBUNTU)","0")
       ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
         GLPATH := /usr/arm-linux-gnueabihf/lib
@@ -87,27 +88,17 @@ ifeq ("$(TARGET_OS)","linux")
         DFLT_PATH ?= /usr/lib
       endif
     endif
+
     ifeq ("$(SUSE)","0")
       GLPATH    ?= /usr/X11R6/lib64
       GLLINK    ?= -L/usr/X11R6/lib64
       DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(FEDORA)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(RHEL)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(CENTOS)","0")
+    else 
       GLPATH    ?= /usr/lib64/nvidia
       GLLINK    ?= -L/usr/lib64/nvidia
       DFLT_PATH ?= /usr/lib64
     endif
-  
+      
   # find libGL, libGLU 
   GLLIB  := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGL.so  -print 2>/dev/null)
   GLULIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLU.so -print 2>/dev/null)
diff --git a/Samples/5_Domain_Specific/volumeRender/volumeRender_vs2017.vcxproj b/Samples/5_Domain_Specific/volumeRender/volumeRender_vs2017.vcxproj
index 3e6388cae..ab6037e94 100644
--- a/Samples/5_Domain_Specific/volumeRender/volumeRender_vs2017.vcxproj
+++ b/Samples/5_Domain_Specific/volumeRender/volumeRender_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/volumeRender.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -118,6 +118,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/volumeRender/volumeRender_vs2019.vcxproj b/Samples/5_Domain_Specific/volumeRender/volumeRender_vs2019.vcxproj
index 44536db8e..c752510d0 100644
--- a/Samples/5_Domain_Specific/volumeRender/volumeRender_vs2019.vcxproj
+++ b/Samples/5_Domain_Specific/volumeRender/volumeRender_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/volumeRender.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -114,6 +114,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/volumeRender/volumeRender_vs2022.vcxproj b/Samples/5_Domain_Specific/volumeRender/volumeRender_vs2022.vcxproj
index 30b3f8fe8..77698195a 100644
--- a/Samples/5_Domain_Specific/volumeRender/volumeRender_vs2022.vcxproj
+++ b/Samples/5_Domain_Specific/volumeRender/volumeRender_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/volumeRender.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -114,6 +114,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/vulkanImageCUDA/Makefile b/Samples/5_Domain_Specific/vulkanImageCUDA/Makefile
index 7e592bbcb..56b3696a4 100644
--- a/Samples/5_Domain_Specific/vulkanImageCUDA/Makefile
+++ b/Samples/5_Domain_Specific/vulkanImageCUDA/Makefile
@@ -338,9 +338,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/5_Domain_Specific/vulkanImageCUDA/NsightEclipse.xml b/Samples/5_Domain_Specific/vulkanImageCUDA/NsightEclipse.xml
index 33025d3a6..4fa867e9d 100644
--- a/Samples/5_Domain_Specific/vulkanImageCUDA/NsightEclipse.xml
+++ b/Samples/5_Domain_Specific/vulkanImageCUDA/NsightEclipse.xml
@@ -6,33 +6,33 @@
     <flag>--std=c++11</flag>
   </cflags>
   <cuda_api_list>
-    <toolkit>cudaGetMipmappedArrayLevel</toolkit>
-    <toolkit>cudaImportExternalSemaphore</toolkit>
-    <toolkit>cudaExternalMemoryGetMappedMipmappedArray</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
-    <toolkit>cudaDestroyExternalMemory</toolkit>
-    <toolkit>cudaSignalExternalSemaphoresAsync</toolkit>
-    <toolkit>cudaFreeMipmappedArray</toolkit>
     <toolkit>cudaVkSemaphoreSignal</toolkit>
-    <toolkit>cudaVkImportImageMem</toolkit>
-    <toolkit>cudaDestroySurfaceObject</toolkit>
-    <toolkit>cudaImportExternalMemory</toolkit>
-    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaWaitExternalSemaphoresAsync</toolkit>
+    <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaVkImportSemaphore</toolkit>
+    <toolkit>cudaImportExternalSemaphore</toolkit>
+    <toolkit>cudaGetMipmappedArrayLevel</toolkit>
     <toolkit>cudaSetDevice</toolkit>
-    <toolkit>cudaGetDeviceProperties</toolkit>
+    <toolkit>cudaDestroySurfaceObject</toolkit>
+    <toolkit>cudaExtent</toolkit>
+    <toolkit>cudaMallocMipmappedArray</toolkit>
+    <toolkit>cudaCreateSurfaceObject</toolkit>
+    <toolkit>cudaStreamCreate</toolkit>
+    <toolkit>cudaSignalExternalSemaphoresAsync</toolkit>
+    <toolkit>cudaUpdateVkImage</toolkit>
+    <toolkit>cudaCreateTextureObject</toolkit>
     <toolkit>cudaGetDeviceCount</toolkit>
+    <toolkit>cudaExternalMemoryGetMappedMipmappedArray</toolkit>
     <toolkit>cudaDestroyTextureObject</toolkit>
-    <toolkit>cudaUpdateVkImage</toolkit>
+    <toolkit>cudaVkImportImageMem</toolkit>
+    <toolkit>cudaDestroyExternalMemory</toolkit>
+    <toolkit>cudaVkSemaphoreWait</toolkit>
+    <toolkit>cudaImportExternalMemory</toolkit>
     <toolkit>cudaDestroyExternalSemaphore</toolkit>
+    <toolkit>cudaFreeMipmappedArray</toolkit>
     <toolkit>cudaFree</toolkit>
-    <toolkit>cudaStreamCreate</toolkit>
-    <toolkit>cudaVkSemaphoreWait</toolkit>
-    <toolkit>cudaExtent</toolkit>
-    <toolkit>cudaVkImportSemaphore</toolkit>
-    <toolkit>cudaCreateSurfaceObject</toolkit>
-    <toolkit>cudaMallocMipmappedArray</toolkit>
-    <toolkit>cudaCreateTextureObject</toolkit>
-    <toolkit>cudaWaitExternalSemaphoresAsync</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaGetDeviceProperties</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample demonstrates Vulkan Image - CUDA Interop. CUDA imports the Vulkan image buffer, performs box filtering over it, and synchronizes with Vulkan through vulkan semaphores imported by CUDA. This sample depends on Vulkan SDK, GLFW3 libraries, for building this sample please refer to "Build_instructions.txt" provided in this sample's directory]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -80,6 +80,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/5_Domain_Specific/vulkanImageCUDA/README.md b/Samples/5_Domain_Specific/vulkanImageCUDA/README.md
index 4b5e262ad..97f2de7d1 100644
--- a/Samples/5_Domain_Specific/vulkanImageCUDA/README.md
+++ b/Samples/5_Domain_Specific/vulkanImageCUDA/README.md
@@ -10,7 +10,7 @@ Graphics Interop, CUDA Vulkan Interop, Data Parallel Algorithms
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, aarch64
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaGetMipmappedArrayLevel, cudaImportExternalSemaphore, cudaExternalMemoryGetMappedMipmappedArray, cudaMemcpy, cudaDestroyExternalMemory, cudaSignalExternalSemaphoresAsync, cudaFreeMipmappedArray, cudaVkSemaphoreSignal, cudaVkImportImageMem, cudaDestroySurfaceObject, cudaImportExternalMemory, cudaMalloc, cudaSetDevice, cudaGetDeviceProperties, cudaGetDeviceCount, cudaDestroyTextureObject, cudaUpdateVkImage, cudaDestroyExternalSemaphore, cudaFree, cudaStreamCreate, cudaVkSemaphoreWait, cudaExtent, cudaVkImportSemaphore, cudaCreateSurfaceObject, cudaMallocMipmappedArray, cudaCreateTextureObject, cudaWaitExternalSemaphoresAsync
+cudaVkSemaphoreSignal, cudaWaitExternalSemaphoresAsync, cudaMemcpy, cudaVkImportSemaphore, cudaImportExternalSemaphore, cudaGetMipmappedArrayLevel, cudaSetDevice, cudaDestroySurfaceObject, cudaExtent, cudaMallocMipmappedArray, cudaCreateSurfaceObject, cudaStreamCreate, cudaSignalExternalSemaphoresAsync, cudaUpdateVkImage, cudaCreateTextureObject, cudaGetDeviceCount, cudaExternalMemoryGetMappedMipmappedArray, cudaDestroyTextureObject, cudaVkImportImageMem, cudaDestroyExternalMemory, cudaVkSemaphoreWait, cudaImportExternalMemory, cudaDestroyExternalSemaphore, cudaFreeMipmappedArray, cudaFree, cudaMalloc, cudaGetDeviceProperties
 
 ## Dependencies needed to build/run
 [X11](../../../README.md#x11), [VULKAN](../../../README.md#vulkan)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/5_Domain_Specific/vulkanImageCUDA/findvulkan.mk b/Samples/5_Domain_Specific/vulkanImageCUDA/findvulkan.mk
index 4d4e8aed7..004ab22b5 100644
--- a/Samples/5_Domain_Specific/vulkanImageCUDA/findvulkan.mk
+++ b/Samples/5_Domain_Specific/vulkanImageCUDA/findvulkan.mk
@@ -57,6 +57,7 @@ ifeq ("$(TARGET_OS)","linux")
     RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?)
     CENTOS = $(shell echo $(DISTRO) | grep -i centos      >/dev/null 2>&1; echo $$?)
     SUSE   = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?)
+    KYLIN  = $(shell echo $(DISTRO) | grep -i kylin        >/dev/null 2>&1; echo $$?)
     ifeq ("$(UBUNTU)","0")
       ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
         GLPATH := /usr/arm-linux-gnueabihf/lib
@@ -103,6 +104,11 @@ ifeq ("$(TARGET_OS)","linux")
       GLLINK    ?= -L/usr/lib64/nvidia
       DFLT_PATH ?= /usr/lib64
     endif
+    ifeq ("$(KYLIN)","0")
+      GLPATH    ?= /usr/lib64/nvidia
+      GLLINK    ?= -L/usr/lib64/nvidia
+      DFLT_PATH ?= /usr/lib64
+    endif
 
   VULKAN_SDK_PATH ?= ${VULKAN_SDK}
 
diff --git a/Samples/5_Domain_Specific/vulkanImageCUDA/vulkanImageCUDA_vs2017.vcxproj b/Samples/5_Domain_Specific/vulkanImageCUDA/vulkanImageCUDA_vs2017.vcxproj
index 93db012f7..424170ab9 100644
--- a/Samples/5_Domain_Specific/vulkanImageCUDA/vulkanImageCUDA_vs2017.vcxproj
+++ b/Samples/5_Domain_Specific/vulkanImageCUDA/vulkanImageCUDA_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/vulkanImageCUDA.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -117,6 +117,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/vulkanImageCUDA/vulkanImageCUDA_vs2019.vcxproj b/Samples/5_Domain_Specific/vulkanImageCUDA/vulkanImageCUDA_vs2019.vcxproj
index 632afdc26..769231d57 100644
--- a/Samples/5_Domain_Specific/vulkanImageCUDA/vulkanImageCUDA_vs2019.vcxproj
+++ b/Samples/5_Domain_Specific/vulkanImageCUDA/vulkanImageCUDA_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/vulkanImageCUDA.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -113,6 +113,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/vulkanImageCUDA/vulkanImageCUDA_vs2022.vcxproj b/Samples/5_Domain_Specific/vulkanImageCUDA/vulkanImageCUDA_vs2022.vcxproj
index 530a2f71f..997263975 100644
--- a/Samples/5_Domain_Specific/vulkanImageCUDA/vulkanImageCUDA_vs2022.vcxproj
+++ b/Samples/5_Domain_Specific/vulkanImageCUDA/vulkanImageCUDA_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/vulkanImageCUDA.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -113,6 +113,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/6_Performance/UnifiedMemoryPerf/Makefile b/Samples/6_Performance/UnifiedMemoryPerf/Makefile
index 05eb45ee5..2f1a6188b 100644
--- a/Samples/6_Performance/UnifiedMemoryPerf/Makefile
+++ b/Samples/6_Performance/UnifiedMemoryPerf/Makefile
@@ -285,9 +285,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/6_Performance/UnifiedMemoryPerf/NsightEclipse.xml b/Samples/6_Performance/UnifiedMemoryPerf/NsightEclipse.xml
index a3b50f03f..6217899d4 100644
--- a/Samples/6_Performance/UnifiedMemoryPerf/NsightEclipse.xml
+++ b/Samples/6_Performance/UnifiedMemoryPerf/NsightEclipse.xml
@@ -3,19 +3,19 @@
 <entry>
   <name>UnifiedMemoryPerf</name>
   <cuda_api_list>
+    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaStreamDestroy</toolkit>
+    <toolkit>cudaMemPrefetchAsync</toolkit>
     <toolkit>cudaFree</toolkit>
     <toolkit>cudaMallocHost</toolkit>
     <toolkit>cudaMallocManaged</toolkit>
-    <toolkit>cudaMemPrefetchAsync</toolkit>
-    <toolkit>cudaStreamCreate</toolkit>
     <toolkit>cudaStreamAttachMemAsync</toolkit>
+    <toolkit>cudaHostGetDevicePointer</toolkit>
     <toolkit>cudaFreeHost</toolkit>
+    <toolkit>cudaStreamSynchronize</toolkit>
     <toolkit>cudaMalloc</toolkit>
     <toolkit>cudaMemcpyAsync</toolkit>
-    <toolkit>cudaStreamSynchronize</toolkit>
-    <toolkit>cudaHostGetDevicePointer</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaStreamCreate</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample demonstrates the performance comparision using matrix multiplication kernel of Unified Memory with/without hints and other types of memory like zero copy buffers, pageable, pagelocked memory performing synchronous and Asynchronous transfers on a single GPU.]]></description>
@@ -66,6 +66,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/6_Performance/UnifiedMemoryPerf/README.md b/Samples/6_Performance/UnifiedMemoryPerf/README.md
index 575b7ee1c..6c1ce62c3 100644
--- a/Samples/6_Performance/UnifiedMemoryPerf/README.md
+++ b/Samples/6_Performance/UnifiedMemoryPerf/README.md
@@ -10,7 +10,7 @@ CUDA Systems Integration, Unified Memory, CUDA Streams and Events, Pinned System
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l, aarch64
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaStreamDestroy, cudaFree, cudaMallocHost, cudaMallocManaged, cudaMemPrefetchAsync, cudaStreamCreate, cudaStreamAttachMemAsync, cudaFreeHost, cudaMalloc, cudaMemcpyAsync, cudaStreamSynchronize, cudaHostGetDevicePointer, cudaMemcpy, cudaGetDeviceProperties
+cudaMemcpy, cudaStreamDestroy, cudaMemPrefetchAsync, cudaFree, cudaMallocHost, cudaMallocManaged, cudaStreamAttachMemAsync, cudaHostGetDevicePointer, cudaFreeHost, cudaStreamSynchronize, cudaMalloc, cudaMemcpyAsync, cudaStreamCreate, cudaGetDeviceProperties
 
 ## Dependencies needed to build/run
 [UVM](../../../README.md#uvm)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/6_Performance/UnifiedMemoryPerf/UnifiedMemoryPerf_vs2017.vcxproj b/Samples/6_Performance/UnifiedMemoryPerf/UnifiedMemoryPerf_vs2017.vcxproj
index 5b99766a2..7d59c0aec 100644
--- a/Samples/6_Performance/UnifiedMemoryPerf/UnifiedMemoryPerf_vs2017.vcxproj
+++ b/Samples/6_Performance/UnifiedMemoryPerf/UnifiedMemoryPerf_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/UnifiedMemoryPerf.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -110,6 +110,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/6_Performance/UnifiedMemoryPerf/UnifiedMemoryPerf_vs2019.vcxproj b/Samples/6_Performance/UnifiedMemoryPerf/UnifiedMemoryPerf_vs2019.vcxproj
index 823351a1f..9b5e36571 100644
--- a/Samples/6_Performance/UnifiedMemoryPerf/UnifiedMemoryPerf_vs2019.vcxproj
+++ b/Samples/6_Performance/UnifiedMemoryPerf/UnifiedMemoryPerf_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/UnifiedMemoryPerf.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -106,6 +106,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/6_Performance/UnifiedMemoryPerf/UnifiedMemoryPerf_vs2022.vcxproj b/Samples/6_Performance/UnifiedMemoryPerf/UnifiedMemoryPerf_vs2022.vcxproj
index 7dfac552a..536d6d5b8 100644
--- a/Samples/6_Performance/UnifiedMemoryPerf/UnifiedMemoryPerf_vs2022.vcxproj
+++ b/Samples/6_Performance/UnifiedMemoryPerf/UnifiedMemoryPerf_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/UnifiedMemoryPerf.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -106,6 +106,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/6_Performance/alignedTypes/Makefile b/Samples/6_Performance/alignedTypes/Makefile
index 907e3f446..492ad1974 100644
--- a/Samples/6_Performance/alignedTypes/Makefile
+++ b/Samples/6_Performance/alignedTypes/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/6_Performance/alignedTypes/NsightEclipse.xml b/Samples/6_Performance/alignedTypes/NsightEclipse.xml
index 388b2897e..0b1d16af4 100644
--- a/Samples/6_Performance/alignedTypes/NsightEclipse.xml
+++ b/Samples/6_Performance/alignedTypes/NsightEclipse.xml
@@ -3,11 +3,11 @@
 <entry>
   <name>alignedTypes</name>
   <cuda_api_list>
-    <toolkit>cudaMemset</toolkit>
+    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaFree</toolkit>
     <toolkit>cudaDeviceSynchronize</toolkit>
+    <toolkit>cudaMemset</toolkit>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
   </cuda_api_list>
   <description><![CDATA[A simple test, showing huge access speed gap between aligned and misaligned structures. It measures per-element copy throughput for aligned and misaligned structures on big chunks of data.]]></description>
@@ -46,6 +46,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/6_Performance/alignedTypes/README.md b/Samples/6_Performance/alignedTypes/README.md
index 85c8bd55e..56e5e265a 100644
--- a/Samples/6_Performance/alignedTypes/README.md
+++ b/Samples/6_Performance/alignedTypes/README.md
@@ -10,7 +10,7 @@ Performance Strategies
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMemset, cudaFree, cudaDeviceSynchronize, cudaMalloc, cudaMemcpy, cudaGetDeviceProperties
+cudaMemcpy, cudaFree, cudaDeviceSynchronize, cudaMemset, cudaMalloc, cudaGetDeviceProperties
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/6_Performance/alignedTypes/alignedTypes_vs2017.vcxproj b/Samples/6_Performance/alignedTypes/alignedTypes_vs2017.vcxproj
index 73e89699f..7e3439f69 100644
--- a/Samples/6_Performance/alignedTypes/alignedTypes_vs2017.vcxproj
+++ b/Samples/6_Performance/alignedTypes/alignedTypes_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/alignedTypes.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/6_Performance/alignedTypes/alignedTypes_vs2019.vcxproj b/Samples/6_Performance/alignedTypes/alignedTypes_vs2019.vcxproj
index da732d283..833b531c6 100644
--- a/Samples/6_Performance/alignedTypes/alignedTypes_vs2019.vcxproj
+++ b/Samples/6_Performance/alignedTypes/alignedTypes_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/alignedTypes.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/6_Performance/alignedTypes/alignedTypes_vs2022.vcxproj b/Samples/6_Performance/alignedTypes/alignedTypes_vs2022.vcxproj
index d24f27490..ea9110365 100644
--- a/Samples/6_Performance/alignedTypes/alignedTypes_vs2022.vcxproj
+++ b/Samples/6_Performance/alignedTypes/alignedTypes_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/alignedTypes.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/6_Performance/transpose/Makefile b/Samples/6_Performance/transpose/Makefile
index 50355e562..83909bce0 100644
--- a/Samples/6_Performance/transpose/Makefile
+++ b/Samples/6_Performance/transpose/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/6_Performance/transpose/NsightEclipse.xml b/Samples/6_Performance/transpose/NsightEclipse.xml
index a080824a9..58f448f6b 100644
--- a/Samples/6_Performance/transpose/NsightEclipse.xml
+++ b/Samples/6_Performance/transpose/NsightEclipse.xml
@@ -3,17 +3,17 @@
 <entry>
   <name>transpose</name>
   <cuda_api_list>
+    <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaMalloc</toolkit>
     <toolkit>cudaFree</toolkit>
-    <toolkit>cudaEventRecord</toolkit>
-    <toolkit>cudaEventCreate</toolkit>
-    <toolkit>cudaEventElapsedTime</toolkit>
+    <toolkit>cudaGetLastError</toolkit>
     <toolkit>cudaEventSynchronize</toolkit>
-    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaEventRecord</toolkit>
+    <toolkit>cudaGetDevice</toolkit>
     <toolkit>cudaEventDestroy</toolkit>
-    <toolkit>cudaGetLastError</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaEventElapsedTime</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
-    <toolkit>cudaGetDevice</toolkit>
+    <toolkit>cudaEventCreate</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample demonstrates Matrix Transpose.  Different performance are shown to achieve high performance.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -55,6 +55,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/6_Performance/transpose/README.md b/Samples/6_Performance/transpose/README.md
index 2d7870689..0693888b0 100644
--- a/Samples/6_Performance/transpose/README.md
+++ b/Samples/6_Performance/transpose/README.md
@@ -10,7 +10,7 @@ Performance Strategies, Linear Algebra
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaEventRecord, cudaEventCreate, cudaEventElapsedTime, cudaEventSynchronize, cudaMalloc, cudaEventDestroy, cudaGetLastError, cudaMemcpy, cudaGetDeviceProperties, cudaGetDevice
+cudaMemcpy, cudaMalloc, cudaFree, cudaGetLastError, cudaEventSynchronize, cudaEventRecord, cudaGetDevice, cudaEventDestroy, cudaEventElapsedTime, cudaGetDeviceProperties, cudaEventCreate
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/6_Performance/transpose/transpose_vs2017.vcxproj b/Samples/6_Performance/transpose/transpose_vs2017.vcxproj
index 0c04b15f2..a9f215afa 100644
--- a/Samples/6_Performance/transpose/transpose_vs2017.vcxproj
+++ b/Samples/6_Performance/transpose/transpose_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/transpose.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/6_Performance/transpose/transpose_vs2019.vcxproj b/Samples/6_Performance/transpose/transpose_vs2019.vcxproj
index 6cf4cb22a..e4721874d 100644
--- a/Samples/6_Performance/transpose/transpose_vs2019.vcxproj
+++ b/Samples/6_Performance/transpose/transpose_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/transpose.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/6_Performance/transpose/transpose_vs2022.vcxproj b/Samples/6_Performance/transpose/transpose_vs2022.vcxproj
index 3c7e497c0..1db2b8cda 100644
--- a/Samples/6_Performance/transpose/transpose_vs2022.vcxproj
+++ b/Samples/6_Performance/transpose/transpose_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/transpose.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>