memset cuda to gpu

brucefan1983 · Oct 23, 2024 · 3d3736a · 3d3736a
1 parent 707dd3b
commit 3d3736a
Show file tree

Hide file tree

Showing 5 changed files with 21 additions and 13 deletions.
diff --git a/src/force/neighbor.cu b/src/force/neighbor.cu
@@ -183,9 +183,9 @@ void find_cell_list(
     cell_count_sum.resize(N_cells);
   }
 
-  CHECK(cudaMemset(cell_count.data(), 0, sizeof(int) * N_cells));
-  CHECK(cudaMemset(cell_count_sum.data(), 0, sizeof(int) * N_cells));
-  CHECK(cudaMemset(cell_contents.data(), 0, sizeof(int) * N));
+  CHECK(gpuMemset(cell_count.data(), 0, sizeof(int) * N_cells));
+  CHECK(gpuMemset(cell_count_sum.data(), 0, sizeof(int) * N_cells));
+  CHECK(gpuMemset(cell_contents.data(), 0, sizeof(int) * N));
 
   find_cell_counts<<<grid_size, block_size>>>(
     box, N, cell_count.data(), x, y, z, num_bins[0], num_bins[1], num_bins[2], rc_inv);
@@ -194,7 +194,7 @@ void find_cell_list(
   thrust::exclusive_scan(
     thrust::device, cell_count.data(), cell_count.data() + N_cells, cell_count_sum.data());
 
-  CHECK(cudaMemset(cell_count.data(), 0, sizeof(int) * N_cells));
+  CHECK(gpuMemset(cell_count.data(), 0, sizeof(int) * N_cells));
 
   find_cell_contents<<<grid_size, block_size>>>(
     box,

diff --git a/src/main_gpumd/run.cu b/src/main_gpumd/run.cu
@@ -84,7 +84,7 @@ static void calculate_time_step(
   }
   const int N = velocity_per_atom.size() / 3;
   double* gpu_v2_max;
-  CHECK(cudaGetSymbolAddress((void**)&gpu_v2_max, device_v2_max));
+  CHECK(gpuGetSymbolAddress((void**)&gpu_v2_max, device_v2_max));
   gpu_find_largest_v2<<<1, 1024>>>(
     N,
     (N - 1) / 1024 + 1,

diff --git a/src/mc/mc_ensemble_canonical.cu b/src/mc/mc_ensemble_canonical.cu
@@ -243,7 +243,7 @@ void MC_Ensemble_Canonical::compute(
       type_j = atom.cpu_type[j];
     }
 
-    CHECK(cudaMemset(NN_ij.data(), 0, sizeof(int)));
+    CHECK(gpuMemset(NN_ij.data(), 0, sizeof(int)));
     get_neighbors_of_i_and_j<<<(atom.number_of_atoms - 1) / 64 + 1, 64>>>(
       atom.number_of_atoms,
       box,
@@ -280,8 +280,8 @@ void MC_Ensemble_Canonical::compute(
       local_type_after.data());
     CUDA_CHECK_KERNEL
 
-    CHECK(cudaMemset(NN_radial.data(), 0, sizeof(int) * NN_radial.size()));
-    CHECK(cudaMemset(NN_angular.data(), 0, sizeof(int) * NN_angular.size()));
+    CHECK(gpuMemset(NN_radial.data(), 0, sizeof(int) * NN_radial.size()));
+    CHECK(gpuMemset(NN_angular.data(), 0, sizeof(int) * NN_angular.size()));
     create_inputs_for_energy_calculator<<<(atom.number_of_atoms - 1) / 64 + 1, 64>>>(
       atom.number_of_atoms,
       NN_ij_cpu,

diff --git a/src/mc/mc_ensemble_sgc.cu b/src/mc/mc_ensemble_sgc.cu
@@ -359,7 +359,7 @@ void MC_Ensemble_SGC::compute(
       type_j = types[index_new_species];
     }
 
-    CHECK(cudaMemset(NN_ij.data(), 0, sizeof(int)));
+    CHECK(gpuMemset(NN_ij.data(), 0, sizeof(int)));
     get_neighbors_of_i<<<(atom.number_of_atoms - 1) / 64 + 1, 64>>>(
       atom.number_of_atoms,
       box,
@@ -388,8 +388,8 @@ void MC_Ensemble_SGC::compute(
       local_type_after.data());
     CUDA_CHECK_KERNEL
 
-    CHECK(cudaMemset(NN_radial.data(), 0, sizeof(int) * NN_radial.size()));
-    CHECK(cudaMemset(NN_angular.data(), 0, sizeof(int) * NN_angular.size()));
+    CHECK(gpuMemset(NN_radial.data(), 0, sizeof(int) * NN_radial.size()));
+    CHECK(gpuMemset(NN_angular.data(), 0, sizeof(int) * NN_angular.size()));
     create_inputs_for_energy_calculator<<<(atom.number_of_atoms - 1) / 64 + 1, 64>>>(
       atom.number_of_atoms,
       NN_ij_cpu,

diff --git a/src/utilities/gpu_macro.cuh b/src/utilities/gpu_macro.cuh
@@ -19,23 +19,27 @@
 
 #include "hip/hip_runtime.h"
 
+// memory manipulation
 #define gpuMalloc hipMalloc 
 #define gpuMallocManaged hipMallocManaged
 #define gpuFree hipFree
-
 #define gpuMemcpy hipMemcpy
 #define gpuMemcpyFromSymbol hipMemcpyFromSymbol
 #define gpuMemcpyToSymbol hipMemcpyToSymbol
+#define gpuGetSymbolAddress hipGetSymbolAddress
 #define gpuMemcpyHostToDevice hipMemcpyHostToDevice
 #define gpuMemcpyDeviceToHost hipMemcpyDeviceToHost
 #define gpuMemcpyHostToHost hipMemcpyHostToHost
 #define gpuMemcpyDeviceToDevice hipMemcpyDeviceToDevice
+#define gpuMemset hipMemset
 
+// error handling
 #define gpuError_t hipError_t 
 #define gpuSuccess hipSuccess
 #define gpuGetErrorString hipGetErrorString
 #define gpuGetLastError hipGetLastError
 
+// device manipulation
 #define gpuSetDevice hipSetDevice
 #define gpuGetDeviceCount hipGetDeviceCount
 #define gpuDeviceProp hipDeviceProp
@@ -46,23 +50,27 @@
 
 #else // CUDA for Nvidia card
 
+// memory manipulation
 #define gpuMalloc cudaMalloc
 #define gpuMallocManaged cudaMallocManaged
 #define gpuFree cudaFree
-
 #define gpuMemcpy cudaMemcpy
 #define gpuMemcpyFromSymbol cudaMemcpyFromSymbol
 #define gpuMemcpyToSymbol cudaMemcpyToSymbol
+#define gpuGetSymbolAddress cudaGetSymbolAddress
 #define gpuMemcpyHostToDevice cudaMemcpyHostToDevice
 #define gpuMemcpyDeviceToHost cudaMemcpyDeviceToHost
 #define gpuMemcpyHostToHost cudaMemcpyHostToHost
 #define gpuMemcpyDeviceToDevice cudaMemcpyDeviceToDevice
+#define gpuMemset cudaMemset
 
+// error handling
 #define gpuError_t cudaError_t 
 #define gpuSuccess cudaSuccess
 #define gpuGetErrorString cudaGetErrorString
 #define gpuGetLastError cudaGetLastError
 
+// device manipulation
 #define gpuSetDevice cudaSetDevice
 #define gpuGetDeviceCount cudaGetDeviceCount
 #define gpuDeviceProp cudaDeviceProp