Merge pull request #156 from brucefan1983/varying_time_step

Varying time step
brucefan1983 · Feb 16, 2022 · 86ad812 · 86ad812
2 parents a0024eb + 52a7c09
commit 86ad812
Show file tree

Hide file tree

Showing 6 changed files with 81 additions and 10 deletions.
diff --git a/src/force/nep4_small_box.cuh b/src/force/nep4_small_box.cuh
@@ -89,7 +89,6 @@ static __global__ void find_neighbor_list_small_box(
     double x1 = g_x[n1];
     double y1 = g_y[n1];
     double z1 = g_z[n1];
-    int count_radial = 0;
     int count_angular = 0;
     for (int n2 = N1; n2 < N2; ++n2) {
       for (int ia = 0; ia < ebox.num_cells[0]; ++ia) {

diff --git a/src/main_gpumd/run.cu b/src/main_gpumd/run.cu
@@ -33,6 +33,65 @@ Run simulation according to the inputs in the run.in file.
 #include "utilities/read_file.cuh"
 #include "velocity.cuh"
 
+static __global__ void gpu_find_largest_v2(
+  int N, int number_of_rounds, double* g_vx, double* g_vy, double* g_vz, double* g_v2_max)
+{
+  int tid = threadIdx.x;
+  __shared__ double s_data[1024];
+  s_data[tid] = 0.0;
+  for (int round = 0; round < number_of_rounds; ++round) {
+    int n = round * 1024 + tid;
+    if (n < N) {
+      double vx = g_vx[n];
+      double vy = g_vy[n];
+      double vz = g_vz[n];
+      double v2 = vx * vx + vy * vy + vz * vz;
+      if (s_data[tid] < v2) {
+        s_data[tid] = v2;
+      }
+    }
+  }
+  __syncthreads();
+
+  for (int offset = blockDim.x >> 1; offset > 0; offset >>= 1) {
+    if (tid < offset) {
+      if (s_data[tid] < s_data[tid + offset]) {
+        s_data[tid] = s_data[tid + offset];
+      }
+    }
+    __syncthreads();
+  }
+
+  if (tid == 0) {
+    g_v2_max[0] = s_data[0];
+  }
+}
+
+__device__ double device_v2_max[1];
+
+static void calculate_time_step(
+  double max_distance_per_step, GPU_Vector<double>& velocity_per_atom, double& time_step)
+{
+  if (max_distance_per_step <= 0.0) {
+    return;
+  }
+  const int N = velocity_per_atom.size() / 3;
+  double* gpu_v2_max;
+  CHECK(cudaGetSymbolAddress((void**)&gpu_v2_max, device_v2_max));
+  gpu_find_largest_v2<<<1, 1024>>>(
+    N, (N - 1) / 1024 + 1, velocity_per_atom.data(), velocity_per_atom.data() + N,
+    velocity_per_atom.data() + N * 2, gpu_v2_max);
+  CUDA_CHECK_KERNEL
+  double cpu_v2_max[1] = {0.0};
+  CHECK(cudaMemcpy(cpu_v2_max, gpu_v2_max, sizeof(double), cudaMemcpyDeviceToHost));
+  double cpu_v_max = sqrt(cpu_v2_max[0]);
+  double time_step_min = max_distance_per_step / cpu_v_max;
+
+  if (time_step_min < time_step) {
+    time_step = time_step_min;
+  }
+}
+
 Run::Run(char* input_dir)
 {
   print_line_1();
@@ -93,6 +152,8 @@ void Run::perform_a_run(char* input_dir)
   clock_t time_begin = clock();
 
   for (int step = 0; step < number_of_steps; ++step) {
+
+    calculate_time_step(max_distance_per_step, atom.velocity_per_atom, time_step);
     global_time += time_step;
 
 #ifndef USE_FCP // the FCP does not use a neighbor list at all
@@ -155,6 +216,7 @@ void Run::perform_a_run(char* input_dir)
   integrate.finalize();
   neighbor.finalize();
   velocity.finalize();
+  max_distance_per_step = 0.0;
 }
 
 void Run::parse_one_keyword(char** param, int num_param, char* input_dir)
@@ -276,14 +338,23 @@ void Run::parse_correct_velocity(char** param, int num_param)
 
 void Run::parse_time_step(char** param, int num_param)
 {
-  if (num_param != 2) {
-    PRINT_INPUT_ERROR("time_step should have 1 parameter.\n");
+  if (num_param != 2 && num_param != 3) {
+    PRINT_INPUT_ERROR("time_step should have 1 or 2 parameters.\n");
   }
   if (!is_valid_real(param[1], &time_step)) {
     PRINT_INPUT_ERROR("time_step should be a real number.\n");
   }
   printf("Time step for this run is %g fs.\n", time_step);
   time_step /= TIME_UNIT_CONVERSION;
+  if (num_param == 3) {
+    if (!is_valid_real(param[2], &max_distance_per_step)) {
+      PRINT_INPUT_ERROR("max distance per step should be a real number.\n");
+    }
+    if (max_distance_per_step <= 0.0) {
+      PRINT_INPUT_ERROR("max distance per step should > 0.\n");
+    }
+    printf("    max distance per step = %g A.\n", max_distance_per_step);
+  }
 }
 
 void Run::parse_run(char** param, int num_param, char* input_dir)

diff --git a/src/main_gpumd/run.cuh b/src/main_gpumd/run.cuh
@@ -55,6 +55,7 @@ private:
   double global_time = 0.0;   // run time of entire simulation (fs)
   double initial_temperature; // initial temperature for velocity
   double time_step = 1.0 / TIME_UNIT_CONVERSION;
+  double max_distance_per_step = -1.0;
   Atom atom;
   GPU_Vector<double> thermo; // some thermodynamic quantities
   Neighbor neighbor;

diff --git a/src/measure/dump_exyz.cu b/src/measure/dump_exyz.cu
@@ -84,7 +84,7 @@ void Dump_EXYZ::parse(char** param, int num_param)
   }
 }
 
-void Dump_EXYZ::preprocess(char* input_dir, const int number_of_atoms, const double time_step)
+void Dump_EXYZ::preprocess(char* input_dir, const int number_of_atoms)
 {
   if (dump_) {
     strcpy(filename_, input_dir);
@@ -95,7 +95,6 @@ void Dump_EXYZ::preprocess(char* input_dir, const int number_of_atoms, const dou
     if (has_force_) {
       cpu_force_per_atom_.resize(number_of_atoms * 3);
     }
-    time_step_ = time_step;
   }
 }
 
@@ -161,6 +160,7 @@ void Dump_EXYZ::output_line2(
 
 void Dump_EXYZ::process(
   const int step,
+  const double global_time,
   const Box& box,
   const std::vector<std::string>& cpu_atom_symbol,
   const std::vector<int>& cpu_type,
@@ -190,7 +190,7 @@ void Dump_EXYZ::process(
   fprintf(fid_, "%d\n", num_atoms_total);
 
   // line 2
-  output_line2((step + 1) * time_step_, box, virial_per_atom, gpu_thermo);
+  output_line2(global_time, box, virial_per_atom, gpu_thermo);
 
   // other lines
   for (int n = 0; n < num_atoms_total; n++) {

diff --git a/src/measure/dump_exyz.cuh b/src/measure/dump_exyz.cuh
@@ -24,9 +24,10 @@ class Dump_EXYZ
 {
 public:
   void parse(char** param, int num_param);
-  void preprocess(char* input_dir, const int number_of_atoms, const double time_step);
+  void preprocess(char* input_dir, const int number_of_atoms);
   void process(
     const int step,
+    const double global_time,
     const Box& box,
     const std::vector<std::string>& cpu_atom_symbol,
     const std::vector<int>& cpu_type,
@@ -44,7 +45,6 @@ private:
   int dump_interval_ = 1;
   int has_velocity_ = 0;
   int has_force_ = 0;
-  double time_step_;
   FILE* fid_;
   char filename_[200];
   void output_line2(

diff --git a/src/measure/measure.cu b/src/measure/measure.cu
@@ -50,7 +50,7 @@ void Measure::initialize(
   dump_restart.preprocess(input_dir);
   dump_thermo.preprocess(input_dir);
   dump_force.preprocess(input_dir, number_of_atoms, group);
-  dump_exyz.preprocess(input_dir, number_of_atoms, time_step);
+  dump_exyz.preprocess(input_dir, number_of_atoms);
 #ifdef USE_NETCDF
   dump_netcdf.preprocess(input_dir, number_of_atoms);
 #endif
@@ -114,7 +114,7 @@ void Measure::process(
     atom.cpu_velocity_per_atom);
   dump_force.process(step, group, atom.force_per_atom);
   dump_exyz.process(
-    step, box, atom.cpu_atom_symbol, atom.cpu_type, atom.position_per_atom,
+    step, global_time, box, atom.cpu_atom_symbol, atom.cpu_type, atom.position_per_atom,
     atom.cpu_position_per_atom, atom.velocity_per_atom, atom.cpu_velocity_per_atom,
     atom.force_per_atom, atom.virial_per_atom, thermo);