From f8b9adab9e359ac93691df274266c8f59d6bc9c3 Mon Sep 17 00:00:00 2001 From: Zheyong Fan Date: Tue, 15 Feb 2022 20:23:17 +0800 Subject: [PATCH 1/4] remove an unused variable --- src/force/nep4_small_box.cuh | 1 - 1 file changed, 1 deletion(-) diff --git a/src/force/nep4_small_box.cuh b/src/force/nep4_small_box.cuh index e4ffba4ec..463e9c801 100644 --- a/src/force/nep4_small_box.cuh +++ b/src/force/nep4_small_box.cuh @@ -89,7 +89,6 @@ static __global__ void find_neighbor_list_small_box( double x1 = g_x[n1]; double y1 = g_y[n1]; double z1 = g_z[n1]; - int count_radial = 0; int count_angular = 0; for (int n2 = N1; n2 < N2; ++n2) { for (int ia = 0; ia < ebox.num_cells[0]; ++ia) { From 85ffbe4cf0c619a5a99b116f2c54d4e164b8cec2 Mon Sep 17 00:00:00 2001 From: Zheyong Fan Date: Tue, 15 Feb 2022 20:50:37 +0800 Subject: [PATCH 2/4] correct the time reported in dump.xyz --- src/measure/dump_exyz.cu | 6 +++--- src/measure/dump_exyz.cuh | 4 ++-- src/measure/measure.cu | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/measure/dump_exyz.cu b/src/measure/dump_exyz.cu index a1e726b82..7b05a537a 100644 --- a/src/measure/dump_exyz.cu +++ b/src/measure/dump_exyz.cu @@ -84,7 +84,7 @@ void Dump_EXYZ::parse(char** param, int num_param) } } -void Dump_EXYZ::preprocess(char* input_dir, const int number_of_atoms, const double time_step) +void Dump_EXYZ::preprocess(char* input_dir, const int number_of_atoms) { if (dump_) { strcpy(filename_, input_dir); @@ -95,7 +95,6 @@ void Dump_EXYZ::preprocess(char* input_dir, const int number_of_atoms, const dou if (has_force_) { cpu_force_per_atom_.resize(number_of_atoms * 3); } - time_step_ = time_step; } } @@ -161,6 +160,7 @@ void Dump_EXYZ::output_line2( void Dump_EXYZ::process( const int step, + const double global_time, const Box& box, const std::vector& cpu_atom_symbol, const std::vector& cpu_type, @@ -190,7 +190,7 @@ void Dump_EXYZ::process( fprintf(fid_, "%d\n", num_atoms_total); // line 2 - output_line2((step + 1) * time_step_, box, virial_per_atom, gpu_thermo); + output_line2(global_time, box, virial_per_atom, gpu_thermo); // other lines for (int n = 0; n < num_atoms_total; n++) { diff --git a/src/measure/dump_exyz.cuh b/src/measure/dump_exyz.cuh index e5e7aae11..18b7c57fb 100644 --- a/src/measure/dump_exyz.cuh +++ b/src/measure/dump_exyz.cuh @@ -24,9 +24,10 @@ class Dump_EXYZ { public: void parse(char** param, int num_param); - void preprocess(char* input_dir, const int number_of_atoms, const double time_step); + void preprocess(char* input_dir, const int number_of_atoms); void process( const int step, + const double global_time, const Box& box, const std::vector& cpu_atom_symbol, const std::vector& cpu_type, @@ -44,7 +45,6 @@ private: int dump_interval_ = 1; int has_velocity_ = 0; int has_force_ = 0; - double time_step_; FILE* fid_; char filename_[200]; void output_line2( diff --git a/src/measure/measure.cu b/src/measure/measure.cu index 5d13060b9..e70039c12 100644 --- a/src/measure/measure.cu +++ b/src/measure/measure.cu @@ -50,7 +50,7 @@ void Measure::initialize( dump_restart.preprocess(input_dir); dump_thermo.preprocess(input_dir); dump_force.preprocess(input_dir, number_of_atoms, group); - dump_exyz.preprocess(input_dir, number_of_atoms, time_step); + dump_exyz.preprocess(input_dir, number_of_atoms); #ifdef USE_NETCDF dump_netcdf.preprocess(input_dir, number_of_atoms); #endif @@ -114,7 +114,7 @@ void Measure::process( atom.cpu_velocity_per_atom); dump_force.process(step, group, atom.force_per_atom); dump_exyz.process( - step, box, atom.cpu_atom_symbol, atom.cpu_type, atom.position_per_atom, + step, global_time, box, atom.cpu_atom_symbol, atom.cpu_type, atom.position_per_atom, atom.cpu_position_per_atom, atom.velocity_per_atom, atom.cpu_velocity_per_atom, atom.force_per_atom, atom.virial_per_atom, thermo); From da7e5e3f254aac002b4213a3d57ab75887cd9204 Mon Sep 17 00:00:00 2001 From: Zheyong Fan Date: Wed, 16 Feb 2022 17:33:33 +0800 Subject: [PATCH 3/4] varying time step finished --- src/main_gpumd/run.cu | 78 ++++++++++++++++++++++++++++++++++++++++-- src/main_gpumd/run.cuh | 1 + 2 files changed, 77 insertions(+), 2 deletions(-) diff --git a/src/main_gpumd/run.cu b/src/main_gpumd/run.cu index 70ac90467..36ea7ba45 100644 --- a/src/main_gpumd/run.cu +++ b/src/main_gpumd/run.cu @@ -33,6 +33,68 @@ Run simulation according to the inputs in the run.in file. #include "utilities/read_file.cuh" #include "velocity.cuh" +static __global__ void gpu_find_largest_v2( + int N, int number_of_rounds, double* g_vx, double* g_vy, double* g_vz, double* g_v2_max) +{ + int tid = threadIdx.x; + __shared__ double s_data[1024]; + s_data[tid] = 0.0; + for (int round = 0; round < number_of_rounds; ++round) { + int n = round * 1024 + tid; + if (n < N) { + double vx = g_vx[n]; + double vy = g_vy[n]; + double vz = g_vz[n]; + double v2 = vx * vx + vy * vy + vz * vz; + if (s_data[tid] < v2) { + s_data[tid] = v2; + } + } + } + __syncthreads(); + + for (int offset = blockDim.x >> 1; offset > 0; offset >>= 1) { + if (tid < offset) { + if (s_data[tid] < s_data[tid + offset]) { + s_data[tid] = s_data[tid + offset]; + } + } + __syncthreads(); + } + + if (tid == 0) { + g_v2_max[0] = s_data[0]; + } +} + +__device__ double device_v2_max[1]; + +static void calculate_time_step( + double max_distance_per_step, GPU_Vector& velocity_per_atom, double& time_step) +{ + if (max_distance_per_step <= 0.0) { + return; + } + const int N = velocity_per_atom.size() / 3; + double* gpu_v2_max; + CHECK(cudaGetSymbolAddress((void**)&gpu_v2_max, device_v2_max)); + gpu_find_largest_v2<<<1, 1024>>>( + N, (N - 1) / 1024 + 1, velocity_per_atom.data(), velocity_per_atom.data() + N, + velocity_per_atom.data() + N * 2, gpu_v2_max); + CUDA_CHECK_KERNEL + double cpu_v2_max[1] = {0.0}; + CHECK(cudaMemcpy(cpu_v2_max, gpu_v2_max, sizeof(double), cudaMemcpyDeviceToHost)); + double cpu_v_max = sqrt(cpu_v2_max[0]); + double time_step_min = max_distance_per_step / cpu_v_max; + + if (time_step_min < time_step) { + time_step = time_step_min; + } + printf( + "cpu_v_max=%f, time_step_min=%f, time_step=%f\n", cpu_v_max, time_step_min * 1.018051e+1, + time_step * 1.018051e+1); +} + Run::Run(char* input_dir) { print_line_1(); @@ -93,6 +155,8 @@ void Run::perform_a_run(char* input_dir) clock_t time_begin = clock(); for (int step = 0; step < number_of_steps; ++step) { + + calculate_time_step(max_distance_per_step, atom.velocity_per_atom, time_step); global_time += time_step; #ifndef USE_FCP // the FCP does not use a neighbor list at all @@ -155,6 +219,7 @@ void Run::perform_a_run(char* input_dir) integrate.finalize(); neighbor.finalize(); velocity.finalize(); + max_distance_per_step = 0.0; } void Run::parse_one_keyword(char** param, int num_param, char* input_dir) @@ -276,14 +341,23 @@ void Run::parse_correct_velocity(char** param, int num_param) void Run::parse_time_step(char** param, int num_param) { - if (num_param != 2) { - PRINT_INPUT_ERROR("time_step should have 1 parameter.\n"); + if (num_param != 2 && num_param != 3) { + PRINT_INPUT_ERROR("time_step should have 1 or 2 parameters.\n"); } if (!is_valid_real(param[1], &time_step)) { PRINT_INPUT_ERROR("time_step should be a real number.\n"); } printf("Time step for this run is %g fs.\n", time_step); time_step /= TIME_UNIT_CONVERSION; + if (num_param == 3) { + if (!is_valid_real(param[2], &max_distance_per_step)) { + PRINT_INPUT_ERROR("max distance per step should be a real number.\n"); + } + if (max_distance_per_step <= 0.0) { + PRINT_INPUT_ERROR("max distance per step should > 0.\n"); + } + printf(" max distance per step = %g A.\n", max_distance_per_step); + } } void Run::parse_run(char** param, int num_param, char* input_dir) diff --git a/src/main_gpumd/run.cuh b/src/main_gpumd/run.cuh index 7b9ecd847..60881910f 100644 --- a/src/main_gpumd/run.cuh +++ b/src/main_gpumd/run.cuh @@ -55,6 +55,7 @@ private: double global_time = 0.0; // run time of entire simulation (fs) double initial_temperature; // initial temperature for velocity double time_step = 1.0 / TIME_UNIT_CONVERSION; + double max_distance_per_step = -1.0; Atom atom; GPU_Vector thermo; // some thermodynamic quantities Neighbor neighbor; From 52a7c097b67d99e8096be9c13d563c0d14945757 Mon Sep 17 00:00:00 2001 From: Zheyong Fan Date: Wed, 16 Feb 2022 17:35:12 +0800 Subject: [PATCH 4/4] remove printf --- src/main_gpumd/run.cu | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/main_gpumd/run.cu b/src/main_gpumd/run.cu index 36ea7ba45..4e48be6fe 100644 --- a/src/main_gpumd/run.cu +++ b/src/main_gpumd/run.cu @@ -90,9 +90,6 @@ static void calculate_time_step( if (time_step_min < time_step) { time_step = time_step_min; } - printf( - "cpu_v_max=%f, time_step_min=%f, time_step=%f\n", cpu_v_max, time_step_min * 1.018051e+1, - time_step * 1.018051e+1); } Run::Run(char* input_dir)