Skip to content

Commit

Permalink
Merge pull request #156 from brucefan1983/varying_time_step
Browse files Browse the repository at this point in the history
Varying time step
  • Loading branch information
brucefan1983 authored Feb 16, 2022
2 parents a0024eb + 52a7c09 commit 86ad812
Show file tree
Hide file tree
Showing 6 changed files with 81 additions and 10 deletions.
1 change: 0 additions & 1 deletion src/force/nep4_small_box.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,6 @@ static __global__ void find_neighbor_list_small_box(
double x1 = g_x[n1];
double y1 = g_y[n1];
double z1 = g_z[n1];
int count_radial = 0;
int count_angular = 0;
for (int n2 = N1; n2 < N2; ++n2) {
for (int ia = 0; ia < ebox.num_cells[0]; ++ia) {
Expand Down
75 changes: 73 additions & 2 deletions src/main_gpumd/run.cu
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,65 @@ Run simulation according to the inputs in the run.in file.
#include "utilities/read_file.cuh"
#include "velocity.cuh"

static __global__ void gpu_find_largest_v2(
int N, int number_of_rounds, double* g_vx, double* g_vy, double* g_vz, double* g_v2_max)
{
int tid = threadIdx.x;
__shared__ double s_data[1024];
s_data[tid] = 0.0;
for (int round = 0; round < number_of_rounds; ++round) {
int n = round * 1024 + tid;
if (n < N) {
double vx = g_vx[n];
double vy = g_vy[n];
double vz = g_vz[n];
double v2 = vx * vx + vy * vy + vz * vz;
if (s_data[tid] < v2) {
s_data[tid] = v2;
}
}
}
__syncthreads();

for (int offset = blockDim.x >> 1; offset > 0; offset >>= 1) {
if (tid < offset) {
if (s_data[tid] < s_data[tid + offset]) {
s_data[tid] = s_data[tid + offset];
}
}
__syncthreads();
}

if (tid == 0) {
g_v2_max[0] = s_data[0];
}
}

__device__ double device_v2_max[1];

static void calculate_time_step(
double max_distance_per_step, GPU_Vector<double>& velocity_per_atom, double& time_step)
{
if (max_distance_per_step <= 0.0) {
return;
}
const int N = velocity_per_atom.size() / 3;
double* gpu_v2_max;
CHECK(cudaGetSymbolAddress((void**)&gpu_v2_max, device_v2_max));
gpu_find_largest_v2<<<1, 1024>>>(
N, (N - 1) / 1024 + 1, velocity_per_atom.data(), velocity_per_atom.data() + N,
velocity_per_atom.data() + N * 2, gpu_v2_max);
CUDA_CHECK_KERNEL
double cpu_v2_max[1] = {0.0};
CHECK(cudaMemcpy(cpu_v2_max, gpu_v2_max, sizeof(double), cudaMemcpyDeviceToHost));
double cpu_v_max = sqrt(cpu_v2_max[0]);
double time_step_min = max_distance_per_step / cpu_v_max;

if (time_step_min < time_step) {
time_step = time_step_min;
}
}

Run::Run(char* input_dir)
{
print_line_1();
Expand Down Expand Up @@ -93,6 +152,8 @@ void Run::perform_a_run(char* input_dir)
clock_t time_begin = clock();

for (int step = 0; step < number_of_steps; ++step) {

calculate_time_step(max_distance_per_step, atom.velocity_per_atom, time_step);
global_time += time_step;

#ifndef USE_FCP // the FCP does not use a neighbor list at all
Expand Down Expand Up @@ -155,6 +216,7 @@ void Run::perform_a_run(char* input_dir)
integrate.finalize();
neighbor.finalize();
velocity.finalize();
max_distance_per_step = 0.0;
}

void Run::parse_one_keyword(char** param, int num_param, char* input_dir)
Expand Down Expand Up @@ -276,14 +338,23 @@ void Run::parse_correct_velocity(char** param, int num_param)

void Run::parse_time_step(char** param, int num_param)
{
if (num_param != 2) {
PRINT_INPUT_ERROR("time_step should have 1 parameter.\n");
if (num_param != 2 && num_param != 3) {
PRINT_INPUT_ERROR("time_step should have 1 or 2 parameters.\n");
}
if (!is_valid_real(param[1], &time_step)) {
PRINT_INPUT_ERROR("time_step should be a real number.\n");
}
printf("Time step for this run is %g fs.\n", time_step);
time_step /= TIME_UNIT_CONVERSION;
if (num_param == 3) {
if (!is_valid_real(param[2], &max_distance_per_step)) {
PRINT_INPUT_ERROR("max distance per step should be a real number.\n");
}
if (max_distance_per_step <= 0.0) {
PRINT_INPUT_ERROR("max distance per step should > 0.\n");
}
printf(" max distance per step = %g A.\n", max_distance_per_step);
}
}

void Run::parse_run(char** param, int num_param, char* input_dir)
Expand Down
1 change: 1 addition & 0 deletions src/main_gpumd/run.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ private:
double global_time = 0.0; // run time of entire simulation (fs)
double initial_temperature; // initial temperature for velocity
double time_step = 1.0 / TIME_UNIT_CONVERSION;
double max_distance_per_step = -1.0;
Atom atom;
GPU_Vector<double> thermo; // some thermodynamic quantities
Neighbor neighbor;
Expand Down
6 changes: 3 additions & 3 deletions src/measure/dump_exyz.cu
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ void Dump_EXYZ::parse(char** param, int num_param)
}
}

void Dump_EXYZ::preprocess(char* input_dir, const int number_of_atoms, const double time_step)
void Dump_EXYZ::preprocess(char* input_dir, const int number_of_atoms)
{
if (dump_) {
strcpy(filename_, input_dir);
Expand All @@ -95,7 +95,6 @@ void Dump_EXYZ::preprocess(char* input_dir, const int number_of_atoms, const dou
if (has_force_) {
cpu_force_per_atom_.resize(number_of_atoms * 3);
}
time_step_ = time_step;
}
}

Expand Down Expand Up @@ -161,6 +160,7 @@ void Dump_EXYZ::output_line2(

void Dump_EXYZ::process(
const int step,
const double global_time,
const Box& box,
const std::vector<std::string>& cpu_atom_symbol,
const std::vector<int>& cpu_type,
Expand Down Expand Up @@ -190,7 +190,7 @@ void Dump_EXYZ::process(
fprintf(fid_, "%d\n", num_atoms_total);

// line 2
output_line2((step + 1) * time_step_, box, virial_per_atom, gpu_thermo);
output_line2(global_time, box, virial_per_atom, gpu_thermo);

// other lines
for (int n = 0; n < num_atoms_total; n++) {
Expand Down
4 changes: 2 additions & 2 deletions src/measure/dump_exyz.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,10 @@ class Dump_EXYZ
{
public:
void parse(char** param, int num_param);
void preprocess(char* input_dir, const int number_of_atoms, const double time_step);
void preprocess(char* input_dir, const int number_of_atoms);
void process(
const int step,
const double global_time,
const Box& box,
const std::vector<std::string>& cpu_atom_symbol,
const std::vector<int>& cpu_type,
Expand All @@ -44,7 +45,6 @@ private:
int dump_interval_ = 1;
int has_velocity_ = 0;
int has_force_ = 0;
double time_step_;
FILE* fid_;
char filename_[200];
void output_line2(
Expand Down
4 changes: 2 additions & 2 deletions src/measure/measure.cu
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ void Measure::initialize(
dump_restart.preprocess(input_dir);
dump_thermo.preprocess(input_dir);
dump_force.preprocess(input_dir, number_of_atoms, group);
dump_exyz.preprocess(input_dir, number_of_atoms, time_step);
dump_exyz.preprocess(input_dir, number_of_atoms);
#ifdef USE_NETCDF
dump_netcdf.preprocess(input_dir, number_of_atoms);
#endif
Expand Down Expand Up @@ -114,7 +114,7 @@ void Measure::process(
atom.cpu_velocity_per_atom);
dump_force.process(step, group, atom.force_per_atom);
dump_exyz.process(
step, box, atom.cpu_atom_symbol, atom.cpu_type, atom.position_per_atom,
step, global_time, box, atom.cpu_atom_symbol, atom.cpu_type, atom.position_per_atom,
atom.cpu_position_per_atom, atom.velocity_per_atom, atom.cpu_velocity_per_atom,
atom.force_per_atom, atom.virial_per_atom, thermo);

Expand Down

0 comments on commit 86ad812

Please sign in to comment.