From 6d94ca8ae6867338e65995ae8abcd390f902f74f Mon Sep 17 00:00:00 2001 From: brucefan1983 Date: Tue, 25 Jun 2024 18:22:21 +0800 Subject: [PATCH 1/4] starting point of add_force --- src/main_gpumd/add_force.cu | 266 +++++++++++++++++++++++++++++++++++ src/main_gpumd/add_force.cuh | 41 ++++++ 2 files changed, 307 insertions(+) create mode 100644 src/main_gpumd/add_force.cu create mode 100644 src/main_gpumd/add_force.cuh diff --git a/src/main_gpumd/add_force.cu b/src/main_gpumd/add_force.cu new file mode 100644 index 000000000..07ff71af9 --- /dev/null +++ b/src/main_gpumd/add_force.cu @@ -0,0 +1,266 @@ +/* + Copyright 2017 Zheyong Fan, Ville Vierimaa, Mikko Ervasti, and Ari Harju + This file is part of GPUMD. + GPUMD is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + GPUMD is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with GPUMD. If not, see . +*/ + +/*----------------------------------------------------------------------------80 +Apply electron stopping. +------------------------------------------------------------------------------*/ + +#include "add_force.cuh" +#include "model/atom.cuh" +#include "utilities/common.cuh" +#include "utilities/gpu_vector.cuh" +#include +#include + +static void __global__ find_stopping_force( + const int num_atoms, + const int num_points, + const double time_step, + const double energy_min, + const double energy_max, + const double energy_interval_inverse, + const double* g_stopping_power, + const int* g_type, + const double* g_mass, + const double* g_velocity, + double* g_force, + double* g_power_loss) +{ + const int i = blockIdx.x * blockDim.x + threadIdx.x; + if (i < num_atoms) { + int type = g_type[i]; + double mass = g_mass[i]; + double vx = g_velocity[0 * num_atoms + i]; + double vy = g_velocity[1 * num_atoms + i]; + double vz = g_velocity[2 * num_atoms + i]; + double v2 = vx * vx + vy * vy + vz * vz; + double energy = 0.5 * mass * v2; + + if (energy < energy_min + 1.0e-6 || energy > energy_max - 1.0e-6) { + g_force[0 * num_atoms + i] = 0.0; + g_force[1 * num_atoms + i] = 0.0; + g_force[2 * num_atoms + i] = 0.0; + return; + } + + double fractional_energy = (energy - energy_min) * energy_interval_inverse; + int index_left = static_cast(fractional_energy); + int index_right = index_left + 1; + double weight_right = fractional_energy - index_left; + double weight_left = 1.0 - weight_right; + double stopping_power = g_stopping_power[type * num_points + index_left] * weight_left + + g_stopping_power[type * num_points + index_right] * weight_right; + + double factor = -stopping_power / sqrt(v2); + + g_force[0 * num_atoms + i] = vx * factor; + g_force[1 * num_atoms + i] = vy * factor; + g_force[2 * num_atoms + i] = vz * factor; + + g_power_loss[i] = stopping_power * sqrt(v2) * time_step; + } +} + +__device__ float device_force_average[3]; + +static __global__ void find_force_average(int num_atoms, double* g_force) +{ + //<<<3, 1024>>> + int tid = threadIdx.x; + int bid = blockIdx.x; + int number_of_batches = (num_atoms - 1) / 1024 + 1; + __shared__ double s_f[1024]; + double f = 0.0; + + for (int batch = 0; batch < number_of_batches; ++batch) { + int n = tid + batch * 1024; + if (n < num_atoms) { + f += g_force[n + bid * num_atoms]; + } + } + + s_f[tid] = f; + __syncthreads(); + + for (int offset = blockDim.x >> 1; offset > 32; offset >>= 1) { + if (tid < offset) { + s_f[tid] += s_f[tid + offset]; + } + __syncthreads(); + } + for (int offset = 32; offset > 0; offset >>= 1) { + if (tid < offset) { + s_f[tid] += s_f[tid + offset]; + } + __syncwarp(); + } + + if (tid == 0) { + device_force_average[bid] = s_f[0] / num_atoms; + } +} + +static void __global__ +apply_electron_stopping(const int num_atoms, const double* g_stopping_force, double* g_force) +{ + const int i = blockIdx.x * blockDim.x + threadIdx.x; + if (i < num_atoms) { + for (int d = 0; d < 3; ++d) { + g_force[d * num_atoms + i] += g_stopping_force[d * num_atoms + i] - device_force_average[d]; + } + } +} + +__device__ double device_power_loss; + +static __global__ void find_power_loss(int num_atoms, double* g_power_loss) +{ + //<<<1, 1024>>> + int tid = threadIdx.x; + int block_size = blockDim.x; + + int number_of_batches = (num_atoms + block_size - 1) / block_size; + __shared__ double s_f[1024]; + double f = 0.0; + + for (int batch = 0; batch < number_of_batches; ++batch) { + int idx = tid + batch * block_size; + if (idx < num_atoms) { + f += g_power_loss[idx]; + } + } + + s_f[tid] = f; + __syncthreads(); + + for (int offset = blockDim.x >> 1; offset > 0; offset >>= 1) { + if (tid < offset) { + s_f[tid] += s_f[tid + offset]; + } + __syncthreads(); + } + + if (tid == 0) { + device_power_loss = s_f[0]; + } + +} + +void Add_Force::compute(double time_step, Atom& atom) +{ + if (!do_electron_stop) { + return; + } + + find_stopping_force<<<(atom.number_of_atoms - 1) / 64 + 1, 64>>>( + atom.number_of_atoms, + num_points, + time_step, + energy_min, + energy_max, + 1.0 / energy_interval, + stopping_power_gpu.data(), + atom.type.data(), + atom.mass.data(), + atom.velocity_per_atom.data(), + stopping_force.data(), + stopping_loss.data()); + + CUDA_CHECK_KERNEL + + find_force_average<<<3, 1024>>>(atom.number_of_atoms, stopping_force.data()); + CUDA_CHECK_KERNEL + + apply_electron_stopping<<<(atom.number_of_atoms - 1) / 64 + 1, 64>>>( + atom.number_of_atoms, stopping_force.data(), atom.force_per_atom.data()); + CUDA_CHECK_KERNEL + + find_power_loss<<<1, 1024>>>(atom.number_of_atoms, stopping_loss.data()); + CUDA_CHECK_KERNEL + + double power_loss_host; + CHECK(cudaMemcpyFromSymbol(&power_loss_host, device_power_loss, sizeof(double), 0, cudaMemcpyDeviceToHost)); + stopping_power_loss += power_loss_host; +} + +void Add_Force::parse( + const char** param, int num_param, const int num_atoms, const int num_types) +{ + printf("Apply electron stopping.\n"); + if (num_param != 2) { + PRINT_INPUT_ERROR("electron_stop should have 1 parameter.\n"); + } + printf(" using the stopping power data in %s.\n", param[1]); + + std::ifstream input(param[1]); + if (!input.is_open()) { + printf("Failed to open %s.\n", param[1]); + exit(1); + } + + std::vector tokens = get_tokens(input); + if (tokens.size() != 3) { + PRINT_INPUT_ERROR("The first line of the stopping power file should have 3 values."); + } + num_points = get_int_from_token(tokens[0], __FILE__, __LINE__); + if (num_points < 2) { + PRINT_INPUT_ERROR("Number of energy values should >= 2.\n"); + } else { + printf(" number of energy values = %d.\n", num_points); + } + + energy_min = get_double_from_token(tokens[1], __FILE__, __LINE__); + if (energy_min <= 0) { + PRINT_INPUT_ERROR("energy_min should > 0.\n"); + } else { + printf(" energy_min = %g eV.\n", energy_min); + } + + energy_max = get_double_from_token(tokens[2], __FILE__, __LINE__); + if (energy_max <= energy_min) { + PRINT_INPUT_ERROR("energy_max should > energy_min.\n"); + } else { + printf(" energy_max = %g eV.\n", energy_max); + } + + energy_interval = (energy_max - energy_min) / (num_points - 1); + printf(" energy interval = %g eV.\n", energy_interval); + + stopping_power_cpu.resize(num_points * num_types); + for (int n = 0; n < num_points; ++n) { + std::vector tokens = get_tokens(input); + if (tokens.size() != num_types) { + PRINT_INPUT_ERROR("Number of values does not match with the number of elements."); + } + for (int t = 0; t < num_types; ++t) { + stopping_power_cpu[t * num_points + n] = get_double_from_token(tokens[t], __FILE__, __LINE__); + } + } + + stopping_power_gpu.resize(num_points * num_types); + stopping_power_gpu.copy_from_host(stopping_power_cpu.data()); + stopping_force.resize(num_atoms * 3); + stopping_loss.resize(num_atoms); + do_electron_stop = true; +} + +void Add_Force::finalize() +{ + if (do_electron_stop) { + printf("Total electron stopping power loss = %g eV.\n", stopping_power_loss); + } + do_electron_stop = false; + stopping_power_loss = 0.0; +} diff --git a/src/main_gpumd/add_force.cuh b/src/main_gpumd/add_force.cuh new file mode 100644 index 000000000..2c9ed39f7 --- /dev/null +++ b/src/main_gpumd/add_force.cuh @@ -0,0 +1,41 @@ +/* + Copyright 2017 Zheyong Fan, Ville Vierimaa, Mikko Ervasti, and Ari Harju + This file is part of GPUMD. + GPUMD is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + GPUMD is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with GPUMD. If not, see . +*/ + +#pragma once + +#include "utilities/gpu_vector.cuh" +#include + +class Atom; + +class Add_Force +{ +public: + bool do_electron_stop = false; + double stopping_power_loss = 0.0; + void parse(const char** param, int num_param, const int num_atoms, const int num_types); + void compute(double time_step, Atom& atom); + void finalize(); + +private: + int num_points = 0; + double energy_min; + double energy_max; + double energy_interval; + std::vector stopping_power_cpu; + GPU_Vector stopping_power_gpu; + GPU_Vector stopping_force; + GPU_Vector stopping_loss; +}; From 764d32c74472265c54ac17b4b57ebda7435ee772 Mon Sep 17 00:00:00 2001 From: brucefan1983 Date: Fri, 28 Jun 2024 01:11:17 +0800 Subject: [PATCH 2/4] almost finished add_force --- src/main_gpumd/add_force.cu | 310 +++++++++++------------------------ src/main_gpumd/add_force.cuh | 23 ++- 2 files changed, 109 insertions(+), 224 deletions(-) diff --git a/src/main_gpumd/add_force.cu b/src/main_gpumd/add_force.cu index 07ff71af9..439ba6588 100644 --- a/src/main_gpumd/add_force.cu +++ b/src/main_gpumd/add_force.cu @@ -14,253 +14,141 @@ */ /*----------------------------------------------------------------------------80 -Apply electron stopping. +Add force to a group of atoms. ------------------------------------------------------------------------------*/ #include "add_force.cuh" #include "model/atom.cuh" -#include "utilities/common.cuh" -#include "utilities/gpu_vector.cuh" +#include "model/group.cuh" +#include "utilities/read_file.cuh" #include #include -static void __global__ find_stopping_force( - const int num_atoms, - const int num_points, - const double time_step, - const double energy_min, - const double energy_max, - const double energy_interval_inverse, - const double* g_stopping_power, - const int* g_type, - const double* g_mass, - const double* g_velocity, - double* g_force, - double* g_power_loss) +void __global__ +add_force( + const int group_size, + const int group_size_sum, + const int* g_group_contents, + const double added_fx, + const double added_fy, + const double added_fz, + double* g_fx, + double* g_fy, + double* g_fz) { - const int i = blockIdx.x * blockDim.x + threadIdx.x; - if (i < num_atoms) { - int type = g_type[i]; - double mass = g_mass[i]; - double vx = g_velocity[0 * num_atoms + i]; - double vy = g_velocity[1 * num_atoms + i]; - double vz = g_velocity[2 * num_atoms + i]; - double v2 = vx * vx + vy * vy + vz * vz; - double energy = 0.5 * mass * v2; - - if (energy < energy_min + 1.0e-6 || energy > energy_max - 1.0e-6) { - g_force[0 * num_atoms + i] = 0.0; - g_force[1 * num_atoms + i] = 0.0; - g_force[2 * num_atoms + i] = 0.0; - return; - } - - double fractional_energy = (energy - energy_min) * energy_interval_inverse; - int index_left = static_cast(fractional_energy); - int index_right = index_left + 1; - double weight_right = fractional_energy - index_left; - double weight_left = 1.0 - weight_right; - double stopping_power = g_stopping_power[type * num_points + index_left] * weight_left + - g_stopping_power[type * num_points + index_right] * weight_right; - - double factor = -stopping_power / sqrt(v2); - - g_force[0 * num_atoms + i] = vx * factor; - g_force[1 * num_atoms + i] = vy * factor; - g_force[2 * num_atoms + i] = vz * factor; - - g_power_loss[i] = stopping_power * sqrt(v2) * time_step; - } -} - -__device__ float device_force_average[3]; - -static __global__ void find_force_average(int num_atoms, double* g_force) -{ - //<<<3, 1024>>> - int tid = threadIdx.x; - int bid = blockIdx.x; - int number_of_batches = (num_atoms - 1) / 1024 + 1; - __shared__ double s_f[1024]; - double f = 0.0; - - for (int batch = 0; batch < number_of_batches; ++batch) { - int n = tid + batch * 1024; - if (n < num_atoms) { - f += g_force[n + bid * num_atoms]; - } - } - - s_f[tid] = f; - __syncthreads(); - - for (int offset = blockDim.x >> 1; offset > 32; offset >>= 1) { - if (tid < offset) { - s_f[tid] += s_f[tid + offset]; - } - __syncthreads(); - } - for (int offset = 32; offset > 0; offset >>= 1) { - if (tid < offset) { - s_f[tid] += s_f[tid + offset]; - } - __syncwarp(); - } - - if (tid == 0) { - device_force_average[bid] = s_f[0] / num_atoms; + const int tid = blockIdx.x * blockDim.x + threadIdx.x; + if (tid < group_size) { + const int atom_id = g_group_contents[group_size_sum + tid]; + g_fx[atom_id] += added_fx; + g_fy[atom_id] += added_fy; + g_fz[atom_id] += added_fz; } } -static void __global__ -apply_electron_stopping(const int num_atoms, const double* g_stopping_force, double* g_force) +void Add_Force::compute(const int step, const std::vector& groups, Atom& atom) { - const int i = blockIdx.x * blockDim.x + threadIdx.x; - if (i < num_atoms) { - for (int d = 0; d < 3; ++d) { - g_force[d * num_atoms + i] += g_stopping_force[d * num_atoms + i] - device_force_average[d]; - } + for (int call = 0; call < num_calls_; ++call) { + const int step_mod_table_length = step % table_length_[call]; + const float added_fx = force_table_[call][0 * table_length_[call] + step_mod_table_length]; + const float added_fy = force_table_[call][1 * table_length_[call] + step_mod_table_length]; + const float added_fz = force_table_[call][2 * table_length_[call] + step_mod_table_length]; + const int num_atoms_total = atom.force_per_atom.size() / 3; + const int group_size = groups[grouping_method_[call]].cpu_size[group_id_[call]]; + const int group_size_sum = groups[grouping_method_[call]].cpu_size_sum[group_id_[call]]; + add_force<<<(atom.number_of_atoms - 1) / 64 + 1, 64>>>( + group_size, + group_size_sum, + groups[grouping_method_[call]].contents.data(), + added_fx, + added_fy, + added_fz, + atom.force_per_atom.data(), + atom.force_per_atom.data() + num_atoms_total, + atom.force_per_atom.data() + num_atoms_total * 2 + ); + CUDA_CHECK_KERNEL } } -__device__ double device_power_loss; - -static __global__ void find_power_loss(int num_atoms, double* g_power_loss) +void Add_Force::parse(const char** param, int num_param, const std::vector& group) { - //<<<1, 1024>>> - int tid = threadIdx.x; - int block_size = blockDim.x; + printf("Add force.\n"); - int number_of_batches = (num_atoms + block_size - 1) / block_size; - __shared__ double s_f[1024]; - double f = 0.0; - - for (int batch = 0; batch < number_of_batches; ++batch) { - int idx = tid + batch * block_size; - if (idx < num_atoms) { - f += g_power_loss[idx]; - } + // check the number of parameters + if (num_param != 6 && num_param != 4) { + PRINT_INPUT_ERROR("electron_stop should have 5 or 3 parameters.\n"); } - s_f[tid] = f; - __syncthreads(); - - for (int offset = blockDim.x >> 1; offset > 0; offset >>= 1) { - if (tid < offset) { - s_f[tid] += s_f[tid + offset]; - } - __syncthreads(); + // parse grouping method + if (!is_valid_int(param[1], &grouping_method_[num_calls_])) { + PRINT_INPUT_ERROR("grouping method should be an integer.\n"); } - - if (tid == 0) { - device_power_loss = s_f[0]; + if (grouping_method_[num_calls_] < 0) { + PRINT_INPUT_ERROR("grouping method should >= 0.\n"); } - -} - -void Add_Force::compute(double time_step, Atom& atom) -{ - if (!do_electron_stop) { - return; + if (grouping_method_[num_calls_] >= group.size()) { + PRINT_INPUT_ERROR("grouping method should < maximum number of grouping methods.\n"); } - find_stopping_force<<<(atom.number_of_atoms - 1) / 64 + 1, 64>>>( - atom.number_of_atoms, - num_points, - time_step, - energy_min, - energy_max, - 1.0 / energy_interval, - stopping_power_gpu.data(), - atom.type.data(), - atom.mass.data(), - atom.velocity_per_atom.data(), - stopping_force.data(), - stopping_loss.data()); - - CUDA_CHECK_KERNEL - - find_force_average<<<3, 1024>>>(atom.number_of_atoms, stopping_force.data()); - CUDA_CHECK_KERNEL - - apply_electron_stopping<<<(atom.number_of_atoms - 1) / 64 + 1, 64>>>( - atom.number_of_atoms, stopping_force.data(), atom.force_per_atom.data()); - CUDA_CHECK_KERNEL - - find_power_loss<<<1, 1024>>>(atom.number_of_atoms, stopping_loss.data()); - CUDA_CHECK_KERNEL - - double power_loss_host; - CHECK(cudaMemcpyFromSymbol(&power_loss_host, device_power_loss, sizeof(double), 0, cudaMemcpyDeviceToHost)); - stopping_power_loss += power_loss_host; -} - -void Add_Force::parse( - const char** param, int num_param, const int num_atoms, const int num_types) -{ - printf("Apply electron stopping.\n"); - if (num_param != 2) { - PRINT_INPUT_ERROR("electron_stop should have 1 parameter.\n"); + // parse group id + if (!is_valid_int(param[2], &group_id_[num_calls_])) { + PRINT_INPUT_ERROR("group id should be an integer.\n"); } - printf(" using the stopping power data in %s.\n", param[1]); - - std::ifstream input(param[1]); - if (!input.is_open()) { - printf("Failed to open %s.\n", param[1]); - exit(1); + if (group_id_[num_calls_] < 0) { + PRINT_INPUT_ERROR("group id should >= 0.\n"); } - - std::vector tokens = get_tokens(input); - if (tokens.size() != 3) { - PRINT_INPUT_ERROR("The first line of the stopping power file should have 3 values."); - } - num_points = get_int_from_token(tokens[0], __FILE__, __LINE__); - if (num_points < 2) { - PRINT_INPUT_ERROR("Number of energy values should >= 2.\n"); - } else { - printf(" number of energy values = %d.\n", num_points); + if (group_id_[num_calls_] >= group[grouping_method_[num_calls_]].number) { + PRINT_INPUT_ERROR("group id should < maximum number of groups in the grouping method.\n"); } - energy_min = get_double_from_token(tokens[1], __FILE__, __LINE__); - if (energy_min <= 0) { - PRINT_INPUT_ERROR("energy_min should > 0.\n"); - } else { - printf(" energy_min = %g eV.\n", energy_min); - } - - energy_max = get_double_from_token(tokens[2], __FILE__, __LINE__); - if (energy_max <= energy_min) { - PRINT_INPUT_ERROR("energy_max should > energy_min.\n"); + if (num_param == 6) { + table_length_[num_calls_] = 1; + force_table_[num_calls_].resize(table_length_[num_calls_] * 3); + if (!is_valid_real(param[3], &force_table_[num_calls_][0])) { + PRINT_INPUT_ERROR("fx should be a number.\n"); + } + if (!is_valid_real(param[4], &force_table_[num_calls_][1])) { + PRINT_INPUT_ERROR("fy should be a number.\n"); + } + if (!is_valid_real(param[5], &force_table_[num_calls_][2])) { + PRINT_INPUT_ERROR("fz should be a number.\n"); + } } else { - printf(" energy_max = %g eV.\n", energy_max); - } - - energy_interval = (energy_max - energy_min) / (num_points - 1); - printf(" energy interval = %g eV.\n", energy_interval); + std::ifstream input(param[3]); + if (!input.is_open()) { + printf("Failed to open %s.\n", param[3]); + exit(1); + } - stopping_power_cpu.resize(num_points * num_types); - for (int n = 0; n < num_points; ++n) { std::vector tokens = get_tokens(input); - if (tokens.size() != num_types) { - PRINT_INPUT_ERROR("Number of values does not match with the number of elements."); + if (tokens.size() != 1) { + PRINT_INPUT_ERROR("The first line of the add_force file should have 1 value."); } - for (int t = 0; t < num_types; ++t) { - stopping_power_cpu[t * num_points + n] = get_double_from_token(tokens[t], __FILE__, __LINE__); + table_length_[num_calls_] = get_int_from_token(tokens[0], __FILE__, __LINE__); + if (table_length_[num_calls_] < 2) { + PRINT_INPUT_ERROR("Number of steps in the add_force file should >= 2.\n"); + } else { + printf(" number of values in the add_force file = %d.\n", table_length_[num_calls_]); + } + + force_table_[num_calls_].resize(table_length_[num_calls_] * 3); + for (int n = 0; n < table_length_[num_calls_]; ++n) { + std::vector tokens = get_tokens(input); + if (tokens.size() != 3) { + PRINT_INPUT_ERROR("Number of force components at each step should be 3."); + } + for (int t = 0; t < 3; ++t) { + force_table_[num_calls_][t * table_length_[num_calls_] + n] = get_double_from_token(tokens[t], __FILE__, __LINE__); + } } } - stopping_power_gpu.resize(num_points * num_types); - stopping_power_gpu.copy_from_host(stopping_power_cpu.data()); - stopping_force.resize(num_atoms * 3); - stopping_loss.resize(num_atoms); - do_electron_stop = true; + ++num_calls_; + + exit(1); } void Add_Force::finalize() { - if (do_electron_stop) { - printf("Total electron stopping power loss = %g eV.\n", stopping_power_loss); - } - do_electron_stop = false; - stopping_power_loss = 0.0; + num_calls_ = 0; } diff --git a/src/main_gpumd/add_force.cuh b/src/main_gpumd/add_force.cuh index 2c9ed39f7..8770b9118 100644 --- a/src/main_gpumd/add_force.cuh +++ b/src/main_gpumd/add_force.cuh @@ -15,27 +15,24 @@ #pragma once -#include "utilities/gpu_vector.cuh" #include class Atom; +class Group; class Add_Force { public: - bool do_electron_stop = false; - double stopping_power_loss = 0.0; - void parse(const char** param, int num_param, const int num_atoms, const int num_types); - void compute(double time_step, Atom& atom); + + void parse(const char** param, int num_param, const std::vector& group); + void compute(const int step, const std::vector& groups, Atom& atom); void finalize(); private: - int num_points = 0; - double energy_min; - double energy_max; - double energy_interval; - std::vector stopping_power_cpu; - GPU_Vector stopping_power_gpu; - GPU_Vector stopping_force; - GPU_Vector stopping_loss; + + int num_calls_ = 0; + int table_length_[10]; + std::vector force_table_[10]; + int grouping_method_[10]; + int group_id_[10]; }; From af5920478a53d84bad100980639e11e92e0e574c Mon Sep 17 00:00:00 2001 From: brucefan1983 Date: Fri, 28 Jun 2024 01:37:19 +0800 Subject: [PATCH 3/4] call add_force --- src/main_gpumd/add_force.cu | 11 ++++++++++- src/main_gpumd/run.cu | 7 ++++++- src/main_gpumd/run.cuh | 2 ++ 3 files changed, 18 insertions(+), 2 deletions(-) diff --git a/src/main_gpumd/add_force.cu b/src/main_gpumd/add_force.cu index 439ba6588..cbf1a01b0 100644 --- a/src/main_gpumd/add_force.cu +++ b/src/main_gpumd/add_force.cu @@ -76,7 +76,7 @@ void Add_Force::parse(const char** param, int num_param, const std::vector& tokens) integrate.parse_move(param, num_param, group); } else if (strcmp(param[0], "electron_stop") == 0) { electron_stop.parse(param, num_param, atom.number_of_atoms, number_of_types); - } else if (strcmp(param[0], "mc") == 0) { + } else if (strcmp(param[0], "add_force") == 0) { + add_force.parse(param, num_param, group); + }else if (strcmp(param[0], "mc") == 0) { mc.parse_mc(param, num_param, group, atom); } else if (strcmp(param[0], "dftd3") == 0) { // nothing here; will be handled elsewhere diff --git a/src/main_gpumd/run.cuh b/src/main_gpumd/run.cuh index c2d319863..4670f04e1 100644 --- a/src/main_gpumd/run.cuh +++ b/src/main_gpumd/run.cuh @@ -19,6 +19,7 @@ class Force; class Integrate; class Measure; +#include "add_force.cuh" #include "electron_stop.cuh" #include "force/force.cuh" #include "integrate/integrate.cuh" @@ -68,4 +69,5 @@ private: MC mc; Measure measure; Electron_Stop electron_stop; + Add_Force add_force; }; From 34d7e27c6502af7afe936c41b85826f330d5cd85 Mon Sep 17 00:00:00 2001 From: brucefan1983 Date: Fri, 28 Jun 2024 01:53:01 +0800 Subject: [PATCH 4/4] check the number of calls to add_force --- src/main_gpumd/add_force.cu | 6 ++++-- src/main_gpumd/run.cu | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/main_gpumd/add_force.cu b/src/main_gpumd/add_force.cu index cbf1a01b0..03f942fe5 100644 --- a/src/main_gpumd/add_force.cu +++ b/src/main_gpumd/add_force.cu @@ -55,7 +55,7 @@ void Add_Force::compute(const int step, const std::vector& groups, Atom& const int num_atoms_total = atom.force_per_atom.size() / 3; const int group_size = groups[grouping_method_[call]].cpu_size[group_id_[call]]; const int group_size_sum = groups[grouping_method_[call]].cpu_size_sum[group_id_[call]]; - add_force<<<(atom.number_of_atoms - 1) / 64 + 1, 64>>>( + add_force<<<(group_size - 1) / 64 + 1, 64>>>( group_size, group_size_sum, groups[grouping_method_[call]].contents.data(), @@ -154,7 +154,9 @@ void Add_Force::parse(const char** param, int num_param, const std::vector 10) { + PRINT_INPUT_ERROR("add_force cannot be used more than 10 times in one run."); + } } void Add_Force::finalize() diff --git a/src/main_gpumd/run.cu b/src/main_gpumd/run.cu index 1928a023a..aea3b6c8b 100644 --- a/src/main_gpumd/run.cu +++ b/src/main_gpumd/run.cu @@ -466,7 +466,7 @@ void Run::parse_one_keyword(std::vector& tokens) electron_stop.parse(param, num_param, atom.number_of_atoms, number_of_types); } else if (strcmp(param[0], "add_force") == 0) { add_force.parse(param, num_param, group); - }else if (strcmp(param[0], "mc") == 0) { + } else if (strcmp(param[0], "mc") == 0) { mc.parse_mc(param, num_param, group, atom); } else if (strcmp(param[0], "dftd3") == 0) { // nothing here; will be handled elsewhere