From 6d94ca8ae6867338e65995ae8abcd390f902f74f Mon Sep 17 00:00:00 2001
From: brucefan1983 <brucenju@gmail.com>
Date: Tue, 25 Jun 2024 18:22:21 +0800
Subject: [PATCH 1/4] starting point of add_force

---
 src/main_gpumd/add_force.cu  | 266 +++++++++++++++++++++++++++++++++++
 src/main_gpumd/add_force.cuh |  41 ++++++
 2 files changed, 307 insertions(+)
 create mode 100644 src/main_gpumd/add_force.cu
 create mode 100644 src/main_gpumd/add_force.cuh
diff --git a/src/main_gpumd/add_force.cu b/src/main_gpumd/add_force.cu
new file mode 100644
index 000000000..07ff71af9
--- /dev/null
+++ b/src/main_gpumd/add_force.cu
@@ -0,0 +1,266 @@
+/*
+    Copyright 2017 Zheyong Fan, Ville Vierimaa, Mikko Ervasti, and Ari Harju
+    This file is part of GPUMD.
+    GPUMD is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+    GPUMD is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+    You should have received a copy of the GNU General Public License
+    along with GPUMD.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*----------------------------------------------------------------------------80
+Apply electron stopping.
+------------------------------------------------------------------------------*/
+
+#include "add_force.cuh"
+#include "model/atom.cuh"
+#include "utilities/common.cuh"
+#include "utilities/gpu_vector.cuh"
+#include <iostream>
+#include <vector>
+
+static void __global__ find_stopping_force(
+  const int num_atoms,
+  const int num_points,
+  const double time_step,
+  const double energy_min,
+  const double energy_max,
+  const double energy_interval_inverse,
+  const double* g_stopping_power,
+  const int* g_type,
+  const double* g_mass,
+  const double* g_velocity,
+  double* g_force,
+  double* g_power_loss)
+{
+  const int i = blockIdx.x * blockDim.x + threadIdx.x;
+  if (i < num_atoms) {
+    int type = g_type[i];
+    double mass = g_mass[i];
+    double vx = g_velocity[0 * num_atoms + i];
+    double vy = g_velocity[1 * num_atoms + i];
+    double vz = g_velocity[2 * num_atoms + i];
+    double v2 = vx * vx + vy * vy + vz * vz;
+    double energy = 0.5 * mass * v2;
+
+    if (energy < energy_min + 1.0e-6 || energy > energy_max - 1.0e-6) {
+      g_force[0 * num_atoms + i] = 0.0;
+      g_force[1 * num_atoms + i] = 0.0;
+      g_force[2 * num_atoms + i] = 0.0;
+      return;
+    }
+
+    double fractional_energy = (energy - energy_min) * energy_interval_inverse;
+    int index_left = static_cast<int>(fractional_energy);
+    int index_right = index_left + 1;
+    double weight_right = fractional_energy - index_left;
+    double weight_left = 1.0 - weight_right;
+    double stopping_power = g_stopping_power[type * num_points + index_left] * weight_left +
+                            g_stopping_power[type * num_points + index_right] * weight_right;
+
+    double factor = -stopping_power / sqrt(v2);
+
+    g_force[0 * num_atoms + i] = vx * factor;
+    g_force[1 * num_atoms + i] = vy * factor;
+    g_force[2 * num_atoms + i] = vz * factor;
+
+    g_power_loss[i] = stopping_power * sqrt(v2) * time_step; 
+  }
+}
+
+__device__ float device_force_average[3];
+
+static __global__ void find_force_average(int num_atoms, double* g_force)
+{
+  //<<<3, 1024>>>
+  int tid = threadIdx.x;
+  int bid = blockIdx.x;
+  int number_of_batches = (num_atoms - 1) / 1024 + 1;
+  __shared__ double s_f[1024];
+  double f = 0.0;
+
+  for (int batch = 0; batch < number_of_batches; ++batch) {
+    int n = tid + batch * 1024;
+    if (n < num_atoms) {
+      f += g_force[n + bid * num_atoms];
+    }
+  }
+
+  s_f[tid] = f;
+  __syncthreads();
+
+  for (int offset = blockDim.x >> 1; offset > 32; offset >>= 1) {
+    if (tid < offset) {
+      s_f[tid] += s_f[tid + offset];
+    }
+    __syncthreads();
+  }
+  for (int offset = 32; offset > 0; offset >>= 1) {
+    if (tid < offset) {
+      s_f[tid] += s_f[tid + offset];
+    }
+    __syncwarp();
+  }
+
+  if (tid == 0) {
+    device_force_average[bid] = s_f[0] / num_atoms;
+  }
+}
+
+static void __global__
+apply_electron_stopping(const int num_atoms, const double* g_stopping_force, double* g_force)
+{
+  const int i = blockIdx.x * blockDim.x + threadIdx.x;
+  if (i < num_atoms) {
+    for (int d = 0; d < 3; ++d) {
+      g_force[d * num_atoms + i] += g_stopping_force[d * num_atoms + i] - device_force_average[d];
+    }
+  }
+}
+
+__device__ double device_power_loss;
+
+static __global__ void find_power_loss(int num_atoms, double* g_power_loss)
+{
+  //<<<1, 1024>>>
+  int tid = threadIdx.x;
+  int block_size = blockDim.x;
+
+  int number_of_batches = (num_atoms + block_size - 1) / block_size;
+  __shared__ double s_f[1024];
+  double f = 0.0;
+
+  for (int batch = 0; batch < number_of_batches; ++batch) {
+      int idx = tid + batch * block_size;
+      if (idx < num_atoms) {
+          f += g_power_loss[idx];
+      }
+  }
+
+  s_f[tid] = f;
+  __syncthreads();
+
+  for (int offset = blockDim.x >> 1; offset > 0; offset >>= 1) {
+      if (tid < offset) {
+          s_f[tid] += s_f[tid + offset];
+      }
+      __syncthreads(); 
+  }
+
+  if (tid == 0) {
+      device_power_loss = s_f[0];
+  }
+
+}
+
+void Add_Force::compute(double time_step, Atom& atom)
+{
+  if (!do_electron_stop) {
+    return;
+  }
+
+  find_stopping_force<<<(atom.number_of_atoms - 1) / 64 + 1, 64>>>(
+    atom.number_of_atoms,
+    num_points,
+    time_step,
+    energy_min,
+    energy_max,
+    1.0 / energy_interval,
+    stopping_power_gpu.data(),
+    atom.type.data(),
+    atom.mass.data(),
+    atom.velocity_per_atom.data(),
+    stopping_force.data(),
+    stopping_loss.data());
+
+  CUDA_CHECK_KERNEL
+
+  find_force_average<<<3, 1024>>>(atom.number_of_atoms, stopping_force.data());
+  CUDA_CHECK_KERNEL
+
+  apply_electron_stopping<<<(atom.number_of_atoms - 1) / 64 + 1, 64>>>(
+    atom.number_of_atoms, stopping_force.data(), atom.force_per_atom.data());
+  CUDA_CHECK_KERNEL
+
+  find_power_loss<<<1, 1024>>>(atom.number_of_atoms, stopping_loss.data());
+  CUDA_CHECK_KERNEL
+
+  double power_loss_host;  
+  CHECK(cudaMemcpyFromSymbol(&power_loss_host, device_power_loss, sizeof(double), 0, cudaMemcpyDeviceToHost));
+  stopping_power_loss += power_loss_host;
+}
+
+void Add_Force::parse(
+  const char** param, int num_param, const int num_atoms, const int num_types)
+{
+  printf("Apply electron stopping.\n");
+  if (num_param != 2) {
+    PRINT_INPUT_ERROR("electron_stop should have 1 parameter.\n");
+  }
+  printf("    using the stopping power data in %s.\n", param[1]);
+
+  std::ifstream input(param[1]);
+  if (!input.is_open()) {
+    printf("Failed to open %s.\n", param[1]);
+    exit(1);
+  }
+
+  std::vector<std::string> tokens = get_tokens(input);
+  if (tokens.size() != 3) {
+    PRINT_INPUT_ERROR("The first line of the stopping power file should have 3 values.");
+  }
+  num_points = get_int_from_token(tokens[0], __FILE__, __LINE__);
+  if (num_points < 2) {
+    PRINT_INPUT_ERROR("Number of energy values should >= 2.\n");
+  } else {
+    printf("    number of energy values = %d.\n", num_points);
+  }
+
+  energy_min = get_double_from_token(tokens[1], __FILE__, __LINE__);
+  if (energy_min <= 0) {
+    PRINT_INPUT_ERROR("energy_min should > 0.\n");
+  } else {
+    printf("    energy_min = %g eV.\n", energy_min);
+  }
+
+  energy_max = get_double_from_token(tokens[2], __FILE__, __LINE__);
+  if (energy_max <= energy_min) {
+    PRINT_INPUT_ERROR("energy_max should > energy_min.\n");
+  } else {
+    printf("    energy_max = %g eV.\n", energy_max);
+  }
+
+  energy_interval = (energy_max - energy_min) / (num_points - 1);
+  printf("    energy interval = %g eV.\n", energy_interval);
+
+  stopping_power_cpu.resize(num_points * num_types);
+  for (int n = 0; n < num_points; ++n) {
+    std::vector<std::string> tokens = get_tokens(input);
+    if (tokens.size() != num_types) {
+      PRINT_INPUT_ERROR("Number of values does not match with the number of elements.");
+    }
+    for (int t = 0; t < num_types; ++t) {
+      stopping_power_cpu[t * num_points + n] = get_double_from_token(tokens[t], __FILE__, __LINE__);
+    }
+  }
+
+  stopping_power_gpu.resize(num_points * num_types);
+  stopping_power_gpu.copy_from_host(stopping_power_cpu.data());
+  stopping_force.resize(num_atoms * 3);
+  stopping_loss.resize(num_atoms);
+  do_electron_stop = true;
+}
+
+void Add_Force::finalize() 
+{ 
+  if (do_electron_stop) { 
+    printf("Total electron stopping power loss = %g eV.\n", stopping_power_loss);
+  }
+  do_electron_stop = false; 
+  stopping_power_loss = 0.0;
+}
diff --git a/src/main_gpumd/add_force.cuh b/src/main_gpumd/add_force.cuh
new file mode 100644
index 000000000..2c9ed39f7
--- /dev/null
+++ b/src/main_gpumd/add_force.cuh
@@ -0,0 +1,41 @@
+/*
+    Copyright 2017 Zheyong Fan, Ville Vierimaa, Mikko Ervasti, and Ari Harju
+    This file is part of GPUMD.
+    GPUMD is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+    GPUMD is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+    You should have received a copy of the GNU General Public License
+    along with GPUMD.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#pragma once
+
+#include "utilities/gpu_vector.cuh"
+#include <vector>
+
+class Atom;
+
+class Add_Force
+{
+public:
+  bool do_electron_stop = false;
+  double stopping_power_loss = 0.0;
+  void parse(const char** param, int num_param, const int num_atoms, const int num_types);
+  void compute(double time_step, Atom& atom);
+  void finalize();
+
+private:
+  int num_points = 0;
+  double energy_min;
+  double energy_max;
+  double energy_interval;
+  std::vector<double> stopping_power_cpu;
+  GPU_Vector<double> stopping_power_gpu;
+  GPU_Vector<double> stopping_force;
+  GPU_Vector<double> stopping_loss;
+};

From 764d32c74472265c54ac17b4b57ebda7435ee772 Mon Sep 17 00:00:00 2001
From: brucefan1983 <brucenju@gmail.com>
Date: Fri, 28 Jun 2024 01:11:17 +0800
Subject: [PATCH 2/4] almost finished add_force

---
 src/main_gpumd/add_force.cu  | 310 +++++++++++------------------------
 src/main_gpumd/add_force.cuh |  23 ++-
 2 files changed, 109 insertions(+), 224 deletions(-)

diff --git a/src/main_gpumd/add_force.cu b/src/main_gpumd/add_force.cu
index 07ff71af9..439ba6588 100644
--- a/src/main_gpumd/add_force.cu
+++ b/src/main_gpumd/add_force.cu
@@ -14,253 +14,141 @@
 */
 
 /*----------------------------------------------------------------------------80
-Apply electron stopping.
+Add force to a group of atoms.
 ------------------------------------------------------------------------------*/
 
 #include "add_force.cuh"
 #include "model/atom.cuh"
-#include "utilities/common.cuh"
-#include "utilities/gpu_vector.cuh"
+#include "model/group.cuh"
+#include "utilities/read_file.cuh"
 #include <iostream>
 #include <vector>
 
-static void __global__ find_stopping_force(
-  const int num_atoms,
-  const int num_points,
-  const double time_step,
-  const double energy_min,
-  const double energy_max,
-  const double energy_interval_inverse,
-  const double* g_stopping_power,
-  const int* g_type,
-  const double* g_mass,
-  const double* g_velocity,
-  double* g_force,
-  double* g_power_loss)
+void __global__
+add_force(
+  const int group_size,
+  const int group_size_sum,
+  const int* g_group_contents,
+  const double added_fx, 
+  const double added_fy,
+  const double added_fz,
+  double* g_fx,
+  double* g_fy,
+  double* g_fz)
 {
-  const int i = blockIdx.x * blockDim.x + threadIdx.x;
-  if (i < num_atoms) {
-    int type = g_type[i];
-    double mass = g_mass[i];
-    double vx = g_velocity[0 * num_atoms + i];
-    double vy = g_velocity[1 * num_atoms + i];
-    double vz = g_velocity[2 * num_atoms + i];
-    double v2 = vx * vx + vy * vy + vz * vz;
-    double energy = 0.5 * mass * v2;
-
-    if (energy < energy_min + 1.0e-6 || energy > energy_max - 1.0e-6) {
-      g_force[0 * num_atoms + i] = 0.0;
-      g_force[1 * num_atoms + i] = 0.0;
-      g_force[2 * num_atoms + i] = 0.0;
-      return;
-    }
-
-    double fractional_energy = (energy - energy_min) * energy_interval_inverse;
-    int index_left = static_cast<int>(fractional_energy);
-    int index_right = index_left + 1;
-    double weight_right = fractional_energy - index_left;
-    double weight_left = 1.0 - weight_right;
-    double stopping_power = g_stopping_power[type * num_points + index_left] * weight_left +
-                            g_stopping_power[type * num_points + index_right] * weight_right;
-
-    double factor = -stopping_power / sqrt(v2);
-
-    g_force[0 * num_atoms + i] = vx * factor;
-    g_force[1 * num_atoms + i] = vy * factor;
-    g_force[2 * num_atoms + i] = vz * factor;
-
-    g_power_loss[i] = stopping_power * sqrt(v2) * time_step; 
-  }
-}
-
-__device__ float device_force_average[3];
-
-static __global__ void find_force_average(int num_atoms, double* g_force)
-{
-  //<<<3, 1024>>>
-  int tid = threadIdx.x;
-  int bid = blockIdx.x;
-  int number_of_batches = (num_atoms - 1) / 1024 + 1;
-  __shared__ double s_f[1024];
-  double f = 0.0;
-
-  for (int batch = 0; batch < number_of_batches; ++batch) {
-    int n = tid + batch * 1024;
-    if (n < num_atoms) {
-      f += g_force[n + bid * num_atoms];
-    }
-  }
-
-  s_f[tid] = f;
-  __syncthreads();
-
-  for (int offset = blockDim.x >> 1; offset > 32; offset >>= 1) {
-    if (tid < offset) {
-      s_f[tid] += s_f[tid + offset];
-    }
-    __syncthreads();
-  }
-  for (int offset = 32; offset > 0; offset >>= 1) {
-    if (tid < offset) {
-      s_f[tid] += s_f[tid + offset];
-    }
-    __syncwarp();
-  }
-
-  if (tid == 0) {
-    device_force_average[bid] = s_f[0] / num_atoms;
+  const int tid = blockIdx.x * blockDim.x + threadIdx.x;
+  if (tid < group_size) {
+    const int atom_id = g_group_contents[group_size_sum + tid];
+    g_fx[atom_id] += added_fx;
+    g_fy[atom_id] += added_fy;
+    g_fz[atom_id] += added_fz;
   }
 }
 
-static void __global__
-apply_electron_stopping(const int num_atoms, const double* g_stopping_force, double* g_force)
+void Add_Force::compute(const int step, const std::vector<Group>& groups, Atom& atom)
 {
-  const int i = blockIdx.x * blockDim.x + threadIdx.x;
-  if (i < num_atoms) {
-    for (int d = 0; d < 3; ++d) {
-      g_force[d * num_atoms + i] += g_stopping_force[d * num_atoms + i] - device_force_average[d];
-    }
+  for (int call = 0; call < num_calls_; ++call) {
+    const int step_mod_table_length = step % table_length_[call];
+    const float added_fx = force_table_[call][0 * table_length_[call] + step_mod_table_length];
+    const float added_fy = force_table_[call][1 * table_length_[call] + step_mod_table_length];
+    const float added_fz = force_table_[call][2 * table_length_[call] + step_mod_table_length];
+    const int num_atoms_total = atom.force_per_atom.size() / 3;
+    const int group_size = groups[grouping_method_[call]].cpu_size[group_id_[call]];
+    const int group_size_sum = groups[grouping_method_[call]].cpu_size_sum[group_id_[call]];
+    add_force<<<(atom.number_of_atoms - 1) / 64 + 1, 64>>>(
+      group_size,
+      group_size_sum,
+      groups[grouping_method_[call]].contents.data(),
+      added_fx,
+      added_fy,
+      added_fz,
+      atom.force_per_atom.data(),
+      atom.force_per_atom.data() + num_atoms_total,
+      atom.force_per_atom.data() + num_atoms_total * 2
+    );
+    CUDA_CHECK_KERNEL
   }
 }
 
-__device__ double device_power_loss;
-
-static __global__ void find_power_loss(int num_atoms, double* g_power_loss)
+void Add_Force::parse(const char** param, int num_param, const std::vector<Group>& group)
 {
-  //<<<1, 1024>>>
-  int tid = threadIdx.x;
-  int block_size = blockDim.x;
+  printf("Add force.\n");
 
-  int number_of_batches = (num_atoms + block_size - 1) / block_size;
-  __shared__ double s_f[1024];
-  double f = 0.0;
-
-  for (int batch = 0; batch < number_of_batches; ++batch) {
-      int idx = tid + batch * block_size;
-      if (idx < num_atoms) {
-          f += g_power_loss[idx];
-      }
+  // check the number of parameters
+  if (num_param != 6 && num_param != 4) {
+    PRINT_INPUT_ERROR("electron_stop should have 5 or 3 parameters.\n");
   }
 
-  s_f[tid] = f;
-  __syncthreads();
-
-  for (int offset = blockDim.x >> 1; offset > 0; offset >>= 1) {
-      if (tid < offset) {
-          s_f[tid] += s_f[tid + offset];
-      }
-      __syncthreads(); 
+  // parse grouping method
+  if (!is_valid_int(param[1], &grouping_method_[num_calls_])) {
+    PRINT_INPUT_ERROR("grouping method should be an integer.\n");
   }
-
-  if (tid == 0) {
-      device_power_loss = s_f[0];
+  if (grouping_method_[num_calls_] < 0) {
+    PRINT_INPUT_ERROR("grouping method should >= 0.\n");
   }
-
-}
-
-void Add_Force::compute(double time_step, Atom& atom)
-{
-  if (!do_electron_stop) {
-    return;
+  if (grouping_method_[num_calls_] >= group.size()) {
+    PRINT_INPUT_ERROR("grouping method should < maximum number of grouping methods.\n");
   }
 
-  find_stopping_force<<<(atom.number_of_atoms - 1) / 64 + 1, 64>>>(
-    atom.number_of_atoms,
-    num_points,
-    time_step,
-    energy_min,
-    energy_max,
-    1.0 / energy_interval,
-    stopping_power_gpu.data(),
-    atom.type.data(),
-    atom.mass.data(),
-    atom.velocity_per_atom.data(),
-    stopping_force.data(),
-    stopping_loss.data());
-
-  CUDA_CHECK_KERNEL
-
-  find_force_average<<<3, 1024>>>(atom.number_of_atoms, stopping_force.data());
-  CUDA_CHECK_KERNEL
-
-  apply_electron_stopping<<<(atom.number_of_atoms - 1) / 64 + 1, 64>>>(
-    atom.number_of_atoms, stopping_force.data(), atom.force_per_atom.data());
-  CUDA_CHECK_KERNEL
-
-  find_power_loss<<<1, 1024>>>(atom.number_of_atoms, stopping_loss.data());
-  CUDA_CHECK_KERNEL
-
-  double power_loss_host;  
-  CHECK(cudaMemcpyFromSymbol(&power_loss_host, device_power_loss, sizeof(double), 0, cudaMemcpyDeviceToHost));
-  stopping_power_loss += power_loss_host;
-}
-
-void Add_Force::parse(
-  const char** param, int num_param, const int num_atoms, const int num_types)
-{
-  printf("Apply electron stopping.\n");
-  if (num_param != 2) {
-    PRINT_INPUT_ERROR("electron_stop should have 1 parameter.\n");
+  // parse group id
+  if (!is_valid_int(param[2], &group_id_[num_calls_])) {
+    PRINT_INPUT_ERROR("group id should be an integer.\n");
   }
-  printf("    using the stopping power data in %s.\n", param[1]);
-
-  std::ifstream input(param[1]);
-  if (!input.is_open()) {
-    printf("Failed to open %s.\n", param[1]);
-    exit(1);
+  if (group_id_[num_calls_] < 0) {
+    PRINT_INPUT_ERROR("group id should >= 0.\n");
   }
-
-  std::vector<std::string> tokens = get_tokens(input);
-  if (tokens.size() != 3) {
-    PRINT_INPUT_ERROR("The first line of the stopping power file should have 3 values.");
-  }
-  num_points = get_int_from_token(tokens[0], __FILE__, __LINE__);
-  if (num_points < 2) {
-    PRINT_INPUT_ERROR("Number of energy values should >= 2.\n");
-  } else {
-    printf("    number of energy values = %d.\n", num_points);
+  if (group_id_[num_calls_] >= group[grouping_method_[num_calls_]].number) {
+    PRINT_INPUT_ERROR("group id should < maximum number of groups in the grouping method.\n");
   }
 
-  energy_min = get_double_from_token(tokens[1], __FILE__, __LINE__);
-  if (energy_min <= 0) {
-    PRINT_INPUT_ERROR("energy_min should > 0.\n");
-  } else {
-    printf("    energy_min = %g eV.\n", energy_min);
-  }
-
-  energy_max = get_double_from_token(tokens[2], __FILE__, __LINE__);
-  if (energy_max <= energy_min) {
-    PRINT_INPUT_ERROR("energy_max should > energy_min.\n");
+  if (num_param == 6) {
+    table_length_[num_calls_] = 1;
+    force_table_[num_calls_].resize(table_length_[num_calls_] * 3);
+    if (!is_valid_real(param[3], &force_table_[num_calls_][0])) {
+      PRINT_INPUT_ERROR("fx should be a number.\n");
+    }
+    if (!is_valid_real(param[4], &force_table_[num_calls_][1])) {
+      PRINT_INPUT_ERROR("fy should be a number.\n");
+    }
+    if (!is_valid_real(param[5], &force_table_[num_calls_][2])) {
+      PRINT_INPUT_ERROR("fz should be a number.\n");
+    }
   } else {
-    printf("    energy_max = %g eV.\n", energy_max);
-  }
-
-  energy_interval = (energy_max - energy_min) / (num_points - 1);
-  printf("    energy interval = %g eV.\n", energy_interval);
+    std::ifstream input(param[3]);
+    if (!input.is_open()) {
+      printf("Failed to open %s.\n", param[3]);
+      exit(1);
+    }
 
-  stopping_power_cpu.resize(num_points * num_types);
-  for (int n = 0; n < num_points; ++n) {
     std::vector<std::string> tokens = get_tokens(input);
-    if (tokens.size() != num_types) {
-      PRINT_INPUT_ERROR("Number of values does not match with the number of elements.");
+    if (tokens.size() != 1) {
+      PRINT_INPUT_ERROR("The first line of the add_force file should have 1 value.");
     }
-    for (int t = 0; t < num_types; ++t) {
-      stopping_power_cpu[t * num_points + n] = get_double_from_token(tokens[t], __FILE__, __LINE__);
+    table_length_[num_calls_] = get_int_from_token(tokens[0], __FILE__, __LINE__);
+    if (table_length_[num_calls_] < 2) {
+      PRINT_INPUT_ERROR("Number of steps in the add_force file should >= 2.\n");
+    } else {
+      printf("    number of values in the add_force file = %d.\n", table_length_[num_calls_]);
+    }
+
+    force_table_[num_calls_].resize(table_length_[num_calls_] * 3);
+    for (int n = 0; n < table_length_[num_calls_]; ++n) {
+      std::vector<std::string> tokens = get_tokens(input);
+      if (tokens.size() != 3) {
+        PRINT_INPUT_ERROR("Number of force components at each step should be 3.");
+      }
+      for (int t = 0; t < 3; ++t) {
+        force_table_[num_calls_][t * table_length_[num_calls_] + n] = get_double_from_token(tokens[t], __FILE__, __LINE__);
+      }
     }
   }
 
-  stopping_power_gpu.resize(num_points * num_types);
-  stopping_power_gpu.copy_from_host(stopping_power_cpu.data());
-  stopping_force.resize(num_atoms * 3);
-  stopping_loss.resize(num_atoms);
-  do_electron_stop = true;
+  ++num_calls_;
+
+  exit(1);
 }
 
 void Add_Force::finalize() 
 { 
-  if (do_electron_stop) { 
-    printf("Total electron stopping power loss = %g eV.\n", stopping_power_loss);
-  }
-  do_electron_stop = false; 
-  stopping_power_loss = 0.0;
+  num_calls_ = 0;
 }
diff --git a/src/main_gpumd/add_force.cuh b/src/main_gpumd/add_force.cuh
index 2c9ed39f7..8770b9118 100644
--- a/src/main_gpumd/add_force.cuh
+++ b/src/main_gpumd/add_force.cuh
@@ -15,27 +15,24 @@
 
 #pragma once
 
-#include "utilities/gpu_vector.cuh"
 #include <vector>
 
 class Atom;
+class Group;
 
 class Add_Force
 {
 public:
-  bool do_electron_stop = false;
-  double stopping_power_loss = 0.0;
-  void parse(const char** param, int num_param, const int num_atoms, const int num_types);
-  void compute(double time_step, Atom& atom);
+
+  void parse(const char** param, int num_param, const std::vector<Group>& group);
+  void compute(const int step, const std::vector<Group>& groups, Atom& atom);
   void finalize();
 
 private:
-  int num_points = 0;
-  double energy_min;
-  double energy_max;
-  double energy_interval;
-  std::vector<double> stopping_power_cpu;
-  GPU_Vector<double> stopping_power_gpu;
-  GPU_Vector<double> stopping_force;
-  GPU_Vector<double> stopping_loss;
+
+  int num_calls_ = 0;
+  int table_length_[10];
+  std::vector<double> force_table_[10];
+  int grouping_method_[10];
+  int group_id_[10];
 };

From af5920478a53d84bad100980639e11e92e0e574c Mon Sep 17 00:00:00 2001
From: brucefan1983 <brucenju@gmail.com>
Date: Fri, 28 Jun 2024 01:37:19 +0800
Subject: [PATCH 3/4] call add_force

---
 src/main_gpumd/add_force.cu | 11 ++++++++++-
 src/main_gpumd/run.cu       |  7 ++++++-
 src/main_gpumd/run.cuh      |  2 ++
 3 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/src/main_gpumd/add_force.cu b/src/main_gpumd/add_force.cu
index 439ba6588..cbf1a01b0 100644
--- a/src/main_gpumd/add_force.cu
+++ b/src/main_gpumd/add_force.cu
@@ -76,7 +76,7 @@ void Add_Force::parse(const char** param, int num_param, const std::vector<Group
 
   // check the number of parameters
   if (num_param != 6 && num_param != 4) {
-    PRINT_INPUT_ERROR("electron_stop should have 5 or 3 parameters.\n");
+    PRINT_INPUT_ERROR("add_force should have 5 or 3 parameters.\n");
   }
 
   // parse grouping method
@@ -101,6 +101,12 @@ void Add_Force::parse(const char** param, int num_param, const std::vector<Group
     PRINT_INPUT_ERROR("group id should < maximum number of groups in the grouping method.\n");
   }
 
+  printf(
+    "    for atoms in group %d of grouping method %d.\n", 
+    group_id_[num_calls_], 
+    grouping_method_[num_calls_]
+  );
+
   if (num_param == 6) {
     table_length_[num_calls_] = 1;
     force_table_[num_calls_].resize(table_length_[num_calls_] * 3);
@@ -113,6 +119,9 @@ void Add_Force::parse(const char** param, int num_param, const std::vector<Group
     if (!is_valid_real(param[5], &force_table_[num_calls_][2])) {
       PRINT_INPUT_ERROR("fz should be a number.\n");
     }
+    printf("    fx = %g eV/A.\n", force_table_[num_calls_][0]);
+    printf("    fy = %g eV/A.\n", force_table_[num_calls_][1]);
+    printf("    fz = %g eV/A.\n", force_table_[num_calls_][2]);
   } else {
     std::ifstream input(param[3]);
     if (!input.is_open()) {
diff --git a/src/main_gpumd/run.cu b/src/main_gpumd/run.cu
index c406d2d17..1928a023a 100644
--- a/src/main_gpumd/run.cu
+++ b/src/main_gpumd/run.cu
@@ -17,6 +17,7 @@
 Run simulation according to the inputs in the run.in file.
 ------------------------------------------------------------------------------*/
 
+#include "add_force.cuh"
 #include "cohesive.cuh"
 #include "electron_stop.cuh"
 #include "force/force.cuh"
@@ -257,6 +258,7 @@ void Run::perform_a_run()
 #endif
 
     electron_stop.compute(time_step, atom);
+    add_force.compute(step, group, atom);
 
     integrate.compute2(time_step, double(step) / number_of_steps, group, box, atom, thermo);
 
@@ -311,6 +313,7 @@ void Run::perform_a_run()
     atom.number_of_beads);
 
   electron_stop.finalize();
+  add_force.finalize();
   integrate.finalize();
   mc.finalize();
   velocity.finalize();
@@ -461,7 +464,9 @@ void Run::parse_one_keyword(std::vector<std::string>& tokens)
     integrate.parse_move(param, num_param, group);
   } else if (strcmp(param[0], "electron_stop") == 0) {
     electron_stop.parse(param, num_param, atom.number_of_atoms, number_of_types);
-  } else if (strcmp(param[0], "mc") == 0) {
+  } else if (strcmp(param[0], "add_force") == 0) {
+    add_force.parse(param, num_param, group);
+  }else if (strcmp(param[0], "mc") == 0) {
     mc.parse_mc(param, num_param, group, atom);
   } else if (strcmp(param[0], "dftd3") == 0) {
     // nothing here; will be handled elsewhere
diff --git a/src/main_gpumd/run.cuh b/src/main_gpumd/run.cuh
index c2d319863..4670f04e1 100644
--- a/src/main_gpumd/run.cuh
+++ b/src/main_gpumd/run.cuh
@@ -19,6 +19,7 @@ class Force;
 class Integrate;
 class Measure;
 
+#include "add_force.cuh"
 #include "electron_stop.cuh"
 #include "force/force.cuh"
 #include "integrate/integrate.cuh"
@@ -68,4 +69,5 @@ private:
   MC mc;
   Measure measure;
   Electron_Stop electron_stop;
+  Add_Force add_force;
 };

From 34d7e27c6502af7afe936c41b85826f330d5cd85 Mon Sep 17 00:00:00 2001
From: brucefan1983 <brucenju@gmail.com>
Date: Fri, 28 Jun 2024 01:53:01 +0800
Subject: [PATCH 4/4] check the number of calls to add_force

---
 src/main_gpumd/add_force.cu | 6 ++++--
 src/main_gpumd/run.cu       | 2 +-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/main_gpumd/add_force.cu b/src/main_gpumd/add_force.cu
index cbf1a01b0..03f942fe5 100644
--- a/src/main_gpumd/add_force.cu
+++ b/src/main_gpumd/add_force.cu
@@ -55,7 +55,7 @@ void Add_Force::compute(const int step, const std::vector<Group>& groups, Atom&
     const int num_atoms_total = atom.force_per_atom.size() / 3;
     const int group_size = groups[grouping_method_[call]].cpu_size[group_id_[call]];
     const int group_size_sum = groups[grouping_method_[call]].cpu_size_sum[group_id_[call]];
-    add_force<<<(atom.number_of_atoms - 1) / 64 + 1, 64>>>(
+    add_force<<<(group_size - 1) / 64 + 1, 64>>>(
       group_size,
       group_size_sum,
       groups[grouping_method_[call]].contents.data(),
@@ -154,7 +154,9 @@ void Add_Force::parse(const char** param, int num_param, const std::vector<Group
 
   ++num_calls_;
 
-  exit(1);
+  if (num_calls_ > 10) {
+    PRINT_INPUT_ERROR("add_force cannot be used more than 10 times in one run.");
+  }
 }
 
 void Add_Force::finalize() 
diff --git a/src/main_gpumd/run.cu b/src/main_gpumd/run.cu
index 1928a023a..aea3b6c8b 100644
--- a/src/main_gpumd/run.cu
+++ b/src/main_gpumd/run.cu
@@ -466,7 +466,7 @@ void Run::parse_one_keyword(std::vector<std::string>& tokens)
     electron_stop.parse(param, num_param, atom.number_of_atoms, number_of_types);
   } else if (strcmp(param[0], "add_force") == 0) {
     add_force.parse(param, num_param, group);
-  }else if (strcmp(param[0], "mc") == 0) {
+  } else if (strcmp(param[0], "mc") == 0) {
     mc.parse_mc(param, num_param, group, atom);
   } else if (strcmp(param[0], "dftd3") == 0) {
     // nothing here; will be handled elsewhere