From bab17c56904d666b91473c68d8228e0f2e2807d2 Mon Sep 17 00:00:00 2001 From: Callum Date: Tue, 15 Aug 2023 08:31:30 +0100 Subject: [PATCH 1/5] Changed Eigen path --- .gitignore | 4 ++++ Makefile | 2 +- env.sh | 0 error.txt | 23 +++++++++++++++++++++++ job.pbs | 12 ++++++++++++ output.txt | 1 + 6 files changed, 41 insertions(+), 1 deletion(-) mode change 100644 => 100755 env.sh create mode 100644 error.txt create mode 100644 job.pbs create mode 100644 output.txt diff --git a/.gitignore b/.gitignore index d2da5df..3223329 100644 --- a/.gitignore +++ b/.gitignore @@ -44,3 +44,7 @@ bin/* # VSCode .vscode + + output.txt + error.txt + job.pbs diff --git a/Makefile b/Makefile index 20332f5..b77f906 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ CFLAGS := -g -Wall -Wextra -std=c++20 -O3 -march=native -Wno-unknown-pragmas -W #CFLAGS := -g -Wall -Wextra -std=c++17 -O3 -Wno-unknown-pragmas #CFLAGS := -g -Wall -std=c++17 -Ofast -xHost -xHost -qopt-report-phase=vec,loop -qopt-report=5 LIB := -Lbuild/ -EIGEN=-I/home/br-hwaugh/repos/eigen/ +EIGEN=-I/lustre/home/br-cward/repos/eigen INC := -Iinclude/ $(EIGEN) diff --git a/env.sh b/env.sh old mode 100644 new mode 100755 diff --git a/error.txt b/error.txt new file mode 100644 index 0000000..1c582a3 --- /dev/null +++ b/error.txt @@ -0,0 +1,23 @@ + +Lmod is automatically replacing "PrgEnv-cray/8.3.3" with "PrgEnv-gnu/8.3.3". + + +Lmod is automatically replacing "cce/15.0.1" with "gcc/12.2.0". + + +Due to MODULEPATH changes, the following have been reloaded: + 1) cray-mpich/8.1.25 + +Lmod has detected the following error: The following module(s) are unknown: +"IntelOneApi/modulefiles/tbb/latest" + +Please check the spelling or version number. Also try "module spider ..." +It is also possible your cache file is out-of-date; it may help to try: + $ module --ignore_cache load "IntelOneApi/modulefiles/tbb/latest" + +Also make sure that all modulefiles written in TCL start with the string +#%Module + + + +/var/spool/pbs/mom_priv/jobs/28225.p3-pbs.SC: line 12: 307032 Segmentation fault (core dumped) ./bin/minicombust diff --git a/job.pbs b/job.pbs new file mode 100644 index 0000000..10730e3 --- /dev/null +++ b/job.pbs @@ -0,0 +1,12 @@ +#!/bin/bash +#PBS -N minicombust +#PBS -o output.txt +#PBS -l select=1:ncpus=1:ngpus=1 +#PBS -e error.txt +#PBS -q ampereq +#PBS -l walltime=0:15:00 + +echo $PBS_O_WORKDIR +cd $PBS_O_WORKDIR + +./bin/minicombust # emits 10 particles per timestep by default diff --git a/output.txt b/output.txt new file mode 100644 index 0000000..005e579 --- /dev/null +++ b/output.txt @@ -0,0 +1 @@ +/lustre/home/br-cward/minicombust_app From 30ca7d34c1eae7651f80aba3e4aa4a4505ea7b78 Mon Sep 17 00:00:00 2001 From: Callum Date: Tue, 15 Aug 2023 22:10:42 +0100 Subject: [PATCH 2/5] Function calls: exchange_phi_halos, calculate_uvw - commented out of include/flow/FlowSolver.inl:timestep() --- .gitignore | 7 ++++--- error.txt | 23 ----------------------- include/flow/FlowSolver.inl | 4 ++-- output.txt | 1 - 4 files changed, 6 insertions(+), 29 deletions(-) delete mode 100644 error.txt delete mode 100644 output.txt diff --git a/.gitignore b/.gitignore index 3223329..9864d5f 100644 --- a/.gitignore +++ b/.gitignore @@ -45,6 +45,7 @@ bin/* # VSCode .vscode - output.txt - error.txt - job.pbs +# Job files +output.txt +error.txt +*.pbs diff --git a/error.txt b/error.txt deleted file mode 100644 index 1c582a3..0000000 --- a/error.txt +++ /dev/null @@ -1,23 +0,0 @@ - -Lmod is automatically replacing "PrgEnv-cray/8.3.3" with "PrgEnv-gnu/8.3.3". - - -Lmod is automatically replacing "cce/15.0.1" with "gcc/12.2.0". - - -Due to MODULEPATH changes, the following have been reloaded: - 1) cray-mpich/8.1.25 - -Lmod has detected the following error: The following module(s) are unknown: -"IntelOneApi/modulefiles/tbb/latest" - -Please check the spelling or version number. Also try "module spider ..." -It is also possible your cache file is out-of-date; it may help to try: - $ module --ignore_cache load "IntelOneApi/modulefiles/tbb/latest" - -Also make sure that all modulefiles written in TCL start with the string -#%Module - - - -/var/spool/pbs/mom_priv/jobs/28225.p3-pbs.SC: line 12: 307032 Segmentation fault (core dumped) ./bin/minicombust diff --git a/include/flow/FlowSolver.inl b/include/flow/FlowSolver.inl index db10bc5..3c90cfd 100644 --- a/include/flow/FlowSolver.inl +++ b/include/flow/FlowSolver.inl @@ -2057,7 +2057,7 @@ namespace minicombust::flow grad_time += MPI_Wtime(); halo_time -= MPI_Wtime(); - exchange_phi_halos(); + //exchange_phi_halos(); halo_time += MPI_Wtime(); if ((timestep_count % comms_timestep) == 0) @@ -2083,7 +2083,7 @@ namespace minicombust::flow } } - calculate_UVW(); + //calculate_UVW(); // calculate_pressure(); diff --git a/output.txt b/output.txt deleted file mode 100644 index 005e579..0000000 --- a/output.txt +++ /dev/null @@ -1 +0,0 @@ -/lustre/home/br-cward/minicombust_app From 7c77eeb9a06406ca6daf9c3303293d8f009d5381 Mon Sep 17 00:00:00 2001 From: Callum-Ward Date: Thu, 17 Aug 2023 14:02:42 +0100 Subject: [PATCH 3/5] Removed comments for debugging seg fault --- include/particles/ParticleSolver.inl | 2 ++ job.pbs | 6 +++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/include/particles/ParticleSolver.inl b/include/particles/ParticleSolver.inl index 6ff6028..c6ad696 100644 --- a/include/particles/ParticleSolver.inl +++ b/include/particles/ParticleSolver.inl @@ -234,6 +234,8 @@ namespace minicombust::particles // {printf("ERROR RECV VALS : Rank %d Block %lu will write fields to unallocated memory required size %d max %lu\n", mpi_config->rank, bi, neighbours_size[bi], node_index_array_sizes[bi] / sizeof(uint64_t)); exit(1);} // } + //TODO callum rewrite with c++ parallel algos + #pragma ivdep for (int i = 0; i < neighbours_size[bi]; i++) { diff --git a/job.pbs b/job.pbs index 10730e3..0f81de6 100644 --- a/job.pbs +++ b/job.pbs @@ -1,12 +1,12 @@ #!/bin/bash #PBS -N minicombust #PBS -o output.txt -#PBS -l select=1:ncpus=1:ngpus=1 +#PBS -l select=1:ncpus=10 #PBS -e error.txt -#PBS -q ampereq +#PBS -q romeq #PBS -l walltime=0:15:00 echo $PBS_O_WORKDIR cd $PBS_O_WORKDIR -./bin/minicombust # emits 10 particles per timestep by default +mpirun -np 10 ./bin/minicombust 5 # emits 10 particles per timestep by default From 8d02ff25800f814abcf0ea7d5902ee2f3ca0af6a Mon Sep 17 00:00:00 2001 From: Callum-Ward Date: Tue, 22 Aug 2023 20:17:56 +0100 Subject: [PATCH 4/5] Added mpi world size print statements and job for Cray --- Makefile | 7 ++++--- job.pbs | 14 +++++++++++--- src/minicombust.cpp | 6 ++++++ 3 files changed, 21 insertions(+), 6 deletions(-) diff --git a/Makefile b/Makefile index b77f906..02a5ceb 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,8 @@ ## Compilers and Flags CC := CC -#CC := mpic++ -CFLAGS := -g -Wall -Wextra -std=c++20 -O3 -march=native -Wno-unknown-pragmas -Wno-deprecated-enum-enum-conversion +#CC := mpic++ +#CFLAGS := -g -Wall -Wextra -std=c++20 -O3 -march=native -Wno-unknown-pragmas -Wno-deprecated-enum-enum-conversion +CFLAGS := -g -std=c++20 -O0 -l -Wno-unknown-pragmas -Wno-deprecated-enum-enum-conversion -fno-inline -pg #profiling #CFLAGS := -g -Wall -Wextra -std=c++17 -O3 -Wno-unknown-pragmas #CFLAGS := -g -Wall -std=c++17 -Ofast -xHost -xHost -qopt-report-phase=vec,loop -qopt-report=5 LIB := -Lbuild/ @@ -51,6 +52,6 @@ clean: @echo "Cleaning..." rm -rf build/* $(EXE) @echo "" - + rm output.txt error.txt callgrind.out.* .PHONY: clean diff --git a/job.pbs b/job.pbs index 0f81de6..5160a6e 100644 --- a/job.pbs +++ b/job.pbs @@ -1,12 +1,20 @@ #!/bin/bash #PBS -N minicombust #PBS -o output.txt -#PBS -l select=1:ncpus=10 +#PBS -l select=1:ncpus=10:mpiprocs=10 #PBS -e error.txt -#PBS -q romeq +#PBS -q milanq #PBS -l walltime=0:15:00 echo $PBS_O_WORKDIR cd $PBS_O_WORKDIR -mpirun -np 10 ./bin/minicombust 5 # emits 10 particles per timestep by default +echo $PATH + +#aprun -n 10 ./bin/minicombust 7 100 20 100 + + +export GMON_OUT_PREFIX=gmon.out- +#aprun -n 10 ./bin/minicombust 7 +mpirun -np 10 ./bin/minicombust 7 + diff --git a/src/minicombust.cpp b/src/minicombust.cpp index 6d8d44b..582dab7 100644 --- a/src/minicombust.cpp +++ b/src/minicombust.cpp @@ -28,6 +28,12 @@ int main (int argc, char ** argv) MPI_Comm_rank(mpi_config.world, &mpi_config.rank); MPI_Comm_size(mpi_config.world, &mpi_config.world_size); + std::cout << "Rank: " << mpi_config.rank << " World size: " << mpi_config.world_size < Date: Sun, 10 Sep 2023 15:08:21 +0100 Subject: [PATCH 5/5] Added support for ranges, execution and mdspan libraries in Makefile incase Nvidia compiler still doesn't support C++20 --- Makefile | 25 ++- include/particles/ParticleSolver.inl | 258 ++++++++++++++++++++------- job.pbs | 13 +- 3 files changed, 227 insertions(+), 69 deletions(-) diff --git a/Makefile b/Makefile index 02a5ceb..20c78eb 100644 --- a/Makefile +++ b/Makefile @@ -1,15 +1,31 @@ ## Compilers and Flags CC := CC #CC := mpic++ -#CFLAGS := -g -Wall -Wextra -std=c++20 -O3 -march=native -Wno-unknown-pragmas -Wno-deprecated-enum-enum-conversion -CFLAGS := -g -std=c++20 -O0 -l -Wno-unknown-pragmas -Wno-deprecated-enum-enum-conversion -fno-inline -pg #profiling +CFLAGS := -g -w -std=c++20 -O3 -march=native -Wno-unknown-pragmas -Wno-deprecated-enum-enum-conversion +#CFLAGS := -g -w -std=c++20 -O3 -march=native #nvc++ +#CFLAGS := -g -std=c++20 -O0 -l -Wno-unknown-pragmas -Wno-deprecated-enum-enum-conversion -fno-inline -pg #profiling #CFLAGS := -g -Wall -Wextra -std=c++17 -O3 -Wno-unknown-pragmas #CFLAGS := -g -Wall -std=c++17 -Ofast -xHost -xHost -qopt-report-phase=vec,loop -qopt-report=5 -LIB := -Lbuild/ +LIB := -Lbuild/ -ltbb EIGEN=-I/lustre/home/br-cward/repos/eigen INC := -Iinclude/ $(EIGEN) +# Nvidia compiler 22.9 only supports up to C++17 - range support was added in C++20 +# std::execution port: https://github.com/nvidia/stdexec +# std::mdspan port: https://github.com/kokkos/mdspan +# std::ranges port: https://github.com/ericniebler/range-v3.git + +# Set the include directories +INCLUDE_DIRS := $(HOME)/repos/range-v3/ + +# Find all subdirectories of the include directories +SUBDIRS := $(shell find $(INCLUDE_DIRS) -type d) + +# Generate the necessary -I flags for all the subdirectories +INCLUDES += $(addprefix -I,$(SUBDIRS)) + + ## Directories SRC := src @@ -54,4 +70,7 @@ clean: @echo "" rm output.txt error.txt callgrind.out.* +cleant: + rm output.txt error.txt callgrind.out.* + .PHONY: clean diff --git a/include/particles/ParticleSolver.inl b/include/particles/ParticleSolver.inl index c6ad696..a70cd77 100644 --- a/include/particles/ParticleSolver.inl +++ b/include/particles/ParticleSolver.inl @@ -1,6 +1,9 @@ #include #include "particles/ParticleSolver.hpp" #include "visit/VisitWriter.hpp" +#include +#include +#include @@ -236,14 +239,21 @@ namespace minicombust::particles //TODO callum rewrite with c++ parallel algos - #pragma ivdep - for (int i = 0; i < neighbours_size[bi]; i++) - { - node_to_field_address_map[all_interp_node_indexes[bi][i]] = &all_interp_node_flow_fields[bi][i]; + // #pragma ivdep + // for (int i = 0; i < neighbours_size[bi]; i++) + // { + // node_to_field_address_map[all_interp_node_indexes[bi][i]] = &all_interp_node_flow_fields[bi][i]; + + // // if (PARTICLE_SOLVER_DEBUG && all_interp_node_indexes[bi][i] > mesh->points_size ) + // // {printf("ERROR RECV VALS : Rank %d Flow block %lu Value %lu out of range at %d\n", mpi_config->rank, bi, all_interp_node_indexes[bi][i], i); exit(1);} + // } + + auto ids = std::views::common(std::views::iota(0, (int)neighbours_size[bi])); + std::for_each(std::execution::par_unseq, ids.begin(), ids.end() , [=](int idx) + { + node_to_field_address_map[all_interp_node_indexes[bi][idx]] = &all_interp_node_flow_fields[bi][idx]; + }); - // if (PARTICLE_SOLVER_DEBUG && all_interp_node_indexes[bi][i] > mesh->points_size ) - // {printf("ERROR RECV VALS : Rank %d Flow block %lu Value %lu out of range at %d\n", mpi_config->rank, bi, all_interp_node_indexes[bi][i], i); exit(1);} - } // if (PARTICLE_SOLVER_DEBUG && size_before != node_to_field_address_map.size()) // {printf("\tRank %d: Recieving wrong amount of data(+%lu). Block %lu Node map size %ld sent size %d.\n", mpi_config->rank, node_to_field_address_map.size() - size_before, bi, node_to_field_address_map.size(), neighbours_size[bi] ); exit(1);}; @@ -291,55 +301,116 @@ namespace minicombust::particles performance_logger.my_papi_start(); - // Solve spray equations - #pragma ivdep - for (uint64_t p = 0; p < particles_size; p++) + //Solve spray equations + // #pragma ivdep + // for (uint64_t p = 0; p < particles_size; p++) + // { + // vec total_vector_weight = {0.0, 0.0, 0.0}; + // T total_scalar_weight = 0.0; + + // vec interp_gas_vel = {0.0, 0.0, 0.0}; + // T interp_gas_pre = 0.0; + // T interp_gas_tem = 0.0; + + + // #pragma ivdep + // for (uint64_t n = 0; n < cell_size; n++) + // { + // if (PARTICLE_SOLVER_DEBUG && (particles[p].cell >= mesh->mesh_size)) + // {printf("ERROR::: RANK %d Cell %lu out of range\n", mpi_config->rank, particles[p].cell); exit(1); + // } + + // uint64_t node = mesh->cells[(particles[p].cell - mesh->shmem_cell_disp) * cell_size + n]; + // const uint64_t block_id = mesh->get_block_id(particles[p].cell); + + + // if (PARTICLE_SOLVER_DEBUG && (node >= mesh->points_size)) + // {printf("ERROR::: RANK %d Node %lu out of range\n", mpi_config->rank, node); exit(1); + // } + // if (PARTICLE_SOLVER_DEBUG && (node_to_field_address_map[node] < (flow_aos *)5)) + // {printf("Rank %d Block %lu cell %lu node %lu flow_pointer %p block_flow_pointer %p size %lu\n", mpi_config->rank, block_id, particles[p].cell, node, node_to_field_address_map[node], all_interp_node_flow_fields[block_id], node_flow_array_sizes[block_id] ); exit(1); + // }; + + // const vec node_to_particle = particles[p].x1 - mesh->points[node - mesh->shmem_point_disp]; + + // vec weight = 1.0 / ((node_to_particle * node_to_particle) + vec {__DBL_MIN__, __DBL_MIN__, __DBL_MIN__}); + // T weight_magnitude = magnitude(weight); + + // total_vector_weight += weight; + // total_scalar_weight += weight_magnitude; + + // // if (PARTICLE_SOLVER_DEBUG) check_flow_field_exit ( "SOLVE SPRAY: Node value", node_to_field_address_map[node], &mesh->dummy_flow_field, node ); + + // interp_gas_vel += weight * node_to_field_address_map[node]->vel; + // interp_gas_pre += weight_magnitude * node_to_field_address_map[node]->pressure; + // interp_gas_tem += weight_magnitude * node_to_field_address_map[node]->temp; + // } + + // particles[p].local_flow_value.vel = interp_gas_vel / total_vector_weight; + // particles[p].local_flow_value.pressure = interp_gas_pre / total_scalar_weight; + // particles[p].local_flow_value.temp = interp_gas_tem / total_scalar_weight; + + // // if (PARTICLE_SOLVER_DEBUG) check_flow_field_exit ( "SOLVE SPRAY: Interpolated particle value ", &particles[p].local_flow_value, &mesh->dummy_flow_field, p ); + // } + + // Solve spray equations practice + auto ids = std::views::common(std::views::iota(0, (int)particles_size)); + + std::for_each(std::execution::par_unseq,ids.begin(), ids.end(), [=](int p) { - vec total_vector_weight = {0.0, 0.0, 0.0}; - T total_scalar_weight = 0.0; + vec total_vector_weight = {0.0, 0.0, 0.0}; + T total_scalar_weight = 0.0; vec interp_gas_vel = {0.0, 0.0, 0.0}; - T interp_gas_pre = 0.0; - T interp_gas_tem = 0.0; - + T interp_gas_pre = 0.0; + T interp_gas_tem = 0.0; - #pragma ivdep - for (uint64_t n = 0; n < cell_size; n++) + auto ids2 = std::views::common(std::views::iota(0, (int)cell_size)); + std::for_each(ids2.begin(), ids2.end(), [=,&total_vector_weight, &total_scalar_weight, &interp_gas_pre, &interp_gas_pre ,&interp_gas_tem, &interp_gas_vel](int n) { if (PARTICLE_SOLVER_DEBUG && (particles[p].cell >= mesh->mesh_size)) - {printf("ERROR::: RANK %d Cell %lu out of range\n", mpi_config->rank, particles[p].cell); exit(1);} - + { + printf("ERROR::: RANK %d Cell %lu out of range\n", mpi_config->rank, particles[p].cell); + exit(1); + } + uint64_t node = mesh->cells[(particles[p].cell - mesh->shmem_cell_disp) * cell_size + n]; const uint64_t block_id = mesh->get_block_id(particles[p].cell); - if (PARTICLE_SOLVER_DEBUG && (node >= mesh->points_size)) - {printf("ERROR::: RANK %d Node %lu out of range\n", mpi_config->rank, node); exit(1);} + { + printf("ERROR::: RANK %d Node %lu out of range\n", mpi_config->rank, node); + exit(1); + } if (PARTICLE_SOLVER_DEBUG && (node_to_field_address_map[node] < (flow_aos *)5)) - {printf("Rank %d Block %lu cell %lu node %lu flow_pointer %p block_flow_pointer %p size %lu\n", mpi_config->rank, block_id, particles[p].cell, node, node_to_field_address_map[node], all_interp_node_flow_fields[block_id], node_flow_array_sizes[block_id] ); exit(1);}; - + { + printf("Rank %d Block %lu cell %lu node %lu flow_pointer %p block_flow_pointer %p size %lu\n", mpi_config->rank, block_id, particles[p].cell, node, node_to_field_address_map[node], all_interp_node_flow_fields[block_id], node_flow_array_sizes[block_id]); + exit(1); + }; const vec node_to_particle = particles[p].x1 - mesh->points[node - mesh->shmem_point_disp]; - vec weight = 1.0 / ((node_to_particle * node_to_particle) + vec {__DBL_MIN__, __DBL_MIN__, __DBL_MIN__}); + vec weight = 1.0 / ((node_to_particle * node_to_particle) + vec{__DBL_MIN__, __DBL_MIN__, __DBL_MIN__}); T weight_magnitude = magnitude(weight); - total_vector_weight += weight; - total_scalar_weight += weight_magnitude; + total_vector_weight += weight; + total_scalar_weight += weight_magnitude; // if (PARTICLE_SOLVER_DEBUG) check_flow_field_exit ( "SOLVE SPRAY: Node value", node_to_field_address_map[node], &mesh->dummy_flow_field, node ); - interp_gas_vel += weight * node_to_field_address_map[node]->vel; - interp_gas_pre += weight_magnitude * node_to_field_address_map[node]->pressure; - interp_gas_tem += weight_magnitude * node_to_field_address_map[node]->temp; - } + interp_gas_vel += weight * node_to_field_address_map[node]->vel; + interp_gas_pre += weight_magnitude * node_to_field_address_map[node]->pressure; + interp_gas_tem += weight_magnitude * node_to_field_address_map[node]->temp; + }); - particles[p].local_flow_value.vel = interp_gas_vel / total_vector_weight; - particles[p].local_flow_value.pressure = interp_gas_pre / total_scalar_weight; - particles[p].local_flow_value.temp = interp_gas_tem / total_scalar_weight; + particles[p].local_flow_value.vel = interp_gas_vel / total_vector_weight; + particles[p].local_flow_value.pressure = interp_gas_pre / total_scalar_weight; + particles[p].local_flow_value.temp = interp_gas_tem / total_scalar_weight; // if (PARTICLE_SOLVER_DEBUG) check_flow_field_exit ( "SOLVE SPRAY: Interpolated particle value ", &particles[p].local_flow_value, &mesh->dummy_flow_field, p ); - } + }); + + if (PARTICLE_SOLVER_DEBUG && mpi_config->rank == mpi_config->particle_flow_rank ) printf("\tRank %d: Finished interpolation. Starting spray computation.\n", mpi_config->rank); @@ -356,18 +427,25 @@ namespace minicombust::particles #pragma ivdep for (uint64_t p = 0; p < particles_size; p++) { - particles[p].solve_spray( delta, &logger, particles ); + particles[p].solve_spray(delta, &logger, particles); - if (particles[p].decayed) decayed_particles.push_back(p); + if (particles[p].decayed) decayed_particles.push_back(p); } + // auto ids = std::views::common(std::views::iota(0, (int)particles_size)); + // std::for_each(std::execution::par_unseq, ids.begin(), ids.end(), [=](int p) + // { + // particles[p].solve_spray(delta, &logger, particles); + // if (particles[p].decayed) decayed_particles.push_back(p); + // }); + const uint64_t decayed_particles_size = decayed_particles.size(); #pragma ivdep for (int128_t i = decayed_particles_size - 1; i >= 0; i--) { particles[decayed_particles[i]] = particles.back(); particles.pop_back(); - } + } performance_logger.my_papi_stop(performance_logger.spray_kernel_event_counts, &performance_logger.spray_time); } @@ -381,47 +459,100 @@ namespace minicombust::particles const uint64_t particles_size = particles.size(); uint64_t elements [mesh->num_blocks]; - for (uint64_t i = 0; i < mesh->num_blocks; i++) - elements[i] = 0; + //for (uint64_t i = 0; i < mesh->num_blocks; i++) elements[i] = 0; + + std::fill(std::execution::par_unseq, &elements[0], &elements[0] + mesh->num_blocks, 0); // Update particle positions vector decayed_particles; - #pragma ivdep - for (uint64_t p = 0; p < particles_size; p++) - { + // #pragma ivdep + // for (uint64_t p = 0; p < particles_size; p++) + // { + // // Check if particle is in the current cell. Tetras = Volume/Area comparison method. https://www.peertechzpublications.com/articles/TCSIT-6-132.php. + // particles[p].update_cell(mesh, &logger); + + // if (particles[p].decayed) decayed_particles.push_back(p); + // else + // { + // const uint64_t cell = particles[p].cell; + // const uint64_t block_id = mesh->get_block_id(particles[p].cell); + + // if ( cell_particle_field_map[block_id].count(cell) ) + // { + // const uint64_t index = cell_particle_field_map[block_id][cell]; + + // cell_particle_aos[block_id][index].momentum += particles[p].particle_cell_fields.momentum; + // cell_particle_aos[block_id][index].energy += particles[p].particle_cell_fields.energy; + // cell_particle_aos[block_id][index].fuel += particles[p].particle_cell_fields.fuel; + // } + // else + // { + + // const uint64_t index = cell_particle_field_map[block_id].size(); + // elements[block_id] = cell_particle_field_map[block_id].size() + 1; + + // resize_cell_particle(elements, NULL, NULL); + + // cell_particle_indexes[block_id][index] = cell; + // cell_particle_aos[block_id][index] = particles[p].particle_cell_fields; + + // cell_particle_field_map[block_id][cell] = index; + + + // #pragma ivdep + // for (uint64_t n = 0; n < mesh->cell_size; n++) + // { + // const uint64_t node_id = mesh->cells[(cell - mesh->shmem_cell_disp) * mesh->cell_size + n]; + + // if (!node_to_field_address_map.count(node_id)) + // { + // node_to_field_address_map[node_id] = (flow_aos *)2; + // } + // } + + + // } + // } + + // } + + //capturing by reference is not recommended for parallel execution - current error "array of runtime bound cannot be captured by copy, only by reference" + //this is just an example of how the for_each loop can be used + auto ids = std::views::common(std::views::iota(0, (int)particles_size)); + std::for_each(std::execution::par_unseq, ids.begin(), ids.end(), [&](uint64_t p) + { // Check if particle is in the current cell. Tetras = Volume/Area comparison method. https://www.peertechzpublications.com/articles/TCSIT-6-132.php. particles[p].update_cell(mesh, &logger); if (particles[p].decayed) decayed_particles.push_back(p); else { - const uint64_t cell = particles[p].cell; + const uint64_t cell = particles[p].cell; const uint64_t block_id = mesh->get_block_id(particles[p].cell); - if ( cell_particle_field_map[block_id].count(cell) ) + if (cell_particle_field_map[block_id].count(cell)) { const uint64_t index = cell_particle_field_map[block_id][cell]; cell_particle_aos[block_id][index].momentum += particles[p].particle_cell_fields.momentum; - cell_particle_aos[block_id][index].energy += particles[p].particle_cell_fields.energy; - cell_particle_aos[block_id][index].fuel += particles[p].particle_cell_fields.fuel; + cell_particle_aos[block_id][index].energy += particles[p].particle_cell_fields.energy; + cell_particle_aos[block_id][index].fuel += particles[p].particle_cell_fields.fuel; } else { const uint64_t index = cell_particle_field_map[block_id].size(); - elements[block_id] = cell_particle_field_map[block_id].size() + 1; + elements[block_id] = cell_particle_field_map[block_id].size() + 1; resize_cell_particle(elements, NULL, NULL); - cell_particle_indexes[block_id][index] = cell; - cell_particle_aos[block_id][index] = particles[p].particle_cell_fields; - - cell_particle_field_map[block_id][cell] = index; + cell_particle_indexes[block_id][index] = cell; + cell_particle_aos[block_id][index] = particles[p].particle_cell_fields; - - #pragma ivdep - for (uint64_t n = 0; n < mesh->cell_size; n++) + cell_particle_field_map[block_id][cell] = index; + //do the same with the innner loop using for_each + auto ids2 = std::views::common(std::views::iota(0, (int)mesh->cell_size)); + std::for_each(std::execution::par_unseq, ids2.begin(), ids2.end(), [=](uint64_t n) { const uint64_t node_id = mesh->cells[(cell - mesh->shmem_cell_disp) * mesh->cell_size + n]; @@ -429,20 +560,26 @@ namespace minicombust::particles { node_to_field_address_map[node_id] = (flow_aos *)2; } - } - + }); } } + }); - } const uint64_t decayed_particles_size = decayed_particles.size(); - #pragma ivdep - for (int128_t i = decayed_particles_size - 1; i >= 0; i--) + // #pragma ivdep + // for (int128_t i = decayed_particles_size - 1; i >= 0; i--) + // { + // particles[decayed_particles[i]] = particles.back(); + // particles.pop_back(); + // } + //repalce previous loop with for_each + ids = std::views::common(std::views::iota(0, (int)decayed_particles_size)); + std::for_each(std::execution::par_unseq, ids.begin(), ids.end(), [=](int128_t i) { particles[decayed_particles[i]] = particles.back(); particles.pop_back(); - } + }); performance_logger.my_papi_stop(performance_logger.position_kernel_event_counts, &performance_logger.position_time); } @@ -504,7 +641,6 @@ namespace minicombust::particles } } - particle_release(); if (mpi_config->world_size != 1 && (count % comms_timestep) == 0) diff --git a/job.pbs b/job.pbs index 5160a6e..91c22bf 100644 --- a/job.pbs +++ b/job.pbs @@ -9,12 +9,15 @@ echo $PBS_O_WORKDIR cd $PBS_O_WORKDIR -echo $PATH +echo runnnig mpirun +module load IntelOneApi/modulefiles/tbb/2021.10.0 -#aprun -n 10 ./bin/minicombust 7 100 20 100 +aprun -n 10 ./bin/minicombust 5 100 20 100 +#aprun -n 10 ./bin/minicombust 5 +#mpirun -np 10 ./bin/minicombust 5 100 20 100 +#mpirun -np 10 ./bin/minicombust 5 - -export GMON_OUT_PREFIX=gmon.out- +#export GMON_OUT_PREFIX=gmon.out- #aprun -n 10 ./bin/minicombust 7 -mpirun -np 10 ./bin/minicombust 7 +