From 00b87803ca7e9f5ba88a3409a142aaef8aa7ebd6 Mon Sep 17 00:00:00 2001 From: Sebastien Lemaire Date: Mon, 24 Jun 2024 14:04:42 +0100 Subject: [PATCH 1/8] initial tests with 2decomp&fft --- src/CMakeLists.txt | 3 +++ src/xcompact.f90 | 1 + 2 files changed, 4 insertions(+) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index b242ad216..0176ca245 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -51,6 +51,9 @@ if(${CMAKE_Fortran_COMPILER_ID} STREQUAL "PGI" OR target_compile_options(xcompact PRIVATE "-DCUDA") elseif(${CMAKE_Fortran_COMPILER_ID} STREQUAL "GNU") + find_package(decomp2d REQUIRED) + include_directories(${decomp2d_INCLUDE_DIRS}) + target_link_libraries(decomp2d) set(CMAKE_Fortran_FLAGS "-cpp -std=f2018") set(CMAKE_Fortran_FLAGS_DEBUG "-g -Og -Wall -Wpedantic -Werror -Wimplicit-interface -Wimplicit-procedure -Wno-unused-dummy-argument") set(CMAKE_Fortran_FLAGS_RELEASE "-O3 -ffast-math") diff --git a/src/xcompact.f90 b/src/xcompact.f90 index f31fb2bf3..3043cbbf2 100644 --- a/src/xcompact.f90 +++ b/src/xcompact.f90 @@ -1,6 +1,7 @@ program xcompact use mpi + use decomp_2d_fft use m_allocator use m_base_backend use m_common, only: pi, globs_t, POISSON_SOLVER_FFT, POISSON_SOLVER_CG, & From afcd22ca92a8b49866f7c0459e6586f69d858ba4 Mon Sep 17 00:00:00 2001 From: Sebastien Lemaire Date: Thu, 11 Jul 2024 16:57:45 +0100 Subject: [PATCH 2/8] use 2decomp for mesh parallel decomposition --- src/mesh.f90 | 7 ++-- src/omp/mesh.f90 | 82 +++++++++++++++++++++++++++++++++++++++++ src/omp/poisson_fft.f90 | 10 +++++ 3 files changed, 96 insertions(+), 3 deletions(-) create mode 100644 src/omp/mesh.f90 diff --git a/src/mesh.f90 b/src/mesh.f90 index 8fe9e0199..53400eca9 100644 --- a/src/mesh.f90 +++ b/src/mesh.f90 @@ -42,6 +42,7 @@ module m_mesh type(geo_t), allocatable :: geo ! object containing geometry information type(parallel_t), allocatable :: par ! object containing parallel domain decomposition information contains + procedure :: domain_decomposition => domain_decomposition_generic procedure :: get_SZ procedure :: get_dims @@ -119,7 +120,7 @@ function mesh_init(dims_global, nproc_dir, L_global, & mesh%par%nproc = product(nproc_dir(:)) call MPI_Comm_rank(MPI_COMM_WORLD, mesh%par%nrank, ierr) call MPI_Comm_size(MPI_COMM_WORLD, mesh%par%nproc, ierr) - call domain_decomposition(mesh) + call mesh%domain_decomposition() ! Define default values mesh%vert_dims_padded = mesh%vert_dims @@ -143,7 +144,7 @@ subroutine set_sz(self, sz) end subroutine - subroutine domain_decomposition(mesh) + subroutine domain_decomposition_generic(mesh) !! Supports 1D, 2D, and 3D domain decomposition. !! !! Current implementation allows only constant sub-domain size across a @@ -203,7 +204,7 @@ subroutine domain_decomposition(mesh) subd_pos_next(3)) end do - end subroutine domain_decomposition + end subroutine domain_decomposition_generic pure function get_sz(self) result(sz) !! Getter for parameter SZ diff --git a/src/omp/mesh.f90 b/src/omp/mesh.f90 new file mode 100644 index 000000000..ac839bcc5 --- /dev/null +++ b/src/omp/mesh.f90 @@ -0,0 +1,82 @@ +module m_omp_mesh + use decomp_2d_fft, only: decomp_2d_init + + type, extends(mesh_t) :: omp_mesh_t + contains + procedure :: domain_decomposition => domain_decomposition_2decompfft + end type omp_mesh_t + + contains + + + subroutine domain_decomposition_2decompfft(mesh) + !! Supports 1D, 2D, and 3D domain decomposition. + !! + !! Current implementation allows only constant sub-domain size across a + !! given direction. + class(mesh_t), intent(inout) :: mesh + + nx = mesh%global_vert_dims(1) + ny = mesh%global_vert_dims(2) + nz = mesh%global_vert_dims(3) + + p_row = mesh%par%nproc_dir(2) + p_col = mesh%par%nproc_dir(3) + periodic_bc(:) = mesh%periodic_BC(:) + call decomp_2d_init(nx, ny, nz, p_row, p_col, periodc_bc) + + mesh%vert_dims(:) = xsize(:) + mesh%par%n_offset(:) = xstart(:) + + ! Get global_ranks + allocate(global_ranks(1, p_row, p_col)) + allocate(global_ranks_lin(p_row*p_col)) + global_ranks_lin(:) = 0 + + call MPI_Comm_rank(DECOMP_2D_COMM_CART_X, cart_rank, ierr) + call MPI_Cart_coords(DECOMP_2D_COMM_CART_X, cart_rank, 2, coords, ierr) + + global_ranks_lin(coords(1) + p_row*(coords(2)-1)) = mesh%par%nrank + + call MPI_Allreduce(MPI_IN_PLACE, global_ranks_lin, p_row*p_col, MPI_INTEGER, MPI_SUM, MPI_COMM_WORLD, ierr) + + global_ranks = reshape(global_ranks_lin, shape=[1, p_row, p_col]) + + ! TODO: refactirise + ! subdomain position in the global domain + subd_pos = findloc(global_ranks, mesh%par%nrank) + + ! local/directional position of the subdomain + mesh%par%nrank_dir(:) = subd_pos(:) - 1 + + do dir = 1, 3 + is_last_domain = (mesh%par%nrank_dir(dir) + 1 == mesh%par%nproc_dir(dir)) + if (is_last_domain .and. (.not. mesh%periodic_BC(dir))) then + mesh%cell_dims(dir) = mesh%vert_dims(dir) - 1 + else + mesh%cell_dims(dir) = mesh%vert_dims(dir) + end if + end do + + mesh%par%n_offset(:) = mesh%vert_dims(:)*mesh%par%nrank_dir(:) + + do dir = 1, 3 + nproc = mesh%par%nproc_dir(dir) + subd_pos_prev(:) = subd_pos(:) + subd_pos_prev(dir) = modulo(subd_pos(dir) - 2, nproc) + 1 + mesh%par%pprev(dir) = global_ranks(subd_pos_prev(1), & + subd_pos_prev(2), & + subd_pos_prev(3)) + + subd_pos_next(:) = subd_pos(:) + subd_pos_next(dir) = modulo(subd_pos(dir) - nproc, nproc) + 1 + mesh%par%pnext(dir) = global_ranks(subd_pos_next(1), & + subd_pos_next(2), & + subd_pos_next(3)) + end do + + + end subroutine domain_decomposition_2decompfft + + +end module m_omp_mesh \ No newline at end of file diff --git a/src/omp/poisson_fft.f90 b/src/omp/poisson_fft.f90 index 87105738b..9f6920069 100644 --- a/src/omp/poisson_fft.f90 +++ b/src/omp/poisson_fft.f90 @@ -35,6 +35,8 @@ function init(mesh, xdirps, ydirps, zdirps) result(poisson_fft) call poisson_fft%base_init(mesh, xdirps, ydirps, zdirps) + call decomp_2d_fft_init(PHYSICAL_IN_X) + end function init subroutine fft_forward_omp(self, f_in) @@ -42,6 +44,9 @@ subroutine fft_forward_omp(self, f_in) class(omp_poisson_fft_t) :: self class(field_t), intent(in) :: f_in + + call decomp_2d_fft_3d(f_in%data, self%waves) + end subroutine fft_forward_omp subroutine fft_backward_omp(self, f_out) @@ -49,12 +54,17 @@ subroutine fft_backward_omp(self, f_out) class(omp_poisson_fft_t) :: self class(field_t), intent(inout) :: f_out + + call decomp_2d_fft_3d(self%waves, f_out%data) + end subroutine fft_backward_omp subroutine fft_postprocess_omp(self) implicit none class(omp_poisson_fft_t) :: self + !TODO + end subroutine fft_postprocess_omp end module m_omp_poisson_fft From 8a513a3dc28072a3be6cb7a1a68a94c738fc6f10 Mon Sep 17 00:00:00 2001 From: Sebastien Lemaire Date: Thu, 11 Jul 2024 16:59:54 +0100 Subject: [PATCH 3/8] add omp/mesh.f90 to cmakelists.txt --- src/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 0176ca245..7f1a8cb96 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -10,6 +10,7 @@ set(SRC mesh.f90 field.f90 omp/backend.f90 + omp/mesh.f90 omp/common.f90 omp/kernels/distributed.f90 omp/poisson_fft.f90 From c0fb05a72a3e2cc0124f79b6658ec434ad2b0345 Mon Sep 17 00:00:00 2001 From: Sebastien Lemaire Date: Tue, 16 Jul 2024 13:58:07 +0100 Subject: [PATCH 4/8] cleanup and fix compilation --- src/CMakeLists.txt | 1 + src/allocator.f90 | 2 +- src/mesh.f90 | 20 +++++++++--------- src/omp/mesh.f90 | 46 ++++++++++++++++++++++++++++++++++++++--- src/omp/poisson_fft.f90 | 3 +++ src/solver.f90 | 2 +- src/xcompact.f90 | 8 ++++++- 7 files changed, 66 insertions(+), 16 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 7f1a8cb96..4e2983152 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -55,6 +55,7 @@ elseif(${CMAKE_Fortran_COMPILER_ID} STREQUAL "GNU") find_package(decomp2d REQUIRED) include_directories(${decomp2d_INCLUDE_DIRS}) target_link_libraries(decomp2d) + target_link_libraries(x3d2 PRIVATE decomp2d) set(CMAKE_Fortran_FLAGS "-cpp -std=f2018") set(CMAKE_Fortran_FLAGS_DEBUG "-g -Og -Wall -Wpedantic -Werror -Wimplicit-interface -Wimplicit-procedure -Wno-unused-dummy-argument") set(CMAKE_Fortran_FLAGS_RELEASE "-O3 -ffast-math") diff --git a/src/allocator.f90 b/src/allocator.f90 index 607a54c9b..d81ccd0a0 100644 --- a/src/allocator.f90 +++ b/src/allocator.f90 @@ -64,7 +64,7 @@ module m_allocator contains function allocator_init(mesh, sz) result(allocator) - type(mesh_t), target, intent(inout) :: mesh + class(mesh_t), target, intent(inout) :: mesh integer, intent(in) :: sz type(allocator_t) :: allocator diff --git a/src/mesh.f90 b/src/mesh.f90 index 53400eca9..1ee0eb084 100644 --- a/src/mesh.f90 +++ b/src/mesh.f90 @@ -31,13 +31,13 @@ module m_mesh ! The mesh class stores all the information about the global and local (due to domain decomposition) mesh ! It also includes getter functions to access some of its parameters type :: mesh_t - integer, dimension(3), private :: global_vert_dims ! global number of vertices in each direction without padding (cartesian structure) - integer, dimension(3), private :: global_cell_dims ! global number of cells in each direction without padding (cartesian structure) + integer, dimension(3) :: global_vert_dims ! global number of vertices in each direction without padding (cartesian structure) + integer, dimension(3) :: global_cell_dims ! global number of cells in each direction without padding (cartesian structure) - integer, dimension(3), private :: vert_dims_padded ! local domain size including padding (cartesian structure) - integer, dimension(3), private :: vert_dims ! local number of vertices in each direction without padding (cartesian structure) - integer, dimension(3), private :: cell_dims ! local number of cells in each direction without padding (cartesian structure) - logical, dimension(3), private :: periodic_BC ! Whether or not a direction has a periodic BC + integer, dimension(3) :: vert_dims_padded ! local domain size including padding (cartesian structure) + integer, dimension(3) :: vert_dims ! local number of vertices in each direction without padding (cartesian structure) + integer, dimension(3) :: cell_dims ! local number of cells in each direction without padding (cartesian structure) + logical, dimension(3) :: periodic_BC ! Whether or not a direction has a periodic BC integer, private :: sz type(geo_t), allocatable :: geo ! object containing geometry information type(parallel_t), allocatable :: par ! object containing parallel domain decomposition information @@ -87,7 +87,7 @@ function mesh_init(dims_global, nproc_dir, L_global, & integer, dimension(3), intent(in) :: nproc_dir ! Number of proc in each direction real(dp), dimension(3), intent(in) :: L_global logical, dimension(3), optional, intent(in) :: periodic_BC - type(mesh_t) :: mesh + class(mesh_t), allocatable :: mesh integer :: dir integer :: ierr @@ -162,6 +162,9 @@ subroutine domain_decomposition_generic(mesh) nproc_y = mesh%par%nproc_dir(2) nproc_z = mesh%par%nproc_dir(3) + ! Define number of cells and vertices in each direction + mesh%vert_dims = mesh%global_vert_dims/mesh%par%nproc_dir + ! A 3D array corresponding to each region in the global domain allocate (global_ranks(nproc_x, nproc_y, nproc_z)) @@ -175,9 +178,6 @@ subroutine domain_decomposition_generic(mesh) ! local/directional position of the subdomain mesh%par%nrank_dir(:) = subd_pos(:) - 1 - ! Define number of cells and vertices in each direction - mesh%vert_dims = mesh%global_vert_dims/mesh%par%nproc_dir - do dir = 1, 3 is_last_domain = (mesh%par%nrank_dir(dir) + 1 == mesh%par%nproc_dir(dir)) if (is_last_domain .and. (.not. mesh%periodic_BC(dir))) then diff --git a/src/omp/mesh.f90 b/src/omp/mesh.f90 index ac839bcc5..8f0ed2a47 100644 --- a/src/omp/mesh.f90 +++ b/src/omp/mesh.f90 @@ -1,20 +1,60 @@ module m_omp_mesh - use decomp_2d_fft, only: decomp_2d_init + use mpi + use decomp_2d, only: decomp_2d_init, DECOMP_2D_COMM_CART_X, xsize, xstart + use m_mesh, only: mesh_t + use m_common, only: dp, DIR_X, DIR_Y, DIR_Z, DIR_C, & + CELL, VERT, none, X_FACE, Y_FACE, Z_FACE, & + X_EDGE, Y_EDGE, Z_EDGE + use m_field, only: field_t + type, extends(mesh_t) :: omp_mesh_t contains procedure :: domain_decomposition => domain_decomposition_2decompfft end type omp_mesh_t + interface omp_mesh_t + module procedure init + end interface omp_mesh_t + + private init + contains + function init(dims_global, nproc_dir, L_global, & + periodic_BC) result(mesh) + integer, dimension(3), intent(in) :: dims_global + integer, dimension(3), intent(in) :: nproc_dir ! Number of proc in each direction + real(dp), dimension(3), intent(in) :: L_global + logical, dimension(3), optional, intent(in) :: periodic_BC + + type(omp_mesh_t) :: mesh + + mesh%mesh_t = mesh_t(dims_global, nproc_dir, L_global, & + periodic_BC) + + end function + + subroutine domain_decomposition_2decompfft(mesh) !! Supports 1D, 2D, and 3D domain decomposition. !! !! Current implementation allows only constant sub-domain size across a !! given direction. - class(mesh_t), intent(inout) :: mesh + class(omp_mesh_t), intent(inout) :: mesh + integer :: p_col, p_row + integer, allocatable, dimension(:, :, :) :: global_ranks + integer, allocatable, dimension(:) :: global_ranks_lin + integer :: nproc + integer, dimension(3) :: subd_pos, subd_pos_prev, subd_pos_next + logical, dimension(3) :: periodic_bc + integer :: dir + logical :: is_last_domain + integer :: nx, ny, nz + integer :: ierr + integer :: cart_rank + integer, dimension(2) :: coords nx = mesh%global_vert_dims(1) ny = mesh%global_vert_dims(2) @@ -23,7 +63,7 @@ subroutine domain_decomposition_2decompfft(mesh) p_row = mesh%par%nproc_dir(2) p_col = mesh%par%nproc_dir(3) periodic_bc(:) = mesh%periodic_BC(:) - call decomp_2d_init(nx, ny, nz, p_row, p_col, periodc_bc) + call decomp_2d_init(nx, ny, nz, p_row, p_col, periodic_bc) mesh%vert_dims(:) = xsize(:) mesh%par%n_offset(:) = xstart(:) diff --git a/src/omp/poisson_fft.f90 b/src/omp/poisson_fft.f90 index 9f6920069..86c139313 100644 --- a/src/omp/poisson_fft.f90 +++ b/src/omp/poisson_fft.f90 @@ -1,4 +1,7 @@ module m_omp_poisson_fft + + use decomp_2d_constants, only: PHYSICAL_IN_X + use decomp_2d_fft, only: decomp_2d_fft_init, decomp_2d_fft_3d use m_allocator, only: field_t use m_common, only: dp use m_poisson_fft, only: poisson_fft_t diff --git a/src/solver.f90 b/src/solver.f90 index 0ce9a46a4..c78816179 100644 --- a/src/solver.f90 +++ b/src/solver.f90 @@ -86,7 +86,7 @@ function init(backend, mesh, time_integrator, host_allocator, & implicit none class(base_backend_t), target, intent(inout) :: backend - type(mesh_t), target, intent(inout) :: mesh + class(mesh_t), target, intent(inout) :: mesh class(time_intg_t), target, intent(inout) :: time_integrator type(allocator_t), target, intent(inout) :: host_allocator class(dirps_t), target, intent(inout) :: xdirps, ydirps, zdirps diff --git a/src/xcompact.f90 b/src/xcompact.f90 index 6d4a9bcb1..af37fb309 100644 --- a/src/xcompact.f90 +++ b/src/xcompact.f90 @@ -19,6 +19,7 @@ program xcompact #else use m_omp_backend use m_omp_common, only: SZ + use m_omp_mesh #endif implicit none @@ -26,7 +27,6 @@ program xcompact type(globs_t) :: globs class(base_backend_t), pointer :: backend class(allocator_t), pointer :: allocator - type(mesh_t) :: mesh type(allocator_t), pointer :: host_allocator type(solver_t) :: solver type(time_intg_t) :: time_integrator @@ -35,9 +35,11 @@ program xcompact #ifdef CUDA type(cuda_backend_t), target :: cuda_backend type(cuda_allocator_t), target :: cuda_allocator + type(mesh_t), target :: mesh integer :: ndevs, devnum #else type(omp_backend_t), target :: omp_backend + type(omp_mesh_t), target :: mesh #endif type(allocator_t), target :: omp_allocator @@ -69,7 +71,11 @@ program xcompact ! Domain decomposition in each direction nproc_dir = [1, 1, nproc] +#ifdef CUDA mesh = mesh_t(dims_global, nproc_dir, L_global) +#else + mesh = omp_mesh_t(dims_global, nproc_dir, L_global) +#endif globs%dt = 0.001_dp globs%nu = 1._dp/1600._dp From 9c113a4fa00de460759a572ed4bd64a683f481fe Mon Sep 17 00:00:00 2001 From: Sebastien Lemaire Date: Wed, 17 Jul 2024 16:25:08 +0100 Subject: [PATCH 5/8] use number of cells as 2decomp input --- src/mesh.f90 | 5 +++++ src/omp/mesh.f90 | 34 ++++++++++++++++++++-------------- src/poisson_fft.f90 | 2 +- 3 files changed, 26 insertions(+), 15 deletions(-) diff --git a/src/mesh.f90 b/src/mesh.f90 index 1ee0eb084..30f5fb423 100644 --- a/src/mesh.f90 +++ b/src/mesh.f90 @@ -92,6 +92,7 @@ function mesh_init(dims_global, nproc_dir, L_global, & integer :: dir integer :: ierr + allocate(mesh) allocate (mesh%geo) allocate (mesh%par) mesh%global_vert_dims(:) = dims_global @@ -157,6 +158,10 @@ subroutine domain_decomposition_generic(mesh) integer :: dir logical :: is_last_domain + if (mesh%par%is_root()) then + print*, "Generic domain decomposition" + end if + ! Number of processes on a direction basis nproc_x = mesh%par%nproc_dir(1) nproc_y = mesh%par%nproc_dir(2) diff --git a/src/omp/mesh.f90 b/src/omp/mesh.f90 index 8f0ed2a47..f72808b1d 100644 --- a/src/omp/mesh.f90 +++ b/src/omp/mesh.f90 @@ -29,11 +29,14 @@ function init(dims_global, nproc_dir, L_global, & real(dp), dimension(3), intent(in) :: L_global logical, dimension(3), optional, intent(in) :: periodic_BC - type(omp_mesh_t) :: mesh + type(omp_mesh_t), allocatable :: mesh + allocate(omp_mesh_t :: mesh) mesh%mesh_t = mesh_t(dims_global, nproc_dir, L_global, & periodic_BC) + call domain_decomposition_2decompfft(mesh) + end function @@ -56,18 +59,19 @@ subroutine domain_decomposition_2decompfft(mesh) integer :: cart_rank integer, dimension(2) :: coords - nx = mesh%global_vert_dims(1) - ny = mesh%global_vert_dims(2) - nz = mesh%global_vert_dims(3) + if (mesh%par%is_root()) then + print*, "Domain decomposition by 2decomp&fft" + end if + + nx = mesh%global_cell_dims(1) + ny = mesh%global_cell_dims(2) + nz = mesh%global_cell_dims(3) p_row = mesh%par%nproc_dir(2) p_col = mesh%par%nproc_dir(3) periodic_bc(:) = mesh%periodic_BC(:) call decomp_2d_init(nx, ny, nz, p_row, p_col, periodic_bc) - mesh%vert_dims(:) = xsize(:) - mesh%par%n_offset(:) = xstart(:) - ! Get global_ranks allocate(global_ranks(1, p_row, p_col)) allocate(global_ranks_lin(p_row*p_col)) @@ -76,30 +80,33 @@ subroutine domain_decomposition_2decompfft(mesh) call MPI_Comm_rank(DECOMP_2D_COMM_CART_X, cart_rank, ierr) call MPI_Cart_coords(DECOMP_2D_COMM_CART_X, cart_rank, 2, coords, ierr) - global_ranks_lin(coords(1) + p_row*(coords(2)-1)) = mesh%par%nrank + global_ranks_lin(coords(1)+1 + p_row*(coords(2))) = mesh%par%nrank call MPI_Allreduce(MPI_IN_PLACE, global_ranks_lin, p_row*p_col, MPI_INTEGER, MPI_SUM, MPI_COMM_WORLD, ierr) global_ranks = reshape(global_ranks_lin, shape=[1, p_row, p_col]) - ! TODO: refactirise ! subdomain position in the global domain subd_pos = findloc(global_ranks, mesh%par%nrank) ! local/directional position of the subdomain mesh%par%nrank_dir(:) = subd_pos(:) - 1 + ! Get local domain size and offset from 2decomp + mesh%cell_dims(:) = xsize(:) + mesh%par%n_offset(:) = xstart(:) + + ! compute vert_dims from cell_dims do dir = 1, 3 is_last_domain = (mesh%par%nrank_dir(dir) + 1 == mesh%par%nproc_dir(dir)) if (is_last_domain .and. (.not. mesh%periodic_BC(dir))) then - mesh%cell_dims(dir) = mesh%vert_dims(dir) - 1 + mesh%vert_dims(dir) = mesh%cell_dims(dir) +1 else - mesh%cell_dims(dir) = mesh%vert_dims(dir) + mesh%vert_dims(dir) = mesh%cell_dims(dir) end if end do - mesh%par%n_offset(:) = mesh%vert_dims(:)*mesh%par%nrank_dir(:) - + ! Get neighbour ranks do dir = 1, 3 nproc = mesh%par%nproc_dir(dir) subd_pos_prev(:) = subd_pos(:) @@ -115,7 +122,6 @@ subroutine domain_decomposition_2decompfft(mesh) subd_pos_next(3)) end do - end subroutine domain_decomposition_2decompfft diff --git a/src/poisson_fft.f90 b/src/poisson_fft.f90 index 915953bad..f6bb06e54 100644 --- a/src/poisson_fft.f90 +++ b/src/poisson_fft.f90 @@ -89,7 +89,7 @@ subroutine base_init(self, mesh, xdirps, ydirps, zdirps) allocate (self%ay(self%ny_glob), self%by(self%ny_glob)) allocate (self%az(self%nz_glob), self%bz(self%nz_glob)) - ! cuFFT 3D transform halves the first index. + ! FFT 3D transform halves the first index. allocate (self%waves(self%nx_spec, self%ny_spec, self%nz_spec)) ! waves_set requires some of the preprocessed tdsops variables. From 2e6f86c6153f65605886daaef643e33cd8007f1c Mon Sep 17 00:00:00 2001 From: Sebastien Lemaire Date: Wed, 17 Jul 2024 17:29:57 +0100 Subject: [PATCH 6/8] fix size of field issue in vecadd_omp --- src/omp/backend.f90 | 4 +++- src/omp/poisson_fft.f90 | 8 ++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/src/omp/backend.f90 b/src/omp/backend.f90 index ae91b3030..3aa8edab0 100644 --- a/src/omp/backend.f90 +++ b/src/omp/backend.f90 @@ -421,7 +421,9 @@ subroutine vecadd_omp(self, a, x, b, y) error stop "Called vector add with incompatible fields" end if - dims = size(x%data) + !dims = size(x%data) + ! Fix for size being stored wrongly into dims + dims = self%mesh%get_padded_dims(x) nvec = dims(1)/SZ remstart = nvec*SZ + 1 diff --git a/src/omp/poisson_fft.f90 b/src/omp/poisson_fft.f90 index 86c139313..0ed51df38 100644 --- a/src/omp/poisson_fft.f90 +++ b/src/omp/poisson_fft.f90 @@ -38,8 +38,16 @@ function init(mesh, xdirps, ydirps, zdirps) result(poisson_fft) call poisson_fft%base_init(mesh, xdirps, ydirps, zdirps) + if (mesh%par%is_root()) then + print*, "Initialising 2decomp&fft" + end if + call decomp_2d_fft_init(PHYSICAL_IN_X) + if (mesh%par%is_root()) then + print*, "Initialising done 2decomp&fft" + end if + end function init subroutine fft_forward_omp(self, f_in) From 8d1b52841881b2b5d839c6968bb7ab1797689da9 Mon Sep 17 00:00:00 2001 From: Sebastien Lemaire Date: Thu, 18 Jul 2024 12:38:17 +0100 Subject: [PATCH 7/8] add spectral processing to omp poisson solver --- src/CMakeLists.txt | 1 + src/omp/kernels/spectral_processing.f90 | 99 +++++++++++++++++++++++++ src/omp/poisson_fft.f90 | 15 ++-- src/poisson_fft.f90 | 2 +- 4 files changed, 109 insertions(+), 8 deletions(-) create mode 100644 src/omp/kernels/spectral_processing.f90 diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 4e2983152..187816d5f 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -13,6 +13,7 @@ set(SRC omp/mesh.f90 omp/common.f90 omp/kernels/distributed.f90 + omp/kernels/spectral_processing.f90 omp/poisson_fft.f90 omp/sendrecv.f90 omp/exec_dist.f90 diff --git a/src/omp/kernels/spectral_processing.f90 b/src/omp/kernels/spectral_processing.f90 new file mode 100644 index 000000000..00fdceb7b --- /dev/null +++ b/src/omp/kernels/spectral_processing.f90 @@ -0,0 +1,99 @@ +module m_omp_spectral + + use m_common, only: dp + implicit none + + contains + subroutine process_spectral_div_u(div_u, waves, nx_spec, ny_spec, nz_spec, & + y_sp_st, nx, ny, nz, ax, bx, ay, by, az, bz) + + complex(dp), intent(inout), dimension(:, :, :) :: div_u + complex(dp), intent(in), dimension(:, :, :) :: waves + real(dp), intent(in), dimension(:) :: ax, bx, ay, by, az, bz + integer, intent(in) :: nx_spec, ny_spec, nz_spec + integer, intent(in) :: y_sp_st + integer, intent(in) :: nx, ny, nz + + integer :: i, j, k, ix, iy, iz + real(dp) :: tmp_r, tmp_c, div_r, div_c + + + do k=1, nz_spec + do j=1, ny_spec + do i=1, nx_spec + ! normalisation + div_r = real(div_u(i, j, k), kind=dp)/(nx*ny*nz) + div_c = aimag(div_u(i, j, k))/(nx*ny*nz) + + ix = i + iy = j + y_sp_st + iz = k + + ! post-process forward + ! post-process in z + tmp_r = div_r + tmp_c = div_c + div_r = tmp_r*bz(iz) + tmp_c*az(iz) + div_c = tmp_c*bz(iz) - tmp_r*az(iz) + if (iz > nz/2 + 1) div_r = -div_r + if (iz > nz/2 + 1) div_c = -div_c + + ! post-process in y + tmp_r = div_r + tmp_c = div_c + div_r = tmp_r*by(iy) + tmp_c*ay(iy) + div_c = tmp_c*by(iy) - tmp_r*ay(iy) + if (iy > ny/2 + 1) div_r = -div_r + if (iy > ny/2 + 1) div_c = -div_c + + ! post-process in x + tmp_r = div_r + tmp_c = div_c + div_r = tmp_r*bx(ix) + tmp_c*ax(ix) + div_c = tmp_c*bx(ix) - tmp_r*ax(ix) + + ! Solve Poisson + tmp_r = real(waves(i, j, k), kind=dp) + tmp_c = aimag(waves(i, j, k)) + if ((tmp_r < 1.e-16_dp) .or. (tmp_c < 1.e-16_dp)) then + div_r = 0._dp; div_c = 0._dp + else + div_r = -div_r/tmp_r + div_c = -div_c/tmp_c + end if + + ! post-process backward + ! post-process in z + tmp_r = div_r + tmp_c = div_c + div_r = tmp_r*bz(iz) - tmp_c*az(iz) + div_c = -tmp_c*bz(iz) - tmp_r*az(iz) + if (iz > nz/2 + 1) div_r = -div_r + if (iz > nz/2 + 1) div_c = -div_c + + ! post-process in y + tmp_r = div_r + tmp_c = div_c + div_r = tmp_r*by(iy) + tmp_c*ay(iy) + div_c = tmp_c*by(iy) - tmp_r*ay(iy) + if (iy > ny/2 + 1) div_r = -div_r + if (iy > ny/2 + 1) div_c = -div_c + + ! post-process in x + tmp_r = div_r + tmp_c = div_c + div_r = tmp_r*bx(ix) + tmp_c*ax(ix) + div_c = -tmp_c*bx(ix) + tmp_r*ax(ix) + + ! update the entry + div_u(i, j, k) = cmplx(div_r, div_c, kind=dp) + + end do + end do + end do + + + + end subroutine + +end module m_omp_spectral \ No newline at end of file diff --git a/src/omp/poisson_fft.f90 b/src/omp/poisson_fft.f90 index 0ed51df38..8626f04e8 100644 --- a/src/omp/poisson_fft.f90 +++ b/src/omp/poisson_fft.f90 @@ -7,6 +7,7 @@ module m_omp_poisson_fft use m_poisson_fft, only: poisson_fft_t use m_tdsops, only: dirps_t use m_mesh, only: mesh_t + use m_omp_spectral, only: process_spectral_div_u implicit none @@ -43,10 +44,7 @@ function init(mesh, xdirps, ydirps, zdirps) result(poisson_fft) end if call decomp_2d_fft_init(PHYSICAL_IN_X) - - if (mesh%par%is_root()) then - print*, "Initialising done 2decomp&fft" - end if + allocate(poisson_fft%c_x(poisson_fft%nx_spec, poisson_fft%ny_spec, poisson_fft%nz_spec)) end function init @@ -56,7 +54,7 @@ subroutine fft_forward_omp(self, f_in) class(omp_poisson_fft_t) :: self class(field_t), intent(in) :: f_in - call decomp_2d_fft_3d(f_in%data, self%waves) + call decomp_2d_fft_3d(f_in%data, self%c_x) end subroutine fft_forward_omp @@ -66,7 +64,7 @@ subroutine fft_backward_omp(self, f_out) class(omp_poisson_fft_t) :: self class(field_t), intent(inout) :: f_out - call decomp_2d_fft_3d(self%waves, f_out%data) + call decomp_2d_fft_3d(self%c_x, f_out%data) end subroutine fft_backward_omp @@ -74,7 +72,10 @@ subroutine fft_postprocess_omp(self) implicit none class(omp_poisson_fft_t) :: self - !TODO + + call process_spectral_div_u(self%c_x, self%waves, self%nx_spec, self%ny_spec, self%nz_spec, & + self%y_sp_st, self%nx_glob, self%ny_glob, self%nz_glob, & + self%ax, self%bx, self%ay, self%by, self%az, self%bz) end subroutine fft_postprocess_omp diff --git a/src/poisson_fft.f90 b/src/poisson_fft.f90 index f6bb06e54..fbb7af0ae 100644 --- a/src/poisson_fft.f90 +++ b/src/poisson_fft.f90 @@ -21,7 +21,7 @@ module m_poisson_fft !> Local domain sized array storing the spectral equivalence constants complex(dp), allocatable, dimension(:, :, :) :: waves !> Wave numbers in x, y, and z - complex(dp), allocatable, dimension(:) :: ax, bx, ay, by, az, bz + real(dp), allocatable, dimension(:) :: ax, bx, ay, by, az, bz contains procedure(fft_forward), deferred :: fft_forward procedure(fft_backward), deferred :: fft_backward From eb3883bcdef39287a33b59cbf55dccc00dbdcf17 Mon Sep 17 00:00:00 2001 From: Sebastien Lemaire Date: Thu, 17 Oct 2024 18:24:48 +0100 Subject: [PATCH 8/8] restructure code to have 2decomp&fft as optional dependency --- CMakeLists.txt | 13 +++ src/2decompfft/decomp.f90 | 105 +++++++++++++++++ src/{ => 2decompfft}/omp/poisson_fft.f90 | 0 src/CMakeLists.txt | 37 ++++-- src/decomp.f90 | 26 +++++ src/decomp_generic.f90 | 80 +++++++++++++ src/mesh.f90 | 143 +++++------------------ src/omp/mesh.f90 | 89 +------------- src/par_grid.f90 | 44 +++++++ src/xcompact.f90 | 13 +-- tests/CMakeLists.txt | 3 +- 11 files changed, 325 insertions(+), 228 deletions(-) create mode 100644 src/2decompfft/decomp.f90 rename src/{ => 2decompfft}/omp/poisson_fft.f90 (100%) create mode 100644 src/decomp.f90 create mode 100644 src/decomp_generic.f90 create mode 100644 src/par_grid.f90 diff --git a/CMakeLists.txt b/CMakeLists.txt index 8ee17c0e6..edb811370 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,5 +2,18 @@ cmake_minimum_required(VERSION 3.10) project(x3d2 LANGUAGES Fortran) enable_testing() +# +# Set the Poisson solver choice +# +set(POISSON_SOLVER FFT CACHE STRING + "Select the Poisson solver: FFT or ITER") + +if(${CMAKE_Fortran_COMPILER_ID} STREQUAL "PGI" OR + ${CMAKE_Fortran_COMPILER_ID} STREQUAL "NVHPC") + set(BACKEND CUDA) +else() + set(BACKEND OMP) +endif() + add_subdirectory(src) add_subdirectory(tests) diff --git a/src/2decompfft/decomp.f90 b/src/2decompfft/decomp.f90 new file mode 100644 index 000000000..03c5d2186 --- /dev/null +++ b/src/2decompfft/decomp.f90 @@ -0,0 +1,105 @@ +submodule(m_decomp) m_decomp_2decompfft + + use mpi + use m_decomp, only: decomp_t + implicit none + + type, extends(decomp_t) :: decomp_2decompfft_t + contains + procedure :: decomposition => decomposition_2decompfft + end type + + contains + + module subroutine decomposition_2decompfft(self, grid, par) + !! Supports 1D, 2D, and 3D domain decomposition. + !! + !! Current implementation allows only constant sub-domain size across a + !! given direction. + use m_grid, only: grid_t + use m_par, only: par_t + use decomp_2d, only: decomp_2d_init, DECOMP_2D_COMM_CART_X, xsize, xstart + + class(decomp_2decompfft_t) :: self + class(grid_t), intent(inout) :: grid + class(par_t), intent(inout) :: par + integer :: p_col, p_row + integer, allocatable, dimension(:, :, :) :: global_ranks + integer, allocatable, dimension(:) :: global_ranks_lin + integer :: nproc + integer, dimension(3) :: subd_pos, subd_pos_prev, subd_pos_next + logical, dimension(3) :: periodic_bc + integer :: dir + logical :: is_last_domain + integer :: nx, ny, nz + integer :: ierr + integer :: cart_rank + integer, dimension(2) :: coords + + if (par%is_root()) then + print*, "Domain decomposition by 2decomp&fft" + end if + + nx = grid%global_cell_dims(1) + ny = grid%global_cell_dims(2) + nz = grid%global_cell_dims(3) + + p_row = par%nproc_dir(2) + p_col = par%nproc_dir(3) + periodic_bc(:) = grid%periodic_BC(:) + call decomp_2d_init(nx, ny, nz, p_row, p_col, periodic_bc) + + ! Get global_ranks + allocate(global_ranks(1, p_row, p_col)) + allocate(global_ranks_lin(p_row*p_col)) + global_ranks_lin(:) = 0 + + call MPI_Comm_rank(DECOMP_2D_COMM_CART_X, cart_rank, ierr) + call MPI_Cart_coords(DECOMP_2D_COMM_CART_X, cart_rank, 2, coords, ierr) + + global_ranks_lin(coords(1)+1 + p_row*(coords(2))) = par%nrank + + call MPI_Allreduce(MPI_IN_PLACE, global_ranks_lin, p_row*p_col, MPI_INTEGER, MPI_SUM, MPI_COMM_WORLD, ierr) + + global_ranks = reshape(global_ranks_lin, shape=[1, p_row, p_col]) + + ! subdomain position in the global domain + subd_pos = findloc(global_ranks, par%nrank) + + ! local/directional position of the subdomain + par%nrank_dir(:) = subd_pos(:) - 1 + + ! Get local domain size and offset from 2decomp + grid%cell_dims(:) = xsize(:) + par%n_offset(:) = xstart(:) + + ! compute vert_dims from cell_dims + do dir = 1, 3 + is_last_domain = (par%nrank_dir(dir) + 1 == par%nproc_dir(dir)) + if (is_last_domain .and. (.not. grid%periodic_BC(dir))) then + grid%vert_dims(dir) = grid%cell_dims(dir) +1 + else + grid%vert_dims(dir) = grid%cell_dims(dir) + end if + end do + + ! Get neighbour ranks + do dir = 1, 3 + nproc = par%nproc_dir(dir) + subd_pos_prev(:) = subd_pos(:) + subd_pos_prev(dir) = modulo(subd_pos(dir) - 2, nproc) + 1 + par%pprev(dir) = global_ranks(subd_pos_prev(1), & + subd_pos_prev(2), & + subd_pos_prev(3)) + + subd_pos_next(:) = subd_pos(:) + subd_pos_next(dir) = modulo(subd_pos(dir) - nproc, nproc) + 1 + par%pnext(dir) = global_ranks(subd_pos_next(1), & + subd_pos_next(2), & + subd_pos_next(3)) + end do + + end subroutine decomposition_2decompfft + + +end submodule \ No newline at end of file diff --git a/src/omp/poisson_fft.f90 b/src/2decompfft/omp/poisson_fft.f90 similarity index 100% rename from src/omp/poisson_fft.f90 rename to src/2decompfft/omp/poisson_fft.f90 diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 187816d5f..d34fdb797 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -8,13 +8,13 @@ set(SRC time_integrator.f90 ordering.f90 mesh.f90 + decomp.f90 field.f90 + par_grid.f90 omp/backend.f90 - omp/mesh.f90 omp/common.f90 omp/kernels/distributed.f90 omp/kernels/spectral_processing.f90 - omp/poisson_fft.f90 omp/sendrecv.f90 omp/exec_dist.f90 ) @@ -30,21 +30,31 @@ set(CUDASRC cuda/sendrecv.f90 cuda/tdsops.f90 ) +set(2DECOMPFFTSRC + 2decompfft/omp/poisson_fft.f90 + 2decompfft/decomp.f90 +) +set(GENERICDECOMPSRC + decomp_generic.f90 +) -if(${CMAKE_Fortran_COMPILER_ID} STREQUAL "PGI" OR - ${CMAKE_Fortran_COMPILER_ID} STREQUAL "NVHPC") +if(${BACKEND} STREQUAL "CUDA") list(APPEND SRC ${CUDASRC}) endif() +if (${POISSON_SOLVER} STREQUAL "FFT" AND ${BACKEND} STREQUAL "OMP") + list(APPEND SRC ${2DECOMPFFTSRC}) +else() + list(APPEND SRC ${GENERICDECOMPSRC}) +endif() + add_library(x3d2 STATIC ${SRC}) target_include_directories(x3d2 INTERFACE ${CMAKE_CURRENT_BINARY_DIR}) add_executable(xcompact xcompact.f90) target_link_libraries(xcompact PRIVATE x3d2) -if(${CMAKE_Fortran_COMPILER_ID} STREQUAL "PGI" OR - ${CMAKE_Fortran_COMPILER_ID} STREQUAL "NVHPC") - +if(${BACKEND} STREQUAL "CUDA") set(CMAKE_Fortran_FLAGS "-cpp -cuda") set(CMAKE_Fortran_FLAGS_DEBUG "-g -O0 -traceback -Mbounds -Mchkptr -Ktrap=fp") set(CMAKE_Fortran_FLAGS_RELEASE "-O3 -fast") @@ -53,13 +63,17 @@ if(${CMAKE_Fortran_COMPILER_ID} STREQUAL "PGI" OR target_compile_options(xcompact PRIVATE "-DCUDA") elseif(${CMAKE_Fortran_COMPILER_ID} STREQUAL "GNU") + set(CMAKE_Fortran_FLAGS "-cpp -std=f2018") + set(CMAKE_Fortran_FLAGS_DEBUG "-g -Og -Wall -Wpedantic -Werror -Wimplicit-interface -Wimplicit-procedure -Wno-unused-dummy-argument") + set(CMAKE_Fortran_FLAGS_RELEASE "-O3 -ffast-math") +endif() + +if (${POISSON_SOLVER} STREQUAL "FFT" AND ${BACKEND} STREQUAL "OMP") + message(STATUS "Using the FFT poisson solver with 2decomp&fft") find_package(decomp2d REQUIRED) include_directories(${decomp2d_INCLUDE_DIRS}) target_link_libraries(decomp2d) target_link_libraries(x3d2 PRIVATE decomp2d) - set(CMAKE_Fortran_FLAGS "-cpp -std=f2018") - set(CMAKE_Fortran_FLAGS_DEBUG "-g -Og -Wall -Wpedantic -Werror -Wimplicit-interface -Wimplicit-procedure -Wno-unused-dummy-argument") - set(CMAKE_Fortran_FLAGS_RELEASE "-O3 -ffast-math") endif() find_package(OpenMP REQUIRED) @@ -68,5 +82,4 @@ target_link_libraries(xcompact PRIVATE OpenMP::OpenMP_Fortran) find_package(MPI REQUIRED) target_link_libraries(x3d2 PRIVATE MPI::MPI_Fortran) -target_link_libraries(xcompact PRIVATE MPI::MPI_Fortran) - +target_link_libraries(xcompact PRIVATE MPI::MPI_Fortran) \ No newline at end of file diff --git a/src/decomp.f90 b/src/decomp.f90 new file mode 100644 index 000000000..69b68ab03 --- /dev/null +++ b/src/decomp.f90 @@ -0,0 +1,26 @@ +module m_decomp +implicit none + + type, abstract :: decomp_t + contains + procedure(decomposition), public, deferred :: decomposition + end type decomp_t + + interface + subroutine decomposition(self, grid, par) + use m_grid, only: grid_t + use m_par, only: par_t + import :: decomp_t + class(decomp_t) :: self + class(grid_t), intent(inout) :: grid + class(par_t), intent(inout) :: par + end subroutine + end interface + + contains + + module subroutine test() + print *, "test" + end subroutine + +end module m_decomp \ No newline at end of file diff --git a/src/decomp_generic.f90 b/src/decomp_generic.f90 new file mode 100644 index 000000000..4fe301faf --- /dev/null +++ b/src/decomp_generic.f90 @@ -0,0 +1,80 @@ +submodule(m_decomp) m_decomp_generic + + use m_decomp, only: decomp_t + implicit none + + type, extends(decomp_t) :: decomp_generic_t + contains + procedure :: decomposition => decomposition_generic + end type + + contains + + module subroutine decomposition_generic(self, grid, par) + use m_grid, only: grid_t + use m_par, only: par_t + + class(decomp_generic_t) :: self + class(grid_t), intent(inout) :: grid + class(par_t), intent(inout) :: par + integer, allocatable, dimension(:, :, :) :: global_ranks + integer :: i, nproc_x, nproc_y, nproc_z, nproc + integer, dimension(3) :: subd_pos, subd_pos_prev, subd_pos_next + integer :: dir + logical :: is_last_domain + + if (par%is_root()) then + print*, "Domain decomposition by x3d2 (generic)" + end if + + ! Number of processes on a direction basis + nproc_x = par%nproc_dir(1) + nproc_y = par%nproc_dir(2) + nproc_z = par%nproc_dir(3) + + ! Define number of cells and vertices in each direction + grid%vert_dims = grid%global_vert_dims/par%nproc_dir + + ! A 3D array corresponding to each region in the global domain + allocate (global_ranks(nproc_x, nproc_y, nproc_z)) + + ! set the corresponding global rank for each sub-domain + global_ranks = reshape([(i, i=0, par%nproc - 1)], & + shape=[nproc_x, nproc_y, nproc_z]) + + ! subdomain position in the global domain + subd_pos = findloc(global_ranks, par%nrank) + + ! local/directional position of the subdomain + par%nrank_dir(:) = subd_pos(:) - 1 + + do dir = 1, 3 + is_last_domain = (par%nrank_dir(dir) + 1 == par%nproc_dir(dir)) + if (is_last_domain .and. (.not. grid%periodic_BC(dir))) then + grid%cell_dims(dir) = grid%vert_dims(dir) - 1 + else + grid%cell_dims(dir) = grid%vert_dims(dir) + end if + end do + + par%n_offset(:) = grid%vert_dims(:)*par%nrank_dir(:) + + do dir = 1, 3 + nproc = par%nproc_dir(dir) + subd_pos_prev(:) = subd_pos(:) + subd_pos_prev(dir) = modulo(subd_pos(dir) - 2, nproc) + 1 + par%pprev(dir) = global_ranks(subd_pos_prev(1), & + subd_pos_prev(2), & + subd_pos_prev(3)) + + subd_pos_next(:) = subd_pos(:) + subd_pos_next(dir) = modulo(subd_pos(dir) - nproc, nproc) + 1 + par%pnext(dir) = global_ranks(subd_pos_next(1), & + subd_pos_next(2), & + subd_pos_next(3)) + end do + + end subroutine decomposition_generic + + +end submodule \ No newline at end of file diff --git a/src/mesh.f90 b/src/mesh.f90 index 30f5fb423..515317f87 100644 --- a/src/mesh.f90 +++ b/src/mesh.f90 @@ -6,6 +6,8 @@ module m_mesh CELL, VERT, none, X_FACE, Y_FACE, Z_FACE, & X_EDGE, Y_EDGE, Z_EDGE use m_field, only: field_t + use m_par + use m_grid implicit none @@ -15,34 +17,14 @@ module m_mesh real(dp), dimension(3) :: L ! Global dimensions of the domain in each direction end type - ! Stores parallel domain related information - type :: parallel_t - integer :: nrank ! local rank ID - integer :: nproc ! total number of ranks/proc participating in the domain decomposition - integer, dimension(3) :: nrank_dir ! local rank ID in each direction - integer, dimension(3) :: nproc_dir ! total number of proc in each direction - integer, dimension(3) :: n_offset ! number of cells offset in each direction due to domain decomposition - integer, dimension(3) :: pnext ! rank ID of the previous rank in each direction - integer, dimension(3) :: pprev ! rank ID of the next rank in each direction - contains - procedure :: is_root ! returns if the current rank is the root rank - end type - ! The mesh class stores all the information about the global and local (due to domain decomposition) mesh ! It also includes getter functions to access some of its parameters type :: mesh_t - integer, dimension(3) :: global_vert_dims ! global number of vertices in each direction without padding (cartesian structure) - integer, dimension(3) :: global_cell_dims ! global number of cells in each direction without padding (cartesian structure) - - integer, dimension(3) :: vert_dims_padded ! local domain size including padding (cartesian structure) - integer, dimension(3) :: vert_dims ! local number of vertices in each direction without padding (cartesian structure) - integer, dimension(3) :: cell_dims ! local number of cells in each direction without padding (cartesian structure) - logical, dimension(3) :: periodic_BC ! Whether or not a direction has a periodic BC integer, private :: sz type(geo_t), allocatable :: geo ! object containing geometry information - type(parallel_t), allocatable :: par ! object containing parallel domain decomposition information + class(grid_t), allocatable :: grid ! object containing grid information + class(par_t), allocatable :: par ! object containing parallel domain decomposition information contains - procedure :: domain_decomposition => domain_decomposition_generic procedure :: get_SZ procedure :: get_dims @@ -80,6 +62,8 @@ module m_mesh function mesh_init(dims_global, nproc_dir, L_global, & periodic_BC) result(mesh) + + use m_decomp, only: decomp_t !! Completely initialise the mesh object. !! Upon initialisation the mesh object can be read-only and shouldn't be edited !! Takes as argument global information about the mesh like its length, number of cells and decomposition in each direction @@ -88,6 +72,7 @@ function mesh_init(dims_global, nproc_dir, L_global, & real(dp), dimension(3), intent(in) :: L_global logical, dimension(3), optional, intent(in) :: periodic_BC class(mesh_t), allocatable :: mesh + class(decomp_t), allocatable :: decomp integer :: dir integer :: ierr @@ -95,36 +80,37 @@ function mesh_init(dims_global, nproc_dir, L_global, & allocate(mesh) allocate (mesh%geo) allocate (mesh%par) - mesh%global_vert_dims(:) = dims_global + mesh%grid%global_vert_dims(:) = dims_global if (present(periodic_BC)) then - mesh%periodic_BC(:) = periodic_BC + mesh%grid%periodic_BC(:) = periodic_BC else ! Default to periodic BC - mesh%periodic_BC(:) = .true. + mesh%grid%periodic_BC(:) = .true. end if do dir = 1, 3 - if (mesh%periodic_BC(dir)) then - mesh%global_cell_dims(dir) = mesh%global_vert_dims(dir) + if (mesh%grid%periodic_BC(dir)) then + mesh%grid%global_cell_dims(dir) = mesh%grid%global_vert_dims(dir) else - mesh%global_cell_dims(dir) = mesh%global_vert_dims(dir) - 1 + mesh%grid%global_cell_dims(dir) = mesh%grid%global_vert_dims(dir) - 1 end if end do ! Geometry mesh%geo%L = L_global - mesh%geo%d = mesh%geo%L/mesh%global_cell_dims + mesh%geo%d = mesh%geo%L/mesh%grid%global_cell_dims ! Parallel domain decomposition mesh%par%nproc_dir(:) = nproc_dir mesh%par%nproc = product(nproc_dir(:)) call MPI_Comm_rank(MPI_COMM_WORLD, mesh%par%nrank, ierr) call MPI_Comm_size(MPI_COMM_WORLD, mesh%par%nproc, ierr) - call mesh%domain_decomposition() + + call decomp%decomposition(mesh%grid, mesh%par) ! Define default values - mesh%vert_dims_padded = mesh%vert_dims + mesh%grid%vert_dims_padded = mesh%grid%vert_dims mesh%sz = 1 end function mesh_init @@ -133,7 +119,7 @@ subroutine set_padded_dims(self, vert_dims) class(mesh_t), intent(inout) :: self integer, dimension(3), intent(in) :: vert_dims - self%vert_dims_padded = vert_dims + self%grid%vert_dims_padded = vert_dims end subroutine @@ -145,72 +131,6 @@ subroutine set_sz(self, sz) end subroutine - subroutine domain_decomposition_generic(mesh) - !! Supports 1D, 2D, and 3D domain decomposition. - !! - !! Current implementation allows only constant sub-domain size across a - !! given direction. - class(mesh_t), intent(inout) :: mesh - - integer, allocatable, dimension(:, :, :) :: global_ranks - integer :: i, nproc_x, nproc_y, nproc_z, nproc - integer, dimension(3) :: subd_pos, subd_pos_prev, subd_pos_next - integer :: dir - logical :: is_last_domain - - if (mesh%par%is_root()) then - print*, "Generic domain decomposition" - end if - - ! Number of processes on a direction basis - nproc_x = mesh%par%nproc_dir(1) - nproc_y = mesh%par%nproc_dir(2) - nproc_z = mesh%par%nproc_dir(3) - - ! Define number of cells and vertices in each direction - mesh%vert_dims = mesh%global_vert_dims/mesh%par%nproc_dir - - ! A 3D array corresponding to each region in the global domain - allocate (global_ranks(nproc_x, nproc_y, nproc_z)) - - ! set the corresponding global rank for each sub-domain - global_ranks = reshape([(i, i=0, mesh%par%nproc - 1)], & - shape=[nproc_x, nproc_y, nproc_z]) - - ! subdomain position in the global domain - subd_pos = findloc(global_ranks, mesh%par%nrank) - - ! local/directional position of the subdomain - mesh%par%nrank_dir(:) = subd_pos(:) - 1 - - do dir = 1, 3 - is_last_domain = (mesh%par%nrank_dir(dir) + 1 == mesh%par%nproc_dir(dir)) - if (is_last_domain .and. (.not. mesh%periodic_BC(dir))) then - mesh%cell_dims(dir) = mesh%vert_dims(dir) - 1 - else - mesh%cell_dims(dir) = mesh%vert_dims(dir) - end if - end do - - mesh%par%n_offset(:) = mesh%vert_dims(:)*mesh%par%nrank_dir(:) - - do dir = 1, 3 - nproc = mesh%par%nproc_dir(dir) - subd_pos_prev(:) = subd_pos(:) - subd_pos_prev(dir) = modulo(subd_pos(dir) - 2, nproc) + 1 - mesh%par%pprev(dir) = global_ranks(subd_pos_prev(1), & - subd_pos_prev(2), & - subd_pos_prev(3)) - - subd_pos_next(:) = subd_pos(:) - subd_pos_next(dir) = modulo(subd_pos(dir) - nproc, nproc) + 1 - mesh%par%pnext(dir) = global_ranks(subd_pos_next(1), & - subd_pos_next(2), & - subd_pos_next(3)) - end do - - end subroutine domain_decomposition_generic - pure function get_sz(self) result(sz) !! Getter for parameter SZ class(mesh_t), intent(in) :: self @@ -226,7 +146,7 @@ pure function get_dims(self, data_loc) result(dims) integer, intent(in) :: data_loc integer, dimension(3) :: dims - dims = get_dims_dataloc(data_loc, self%vert_dims, self%cell_dims) + dims = get_dims_dataloc(data_loc, self%grid%vert_dims, self%grid%cell_dims) end function pure function get_global_dims(self, data_loc) result(dims) @@ -235,8 +155,8 @@ pure function get_global_dims(self, data_loc) result(dims) integer, intent(in) :: data_loc integer, dimension(3) :: dims - dims = get_dims_dataloc(data_loc, self%global_vert_dims, & - self%global_cell_dims) + dims = get_dims_dataloc(data_loc, self%grid%global_vert_dims, & + self%grid%global_cell_dims) end function pure function get_dims_dataloc(data_loc, vert_dims, cell_dims) result(dims) @@ -282,10 +202,10 @@ pure function get_padded_dims_dir(self, dir) result(dims_padded) integer, dimension(3) :: dims_padded if (dir == DIR_C) then - dims_padded = self%vert_dims_padded + dims_padded = self%grid%vert_dims_padded else dims_padded(1) = self%sz - dims_padded(2) = self%vert_dims_padded(dir) + dims_padded(2) = self%grid%vert_dims_padded(dir) dims_padded(3) = self%get_n_groups(dir) end if @@ -308,8 +228,8 @@ pure function get_n_groups_dir(self, dir) result(n_groups) integer, intent(in) :: dir integer :: n_groups - n_groups = (product(self%vert_dims_padded(:))/ & - self%vert_dims_padded(dir))/self%sz + n_groups = (product(self%grid%vert_dims_padded(:))/ & + self%grid%vert_dims_padded(dir))/self%sz end function @@ -380,8 +300,8 @@ pure function get_n_dir(self, dir, data_loc) result(n) integer, intent(in) :: data_loc integer :: n, n_cell, n_vert - n_cell = self%cell_dims(dir) - n_vert = self%vert_dims(dir) + n_cell = self%grid%cell_dims(dir) + n_vert = self%grid%vert_dims(dir) ! default to n_vert n = n_vert @@ -431,13 +351,4 @@ pure function get_coordinates(self, i, j, k) result(xloc) xloc(3) = (k - 1 + self%par%n_offset(3))*self%geo%d(3) end function - pure function is_root(self) result(is_root_rank) - !! Returns whether or not the current rank is the root rank - class(parallel_t), intent(in) :: self - logical :: is_root_rank - - is_root_rank = (self%nrank == 0) - - end function - end module m_mesh diff --git a/src/omp/mesh.f90 b/src/omp/mesh.f90 index f72808b1d..cfaeb44a3 100644 --- a/src/omp/mesh.f90 +++ b/src/omp/mesh.f90 @@ -21,7 +21,6 @@ module m_omp_mesh contains - function init(dims_global, nproc_dir, L_global, & periodic_BC) result(mesh) integer, dimension(3), intent(in) :: dims_global @@ -35,94 +34,8 @@ function init(dims_global, nproc_dir, L_global, & mesh%mesh_t = mesh_t(dims_global, nproc_dir, L_global, & periodic_BC) - call domain_decomposition_2decompfft(mesh) + call domain_decomposition_2decompfft(mesh%grid, mesh%par) end function - - subroutine domain_decomposition_2decompfft(mesh) - !! Supports 1D, 2D, and 3D domain decomposition. - !! - !! Current implementation allows only constant sub-domain size across a - !! given direction. - class(omp_mesh_t), intent(inout) :: mesh - integer :: p_col, p_row - integer, allocatable, dimension(:, :, :) :: global_ranks - integer, allocatable, dimension(:) :: global_ranks_lin - integer :: nproc - integer, dimension(3) :: subd_pos, subd_pos_prev, subd_pos_next - logical, dimension(3) :: periodic_bc - integer :: dir - logical :: is_last_domain - integer :: nx, ny, nz - integer :: ierr - integer :: cart_rank - integer, dimension(2) :: coords - - if (mesh%par%is_root()) then - print*, "Domain decomposition by 2decomp&fft" - end if - - nx = mesh%global_cell_dims(1) - ny = mesh%global_cell_dims(2) - nz = mesh%global_cell_dims(3) - - p_row = mesh%par%nproc_dir(2) - p_col = mesh%par%nproc_dir(3) - periodic_bc(:) = mesh%periodic_BC(:) - call decomp_2d_init(nx, ny, nz, p_row, p_col, periodic_bc) - - ! Get global_ranks - allocate(global_ranks(1, p_row, p_col)) - allocate(global_ranks_lin(p_row*p_col)) - global_ranks_lin(:) = 0 - - call MPI_Comm_rank(DECOMP_2D_COMM_CART_X, cart_rank, ierr) - call MPI_Cart_coords(DECOMP_2D_COMM_CART_X, cart_rank, 2, coords, ierr) - - global_ranks_lin(coords(1)+1 + p_row*(coords(2))) = mesh%par%nrank - - call MPI_Allreduce(MPI_IN_PLACE, global_ranks_lin, p_row*p_col, MPI_INTEGER, MPI_SUM, MPI_COMM_WORLD, ierr) - - global_ranks = reshape(global_ranks_lin, shape=[1, p_row, p_col]) - - ! subdomain position in the global domain - subd_pos = findloc(global_ranks, mesh%par%nrank) - - ! local/directional position of the subdomain - mesh%par%nrank_dir(:) = subd_pos(:) - 1 - - ! Get local domain size and offset from 2decomp - mesh%cell_dims(:) = xsize(:) - mesh%par%n_offset(:) = xstart(:) - - ! compute vert_dims from cell_dims - do dir = 1, 3 - is_last_domain = (mesh%par%nrank_dir(dir) + 1 == mesh%par%nproc_dir(dir)) - if (is_last_domain .and. (.not. mesh%periodic_BC(dir))) then - mesh%vert_dims(dir) = mesh%cell_dims(dir) +1 - else - mesh%vert_dims(dir) = mesh%cell_dims(dir) - end if - end do - - ! Get neighbour ranks - do dir = 1, 3 - nproc = mesh%par%nproc_dir(dir) - subd_pos_prev(:) = subd_pos(:) - subd_pos_prev(dir) = modulo(subd_pos(dir) - 2, nproc) + 1 - mesh%par%pprev(dir) = global_ranks(subd_pos_prev(1), & - subd_pos_prev(2), & - subd_pos_prev(3)) - - subd_pos_next(:) = subd_pos(:) - subd_pos_next(dir) = modulo(subd_pos(dir) - nproc, nproc) + 1 - mesh%par%pnext(dir) = global_ranks(subd_pos_next(1), & - subd_pos_next(2), & - subd_pos_next(3)) - end do - - end subroutine domain_decomposition_2decompfft - - end module m_omp_mesh \ No newline at end of file diff --git a/src/par_grid.f90 b/src/par_grid.f90 new file mode 100644 index 000000000..9833c4f1f --- /dev/null +++ b/src/par_grid.f90 @@ -0,0 +1,44 @@ +module m_par + + implicit none + ! Stores parallel domain related information + type :: par_t + integer :: nrank ! local rank ID + integer :: nproc ! total number of ranks/proc participating in the domain decomposition + integer, dimension(3) :: nrank_dir ! local rank ID in each direction + integer, dimension(3) :: nproc_dir ! total number of proc in each direction + integer, dimension(3) :: n_offset ! number of cells offset in each direction due to domain decomposition + integer, dimension(3) :: pnext ! rank ID of the previous rank in each direction + integer, dimension(3) :: pprev ! rank ID of the next rank in each direction + contains + procedure :: is_root ! returns if the current rank is the root rank + end type + + contains + + pure function is_root(self) result(is_root_rank) + !! Returns wether or not the current rank is the root rank + class(par_t), intent(in) :: self + logical :: is_root_rank + + is_root_rank = (self%nrank == 0) + + end function + +end module m_par + +module m_grid + implicit none + + ! Stores grid information + type :: grid_t + integer, dimension(3) :: global_vert_dims ! global number of vertices in each direction without padding (cartesian structure) + integer, dimension(3) :: global_cell_dims ! global number of cells in each direction without padding (cartesian structure) + + integer, dimension(3) :: vert_dims_padded ! local domain size including padding (cartesian structure) + integer, dimension(3) :: vert_dims ! local number of vertices in each direction without padding (cartesian structure) + integer, dimension(3) :: cell_dims ! local number of cells in each direction without padding (cartesian structure) + logical, dimension(3) :: periodic_BC ! Whether or not a direction has a periodic BC + end type +end module + diff --git a/src/xcompact.f90 b/src/xcompact.f90 index af37fb309..8e217682e 100644 --- a/src/xcompact.f90 +++ b/src/xcompact.f90 @@ -1,7 +1,6 @@ program xcompact use mpi - use decomp_2d_fft use m_allocator use m_base_backend use m_common, only: pi, globs_t, POISSON_SOLVER_FFT, POISSON_SOLVER_CG, & @@ -19,7 +18,6 @@ program xcompact #else use m_omp_backend use m_omp_common, only: SZ - use m_omp_mesh #endif implicit none @@ -31,15 +29,14 @@ program xcompact type(solver_t) :: solver type(time_intg_t) :: time_integrator type(dirps_t) :: xdirps, ydirps, zdirps + type(mesh_t), target :: mesh #ifdef CUDA type(cuda_backend_t), target :: cuda_backend type(cuda_allocator_t), target :: cuda_allocator - type(mesh_t), target :: mesh integer :: ndevs, devnum #else type(omp_backend_t), target :: omp_backend - type(omp_mesh_t), target :: mesh #endif type(allocator_t), target :: omp_allocator @@ -63,7 +60,7 @@ program xcompact #endif ! Global number of cells in each direction - dims_global = [256, 256, 256] + dims_global = [96, 96, 96] ! Global domain dimensions L_global = [2*pi, 2*pi, 2*pi] @@ -71,16 +68,12 @@ program xcompact ! Domain decomposition in each direction nproc_dir = [1, 1, nproc] -#ifdef CUDA mesh = mesh_t(dims_global, nproc_dir, L_global) -#else - mesh = omp_mesh_t(dims_global, nproc_dir, L_global) -#endif globs%dt = 0.001_dp globs%nu = 1._dp/1600._dp globs%n_iters = 20000 - globs%n_output = 100 + globs%n_output = 10 globs%poisson_solver_type = POISSON_SOLVER_FFT diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 61527497f..7e440d1c0 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -39,8 +39,7 @@ define_test(omp/test_omp_dist_transeq.f90 ${CMAKE_CTEST_NPROCS} omp) define_test(test_scalar_product.f90 ${CMAKE_CTEST_NPROCS} omp) define_test(test_sum_intox.f90 ${CMAKE_CTEST_NPROCS} omp) -if(${CMAKE_Fortran_COMPILER_ID} STREQUAL "PGI" OR - ${CMAKE_Fortran_COMPILER_ID} STREQUAL "NVHPC") +if(${BACKEND} STREQUAL "CUDA") define_test(test_reordering.f90 1 cuda) define_test(test_time_integrator.f90 1 cuda) define_test(cuda/test_cuda_allocator.f90 1 cuda)