diff --git a/CMakeLists.txt b/CMakeLists.txt index 8ee17c0e6..edb811370 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,5 +2,18 @@ cmake_minimum_required(VERSION 3.10) project(x3d2 LANGUAGES Fortran) enable_testing() +# +# Set the Poisson solver choice +# +set(POISSON_SOLVER FFT CACHE STRING + "Select the Poisson solver: FFT or ITER") + +if(${CMAKE_Fortran_COMPILER_ID} STREQUAL "PGI" OR + ${CMAKE_Fortran_COMPILER_ID} STREQUAL "NVHPC") + set(BACKEND CUDA) +else() + set(BACKEND OMP) +endif() + add_subdirectory(src) add_subdirectory(tests) diff --git a/src/2decompfft/decomp.f90 b/src/2decompfft/decomp.f90 new file mode 100644 index 000000000..03c5d2186 --- /dev/null +++ b/src/2decompfft/decomp.f90 @@ -0,0 +1,105 @@ +submodule(m_decomp) m_decomp_2decompfft + + use mpi + use m_decomp, only: decomp_t + implicit none + + type, extends(decomp_t) :: decomp_2decompfft_t + contains + procedure :: decomposition => decomposition_2decompfft + end type + + contains + + module subroutine decomposition_2decompfft(self, grid, par) + !! Supports 1D, 2D, and 3D domain decomposition. + !! + !! Current implementation allows only constant sub-domain size across a + !! given direction. + use m_grid, only: grid_t + use m_par, only: par_t + use decomp_2d, only: decomp_2d_init, DECOMP_2D_COMM_CART_X, xsize, xstart + + class(decomp_2decompfft_t) :: self + class(grid_t), intent(inout) :: grid + class(par_t), intent(inout) :: par + integer :: p_col, p_row + integer, allocatable, dimension(:, :, :) :: global_ranks + integer, allocatable, dimension(:) :: global_ranks_lin + integer :: nproc + integer, dimension(3) :: subd_pos, subd_pos_prev, subd_pos_next + logical, dimension(3) :: periodic_bc + integer :: dir + logical :: is_last_domain + integer :: nx, ny, nz + integer :: ierr + integer :: cart_rank + integer, dimension(2) :: coords + + if (par%is_root()) then + print*, "Domain decomposition by 2decomp&fft" + end if + + nx = grid%global_cell_dims(1) + ny = grid%global_cell_dims(2) + nz = grid%global_cell_dims(3) + + p_row = par%nproc_dir(2) + p_col = par%nproc_dir(3) + periodic_bc(:) = grid%periodic_BC(:) + call decomp_2d_init(nx, ny, nz, p_row, p_col, periodic_bc) + + ! Get global_ranks + allocate(global_ranks(1, p_row, p_col)) + allocate(global_ranks_lin(p_row*p_col)) + global_ranks_lin(:) = 0 + + call MPI_Comm_rank(DECOMP_2D_COMM_CART_X, cart_rank, ierr) + call MPI_Cart_coords(DECOMP_2D_COMM_CART_X, cart_rank, 2, coords, ierr) + + global_ranks_lin(coords(1)+1 + p_row*(coords(2))) = par%nrank + + call MPI_Allreduce(MPI_IN_PLACE, global_ranks_lin, p_row*p_col, MPI_INTEGER, MPI_SUM, MPI_COMM_WORLD, ierr) + + global_ranks = reshape(global_ranks_lin, shape=[1, p_row, p_col]) + + ! subdomain position in the global domain + subd_pos = findloc(global_ranks, par%nrank) + + ! local/directional position of the subdomain + par%nrank_dir(:) = subd_pos(:) - 1 + + ! Get local domain size and offset from 2decomp + grid%cell_dims(:) = xsize(:) + par%n_offset(:) = xstart(:) + + ! compute vert_dims from cell_dims + do dir = 1, 3 + is_last_domain = (par%nrank_dir(dir) + 1 == par%nproc_dir(dir)) + if (is_last_domain .and. (.not. grid%periodic_BC(dir))) then + grid%vert_dims(dir) = grid%cell_dims(dir) +1 + else + grid%vert_dims(dir) = grid%cell_dims(dir) + end if + end do + + ! Get neighbour ranks + do dir = 1, 3 + nproc = par%nproc_dir(dir) + subd_pos_prev(:) = subd_pos(:) + subd_pos_prev(dir) = modulo(subd_pos(dir) - 2, nproc) + 1 + par%pprev(dir) = global_ranks(subd_pos_prev(1), & + subd_pos_prev(2), & + subd_pos_prev(3)) + + subd_pos_next(:) = subd_pos(:) + subd_pos_next(dir) = modulo(subd_pos(dir) - nproc, nproc) + 1 + par%pnext(dir) = global_ranks(subd_pos_next(1), & + subd_pos_next(2), & + subd_pos_next(3)) + end do + + end subroutine decomposition_2decompfft + + +end submodule \ No newline at end of file diff --git a/src/omp/poisson_fft.f90 b/src/2decompfft/omp/poisson_fft.f90 similarity index 68% rename from src/omp/poisson_fft.f90 rename to src/2decompfft/omp/poisson_fft.f90 index 87105738b..8626f04e8 100644 --- a/src/omp/poisson_fft.f90 +++ b/src/2decompfft/omp/poisson_fft.f90 @@ -1,9 +1,13 @@ module m_omp_poisson_fft + + use decomp_2d_constants, only: PHYSICAL_IN_X + use decomp_2d_fft, only: decomp_2d_fft_init, decomp_2d_fft_3d use m_allocator, only: field_t use m_common, only: dp use m_poisson_fft, only: poisson_fft_t use m_tdsops, only: dirps_t use m_mesh, only: mesh_t + use m_omp_spectral, only: process_spectral_div_u implicit none @@ -35,6 +39,13 @@ function init(mesh, xdirps, ydirps, zdirps) result(poisson_fft) call poisson_fft%base_init(mesh, xdirps, ydirps, zdirps) + if (mesh%par%is_root()) then + print*, "Initialising 2decomp&fft" + end if + + call decomp_2d_fft_init(PHYSICAL_IN_X) + allocate(poisson_fft%c_x(poisson_fft%nx_spec, poisson_fft%ny_spec, poisson_fft%nz_spec)) + end function init subroutine fft_forward_omp(self, f_in) @@ -42,6 +53,9 @@ subroutine fft_forward_omp(self, f_in) class(omp_poisson_fft_t) :: self class(field_t), intent(in) :: f_in + + call decomp_2d_fft_3d(f_in%data, self%c_x) + end subroutine fft_forward_omp subroutine fft_backward_omp(self, f_out) @@ -49,12 +63,20 @@ subroutine fft_backward_omp(self, f_out) class(omp_poisson_fft_t) :: self class(field_t), intent(inout) :: f_out + + call decomp_2d_fft_3d(self%c_x, f_out%data) + end subroutine fft_backward_omp subroutine fft_postprocess_omp(self) implicit none class(omp_poisson_fft_t) :: self + + call process_spectral_div_u(self%c_x, self%waves, self%nx_spec, self%ny_spec, self%nz_spec, & + self%y_sp_st, self%nx_glob, self%ny_glob, self%nz_glob, & + self%ax, self%bx, self%ay, self%by, self%az, self%bz) + end subroutine fft_postprocess_omp end module m_omp_poisson_fft diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 8f3527576..91bf9959f 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -9,11 +9,16 @@ set(SRC solver.f90 tdsops.f90 time_integrator.f90 + ordering.f90 + mesh.f90 + decomp.f90 + field.f90 + par_grid.f90 vector_calculus.f90 omp/backend.f90 omp/common.f90 omp/kernels/distributed.f90 - omp/poisson_fft.f90 + omp/kernels/spectral_processing.f90 omp/sendrecv.f90 omp/exec_dist.f90 ) @@ -31,21 +36,31 @@ set(CUDASRC cuda/sendrecv.f90 cuda/tdsops.f90 ) +set(2DECOMPFFTSRC + 2decompfft/omp/poisson_fft.f90 + 2decompfft/decomp.f90 +) +set(GENERICDECOMPSRC + decomp_generic.f90 +) -if(${CMAKE_Fortran_COMPILER_ID} STREQUAL "PGI" OR - ${CMAKE_Fortran_COMPILER_ID} STREQUAL "NVHPC") +if(${BACKEND} STREQUAL "CUDA") list(APPEND SRC ${CUDASRC}) endif() +if (${POISSON_SOLVER} STREQUAL "FFT" AND ${BACKEND} STREQUAL "OMP") + list(APPEND SRC ${2DECOMPFFTSRC}) +else() + list(APPEND SRC ${GENERICDECOMPSRC}) +endif() + add_library(x3d2 STATIC ${SRC}) target_include_directories(x3d2 INTERFACE ${CMAKE_CURRENT_BINARY_DIR}) add_executable(xcompact xcompact.f90) target_link_libraries(xcompact PRIVATE x3d2) -if(${CMAKE_Fortran_COMPILER_ID} STREQUAL "PGI" OR - ${CMAKE_Fortran_COMPILER_ID} STREQUAL "NVHPC") - +if(${BACKEND} STREQUAL "CUDA") set(CMAKE_Fortran_FLAGS "-cpp -cuda") set(CMAKE_Fortran_FLAGS_DEBUG "-g -O0 -traceback -Mbounds -Mchkptr -Ktrap=fp") set(CMAKE_Fortran_FLAGS_RELEASE "-O3 -fast") @@ -59,11 +74,18 @@ elseif(${CMAKE_Fortran_COMPILER_ID} STREQUAL "GNU") set(CMAKE_Fortran_FLAGS_RELEASE "-O3 -ffast-math") endif() +if (${POISSON_SOLVER} STREQUAL "FFT" AND ${BACKEND} STREQUAL "OMP") + message(STATUS "Using the FFT poisson solver with 2decomp&fft") + find_package(decomp2d REQUIRED) + include_directories(${decomp2d_INCLUDE_DIRS}) + target_link_libraries(decomp2d) + target_link_libraries(x3d2 PRIVATE decomp2d) +endif() + find_package(OpenMP REQUIRED) target_link_libraries(x3d2 PRIVATE OpenMP::OpenMP_Fortran) target_link_libraries(xcompact PRIVATE OpenMP::OpenMP_Fortran) find_package(MPI REQUIRED) target_link_libraries(x3d2 PRIVATE MPI::MPI_Fortran) -target_link_libraries(xcompact PRIVATE MPI::MPI_Fortran) - +target_link_libraries(xcompact PRIVATE MPI::MPI_Fortran) \ No newline at end of file diff --git a/src/allocator.f90 b/src/allocator.f90 index 607a54c9b..d81ccd0a0 100644 --- a/src/allocator.f90 +++ b/src/allocator.f90 @@ -64,7 +64,7 @@ module m_allocator contains function allocator_init(mesh, sz) result(allocator) - type(mesh_t), target, intent(inout) :: mesh + class(mesh_t), target, intent(inout) :: mesh integer, intent(in) :: sz type(allocator_t) :: allocator diff --git a/src/decomp.f90 b/src/decomp.f90 new file mode 100644 index 000000000..69b68ab03 --- /dev/null +++ b/src/decomp.f90 @@ -0,0 +1,26 @@ +module m_decomp +implicit none + + type, abstract :: decomp_t + contains + procedure(decomposition), public, deferred :: decomposition + end type decomp_t + + interface + subroutine decomposition(self, grid, par) + use m_grid, only: grid_t + use m_par, only: par_t + import :: decomp_t + class(decomp_t) :: self + class(grid_t), intent(inout) :: grid + class(par_t), intent(inout) :: par + end subroutine + end interface + + contains + + module subroutine test() + print *, "test" + end subroutine + +end module m_decomp \ No newline at end of file diff --git a/src/decomp_generic.f90 b/src/decomp_generic.f90 new file mode 100644 index 000000000..4fe301faf --- /dev/null +++ b/src/decomp_generic.f90 @@ -0,0 +1,80 @@ +submodule(m_decomp) m_decomp_generic + + use m_decomp, only: decomp_t + implicit none + + type, extends(decomp_t) :: decomp_generic_t + contains + procedure :: decomposition => decomposition_generic + end type + + contains + + module subroutine decomposition_generic(self, grid, par) + use m_grid, only: grid_t + use m_par, only: par_t + + class(decomp_generic_t) :: self + class(grid_t), intent(inout) :: grid + class(par_t), intent(inout) :: par + integer, allocatable, dimension(:, :, :) :: global_ranks + integer :: i, nproc_x, nproc_y, nproc_z, nproc + integer, dimension(3) :: subd_pos, subd_pos_prev, subd_pos_next + integer :: dir + logical :: is_last_domain + + if (par%is_root()) then + print*, "Domain decomposition by x3d2 (generic)" + end if + + ! Number of processes on a direction basis + nproc_x = par%nproc_dir(1) + nproc_y = par%nproc_dir(2) + nproc_z = par%nproc_dir(3) + + ! Define number of cells and vertices in each direction + grid%vert_dims = grid%global_vert_dims/par%nproc_dir + + ! A 3D array corresponding to each region in the global domain + allocate (global_ranks(nproc_x, nproc_y, nproc_z)) + + ! set the corresponding global rank for each sub-domain + global_ranks = reshape([(i, i=0, par%nproc - 1)], & + shape=[nproc_x, nproc_y, nproc_z]) + + ! subdomain position in the global domain + subd_pos = findloc(global_ranks, par%nrank) + + ! local/directional position of the subdomain + par%nrank_dir(:) = subd_pos(:) - 1 + + do dir = 1, 3 + is_last_domain = (par%nrank_dir(dir) + 1 == par%nproc_dir(dir)) + if (is_last_domain .and. (.not. grid%periodic_BC(dir))) then + grid%cell_dims(dir) = grid%vert_dims(dir) - 1 + else + grid%cell_dims(dir) = grid%vert_dims(dir) + end if + end do + + par%n_offset(:) = grid%vert_dims(:)*par%nrank_dir(:) + + do dir = 1, 3 + nproc = par%nproc_dir(dir) + subd_pos_prev(:) = subd_pos(:) + subd_pos_prev(dir) = modulo(subd_pos(dir) - 2, nproc) + 1 + par%pprev(dir) = global_ranks(subd_pos_prev(1), & + subd_pos_prev(2), & + subd_pos_prev(3)) + + subd_pos_next(:) = subd_pos(:) + subd_pos_next(dir) = modulo(subd_pos(dir) - nproc, nproc) + 1 + par%pnext(dir) = global_ranks(subd_pos_next(1), & + subd_pos_next(2), & + subd_pos_next(3)) + end do + + end subroutine decomposition_generic + + +end submodule \ No newline at end of file diff --git a/src/mesh.f90 b/src/mesh.f90 index 76dd0360a..1b370d47f 100644 --- a/src/mesh.f90 +++ b/src/mesh.f90 @@ -7,6 +7,8 @@ module m_mesh X_EDGE, Y_EDGE, Z_EDGE, & BC_PERIODIC, BC_NEUMANN, BC_DIRICHLET use m_field, only: field_t + use m_par + use m_grid implicit none @@ -16,34 +18,13 @@ module m_mesh real(dp), dimension(3) :: L ! Global dimensions of the domain in each direction end type - ! Stores parallel domain related information - type :: parallel_t - integer :: nrank ! local rank ID - integer :: nproc ! total number of ranks/proc participating in the domain decomposition - integer, dimension(3) :: nrank_dir ! local rank ID in each direction - integer, dimension(3) :: nproc_dir ! total number of proc in each direction - integer, dimension(3) :: n_offset ! number of cells offset in each direction due to domain decomposition - integer, dimension(3) :: pnext ! rank ID of the previous rank in each direction - integer, dimension(3) :: pprev ! rank ID of the next rank in each direction - contains - procedure :: is_root ! returns if the current rank is the root rank - end type - ! The mesh class stores all the information about the global and local (due to domain decomposition) mesh ! It also includes getter functions to access some of its parameters type :: mesh_t - integer, dimension(3), private :: global_vert_dims ! global number of vertices in each direction without padding (cartesian structure) - integer, dimension(3), private :: global_cell_dims ! global number of cells in each direction without padding (cartesian structure) - - integer, dimension(3), private :: vert_dims_padded ! local domain size including padding (cartesian structure) - integer, dimension(3), private :: vert_dims ! local number of vertices in each direction without padding (cartesian structure) - integer, dimension(3), private :: cell_dims ! local number of cells in each direction without padding (cartesian structure) - logical, dimension(3), private :: periodic_BC ! Whether or not a direction has a periodic BC - integer, dimension(3, 2), private :: BCs_global - integer, dimension(3, 2), private :: BCs integer, private :: sz type(geo_t), allocatable :: geo ! object containing geometry information - type(parallel_t), allocatable :: par ! object containing parallel domain decomposition information + class(grid_t), allocatable :: grid ! object containing grid information + class(par_t), allocatable :: par ! object containing parallel domain decomposition information contains procedure :: get_SZ @@ -82,20 +63,23 @@ module m_mesh function mesh_init(dims_global, nproc_dir, L_global, BC_x, BC_y, BC_z) & result(mesh) + use m_decomp, only: decomp_t !! Completely initialise the mesh object. !! Upon initialisation the mesh object can be read-only and shouldn't be edited !! Takes as argument global information about the mesh like its length, number of cells and decomposition in each direction integer, dimension(3), intent(in) :: dims_global integer, dimension(3), intent(in) :: nproc_dir ! Number of proc in each direction real(dp), dimension(3), intent(in) :: L_global + class(mesh_t), allocatable :: mesh + class(decomp_t), allocatable :: decomp character(len=*), dimension(2), intent(in) :: BC_x, BC_y, BC_z - type(mesh_t) :: mesh character(len=20), dimension(3, 2) :: BC_all logical :: is_first_domain, is_last_domain integer :: dir, j integer :: ierr + allocate(mesh) allocate (mesh%geo) allocate (mesh%par) @@ -106,11 +90,11 @@ function mesh_init(dims_global, nproc_dir, L_global, BC_x, BC_y, BC_z) & do j = 1, 2 select case (trim(BC_all(dir, j))) case ('periodic') - mesh%BCs_global(dir, j) = BC_PERIODIC + mesh%grid%BCs_global(dir, j) = BC_PERIODIC case ('neumann') - mesh%BCs_global(dir, j) = BC_NEUMANN + mesh%grid%BCs_global(dir, j) = BC_NEUMANN case ('dirichlet') - mesh%BCs_global(dir, j) = BC_DIRICHLET + mesh%grid%BCs_global(dir, j) = BC_DIRICHLET case default error stop 'Unknown BC' end select @@ -118,36 +102,37 @@ function mesh_init(dims_global, nproc_dir, L_global, BC_x, BC_y, BC_z) & end do do dir = 1, 3 - if (any(mesh%BCs_global(dir, :) == BC_PERIODIC) .and. & - (.not. all(mesh%BCs_global(dir, :) == BC_PERIODIC))) then + if (any(mesh%grid%BCs_global(dir, :) == BC_PERIODIC) .and. & + (.not. all(mesh%grid%BCs_global(dir, :) == BC_PERIODIC))) then error stop 'BCs are incompatible: in a direction make sure to have & &either both sides periodic or none.' end if - mesh%periodic_BC(dir) = all(mesh%BCs_global(dir, :) == BC_PERIODIC) + mesh%grid%periodic_BC(dir) = all(mesh%grid%BCs_global(dir, :) == BC_PERIODIC) end do ! Set global vertex dims - mesh%global_vert_dims(:) = dims_global + mesh%grid%global_vert_dims(:) = dims_global ! Set global cell dims do dir = 1, 3 - if (mesh%periodic_BC(dir)) then - mesh%global_cell_dims(dir) = mesh%global_vert_dims(dir) + if (mesh%grid%periodic_BC(dir)) then + mesh%grid%global_cell_dims(dir) = mesh%grid%global_vert_dims(dir) else - mesh%global_cell_dims(dir) = mesh%global_vert_dims(dir) - 1 + mesh%grid%global_cell_dims(dir) = mesh%grid%global_vert_dims(dir) - 1 end if end do ! Geometry mesh%geo%L = L_global - mesh%geo%d = mesh%geo%L/mesh%global_cell_dims + mesh%geo%d = mesh%geo%L/mesh%grid%global_cell_dims ! Parallel domain decomposition mesh%par%nproc_dir(:) = nproc_dir mesh%par%nproc = product(nproc_dir(:)) call MPI_Comm_rank(MPI_COMM_WORLD, mesh%par%nrank, ierr) call MPI_Comm_size(MPI_COMM_WORLD, mesh%par%nproc, ierr) - call domain_decomposition(mesh) + + call decomp%decomposition(mesh%grid, mesh%par) ! Set subdomain BCs do dir = 1, 3 @@ -155,33 +140,33 @@ function mesh_init(dims_global, nproc_dir, L_global, BC_x, BC_y, BC_z) & is_last_domain = mesh%par%nrank_dir(dir) + 1 == mesh%par%nproc_dir(dir) ! subdomain-subdomain boundaries are identical to periodic BCs if (is_first_domain) then - mesh%BCs(dir, 1) = mesh%BCs_global(dir, 1) - mesh%BCs(dir, 2) = BC_PERIODIC + mesh%grid%BCs(dir, 1) = mesh%grid%BCs_global(dir, 1) + mesh%grid%BCs(dir, 2) = BC_PERIODIC else if (is_last_domain) then - mesh%BCs(dir, 1) = BC_PERIODIC - mesh%BCs(dir, 2) = mesh%BCs_global(dir, 2) + mesh%grid%BCs(dir, 1) = BC_PERIODIC + mesh%grid%BCs(dir, 2) = mesh%grid%BCs_global(dir, 2) else - mesh%BCs(dir, :) = BC_PERIODIC + mesh%grid%BCs(dir, :) = BC_PERIODIC end if end do ! Define number of cells and vertices in each direction - mesh%vert_dims = mesh%global_vert_dims/mesh%par%nproc_dir + mesh%grid%vert_dims = mesh%grid%global_vert_dims/mesh%par%nproc_dir do dir = 1, 3 is_last_domain = (mesh%par%nrank_dir(dir) + 1 == mesh%par%nproc_dir(dir)) - if (is_last_domain .and. (.not. mesh%periodic_BC(dir))) then - mesh%cell_dims(dir) = mesh%vert_dims(dir) - 1 + if (is_last_domain .and. (.not. mesh%grid%periodic_BC(dir))) then + mesh%grid%cell_dims(dir) = mesh%grid%vert_dims(dir) - 1 else - mesh%cell_dims(dir) = mesh%vert_dims(dir) + mesh%grid%cell_dims(dir) = mesh%grid%vert_dims(dir) end if end do ! Set offset for global indices - mesh%par%n_offset(:) = mesh%vert_dims(:)*mesh%par%nrank_dir(:) + mesh%par%n_offset(:) = mesh%grid%vert_dims(:)*mesh%par%nrank_dir(:) ! Define default values - mesh%vert_dims_padded = mesh%vert_dims + mesh%grid%vert_dims_padded = mesh%grid%vert_dims mesh%sz = 1 end function mesh_init @@ -190,7 +175,7 @@ subroutine set_padded_dims(self, vert_dims) class(mesh_t), intent(inout) :: self integer, dimension(3), intent(in) :: vert_dims - self%vert_dims_padded = vert_dims + self%grid%vert_dims_padded = vert_dims end subroutine @@ -202,53 +187,6 @@ subroutine set_sz(self, sz) end subroutine - subroutine domain_decomposition(mesh) - !! Supports 1D, 2D, and 3D domain decomposition. - !! - !! Current implementation allows only constant sub-domain size across a - !! given direction. - class(mesh_t), intent(inout) :: mesh - - integer, allocatable, dimension(:, :, :) :: global_ranks - integer :: i, nproc_x, nproc_y, nproc_z, nproc - integer, dimension(3) :: subd_pos, subd_pos_prev, subd_pos_next - integer :: dir - - ! Number of processes on a direction basis - nproc_x = mesh%par%nproc_dir(1) - nproc_y = mesh%par%nproc_dir(2) - nproc_z = mesh%par%nproc_dir(3) - - ! A 3D array corresponding to each region in the global domain - allocate (global_ranks(nproc_x, nproc_y, nproc_z)) - - ! set the corresponding global rank for each sub-domain - global_ranks = reshape([(i, i=0, mesh%par%nproc - 1)], & - shape=[nproc_x, nproc_y, nproc_z]) - - ! subdomain position in the global domain - subd_pos = findloc(global_ranks, mesh%par%nrank) - - ! local/directional position of the subdomain - mesh%par%nrank_dir(:) = subd_pos(:) - 1 - - do dir = 1, 3 - nproc = mesh%par%nproc_dir(dir) - subd_pos_prev(:) = subd_pos(:) - subd_pos_prev(dir) = modulo(subd_pos(dir) - 2, nproc) + 1 - mesh%par%pprev(dir) = global_ranks(subd_pos_prev(1), & - subd_pos_prev(2), & - subd_pos_prev(3)) - - subd_pos_next(:) = subd_pos(:) - subd_pos_next(dir) = modulo(subd_pos(dir) - nproc, nproc) + 1 - mesh%par%pnext(dir) = global_ranks(subd_pos_next(1), & - subd_pos_next(2), & - subd_pos_next(3)) - end do - - end subroutine domain_decomposition - pure function get_sz(self) result(sz) !! Getter for parameter SZ class(mesh_t), intent(in) :: self @@ -264,7 +202,7 @@ pure function get_dims(self, data_loc) result(dims) integer, intent(in) :: data_loc integer, dimension(3) :: dims - dims = get_dims_dataloc(data_loc, self%vert_dims, self%cell_dims) + dims = get_dims_dataloc(data_loc, self%grid%vert_dims, self%grid%cell_dims) end function pure function get_global_dims(self, data_loc) result(dims) @@ -273,8 +211,8 @@ pure function get_global_dims(self, data_loc) result(dims) integer, intent(in) :: data_loc integer, dimension(3) :: dims - dims = get_dims_dataloc(data_loc, self%global_vert_dims, & - self%global_cell_dims) + dims = get_dims_dataloc(data_loc, self%grid%global_vert_dims, & + self%grid%global_cell_dims) end function pure function get_dims_dataloc(data_loc, vert_dims, cell_dims) result(dims) @@ -320,10 +258,10 @@ pure function get_padded_dims_dir(self, dir) result(dims_padded) integer, dimension(3) :: dims_padded if (dir == DIR_C) then - dims_padded = self%vert_dims_padded + dims_padded = self%grid%vert_dims_padded else dims_padded(1) = self%sz - dims_padded(2) = self%vert_dims_padded(dir) + dims_padded(2) = self%grid%vert_dims_padded(dir) dims_padded(3) = self%get_n_groups(dir) end if @@ -346,8 +284,8 @@ pure function get_n_groups_dir(self, dir) result(n_groups) integer, intent(in) :: dir integer :: n_groups - n_groups = (product(self%vert_dims_padded(:))/ & - self%vert_dims_padded(dir))/self%sz + n_groups = (product(self%grid%vert_dims_padded(:))/ & + self%grid%vert_dims_padded(dir))/self%sz end function @@ -418,8 +356,8 @@ pure function get_n_dir(self, dir, data_loc) result(n) integer, intent(in) :: data_loc integer :: n, n_cell, n_vert - n_cell = self%cell_dims(dir) - n_vert = self%vert_dims(dir) + n_cell = self%grid%cell_dims(dir) + n_vert = self%grid%vert_dims(dir) ! default to n_vert n = n_vert @@ -469,13 +407,4 @@ pure function get_coordinates(self, i, j, k) result(xloc) xloc(3) = (k - 1 + self%par%n_offset(3))*self%geo%d(3) end function - pure function is_root(self) result(is_root_rank) - !! Returns whether or not the current rank is the root rank - class(parallel_t), intent(in) :: self - logical :: is_root_rank - - is_root_rank = (self%nrank == 0) - - end function - end module m_mesh diff --git a/src/omp/backend.f90 b/src/omp/backend.f90 index a9e1a0a45..32311bb81 100644 --- a/src/omp/backend.f90 +++ b/src/omp/backend.f90 @@ -418,7 +418,9 @@ subroutine vecadd_omp(self, a, x, b, y) error stop "Called vector add with incompatible fields" end if - dims = size(x%data) + !dims = size(x%data) + ! Fix for size being stored wrongly into dims + dims = self%mesh%get_padded_dims(x) nvec = dims(1)/SZ remstart = nvec*SZ + 1 diff --git a/src/omp/kernels/spectral_processing.f90 b/src/omp/kernels/spectral_processing.f90 new file mode 100644 index 000000000..00fdceb7b --- /dev/null +++ b/src/omp/kernels/spectral_processing.f90 @@ -0,0 +1,99 @@ +module m_omp_spectral + + use m_common, only: dp + implicit none + + contains + subroutine process_spectral_div_u(div_u, waves, nx_spec, ny_spec, nz_spec, & + y_sp_st, nx, ny, nz, ax, bx, ay, by, az, bz) + + complex(dp), intent(inout), dimension(:, :, :) :: div_u + complex(dp), intent(in), dimension(:, :, :) :: waves + real(dp), intent(in), dimension(:) :: ax, bx, ay, by, az, bz + integer, intent(in) :: nx_spec, ny_spec, nz_spec + integer, intent(in) :: y_sp_st + integer, intent(in) :: nx, ny, nz + + integer :: i, j, k, ix, iy, iz + real(dp) :: tmp_r, tmp_c, div_r, div_c + + + do k=1, nz_spec + do j=1, ny_spec + do i=1, nx_spec + ! normalisation + div_r = real(div_u(i, j, k), kind=dp)/(nx*ny*nz) + div_c = aimag(div_u(i, j, k))/(nx*ny*nz) + + ix = i + iy = j + y_sp_st + iz = k + + ! post-process forward + ! post-process in z + tmp_r = div_r + tmp_c = div_c + div_r = tmp_r*bz(iz) + tmp_c*az(iz) + div_c = tmp_c*bz(iz) - tmp_r*az(iz) + if (iz > nz/2 + 1) div_r = -div_r + if (iz > nz/2 + 1) div_c = -div_c + + ! post-process in y + tmp_r = div_r + tmp_c = div_c + div_r = tmp_r*by(iy) + tmp_c*ay(iy) + div_c = tmp_c*by(iy) - tmp_r*ay(iy) + if (iy > ny/2 + 1) div_r = -div_r + if (iy > ny/2 + 1) div_c = -div_c + + ! post-process in x + tmp_r = div_r + tmp_c = div_c + div_r = tmp_r*bx(ix) + tmp_c*ax(ix) + div_c = tmp_c*bx(ix) - tmp_r*ax(ix) + + ! Solve Poisson + tmp_r = real(waves(i, j, k), kind=dp) + tmp_c = aimag(waves(i, j, k)) + if ((tmp_r < 1.e-16_dp) .or. (tmp_c < 1.e-16_dp)) then + div_r = 0._dp; div_c = 0._dp + else + div_r = -div_r/tmp_r + div_c = -div_c/tmp_c + end if + + ! post-process backward + ! post-process in z + tmp_r = div_r + tmp_c = div_c + div_r = tmp_r*bz(iz) - tmp_c*az(iz) + div_c = -tmp_c*bz(iz) - tmp_r*az(iz) + if (iz > nz/2 + 1) div_r = -div_r + if (iz > nz/2 + 1) div_c = -div_c + + ! post-process in y + tmp_r = div_r + tmp_c = div_c + div_r = tmp_r*by(iy) + tmp_c*ay(iy) + div_c = tmp_c*by(iy) - tmp_r*ay(iy) + if (iy > ny/2 + 1) div_r = -div_r + if (iy > ny/2 + 1) div_c = -div_c + + ! post-process in x + tmp_r = div_r + tmp_c = div_c + div_r = tmp_r*bx(ix) + tmp_c*ax(ix) + div_c = -tmp_c*bx(ix) + tmp_r*ax(ix) + + ! update the entry + div_u(i, j, k) = cmplx(div_r, div_c, kind=dp) + + end do + end do + end do + + + + end subroutine + +end module m_omp_spectral \ No newline at end of file diff --git a/src/omp/mesh.f90 b/src/omp/mesh.f90 new file mode 100644 index 000000000..cfaeb44a3 --- /dev/null +++ b/src/omp/mesh.f90 @@ -0,0 +1,41 @@ +module m_omp_mesh + use mpi + use decomp_2d, only: decomp_2d_init, DECOMP_2D_COMM_CART_X, xsize, xstart + use m_mesh, only: mesh_t + use m_common, only: dp, DIR_X, DIR_Y, DIR_Z, DIR_C, & + CELL, VERT, none, X_FACE, Y_FACE, Z_FACE, & + X_EDGE, Y_EDGE, Z_EDGE + use m_field, only: field_t + + + type, extends(mesh_t) :: omp_mesh_t + contains + procedure :: domain_decomposition => domain_decomposition_2decompfft + end type omp_mesh_t + + interface omp_mesh_t + module procedure init + end interface omp_mesh_t + + private init + + contains + + function init(dims_global, nproc_dir, L_global, & + periodic_BC) result(mesh) + integer, dimension(3), intent(in) :: dims_global + integer, dimension(3), intent(in) :: nproc_dir ! Number of proc in each direction + real(dp), dimension(3), intent(in) :: L_global + logical, dimension(3), optional, intent(in) :: periodic_BC + + type(omp_mesh_t), allocatable :: mesh + allocate(omp_mesh_t :: mesh) + + mesh%mesh_t = mesh_t(dims_global, nproc_dir, L_global, & + periodic_BC) + + call domain_decomposition_2decompfft(mesh%grid, mesh%par) + + end function + +end module m_omp_mesh \ No newline at end of file diff --git a/src/par_grid.f90 b/src/par_grid.f90 new file mode 100644 index 000000000..c03a04c74 --- /dev/null +++ b/src/par_grid.f90 @@ -0,0 +1,46 @@ +module m_par + + implicit none + ! Stores parallel domain related information + type :: par_t + integer :: nrank ! local rank ID + integer :: nproc ! total number of ranks/proc participating in the domain decomposition + integer, dimension(3) :: nrank_dir ! local rank ID in each direction + integer, dimension(3) :: nproc_dir ! total number of proc in each direction + integer, dimension(3) :: n_offset ! number of cells offset in each direction due to domain decomposition + integer, dimension(3) :: pnext ! rank ID of the previous rank in each direction + integer, dimension(3) :: pprev ! rank ID of the next rank in each direction + contains + procedure :: is_root ! returns if the current rank is the root rank + end type + + contains + + pure function is_root(self) result(is_root_rank) + !! Returns wether or not the current rank is the root rank + class(par_t), intent(in) :: self + logical :: is_root_rank + + is_root_rank = (self%nrank == 0) + + end function + +end module m_par + +module m_grid + implicit none + + ! Stores grid information + type :: grid_t + integer, dimension(3) :: global_vert_dims ! global number of vertices in each direction without padding (cartesian structure) + integer, dimension(3) :: global_cell_dims ! global number of cells in each direction without padding (cartesian structure) + + integer, dimension(3) :: vert_dims_padded ! local domain size including padding (cartesian structure) + integer, dimension(3) :: vert_dims ! local number of vertices in each direction without padding (cartesian structure) + integer, dimension(3) :: cell_dims ! local number of cells in each direction without padding (cartesian structure) + logical, dimension(3) :: periodic_BC ! Whether or not a direction has a periodic BC + integer, dimension(3, 2) :: BCs_global + integer, dimension(3, 2) :: BCs + end type +end module + diff --git a/src/poisson_fft.f90 b/src/poisson_fft.f90 index ca16f3375..2c080e44f 100644 --- a/src/poisson_fft.f90 +++ b/src/poisson_fft.f90 @@ -21,7 +21,7 @@ module m_poisson_fft !> Local domain sized array storing the spectral equivalence constants complex(dp), allocatable, dimension(:, :, :) :: waves !> Wave numbers in x, y, and z - complex(dp), allocatable, dimension(:) :: ax, bx, ay, by, az, bz + real(dp), allocatable, dimension(:) :: ax, bx, ay, by, az, bz contains procedure(fft_forward), deferred :: fft_forward procedure(fft_backward), deferred :: fft_backward @@ -89,7 +89,7 @@ subroutine base_init(self, mesh, xdirps, ydirps, zdirps) allocate (self%ay(self%ny_glob), self%by(self%ny_glob)) allocate (self%az(self%nz_glob), self%bz(self%nz_glob)) - ! cuFFT 3D transform halves the first index. + ! FFT 3D transform halves the first index. allocate (self%waves(self%nx_spec, self%ny_spec, self%nz_spec)) ! waves_set requires some of the preprocessed tdsops variables. diff --git a/src/xcompact.f90 b/src/xcompact.f90 index bdcb7bf75..080f73847 100644 --- a/src/xcompact.f90 +++ b/src/xcompact.f90 @@ -22,9 +22,9 @@ program xcompact class(base_backend_t), pointer :: backend class(allocator_t), pointer :: allocator - type(mesh_t) :: mesh type(allocator_t), pointer :: host_allocator type(solver_t) :: solver + type(mesh_t), target :: mesh #ifdef CUDA type(cuda_backend_t), target :: cuda_backend diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 5795f0076..416cc0027 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -39,8 +39,7 @@ define_test(omp/test_omp_dist_transeq.f90 ${CMAKE_CTEST_NPROCS} omp) define_test(test_scalar_product.f90 ${CMAKE_CTEST_NPROCS} omp) define_test(test_sum_intox.f90 ${CMAKE_CTEST_NPROCS} omp) -if(${CMAKE_Fortran_COMPILER_ID} STREQUAL "PGI" OR - ${CMAKE_Fortran_COMPILER_ID} STREQUAL "NVHPC") +if(${BACKEND} STREQUAL "CUDA") define_test(test_reordering.f90 1 cuda) define_test(test_time_integrator.f90 1 cuda) define_test(cuda/test_cuda_allocator.f90 1 cuda)