diff --git a/src/backend.f90 b/src/backend.f90 index da0446d8..1ca24cad 100644 --- a/src/backend.f90 +++ b/src/backend.f90 @@ -237,7 +237,7 @@ subroutine get_field_data(self, data, f, dir) end if ! Returns 0 if no reorder required - rdr_dir = get_rdr_from_dirs(direction, f%dir) + rdr_dir = get_rdr_from_dirs(f%dir, direction) ! Carry out a reorder if we need, and copy from field to data array if (rdr_dir /= 0) then @@ -269,7 +269,7 @@ subroutine set_field_data(self, f, data, dir) end if ! Returns 0 if no reorder required - rdr_dir = get_rdr_from_dirs(f%dir, direction) + rdr_dir = get_rdr_from_dirs(direction, f%dir) ! Carry out a reorder if we need, and copy from data array to field if (rdr_dir /= 0) then diff --git a/src/common.f90 b/src/common.f90 index b0467be0..0a9a33eb 100644 --- a/src/common.f90 +++ b/src/common.f90 @@ -20,10 +20,10 @@ module m_common Z_EDGE = 103, & ! Data on edges along Z none = -1 ! The location of data isn't specified integer, protected :: & - rdr_map(4, 4) = reshape([0, RDR_X2Y, RDR_X2Z, RDR_X2C, & - RDR_Y2X, 0, RDR_Y2Z, RDR_Y2C, & - RDR_Z2X, RDR_Z2Y, 0, RDR_Z2C, & - RDR_C2X, RDR_C2Y, RDR_C2Z, 0], shape=[4, 4]) + rdr_map(4, 4) = reshape([0, RDR_Y2X, RDR_Z2X, RDR_C2X, & + RDR_X2Y, 0, RDR_Z2Y, RDR_C2Y, & + RDR_X2Z, RDR_Y2Z, 0, RDR_C2Z, & + RDR_X2C, RDR_Y2C, RDR_Z2C, 0], shape=[4, 4]) type :: globs_t real(dp) :: nu, dt @@ -38,10 +38,10 @@ pure subroutine get_dirs_from_rdr(dir_from, dir_to, rdr_dir) integer, intent(out) :: dir_from, dir_to integer, intent(in) :: rdr_dir integer, dimension(2) :: dirs - + dirs = findloc(rdr_map, rdr_dir) - dir_from = dirs(2) - dir_to = dirs(1) + dir_from = dirs(1) + dir_to = dirs(2) end subroutine diff --git a/src/mesh.f90 b/src/mesh.f90 index a5f8e9b4..8fe9e019 100644 --- a/src/mesh.f90 +++ b/src/mesh.f90 @@ -265,9 +265,9 @@ pure function get_dims_dataloc(data_loc, vert_dims, cell_dims) result(dims) dims(1:2) = vert_dims(1:2) dims(3) = cell_dims(3) case (none) - error stop + error stop "Unknown location in get_dims_dataloc" end select - end function + end function get_dims_dataloc pure function get_padded_dims_dir(self, dir) result(dims_padded) !! Getter for padded dimensions with structure in `dir` direction @@ -410,9 +410,9 @@ pure function get_n_dir(self, dir, data_loc) result(n) n = n_cell end if case (none) - error stop + error stop "Unknown direction in get_n_dir" end select - end function + end function get_n_dir pure function get_coordinates(self, i, j, k) result(xloc) !! Get the physical location of a cell center with i,j,k local indices diff --git a/src/omp/backend.f90 b/src/omp/backend.f90 index 0904e881..ae91b303 100644 --- a/src/omp/backend.f90 +++ b/src/omp/backend.f90 @@ -360,6 +360,8 @@ subroutine sum_yintox_omp(self, u, u_) class(field_t), intent(inout) :: u class(field_t), intent(in) :: u_ + call sum_intox_omp(self, u, u_, DIR_Y) + end subroutine sum_yintox_omp subroutine sum_zintox_omp(self, u, u_) @@ -369,8 +371,39 @@ subroutine sum_zintox_omp(self, u, u_) class(field_t), intent(inout) :: u class(field_t), intent(in) :: u_ + call sum_intox_omp(self, u, u_, DIR_Z) + end subroutine sum_zintox_omp + subroutine sum_intox_omp(self, u, u_, dir_to) + + class(omp_backend_t) :: self + class(field_t), intent(inout) :: u + class(field_t), intent(in) :: u_ + integer, intent(in) :: dir_to + + integer :: dir_from + integer, dimension(3) :: dims + integer :: i, j, k ! Working indices + integer :: ii, jj, kk ! Transpose indices + + dir_from = DIR_X + + dims = self%mesh%get_padded_dims(u) + !$omp parallel do private(i, ii, jj, kk) collapse(2) + do k = 1, dims(3) + do j = 1, dims(2) + do i = 1, dims(1) + call get_index_reordering(ii, jj, kk, i, j, k, & + dir_from, dir_to, self%mesh) + u%data(i, j, k) = u%data(i, j, k) + u_%data(ii, jj, kk) + end do + end do + end do + !$omp end parallel do + + end subroutine sum_intox_omp + subroutine vecadd_omp(self, a, x, b, y) implicit none @@ -379,18 +412,107 @@ subroutine vecadd_omp(self, a, x, b, y) class(field_t), intent(in) :: x real(dp), intent(in) :: b class(field_t), intent(inout) :: y + integer, dimension(3) :: dims + integer :: i, j, k, ii - y%data = a*x%data + b*y%data ! fixme + integer :: nvec, remstart + + if (x%dir /= y%dir) then + error stop "Called vector add with incompatible fields" + end if + + dims = size(x%data) + nvec = dims(1)/SZ + remstart = nvec*SZ + 1 + + !$omp parallel do private(i, ii) collapse(2) + do k = 1, dims(3) + do j = 1, dims(2) + ! Execute inner vectorised loops + do ii = 1, nvec + !$omp simd + do i = 1, SZ + y%data(i + (ii - 1)*SZ, j, k) = & + a*x%data(i + (ii - 1)*SZ, j, k) + & + b*y%data(i + (ii - 1)*SZ, j, k) + end do + !$omp end simd + end do + + ! Remainder loop + do i = remstart, dims(1) + y%data(i, j, k) = a*x%data(i, j, k) + b*y%data(i, j, k) + end do + end do + end do + !$omp end parallel do end subroutine vecadd_omp real(dp) function scalar_product_omp(self, x, y) result(s) + + use mpi + + use m_common, only: NONE, get_rdr_from_dirs + implicit none class(omp_backend_t) :: self class(field_t), intent(in) :: x, y + class(field_t), pointer :: x_, y_ + integer, dimension(3) :: dims + integer :: i, j, k, ii + integer :: nvec, remstart + integer :: ierr + + if ((x%data_loc == NONE) .or. (y%data_loc == NONE)) then + error stop "You must set the field location before calling scalar product" + end if + if (x%data_loc /= y%data_loc) then + error stop "Called scalar product with incompatible fields" + end if + + ! Reorient data into temporary DIR_C storage + x_ => self%allocator%get_block(DIR_C, x%data_loc) + call self%get_field_data(x_%data, x) + y_ => self%allocator%get_block(DIR_C, y%data_loc) + call self%get_field_data(y_%data, y) + + dims = self%mesh%get_field_dims(x_) + + nvec = dims(1)/SZ + remstart = nvec*SZ + 1 + + s = 0.0_dp + !$omp parallel do reduction(+:s) private(i, ii) collapse(2) + do k = 1, dims(3) + do j = 1, dims(2) + ! Execute inner vectorised loops + do ii = 1, nvec + !$omp simd reduction(+:s) + do i = 1, SZ + s = s + x_%data(i + (ii - 1)*SZ, j, k)* & + y_%data(i + (ii - 1)*SZ, j, k) + end do + !$omp end simd + end do + + ! Remainder loop + do i = remstart, dims(1) + s = s + x_%data(i, j, k)*y_%data(i, j, k) + end do + end do + end do + !$omp end parallel do + + ! Release temporary storage + call self%allocator%release_block(x_) + call self%allocator%release_block(y_) - s = 0._dp + ! Reduce the result + call MPI_Allreduce(MPI_IN_PLACE, s, 1, MPI_DOUBLE_PRECISION, & + MPI_SUM, MPI_COMM_WORLD, & + ierr) end function scalar_product_omp diff --git a/src/ordering.f90 b/src/ordering.f90 index 5a07bdeb..0d2527c7 100644 --- a/src/ordering.f90 +++ b/src/ordering.f90 @@ -75,7 +75,7 @@ pure subroutine get_index_dir(dir_i, dir_j, dir_k, i, j, k, dir, & end subroutine get_index_dir pure subroutine get_index_reordering_dirs(out_i, out_j, out_k, in_i, in_j, in_k, & - dir_from, dir_to, mesh) + dir_from, dir_to, mesh) !! Converts a set of application storage directional index to an other direction. !! The two directions are defined by the reorder_dir variable, RDR_X2Y will go from storage in X to Y etc. integer, intent(out) :: out_i, out_j, out_k ! new indices in the application storage diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index b951a3d2..f8a11e6c 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -18,6 +18,7 @@ function(define_test testfile np backend) else() target_compile_options(${test_name} PRIVATE "-cpp") find_package(OpenMP REQUIRED) + target_compile_options(${test_name} PRIVATE "-cpp") target_link_libraries(${test_name} PRIVATE OpenMP::OpenMP_Fortran) endif() target_link_libraries(${test_name} PRIVATE x3d2) @@ -33,12 +34,16 @@ define_test(test_mesh.f90 4 omp) define_test(test_reordering.f90 1 omp) define_test(test_reordering.f90 2 omp) define_test(test_reordering.f90 4 omp) +define_test(test_reorder_map.f90 1 omp) define_test(test_time_integrator.f90 1 omp) define_test(test_hostside_io.f90 ${CMAKE_CTEST_NPROCS} omp) define_test(omp/test_omp_tridiag.f90 ${CMAKE_CTEST_NPROCS} omp) define_test(omp/test_omp_transeq.f90 ${CMAKE_CTEST_NPROCS} omp) define_test(omp/test_omp_dist_transeq.f90 ${CMAKE_CTEST_NPROCS} omp) +define_test(test_scalar_product.f90 ${CMAKE_CTEST_NPROCS} omp) +define_test(test_sum_intox.f90 ${CMAKE_CTEST_NPROCS} omp) + if(${CMAKE_Fortran_COMPILER_ID} STREQUAL "PGI" OR ${CMAKE_Fortran_COMPILER_ID} STREQUAL "NVHPC") define_test(test_reordering.f90 1 cuda) diff --git a/tests/test_reorder_map.f90 b/tests/test_reorder_map.f90 new file mode 100644 index 00000000..c09331e1 --- /dev/null +++ b/tests/test_reorder_map.f90 @@ -0,0 +1,93 @@ +program test_reorder_map + !! Check that reorder mappings return the expected values + + use m_common + + implicit none + + logical :: test_pass + + test_pass = .true. + + call test_get_dirs_from_rdr() + call test_get_rdr_from_dirs() + + if (.not. test_pass) then + error stop "FAIL" + end if + +contains + + subroutine test_get_dirs_from_rdr() + + call test_one_get_dirs_from_rdr("RDR_X2Y", RDR_X2Y, DIR_X, DIR_Y) + call test_one_get_dirs_from_rdr("RDR_X2Z", RDR_X2Z, DIR_X, DIR_Z) + call test_one_get_dirs_from_rdr("RDR_X2C", RDR_X2C, DIR_X, DIR_C) + + call test_one_get_dirs_from_rdr("RDR_Y2X", RDR_Y2X, DIR_Y, DIR_X) + call test_one_get_dirs_from_rdr("RDR_Y2Z", RDR_Y2Z, DIR_Y, DIR_Z) + call test_one_get_dirs_from_rdr("RDR_Y2C", RDR_Y2C, DIR_Y, DIR_C) + + call test_one_get_dirs_from_rdr("RDR_Z2X", RDR_Z2X, DIR_Z, DIR_X) + call test_one_get_dirs_from_rdr("RDR_Z2Y", RDR_Z2Y, DIR_Z, DIR_Y) + call test_one_get_dirs_from_rdr("RDR_Z2C", RDR_Z2C, DIR_Z, DIR_C) + + call test_one_get_dirs_from_rdr("RDR_C2X", RDR_C2X, DIR_C, DIR_X) + call test_one_get_dirs_from_rdr("RDR_C2Y", RDR_C2Y, DIR_C, DIR_Y) + call test_one_get_dirs_from_rdr("RDR_C2Z", RDR_C2Z, DIR_C, DIR_Z) + + end subroutine test_get_dirs_from_rdr + + subroutine test_one_get_dirs_from_rdr(test, rdr, expect_from, expect_to) + character(len=*), intent(in) :: test + integer, intent(in) :: rdr, expect_from, expect_to + + integer :: dir_from, dir_to + + call get_dirs_from_rdr(dir_from, dir_to, rdr) + print *, test + print *, "- Expect: ", expect_from, expect_to + print *, "- Got: ", dir_from, dir_to + if ((dir_from /= expect_from) .or. (dir_to /= expect_to)) then + test_pass = .false. + end if + + end subroutine test_one_get_dirs_from_rdr + + subroutine test_get_rdr_from_dirs() + + call test_one_get_rdr_from_dirs("X->Y", DIR_X, DIR_Y, RDR_X2Y) + call test_one_get_rdr_from_dirs("X->Z", DIR_X, DIR_Z, RDR_X2Z) + call test_one_get_rdr_from_dirs("X->C", DIR_X, DIR_C, RDR_X2C) + + call test_one_get_rdr_from_dirs("Y->X", DIR_Y, DIR_X, RDR_Y2X) + call test_one_get_rdr_from_dirs("Y->Z", DIR_Y, DIR_Z, RDR_Y2Z) + call test_one_get_rdr_from_dirs("Y->C", DIR_Y, DIR_C, RDR_Y2C) + + call test_one_get_rdr_from_dirs("Z->X", DIR_Z, DIR_X, RDR_Z2X) + call test_one_get_rdr_from_dirs("Z->Y", DIR_Z, DIR_Y, RDR_Z2Y) + call test_one_get_rdr_from_dirs("Z->C", DIR_Z, DIR_C, RDR_Z2C) + + call test_one_get_rdr_from_dirs("C->X", DIR_C, DIR_X, RDR_C2X) + call test_one_get_rdr_from_dirs("C->Y", DIR_C, DIR_Y, RDR_C2Y) + call test_one_get_rdr_from_dirs("C->Z", DIR_C, DIR_Z, RDR_C2Z) + + end subroutine test_get_rdr_from_dirs + + subroutine test_one_get_rdr_from_dirs(test, from, to, expect_rdr) + + character(len=*), intent(in) :: test + integer, intent(in) :: from, to, expect_rdr + integer :: rdr + + rdr = get_rdr_from_dirs(from, to) + print *, test + print *, "- Expect: ", expect_rdr + print *, "- Got: ", rdr + if (rdr /= expect_rdr) then + test_pass = .false. + end if + + end subroutine test_one_get_rdr_from_dirs + +end program test_reorder_map diff --git a/tests/test_scalar_product.f90 b/tests/test_scalar_product.f90 new file mode 100644 index 00000000..dcd79a7e --- /dev/null +++ b/tests/test_scalar_product.f90 @@ -0,0 +1,157 @@ +program test_scalar_product + !! Tests the implementation of the scalar product function. + ! + !! Given two fields, a and b, computes s = a_i * b_i where repeated indices + !! imply summation. + + use m_common, only: DIR_X, DIR_Y, DIR_Z, DIR_C + + use m_allocator + use m_base_backend +#ifdef CUDA +#else + use m_omp_backend + use m_omp_common, only: SZ +#endif + + implicit none + + integer, parameter :: nx = 17, ny = 32, nz = 59 + real(dp), parameter :: lx = 1.618, ly = 3.141529, lz = 1.729 + + class(base_backend_t), pointer :: backend + class(allocator_t), pointer :: allocator +#ifdef CUDA +#else + type(omp_backend_t), target :: omp_backend + type(allocator_t), target :: omp_allocator +#endif + + type(mesh_t) :: mesh + + character(len=5), dimension(4), parameter :: test = & + ["DIR_X", "DIR_Y", "DIR_Z", "DIR_C"] + integer, dimension(4), parameter :: dir = [DIR_X, DIR_Y, DIR_Z, DIR_C] + integer :: i + + integer :: nrank, nproc + integer :: ierr + + logical :: test_pass = .true. + + call MPI_Init(ierr) + call MPI_Comm_rank(MPI_COMM_WORLD, nrank, ierr) + call MPI_Comm_size(MPI_COMM_WORLD, nproc, ierr) + + mesh = mesh_t([nx, ny, nz], & + [1, 1, nproc], & + [lx, ly, lz]) + +#ifdef CUDA +#else + omp_allocator = allocator_t(mesh, SZ) + allocator => omp_allocator + omp_backend = omp_backend_t(mesh, allocator) + backend => omp_backend +#endif + + do i = 1, 4 + call runtest(test(i), dir(i)) + end do + + if (nrank == 0) then + if (.not. test_pass) then + error stop "Test failed" + end if + end if + call MPI_Barrier(MPI_COMM_WORLD, ierr) + + call MPI_Finalize(ierr) + +contains + + subroutine runtest(test, dir) + + character(len=*), intent(in) :: test + integer, intent(in) :: dir + + class(field_t), pointer :: a, b, c + real(dp) :: s + + integer :: n + integer :: expt + logical :: check_pass + + if (nrank == 0) then + print *, "Testing ", test + end if + + a => backend%allocator%get_block(dir, VERT) + b => backend%allocator%get_block(dir, VERT) + + if (nrank == 0) then + print *, "Simplest check: dot(0, 0) = 0" + end if + a%data = 0 + b%data = 0 + s = backend%scalar_product(a, b) + if (s /= 0) then + check_pass = .false. + else + check_pass = .true. + end if + call MPI_Allreduce(MPI_IN_PLACE, check_pass, 1, & + MPI_LOGICAL, MPI_LAND, MPI_COMM_WORLD, & + ierr) + if (nrank == 0) then + print *, "- Got: ", s + print *, "- Expected: ", 0 + if (.not. check_pass) then + print *, "- FAIL" + else + print *, "- PASS" + end if + end if + test_pass = test_pass .and. check_pass + + if (nrank == 0) then + print *, "Check: dot(nrank, nrank) = sum^{nrank-1}_i=0 sum_n(i) i**2" + end if + a%data = (nrank + 1) + s = backend%scalar_product(a, a) + + ! Determine number of interior points, using a temporary DIR_C field + c => backend%allocator%get_block(DIR_C) + call c%set_data_loc(a%data_loc) + n = product(mesh%get_field_dims(c)) + call backend%allocator%release_block(c) + + expt = n*(nrank + 1)**2 + call MPI_Allreduce(MPI_IN_PLACE, expt, 1, & + MPI_INTEGER, MPI_SUM, MPI_COMM_WORLD, & + ierr) + if (s /= real(expt, kind(s))) then + check_pass = .false. + else + check_pass = .true. + end if + call MPI_Allreduce(MPI_IN_PLACE, check_pass, 1, & + MPI_LOGICAL, MPI_LAND, MPI_COMM_WORLD, & + ierr) + if (nrank == 0) then + print *, "- Got: ", s + print *, "- Expected: ", expt + if (.not. check_pass) then + print *, "- FAIL" + else + print *, "- PASS" + end if + end if + test_pass = test_pass .and. check_pass + + call backend%allocator%release_block(a) + call backend%allocator%release_block(b) + + end subroutine runtest + +end program test_scalar_product diff --git a/tests/test_sum_intox.f90 b/tests/test_sum_intox.f90 new file mode 100644 index 00000000..a2257d94 --- /dev/null +++ b/tests/test_sum_intox.f90 @@ -0,0 +1,129 @@ +program test_sum_intox + !! Tests the implementation of summing a Y-oriented field into an X-oriented + !! one. + + use m_common, only: DIR_X, DIR_Y, DIR_Z + + use m_allocator + use m_base_backend +#ifdef CUDA +#else + use m_omp_backend + use m_omp_common, only: SZ +#endif + + implicit none + + integer, parameter :: nx = 17, ny = 32, nz = 59 + real(dp), parameter :: lx = 1.618, ly = 3.141529, lz = 1.729 + + class(base_backend_t), pointer :: backend + class(allocator_t), pointer :: allocator +#ifdef CUDA +#else + type(omp_backend_t), target :: omp_backend + type(allocator_t), target :: omp_allocator +#endif + + type(mesh_t) :: mesh + + integer :: nrank, nproc + integer :: ierr + + logical :: test_pass = .true. + + call MPI_Init(ierr) + call MPI_Comm_rank(MPI_COMM_WORLD, nrank, ierr) + call MPI_Comm_size(MPI_COMM_WORLD, nproc, ierr) + + mesh = mesh_t([nx, ny, nz], & + [1, 1, nproc], & + [lx, ly, lz]) + +#ifdef CUDA +#else + omp_allocator = allocator_t(mesh, SZ) + allocator => omp_allocator + omp_backend = omp_backend_t(mesh, allocator) + backend => omp_backend +#endif + + call runtest("YintoX", DIR_Y) + call runtest("ZintoX", DIR_Z) + + if (nrank == 0) then + if (.not. test_pass) then + error stop "Test failed" + end if + end if + + call MPI_Finalize(ierr) + +contains + + subroutine runtest(test, dir_from) + + use m_ordering, only: get_index_dir + + character(len=*), intent(in) :: test + integer, intent(in) :: dir_from + + class(field_t), pointer :: a, b + integer :: ctr + integer :: i, j, k + integer :: ii, jj, kk + + integer, dimension(3) :: dims + logical :: check_pass + + if (nrank == 0) then + print *, "Test ", test + end if + + a => backend%allocator%get_block(DIR_X) + b => backend%allocator%get_block(dir_from) + + dims = mesh%get_padded_dims(DIR_C) + + ! Initialise fields so that b = -a + ctr = 0 + do k = 1, dims(3) + do j = 1, dims(2) + do i = 1, dims(1) + call get_index_dir(ii, jj, kk, i, j, k, DIR_X, SZ, & + dims(1), dims(2), dims(3)) + a%data(ii, jj, kk) = ctr + call get_index_dir(ii, jj, kk, i, j, k, dir_from, SZ, & + dims(1), dims(2), dims(3)) + b%data(ii, jj, kk) = -ctr + ctr = ctr + 1 + end do + end do + end do + + if (dir_from == DIR_Y) then + call backend%sum_yintox(a, b) + else + call backend%sum_zintox(a, b) + end if + + check_pass = .not. ((minval(a%data) /= 0) .or. (maxval(a%data) /= 0)) + call MPI_Allreduce(MPI_IN_PLACE, check_pass, 1, & + MPI_LOGICAL, MPI_LAND, MPI_COMM_WORLD, & + ierr) + test_pass = test_pass .and. check_pass + + if (nrank == 0) then + if (check_pass) then + print *, "- PASS" + else + print *, "- FAIL" + end if + end if + + call backend%allocator%release_block(a) + call backend%allocator%release_block(b) + + end subroutine runtest + +end program test_sum_intox