diff --git a/.clang-format b/.clang-format index 0f46089fda..f80c5f3a7f 100644 --- a/.clang-format +++ b/.clang-format @@ -1,6 +1,7 @@ BasedOnStyle : LLVM # Indent formatting IndentWidth : 2 +Language: Cpp UseTab: Never KeepEmptyLinesAtTheStartOfBlocks : true MaxEmptyLinesToKeep : 2 @@ -40,9 +41,10 @@ PointerAlignment: Left AllowShortIfStatementsOnASingleLine : true AllowShortFunctionsOnASingleLine : true AllowShortLoopsOnASingleLine : false +AllowAllArgumentsOnNextLine : false AllowAllParametersOfDeclarationOnNextLine : false AlignTrailingComments : true -BinPackArguments : false +BinPackArguments : true BinPackParameters : false ConstructorInitializerAllOnOneLineOrOnePerLine : true ColumnLimit : 80 diff --git a/examples/dynamic-forall.cpp b/examples/dynamic-forall.cpp index 751d6e3537..b69e5fec28 100644 --- a/examples/dynamic-forall.cpp +++ b/examples/dynamic-forall.cpp @@ -100,8 +100,7 @@ int main(int argc, char* argv[]) //----------------------------------------------------------------------------// // policy is chosen from the list - RAJA::expt::dynamic_forall(pol, - RAJA::RangeSegment(0, N), + RAJA::expt::dynamic_forall(pol, RAJA::RangeSegment(0, N), [=] RAJA_HOST_DEVICE(int i) { c[i] = a[i] + b[i]; }); // _rajaseq_vector_add_end diff --git a/examples/dynamic_mat_transpose.cpp b/examples/dynamic_mat_transpose.cpp index 2404a2dc64..d83f43e9b0 100644 --- a/examples/dynamic_mat_transpose.cpp +++ b/examples/dynamic_mat_transpose.cpp @@ -355,13 +355,11 @@ int main(int argc, char* argv[]) [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) { RAJA::loop( - ctx, - RAJA::RangeSegment(0, outer_Dimr), + ctx, RAJA::RangeSegment(0, outer_Dimr), [&](int by) { RAJA::loop( - ctx, - RAJA::RangeSegment(0, outer_Dimc), + ctx, RAJA::RangeSegment(0, outer_Dimc), [&](int bx) { // Request memory from shared memory pool @@ -369,44 +367,40 @@ int main(int argc, char* argv[]) ctx.getSharedMemory(TILE_DIM * TILE_DIM); // Use RAJA View for simplified indexing - RAJA::View> Tile( - tile_ptr, TILE_DIM, TILE_DIM); - - RAJA::loop(ctx, - RAJA::RangeSegment(0, TILE_DIM), - [&](int ty) - { - RAJA::loop( - ctx, - RAJA::RangeSegment(0, TILE_DIM), - [&](int tx) - { - int col = - bx * TILE_DIM + - tx; // Matrix column index - int row = by * TILE_DIM + - ty; // Matrix row index - - // Bounds check - if (row < N_r && col < N_c) - { - Tile(ty, tx) = Aview(row, col); - } - }); - }); + RAJA::View> Tile(tile_ptr, TILE_DIM, + TILE_DIM); + + RAJA::loop( + ctx, RAJA::RangeSegment(0, TILE_DIM), + [&](int ty) + { + RAJA::loop( + ctx, RAJA::RangeSegment(0, TILE_DIM), + [&](int tx) + { + int col = + bx * TILE_DIM + tx; // Matrix column index + int row = + by * TILE_DIM + ty; // Matrix row index + + // Bounds check + if (row < N_r && col < N_c) + { + Tile(ty, tx) = Aview(row, col); + } + }); + }); // Barrier is needed to ensure all threads have written to // Tile ctx.teamSync(); RAJA::loop( - ctx, - RAJA::RangeSegment(0, TILE_DIM), + ctx, RAJA::RangeSegment(0, TILE_DIM), [&](int ty) { RAJA::loop( - ctx, - RAJA::RangeSegment(0, TILE_DIM), + ctx, RAJA::RangeSegment(0, TILE_DIM), [&](int tx) { int col = diff --git a/examples/forall-param-reductions.cpp b/examples/forall-param-reductions.cpp index 7459953ffe..fbfcda2d93 100644 --- a/examples/forall-param-reductions.cpp +++ b/examples/forall-param-reductions.cpp @@ -118,20 +118,14 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) VALLOC_INT seq_maxloc(std::numeric_limits::min(), -1); RAJA::forall( - host_res, - arange, - RAJA::expt::Reduce(&seq_sum), + host_res, arange, RAJA::expt::Reduce(&seq_sum), RAJA::expt::Reduce(&seq_min), RAJA::expt::Reduce(&seq_max), RAJA::expt::Reduce(&seq_minloc), RAJA::expt::Reduce(&seq_maxloc), RAJA::expt::KernelName("RAJA Reduce Seq Kernel"), - [=](int i, - int& _seq_sum, - int& _seq_min, - int& _seq_max, - VALLOC_INT& _seq_minloc, - VALLOC_INT& _seq_maxloc) + [=](int i, int& _seq_sum, int& _seq_min, int& _seq_max, + VALLOC_INT& _seq_minloc, VALLOC_INT& _seq_maxloc) { _seq_sum += a[i]; @@ -173,20 +167,14 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) VALLOC_INT omp_maxloc(std::numeric_limits::min(), -1); RAJA::forall( - host_res, - arange, - RAJA::expt::Reduce(&omp_sum), + host_res, arange, RAJA::expt::Reduce(&omp_sum), RAJA::expt::Reduce(&omp_min), RAJA::expt::Reduce(&omp_max), RAJA::expt::Reduce(&omp_minloc), RAJA::expt::Reduce(&omp_maxloc), RAJA::expt::KernelName("RAJA Reduce OpenMP Kernel"), - [=](int i, - int& _omp_sum, - int& _omp_min, - int& _omp_max, - VALLOC_INT& _omp_minloc, - VALLOC_INT& _omp_maxloc) + [=](int i, int& _omp_sum, int& _omp_min, int& _omp_max, + VALLOC_INT& _omp_minloc, VALLOC_INT& _omp_maxloc) { _omp_sum += a[i]; @@ -227,20 +215,14 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) VALLOC_INT omp_t_maxloc(std::numeric_limits::min(), -1); RAJA::forall( - omp_res, - arange, - RAJA::expt::Reduce(&omp_t_sum), + omp_res, arange, RAJA::expt::Reduce(&omp_t_sum), RAJA::expt::Reduce(&omp_t_min), RAJA::expt::Reduce(&omp_t_max), RAJA::expt::Reduce(&omp_t_minloc), RAJA::expt::Reduce(&omp_t_maxloc), RAJA::expt::KernelName("RAJA Reduce Target OpenMP Kernel"), - [=](int i, - int& _omp_t_sum, - int& _omp_t_min, - int& _omp_t_max, - VALLOC_INT& _omp_t_minloc, - VALLOC_INT& _omp_t_maxloc) + [=](int i, int& _omp_t_sum, int& _omp_t_min, int& _omp_t_max, + VALLOC_INT& _omp_t_minloc, VALLOC_INT& _omp_t_maxloc) { _omp_t_sum += a[i]; @@ -285,20 +267,14 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) VALLOC_INT cuda_maxloc(std::numeric_limits::min(), -1); RAJA::forall( - cuda_res, - arange, - RAJA::expt::Reduce(&cuda_sum), + cuda_res, arange, RAJA::expt::Reduce(&cuda_sum), RAJA::expt::Reduce(&cuda_min), RAJA::expt::Reduce(&cuda_max), RAJA::expt::Reduce(&cuda_minloc), RAJA::expt::Reduce(&cuda_maxloc), RAJA::expt::KernelName("RAJA Reduce CUDA Kernel"), - [=] RAJA_DEVICE(int i, - int& _cuda_sum, - int& _cuda_min, - int& _cuda_max, - VALLOC_INT& _cuda_minloc, - VALLOC_INT& _cuda_maxloc) + [=] RAJA_DEVICE(int i, int& _cuda_sum, int& _cuda_min, int& _cuda_max, + VALLOC_INT& _cuda_minloc, VALLOC_INT& _cuda_maxloc) { _cuda_sum += d_a[i]; @@ -342,19 +318,14 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) VALLOC_INT hip_maxloc(std::numeric_limits::min(), -1); RAJA::forall( - arange, - RAJA::expt::Reduce(&hip_sum), + arange, RAJA::expt::Reduce(&hip_sum), RAJA::expt::Reduce(&hip_min), RAJA::expt::Reduce(&hip_max), RAJA::expt::Reduce(&hip_minloc), RAJA::expt::Reduce(&hip_maxloc), RAJA::expt::KernelName("RAJA Reduce HIP Kernel"), - [=] RAJA_DEVICE(int i, - int& _hip_sum, - int& _hip_min, - int& _hip_max, - VALLOC_INT& _hip_minloc, - VALLOC_INT& _hip_maxloc) + [=] RAJA_DEVICE(int i, int& _hip_sum, int& _hip_min, int& _hip_max, + VALLOC_INT& _hip_minloc, VALLOC_INT& _hip_maxloc) { _hip_sum += d_a[i]; @@ -399,20 +370,14 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) VALLOC_INT sycl_maxloc(std::numeric_limits::min(), -1); RAJA::forall( - sycl_res, - arange, - RAJA::expt::Reduce(&sycl_sum), + sycl_res, arange, RAJA::expt::Reduce(&sycl_sum), RAJA::expt::Reduce(&sycl_min), RAJA::expt::Reduce(&sycl_max), RAJA::expt::Reduce(&sycl_minloc), RAJA::expt::Reduce(&sycl_maxloc), RAJA::expt::KernelName("RAJA Reduce SYCL Kernel"), - [=] RAJA_DEVICE(int i, - int& _sycl_sum, - int& _sycl_min, - int& _sycl_max, - VALLOC_INT& _sycl_minloc, - VALLOC_INT& _sycl_maxloc) + [=] RAJA_DEVICE(int i, int& _sycl_sum, int& _sycl_min, int& _sycl_max, + VALLOC_INT& _sycl_minloc, VALLOC_INT& _sycl_maxloc) { _sycl_sum += d_a[i]; diff --git a/examples/forall_multi-reductions.cpp b/examples/forall_multi-reductions.cpp index 090ded938f..3809a107c1 100644 --- a/examples/forall_multi-reductions.cpp +++ b/examples/forall_multi-reductions.cpp @@ -154,8 +154,8 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv)) res.memcpy(bins, host_bins, N * sizeof(int)); res.memcpy(a, host_a, N * sizeof(int)); - example_code( - arange, num_bins, bins, a); + example_code(arange, num_bins, bins, + a); res.deallocate(bins); res.deallocate(a); diff --git a/examples/jacobi.cpp b/examples/jacobi.cpp index dd74a929eb..76888d8c62 100644 --- a/examples/jacobi.cpp +++ b/examples/jacobi.cpp @@ -191,8 +191,7 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) RAJA::RangeSegment jacobiRange(1, (N + 1)); using jacobiSeqNestedPolicy = RAJA::KernelPolicy>>>; printf("RAJA: Sequential Policy - Nested ForallN \n"); @@ -267,8 +266,7 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) */ using jacobiOmpNestedPolicy = RAJA::KernelPolicy>>>; while (resI2 > tol * tol) @@ -329,18 +327,12 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) using jacobiCUDANestedPolicy = RAJA::KernelPolicy, - RAJA::cuda_block_y_loop, + 1, RAJA::tile_fixed<32>, RAJA::cuda_block_y_loop, RAJA::statement::Tile< - 0, - RAJA::tile_fixed<32>, - RAJA::cuda_block_x_loop, + 0, RAJA::tile_fixed<32>, RAJA::cuda_block_x_loop, RAJA::statement::For< - 1, - RAJA::cuda_thread_y_direct, - RAJA::statement::For<0, - RAJA::cuda_thread_x_direct, + 1, RAJA::cuda_thread_y_direct, + RAJA::statement::For<0, RAJA::cuda_thread_x_direct, RAJA::statement::Lambda<0>>>>>>>; resI2 = 1; @@ -411,18 +403,12 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) using jacobiHIPNestedPolicy = RAJA::KernelPolicy, - RAJA::hip_block_y_loop, + 1, RAJA::tile_fixed<32>, RAJA::hip_block_y_loop, RAJA::statement::Tile< - 0, - RAJA::tile_fixed<32>, - RAJA::hip_block_x_loop, + 0, RAJA::tile_fixed<32>, RAJA::hip_block_x_loop, RAJA::statement::For< - 1, - RAJA::hip_thread_y_direct, - RAJA::statement::For<0, - RAJA::hip_thread_x_direct, + 1, RAJA::hip_thread_y_direct, + RAJA::statement::For<0, RAJA::hip_thread_x_direct, RAJA::statement::Lambda<0>>>>>>>; resI2 = 1; @@ -512,8 +498,7 @@ void computeErr(double* I, grid_s grid) RAJA::ReduceMax tMax(-1.0); using jacobiSeqNestedPolicy = RAJA::KernelPolicy>>>; RAJA::kernel( diff --git a/examples/kernel-dynamic-tile.cpp b/examples/kernel-dynamic-tile.cpp index 028cd220aa..ddac3ebf57 100644 --- a/examples/kernel-dynamic-tile.cpp +++ b/examples/kernel-dynamic-tile.cpp @@ -15,15 +15,10 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) using namespace RAJA; kernel_param, - seq_exec, + 1, tile_dynamic<1>, seq_exec, statement::Tile< - 0, - tile_dynamic<0>, - seq_exec, - statement::For<1, - seq_exec, + 0, tile_dynamic<0>, seq_exec, + statement::For<1, seq_exec, statement::For<0, seq_exec, statement::Lambda<0>>>>>>>( make_tuple(RangeSegment{0, 25}, RangeSegment{0, 25}), make_tuple(TileSize{5}, TileSize{10}), diff --git a/examples/launch-param-reductions.cpp b/examples/launch-param-reductions.cpp index 8e90ce2d7b..898023a6be 100644 --- a/examples/launch-param-reductions.cpp +++ b/examples/launch-param-reductions.cpp @@ -134,23 +134,17 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) VALLOC_INT seq_maxloc(std::numeric_limits::min(), -1); RAJA::launch( - host_res, - RAJA::LaunchParams(), - "SeqReductionKernel", + host_res, RAJA::LaunchParams(), "SeqReductionKernel", RAJA::expt::Reduce(&seq_sum), RAJA::expt::Reduce(&seq_min), RAJA::expt::Reduce(&seq_max), RAJA::expt::Reduce(&seq_minloc), RAJA::expt::Reduce(&seq_maxloc), - [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx, - int& _seq_sum, - int& _seq_min, - int& _seq_max, - VALLOC_INT& _seq_minloc, - VALLOC_INT& _seq_maxloc) + [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx, int& _seq_sum, + int& _seq_min, int& _seq_max, + VALLOC_INT& _seq_minloc, VALLOC_INT& _seq_maxloc) { - RAJA::loop(ctx, - arange, + RAJA::loop(ctx, arange, [&](int i) { _seq_sum += a[i]; @@ -198,23 +192,17 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) VALLOC_INT omp_maxloc(std::numeric_limits::min(), -1); RAJA::launch( - host_res, - RAJA::LaunchParams(), - "OmpReductionKernel", + host_res, RAJA::LaunchParams(), "OmpReductionKernel", RAJA::expt::Reduce(&omp_sum), RAJA::expt::Reduce(&omp_min), RAJA::expt::Reduce(&omp_max), RAJA::expt::Reduce(&omp_minloc), RAJA::expt::Reduce(&omp_maxloc), - [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx, - int& _omp_sum, - int& _omp_min, - int& _omp_max, - VALLOC_INT& _omp_minloc, - VALLOC_INT& _omp_maxloc) + [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx, int& _omp_sum, + int& _omp_min, int& _omp_max, + VALLOC_INT& _omp_minloc, VALLOC_INT& _omp_maxloc) { - RAJA::loop(ctx, - arange, + RAJA::loop(ctx, arange, [&](int i) { _omp_sum += a[i]; @@ -272,16 +260,12 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) RAJA::expt::Reduce(&cuda_max), RAJA::expt::Reduce(&cuda_minloc), RAJA::expt::Reduce(&cuda_maxloc), - [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx, - int& _cuda_sum, - int& _cuda_min, - int& _cuda_max, - VALLOC_INT& _cuda_minloc, - VALLOC_INT& _cuda_maxloc) + [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx, int& _cuda_sum, + int& _cuda_min, int& _cuda_max, + VALLOC_INT& _cuda_minloc, VALLOC_INT& _cuda_maxloc) { RAJA::loop( - ctx, - arange, + ctx, arange, [&](int i) { _cuda_sum += d_a[i]; @@ -332,22 +316,17 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) device_res, RAJA::LaunchParams(RAJA::Teams(NUMBER_OF_TEAMS), RAJA::Threads(HIP_BLOCK_SIZE)), - "HipReductionKernel", - RAJA::expt::Reduce(&hip_sum), + "HipReductionKernel", RAJA::expt::Reduce(&hip_sum), RAJA::expt::Reduce(&hip_min), RAJA::expt::Reduce(&hip_max), RAJA::expt::Reduce(&hip_minloc), RAJA::expt::Reduce(&hip_maxloc), - [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx, - int& _hip_sum, - int& _hip_min, - int& _hip_max, - VALLOC_INT& _hip_minloc, - VALLOC_INT& _hip_maxloc) + [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx, int& _hip_sum, + int& _hip_min, int& _hip_max, + VALLOC_INT& _hip_minloc, VALLOC_INT& _hip_maxloc) { RAJA::loop( - ctx, - arange, + ctx, arange, [&](int i) { _hip_sum += d_a[i]; @@ -404,16 +383,12 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) RAJA::expt::Reduce(&sycl_max), RAJA::expt::Reduce(&sycl_minloc), RAJA::expt::Reduce(&sycl_maxloc), - [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx, - int& _sycl_sum, - int& _sycl_min, - int& _sycl_max, - VALLOC_INT& _sycl_minloc, - VALLOC_INT& _sycl_maxloc) + [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx, int& _sycl_sum, + int& _sycl_min, int& _sycl_max, + VALLOC_INT& _sycl_minloc, VALLOC_INT& _sycl_maxloc) { RAJA::loop( - ctx, - arange, + ctx, arange, [&](int i) { _sycl_sum += d_a[i]; diff --git a/examples/launch_flatten.cpp b/examples/launch_flatten.cpp index 48cc03669b..506313f42f 100644 --- a/examples/launch_flatten.cpp +++ b/examples/launch_flatten.cpp @@ -102,13 +102,11 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) launch_params, [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) { - RAJA::loop(ctx, - RAJA::RangeSegment(0, N), + RAJA::loop(ctx, RAJA::RangeSegment(0, N), [&](int j) { RAJA::loop( - ctx, - RAJA::RangeSegment(0, N), + ctx, RAJA::RangeSegment(0, N), [&](int i) { d_A_2DView(j, i) = i + j; }); }); @@ -117,8 +115,7 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) // RAJA flatten policy will reshape a 2/3D thread team to 1D simplifying // accumulating memory contents - RAJA::loop(ctx, - RAJA::RangeSegment(0, NN), + RAJA::loop(ctx, RAJA::RangeSegment(0, NN), [&](int i) { device_kernel_sum += d_A_1DView(i); }); }); @@ -135,13 +132,11 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) launch_params, [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) { - RAJA::loop(ctx, - RAJA::RangeSegment(0, N), + RAJA::loop(ctx, RAJA::RangeSegment(0, N), [&](int j) { RAJA::loop( - ctx, - RAJA::RangeSegment(0, N), + ctx, RAJA::RangeSegment(0, N), [&](int i) { h_A_2DView(j, i) = i + j; }); }); @@ -149,8 +144,7 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) // As loops are dispatched as standard C loops we can revert to using // a regular seq_exec policy - RAJA::loop(ctx, - RAJA::RangeSegment(0, NN), + RAJA::loop(ctx, RAJA::RangeSegment(0, NN), [&](int i) { host_kernel_sum += h_A_1DView(i); }); }); diff --git a/examples/launch_matrix-multiply.cpp b/examples/launch_matrix-multiply.cpp index 1355fb10e8..e9654f91b4 100644 --- a/examples/launch_matrix-multiply.cpp +++ b/examples/launch_matrix-multiply.cpp @@ -335,13 +335,11 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) RAJA::Threads(THREAD_SZ, THREAD_SZ)), [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) { - RAJA::loop(ctx, - col_range, + RAJA::loop(ctx, col_range, [&](int col) { RAJA::loop( - ctx, - row_range, + ctx, row_range, [&](int row) { double dot = 0.0; @@ -382,13 +380,11 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) RAJA::LaunchParams(), [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) { - RAJA::loop(ctx, - col_range, + RAJA::loop(ctx, col_range, [&](int col) { RAJA::loop( - ctx, - row_range, + ctx, row_range, [&](int row) { double dot = 0.0; @@ -418,14 +414,11 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) // using global_thread_xy = RAJA::LoopPolicy; - RAJA::launch(RAJA::ExecPlace::HOST, - RAJA::LaunchParams(), + RAJA::launch(RAJA::ExecPlace::HOST, RAJA::LaunchParams(), [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) { RAJA::expt::loop( - ctx, - col_range, - row_range, + ctx, col_range, row_range, [&](int col, int row) { double dot = 0.0; @@ -465,13 +458,11 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) RAJA::LaunchParams(RAJA::Teams(N), RAJA::Threads(N)), [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) { - RAJA::loop(ctx, - col_range, + RAJA::loop(ctx, col_range, [&](int col) { RAJA::loop( - ctx, - row_range, + ctx, row_range, [&](int row) { double dot = 0.0; @@ -508,25 +499,19 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) RAJA::Threads(THREAD_SZ, THREAD_SZ)), [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) { - RAJA::tile(ctx, - THREAD_SZ, - row_range, + RAJA::tile(ctx, THREAD_SZ, row_range, [&](RAJA::RangeSegment const& row_tile) { RAJA::tile( - ctx, - THREAD_SZ, - col_range, + ctx, THREAD_SZ, col_range, [&](RAJA::RangeSegment const& col_tile) { RAJA::loop( - ctx, - row_tile, + ctx, row_tile, [&](int col) { RAJA::loop( - ctx, - col_tile, + ctx, col_tile, [&](int row) { double dot = 0.0; @@ -582,13 +567,11 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) RAJA::LaunchParams(RAJA::Teams(N), RAJA::Threads(N)), [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) { - RAJA::loop(ctx, - col_range, + RAJA::loop(ctx, col_range, [&](int col) { RAJA::loop( - ctx, - row_range, + ctx, row_range, [&](int row) { double dot = 0.0; @@ -629,25 +612,19 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) RAJA::Threads(THREAD_SZ, THREAD_SZ)), [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) { - RAJA::tile(ctx, - THREAD_SZ, - row_range, + RAJA::tile(ctx, THREAD_SZ, row_range, [&](RAJA::RangeSegment const& row_tile) { RAJA::tile( - ctx, - THREAD_SZ, - col_range, + ctx, THREAD_SZ, col_range, [&](RAJA::RangeSegment const& col_tile) { RAJA::loop( - ctx, - row_tile, + ctx, row_tile, [&](int col) { RAJA::loop( - ctx, - col_tile, + ctx, col_tile, [&](int row) { double dot = 0.0; @@ -697,58 +674,46 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) // Loop over teams // RAJA::tile( - ctx, - THREAD_SZ, - row_range, + ctx, THREAD_SZ, row_range, [&](RAJA::RangeSegment const& y_tile) { RAJA::tile( - ctx, - THREAD_SZ, - col_range, + ctx, THREAD_SZ, col_range, [&](RAJA::RangeSegment const& x_tile) { RAJA_TEAM_SHARED double As[THREAD_SZ][THREAD_SZ]; RAJA_TEAM_SHARED double Bs[THREAD_SZ][THREAD_SZ]; RAJA_TEAM_SHARED double Cs[THREAD_SZ][THREAD_SZ]; - RAJA::loop_icount(ctx, - y_tile, + RAJA::loop_icount(ctx, y_tile, [&](int row, int ty) { RAJA::loop_icount( - ctx, - x_tile, + ctx, x_tile, [&](int col, int tx) { Cs[ty][tx] = 0.0; }); }); RAJA::tile( - ctx, - THREAD_SZ, - dot_range, + ctx, THREAD_SZ, dot_range, [&](RAJA::RangeSegment const& k_tile) { RAJA::loop_icount( - ctx, - y_tile, + ctx, y_tile, [&](int row, int ty) { RAJA::loop_icount( - ctx, - k_tile, + ctx, k_tile, [&](int k_id, int tx) { As[ty][tx] = Aview(row, k_id); }); }); RAJA::loop_icount( - ctx, - k_tile, + ctx, k_tile, [&](int k_id, int ty) { RAJA::loop_icount( - ctx, - x_tile, + ctx, x_tile, [&](int col, int tx) { Bs[ty][tx] = Bview(k_id, col); }); }); @@ -756,18 +721,15 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) ctx.teamSync(); RAJA::loop_icount( - ctx, - y_tile, + ctx, y_tile, [&](int row, int ty) { RAJA::loop_icount( - ctx, - x_tile, + ctx, x_tile, [&](int col, int tx) { RAJA::loop_icount( - ctx, - k_tile, + ctx, k_tile, [&](int gid, int e) { Cs[ty][tx] += As[ty][e] * Bs[e][tx]; }); @@ -777,13 +739,11 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) ctx.teamSync(); }); // slide across matrix - RAJA::loop_icount(ctx, - y_tile, + RAJA::loop_icount(ctx, y_tile, [&](int row, int ty) { RAJA::loop_icount( - ctx, - x_tile, + ctx, x_tile, [&](int col, int tx) { Cview(col, row) = Cs[ty][tx]; @@ -852,8 +812,8 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) hipErrchk(hipMemcpy(d_C, C, N * N * sizeof(double), hipMemcpyHostToDevice)); // Launch HIP kernel defined near the top of this file. - hipLaunchKernelGGL( - (matMultKernel), dim3(griddim), dim3(blockdim), 0, 0, N, d_C, d_A, d_B); + hipLaunchKernelGGL((matMultKernel), dim3(griddim), dim3(blockdim), 0, 0, N, + d_C, d_A, d_B); hipDeviceSynchronize(); @@ -868,15 +828,8 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) hipErrchk(hipMemcpy(d_C, C, N * N * sizeof(double), hipMemcpyHostToDevice)); // Launch HIP kernel defined near the top of this file. - hipLaunchKernelGGL((sharedMatMultKernel), - dim3(griddim), - dim3(blockdim), - 0, - 0, - N, - d_C, - d_A, - d_B); + hipLaunchKernelGGL((sharedMatMultKernel), dim3(griddim), dim3(blockdim), 0, 0, + N, d_C, d_A, d_B); hipDeviceSynchronize(); diff --git a/examples/launch_reductions.cpp b/examples/launch_reductions.cpp index d248a0ac70..5d35a7260d 100644 --- a/examples/launch_reductions.cpp +++ b/examples/launch_reductions.cpp @@ -175,8 +175,7 @@ int main(int argc, char* argv[]) "Launch Reductions", [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) { - RAJA::loop(ctx, - arange, + RAJA::loop(ctx, arange, [&](int i) { kernel_sum += a[i]; diff --git a/examples/multiview.cpp b/examples/multiview.cpp index 42fbbfc7fe..a65609e8ee 100644 --- a/examples/multiview.cpp +++ b/examples/multiview.cpp @@ -111,8 +111,8 @@ void docs_example() t3 = MView1(3, 0); // accesses the 4th index of the 0th internal array a1, // returns value of 8 - t4 = MView1(2, 1); // accesses 3rd index of the 1st internal array a2, returns - // value of 11 + t4 = MView1(2, 1); // accesses 3rd index of the 1st internal array a2, + // returns value of 11 // _multiview_example_1Daopindex_end printf("Comparison of default MultiView with another MultiView that has the " @@ -181,15 +181,8 @@ int main() { for (int jj = 0; jj < 3; ++jj) { - printf("arr(%i, %i, %i) %d == arrmov(%i, %i, %i) %d\n", - pp, - kk, - jj, - arrView(pp, kk, jj), - kk, - pp, - jj, - arrViewMov(kk, pp, jj)); + printf("arr(%i, %i, %i) %d == arrmov(%i, %i, %i) %d\n", pp, kk, jj, + arrView(pp, kk, jj), kk, pp, jj, arrViewMov(kk, pp, jj)); } } } @@ -215,15 +208,8 @@ int main() { for (int jj = 0; jj < 3; ++jj) { - printf("arr(%i, %i, %i) %d == arrmov(%i, %i, %i) %d\n", - pp, - kk, - jj, - arrView(pp, kk, jj), - kk, - pp, - jj, - arrViewMov(kk, pp, jj)); + printf("arr(%i, %i, %i) %d == arrmov(%i, %i, %i) %d\n", pp, kk, jj, + arrView(pp, kk, jj), kk, pp, jj, arrViewMov(kk, pp, jj)); } } } diff --git a/examples/omp-target-ltimes.cpp b/examples/omp-target-ltimes.cpp index 08b527cfed..f0be32126b 100644 --- a/examples/omp-target-ltimes.cpp +++ b/examples/omp-target-ltimes.cpp @@ -36,8 +36,8 @@ void runLTimesRajaKernel(bool debug, using namespace RAJA::statement; // psi[direction, group, zone] - using PsiView = RAJA:: - TypedView, IDirection, IGroup, IZone>; + using PsiView = RAJA::TypedView, IDirection, + IGroup, IZone>; // phi[moment, group, zone] using PhiView = @@ -84,33 +84,31 @@ void runLTimesRajaKernel(bool debug, omp_target_alloc(sizeof(double) * psi_data.size(), did)); // Copy to device - omp_target_memcpy( - &ell_data[0], d_ell, sizeof(double) * ell_data.size(), 0, 0, hid, did); - omp_target_memcpy( - &phi_data[0], d_phi, sizeof(double) * phi_data.size(), 0, 0, hid, did); - omp_target_memcpy( - &psi_data[0], d_psi, sizeof(double) * psi_data.size(), 0, 0, hid, did); + omp_target_memcpy(&ell_data[0], d_ell, sizeof(double) * ell_data.size(), 0, 0, + hid, did); + omp_target_memcpy(&phi_data[0], d_phi, sizeof(double) * phi_data.size(), 0, 0, + hid, did); + omp_target_memcpy(&psi_data[0], d_psi, sizeof(double) * psi_data.size(), 0, 0, + hid, did); // create views on data std::array ell_perm{{0, 1}}; EllView ell(d_ell, - make_permuted_layout({{num_moments, num_directions}}, ell_perm)); + make_permuted_layout({{num_moments, num_directions}}, ell_perm)); std::array psi_perm{{0, 1, 2}}; - PsiView psi(d_psi, - make_permuted_layout({{num_directions, num_groups, num_zones}}, - psi_perm)); + PsiView psi(d_psi, make_permuted_layout( + {{num_directions, num_groups, num_zones}}, psi_perm)); std::array phi_perm{{0, 1, 2}}; - PhiView phi( - d_phi, - make_permuted_layout({{num_moments, num_groups, num_zones}}, phi_perm)); + PhiView phi(d_phi, make_permuted_layout( + {{num_moments, num_groups, num_zones}}, phi_perm)); - using Pol = RAJA::KernelPolicy, - For<3, RAJA::seq_exec, Lambda<0>>>>; + using Pol = RAJA::KernelPolicy< + Collapse, + For<3, RAJA::seq_exec, Lambda<0>>>>; RAJA::Timer timer; timer.start(); diff --git a/examples/pi-reduce_vs_atomic.cpp b/examples/pi-reduce_vs_atomic.cpp index 777e66980b..2be24c1b44 100644 --- a/examples/pi-reduce_vs_atomic.cpp +++ b/examples/pi-reduce_vs_atomic.cpp @@ -223,8 +223,8 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) *atomic_pi = 0; double* d_atomic_pi = memoryManager::allocate_gpu(1); - hipErrchk(hipMemcpy( - d_atomic_pi, atomic_pi, 1 * sizeof(double), hipMemcpyHostToDevice)); + hipErrchk(hipMemcpy(d_atomic_pi, atomic_pi, 1 * sizeof(double), + hipMemcpyHostToDevice)); using ATOMIC_POL4 = RAJA::hip_atomic; @@ -236,8 +236,8 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) dx / (1.0 + x * x)); }); - hipErrchk(hipMemcpy( - atomic_pi, d_atomic_pi, 1 * sizeof(double), hipMemcpyDeviceToHost)); + hipErrchk(hipMemcpy(atomic_pi, d_atomic_pi, 1 * sizeof(double), + hipMemcpyDeviceToHost)); *atomic_pi *= 4.0; std::cout << "\tpi = " << std::setprecision(prec) << *atomic_pi << std::endl; diff --git a/examples/raja-launch.cpp b/examples/raja-launch.cpp index 8a2f40a76a..4adef54d83 100644 --- a/examples/raja-launch.cpp +++ b/examples/raja-launch.cpp @@ -163,31 +163,25 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) RAJA::LaunchParams(RAJA::Teams(N_tri), RAJA::Threads(N_tri)), [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) { - RAJA::loop(ctx, - RAJA::RangeSegment(0, N_tri), + RAJA::loop(ctx, RAJA::RangeSegment(0, N_tri), [&](int r) { // Array shared within threads of the same team RAJA_TEAM_SHARED int s_A[1]; RAJA::loop( - ctx, - RAJA::RangeSegment(0, 1), + ctx, RAJA::RangeSegment(0, 1), [&](int c) { s_A[c] = r; }); // loop c ctx.teamSync(); RAJA::loop( - ctx, - RAJA::RangeSegment(r, N_tri), + ctx, RAJA::RangeSegment(r, N_tri), [&](int c) { D(r, c) = r * N_tri + c; printf("r=%d, c=%d : D=%d : s_A = %d \n", - r, - c, - D(r, c), - s_A[0]); + r, c, D(r, c), s_A[0]); }); // loop c }); // loop r }); // outer lambda diff --git a/examples/red-black-gauss-seidel.cpp b/examples/red-black-gauss-seidel.cpp index e6ef3a64ca..8a73be765f 100644 --- a/examples/red-black-gauss-seidel.cpp +++ b/examples/red-black-gauss-seidel.cpp @@ -241,8 +241,7 @@ void computeErr(double* I, grid_s grid) RAJA::ReduceMax tMax(-1.0); using errPolicy = RAJA::KernelPolicy>>>; RAJA::kernel(RAJA::make_tuple(fdBounds, fdBounds), diff --git a/examples/resource-dynamic-forall.cpp b/examples/resource-dynamic-forall.cpp index aa7c42f8b6..f41e9a5a9c 100644 --- a/examples/resource-dynamic-forall.cpp +++ b/examples/resource-dynamic-forall.cpp @@ -134,9 +134,7 @@ int main(int argc, char* argv[]) RAJA::Get_Host_Resource(host_res, select_cpu_or_gpu); #endif - RAJA::expt::dynamic_forall(res, - pol, - RAJA::RangeSegment(0, N), + RAJA::expt::dynamic_forall(res, pol, RAJA::RangeSegment(0, N), [=] RAJA_HOST_DEVICE(int i) { c[i] = a[i] + b[i]; }); diff --git a/examples/resource-forall.cpp b/examples/resource-forall.cpp index d3f6abc3f6..8cd87c6d26 100644 --- a/examples/resource-forall.cpp +++ b/examples/resource-forall.cpp @@ -91,8 +91,8 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) std::cout << "\n Running RAJA sequential vector addition...\n"; - RAJA::forall( - host, RAJA::RangeSegment(0, N), [=](int i) { c[i] = a[i] + b[i]; }); + RAJA::forall(host, RAJA::RangeSegment(0, N), + [=](int i) { c[i] = a[i] + b[i]; }); checkResult(c, N); @@ -102,8 +102,8 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) std::cout << "\n Running RAJA simd_exec vector addition...\n"; - RAJA::forall( - host, RAJA::RangeSegment(0, N), [=](int i) { c[i] = a[i] + b[i]; }); + RAJA::forall(host, RAJA::RangeSegment(0, N), + [=](int i) { c[i] = a[i] + b[i]; }); checkResult(c, N); @@ -114,8 +114,8 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) std::cout << "\n Running RAJA omp_parallel_for_exec vector addition...\n"; - RAJA::forall( - host, RAJA::RangeSegment(0, N), [=](int i) { c[i] = a[i] + b[i]; }); + RAJA::forall(host, RAJA::RangeSegment(0, N), + [=](int i) { c[i] = a[i] + b[i]; }); checkResult(c, N); @@ -188,13 +188,11 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) res_gpu2.memcpy(d_b2, b, sizeof(int) * N); - RAJA::forall(res_gpu1, - RAJA::RangeSegment(0, N), + RAJA::forall(res_gpu1, RAJA::RangeSegment(0, N), [=] RAJA_DEVICE(int i) { d_c1[i] = d_a1[i] + d_b1[i]; }); - RAJA::forall(res_gpu2, - RAJA::RangeSegment(0, N), + RAJA::forall(res_gpu2, RAJA::RangeSegment(0, N), [=] RAJA_DEVICE(int i) { d_c2[i] = d_a2[i] + d_b2[i]; }); @@ -250,15 +248,13 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) // _raja_res_alloc_end // _raja_res_k1_start - RAJA::forall(res_gpu1, - RAJA::RangeSegment(0, N), + RAJA::forall(res_gpu1, RAJA::RangeSegment(0, N), [=] RAJA_HOST_DEVICE(int i) { d_array1[i] = i; }); // _raja_res_k1_end // _raja_res_k2_start RAJA::resources::Event e = RAJA::forall( - res_gpu2, - RAJA::RangeSegment(0, N), + res_gpu2, RAJA::RangeSegment(0, N), [=] RAJA_HOST_DEVICE(int i) { d_array2[i] = -1; }); // _raja_res_k2_end @@ -267,8 +263,7 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) // _raja_res_wait_end // _raja_res_k3_start - RAJA::forall(res_gpu1, - RAJA::RangeSegment(0, N), + RAJA::forall(res_gpu1, RAJA::RangeSegment(0, N), [=] RAJA_HOST_DEVICE(int i) { d_array1[i] *= d_array2[i]; }); // _raja_res_k3_end @@ -279,8 +274,7 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) // _raja_res_k4_start bool check = true; - RAJA::forall(res_host, - RAJA::RangeSegment(0, N), + RAJA::forall(res_host, RAJA::RangeSegment(0, N), [&check, h_array](int i) { if (h_array[i] != -i) diff --git a/examples/resource-kernel.cpp b/examples/resource-kernel.cpp index 64690126c1..a96dbba7f0 100644 --- a/examples/resource-kernel.cpp +++ b/examples/resource-kernel.cpp @@ -29,13 +29,11 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) RAJA::RangeSegment n_range(0, N); using TEST_POL = RAJA::KernelPolicy>>>>; RAJA::forall( - def_host_res, - n_range, + def_host_res, n_range, [=, &def_cuda_res](int i) { RAJA::resources::Cuda res_cuda; diff --git a/examples/resource-launch.cpp b/examples/resource-launch.cpp index 42da55148a..7fd2dc5fcb 100644 --- a/examples/resource-launch.cpp +++ b/examples/resource-launch.cpp @@ -35,23 +35,19 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) using threads_x = RAJA::LoopPolicy; RAJA::forall( - def_host_res, - n_range, + def_host_res, n_range, [=, &def_cuda_res](int i) { RAJA::resources::Cuda res_cuda; RAJA::resources::Event e = RAJA::launch( - res_cuda, - RAJA::LaunchParams(RAJA::Teams(64), RAJA::Threads(1)), + res_cuda, RAJA::LaunchParams(RAJA::Teams(64), RAJA::Threads(1)), [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) { - RAJA::loop(ctx, - m_range, + RAJA::loop(ctx, m_range, [&](int j) { - RAJA::loop(ctx, - one_range, + RAJA::loop(ctx, one_range, [&](int k) { d_array[i * M + j] = i * M + j; diff --git a/examples/resource-runtime-launch.cpp b/examples/resource-runtime-launch.cpp index 4c9204747a..6354aa46ef 100644 --- a/examples/resource-runtime-launch.cpp +++ b/examples/resource-runtime-launch.cpp @@ -185,12 +185,10 @@ int main(int argc, char* argv[]) // How the kernel executes now depends on how the resource is constructed // (host or device) RAJA::launch( - res, - RAJA::LaunchParams(RAJA::Teams(GRID_SZ), RAJA::Threads(TEAM_SZ)), + res, RAJA::LaunchParams(RAJA::Teams(GRID_SZ), RAJA::Threads(TEAM_SZ)), [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) { - RAJA::loop(ctx, - arange, + RAJA::loop(ctx, arange, [&](int i) { kernel_sum += a[i]; diff --git a/examples/tut_halo-exchange.cpp b/examples/tut_halo-exchange.cpp index 6052d9ca14..8e266d0e43 100644 --- a/examples/tut_halo-exchange.cpp +++ b/examples/tut_halo-exchange.cpp @@ -260,13 +260,13 @@ int main(int argc, char** argv) // std::vector pack_index_lists(num_neighbors, nullptr); std::vector pack_index_list_lengths(num_neighbors, 0); - create_pack_lists( - pack_index_lists, pack_index_list_lengths, halo_width, grid_dims); + create_pack_lists(pack_index_lists, pack_index_list_lengths, halo_width, + grid_dims); std::vector unpack_index_lists(num_neighbors, nullptr); std::vector unpack_index_list_lengths(num_neighbors, 0); - create_unpack_lists( - unpack_index_lists, unpack_index_list_lengths, halo_width, grid_dims); + create_unpack_lists(unpack_index_lists, unpack_index_list_lengths, halo_width, + grid_dims); // _halo_exchange_index_list_generate_end @@ -447,8 +447,7 @@ int main(int argc, char** argv) double* var = vars[v]; - RAJA::forall(range_segment(0, len), - [=](int i) + RAJA::forall(range_segment(0, len), [=](int i) { buffer[i] = var[list[i]]; }); buffer += len; @@ -474,8 +473,7 @@ int main(int argc, char** argv) double* var = vars[v]; - RAJA::forall(range_segment(0, len), - [=](int i) + RAJA::forall(range_segment(0, len), [=](int i) { var[list[i]] = buffer[i]; }); buffer += len; @@ -519,24 +517,17 @@ int main(int argc, char** argv) using forall_policy = RAJA::seq_exec; using workgroup_policy = - RAJA::WorkGroupPolicy; - using workpool = RAJA::WorkPool, + using workpool = RAJA::WorkPool, memory_manager_allocator>; - using workgroup = RAJA::WorkGroup, + using workgroup = RAJA::WorkGroup, memory_manager_allocator>; - using worksite = RAJA::WorkSite, + using worksite = RAJA::WorkSite, memory_manager_allocator>; // _halo_exchange_seq_workgroup_policies_end @@ -702,8 +693,7 @@ int main(int argc, char** argv) double* var = vars[v]; - RAJA::forall(range_segment(0, len), - [=](int i) + RAJA::forall(range_segment(0, len), [=](int i) { buffer[i] = var[list[i]]; }); buffer += len; @@ -729,8 +719,7 @@ int main(int argc, char** argv) double* var = vars[v]; - RAJA::forall(range_segment(0, len), - [=](int i) + RAJA::forall(range_segment(0, len), [=](int i) { var[list[i]] = buffer[i]; }); buffer += len; @@ -772,24 +761,17 @@ int main(int argc, char** argv) using forall_policy = RAJA::omp_parallel_for_exec; using workgroup_policy = - RAJA::WorkGroupPolicy; - using workpool = RAJA::WorkPool, + using workpool = RAJA::WorkPool, memory_manager_allocator>; - using workgroup = RAJA::WorkGroup, + using workgroup = RAJA::WorkGroup, memory_manager_allocator>; - using worksite = RAJA::WorkSite, + using worksite = RAJA::WorkSite, memory_manager_allocator>; // _halo_exchange_openmp_workgroup_policies_end @@ -928,17 +910,13 @@ int main(int argc, char** argv) { int pack_len = pack_index_list_lengths[l]; cuda_pack_index_lists[l] = memoryManager::allocate_gpu(pack_len); - cudaErrchk(cudaMemcpy(cuda_pack_index_lists[l], - pack_index_lists[l], - pack_len * sizeof(int), - cudaMemcpyDefault)); + cudaErrchk(cudaMemcpy(cuda_pack_index_lists[l], pack_index_lists[l], + pack_len * sizeof(int), cudaMemcpyDefault)); int unpack_len = unpack_index_list_lengths[l]; cuda_unpack_index_lists[l] = memoryManager::allocate_gpu(unpack_len); - cudaErrchk(cudaMemcpy(cuda_unpack_index_lists[l], - unpack_index_lists[l], - unpack_len * sizeof(int), - cudaMemcpyDefault)); + cudaErrchk(cudaMemcpy(cuda_unpack_index_lists[l], unpack_index_lists[l], + unpack_len * sizeof(int), cudaMemcpyDefault)); } std::swap(vars, cuda_vars); @@ -1050,8 +1028,8 @@ int main(int argc, char** argv) for (int v = 0; v < num_vars; ++v) { - cudaErrchk(cudaMemcpy( - vars[v], cuda_vars[v], var_size * sizeof(double), cudaMemcpyDefault)); + cudaErrchk(cudaMemcpy(vars[v], cuda_vars[v], var_size * sizeof(double), + cudaMemcpyDefault)); memoryManager::deallocate_gpu(cuda_vars[v]); } @@ -1093,17 +1071,13 @@ int main(int argc, char** argv) { int pack_len = pack_index_list_lengths[l]; cuda_pack_index_lists[l] = memoryManager::allocate_gpu(pack_len); - cudaErrchk(cudaMemcpy(cuda_pack_index_lists[l], - pack_index_lists[l], - pack_len * sizeof(int), - cudaMemcpyDefault)); + cudaErrchk(cudaMemcpy(cuda_pack_index_lists[l], pack_index_lists[l], + pack_len * sizeof(int), cudaMemcpyDefault)); int unpack_len = unpack_index_list_lengths[l]; cuda_unpack_index_lists[l] = memoryManager::allocate_gpu(unpack_len); - cudaErrchk(cudaMemcpy(cuda_unpack_index_lists[l], - unpack_index_lists[l], - unpack_len * sizeof(int), - cudaMemcpyDefault)); + cudaErrchk(cudaMemcpy(cuda_unpack_index_lists[l], unpack_index_lists[l], + unpack_len * sizeof(int), cudaMemcpyDefault)); } std::swap(vars, cuda_vars); @@ -1120,14 +1094,14 @@ int main(int argc, char** argv) RAJA::constant_stride_array_of_objects, RAJA::indirect_function_call_dispatch>; - using workpool = RAJA:: - WorkPool, pinned_allocator>; + using workpool = RAJA::WorkPool, + pinned_allocator>; - using workgroup = RAJA:: - WorkGroup, pinned_allocator>; + using workgroup = RAJA::WorkGroup, + pinned_allocator>; - using worksite = RAJA:: - WorkSite, pinned_allocator>; + using worksite = RAJA::WorkSite, + pinned_allocator>; // _halo_exchange_cuda_workgroup_policies_end std::vector buffers(num_neighbors, nullptr); @@ -1173,8 +1147,7 @@ int main(int argc, char** argv) double* var = vars[v]; - pool_pack.enqueue(range_segment(0, len), - [=] RAJA_DEVICE(int i) + pool_pack.enqueue(range_segment(0, len), [=] RAJA_DEVICE(int i) { buffer[i] = var[list[i]]; }); buffer += len; @@ -1206,8 +1179,7 @@ int main(int argc, char** argv) double* var = vars[v]; - pool_unpack.enqueue(range_segment(0, len), - [=] RAJA_DEVICE(int i) + pool_unpack.enqueue(range_segment(0, len), [=] RAJA_DEVICE(int i) { var[list[i]] = buffer[i]; }); buffer += len; @@ -1241,8 +1213,8 @@ int main(int argc, char** argv) for (int v = 0; v < num_vars; ++v) { - cudaErrchk(cudaMemcpy( - vars[v], cuda_vars[v], var_size * sizeof(double), cudaMemcpyDefault)); + cudaErrchk(cudaMemcpy(vars[v], cuda_vars[v], var_size * sizeof(double), + cudaMemcpyDefault)); memoryManager::deallocate_gpu(cuda_vars[v]); } @@ -1291,17 +1263,13 @@ int main(int argc, char** argv) { int pack_len = pack_index_list_lengths[l]; hip_pack_index_lists[l] = memoryManager::allocate_gpu(pack_len); - hipErrchk(hipMemcpy(hip_pack_index_lists[l], - pack_index_lists[l], - pack_len * sizeof(int), - hipMemcpyHostToDevice)); + hipErrchk(hipMemcpy(hip_pack_index_lists[l], pack_index_lists[l], + pack_len * sizeof(int), hipMemcpyHostToDevice)); int unpack_len = unpack_index_list_lengths[l]; hip_unpack_index_lists[l] = memoryManager::allocate_gpu(unpack_len); - hipErrchk(hipMemcpy(hip_unpack_index_lists[l], - unpack_index_lists[l], - unpack_len * sizeof(int), - hipMemcpyHostToDevice)); + hipErrchk(hipMemcpy(hip_unpack_index_lists[l], unpack_index_lists[l], + unpack_len * sizeof(int), hipMemcpyHostToDevice)); } std::swap(vars, hip_vars); @@ -1413,9 +1381,7 @@ int main(int argc, char** argv) for (int v = 0; v < num_vars; ++v) { - hipErrchk(hipMemcpy(vars[v], - hip_vars[v], - var_size * sizeof(double), + hipErrchk(hipMemcpy(vars[v], hip_vars[v], var_size * sizeof(double), hipMemcpyDeviceToHost)); memoryManager::deallocate_gpu(hip_vars[v]); } @@ -1460,17 +1426,13 @@ int main(int argc, char** argv) { int pack_len = pack_index_list_lengths[l]; hip_pack_index_lists[l] = memoryManager::allocate_gpu(pack_len); - hipErrchk(hipMemcpy(hip_pack_index_lists[l], - pack_index_lists[l], - pack_len * sizeof(int), - hipMemcpyHostToDevice)); + hipErrchk(hipMemcpy(hip_pack_index_lists[l], pack_index_lists[l], + pack_len * sizeof(int), hipMemcpyHostToDevice)); int unpack_len = unpack_index_list_lengths[l]; hip_unpack_index_lists[l] = memoryManager::allocate_gpu(unpack_len); - hipErrchk(hipMemcpy(hip_unpack_index_lists[l], - unpack_index_lists[l], - unpack_len * sizeof(int), - hipMemcpyHostToDevice)); + hipErrchk(hipMemcpy(hip_unpack_index_lists[l], unpack_index_lists[l], + unpack_len * sizeof(int), hipMemcpyHostToDevice)); } std::swap(vars, hip_vars); @@ -1487,14 +1449,14 @@ int main(int argc, char** argv) RAJA::constant_stride_array_of_objects, RAJA::indirect_function_call_dispatch>; - using workpool = RAJA:: - WorkPool, pinned_allocator>; + using workpool = RAJA::WorkPool, + pinned_allocator>; - using workgroup = RAJA:: - WorkGroup, pinned_allocator>; + using workgroup = RAJA::WorkGroup, + pinned_allocator>; - using worksite = RAJA:: - WorkSite, pinned_allocator>; + using worksite = RAJA::WorkSite, + pinned_allocator>; // _halo_exchange_hip_workgroup_policies_end std::vector buffers(num_neighbors, nullptr); @@ -1540,8 +1502,7 @@ int main(int argc, char** argv) double* var = vars[v]; - pool_pack.enqueue(range_segment(0, len), - [=] RAJA_DEVICE(int i) + pool_pack.enqueue(range_segment(0, len), [=] RAJA_DEVICE(int i) { buffer[i] = var[list[i]]; }); buffer += len; @@ -1573,8 +1534,7 @@ int main(int argc, char** argv) double* var = vars[v]; - pool_unpack.enqueue(range_segment(0, len), - [=] RAJA_DEVICE(int i) + pool_unpack.enqueue(range_segment(0, len), [=] RAJA_DEVICE(int i) { var[list[i]] = buffer[i]; }); buffer += len; @@ -1608,9 +1568,7 @@ int main(int argc, char** argv) for (int v = 0; v < num_vars; ++v) { - hipErrchk(hipMemcpy(vars[v], - hip_vars[v], - var_size * sizeof(double), + hipErrchk(hipMemcpy(vars[v], hip_vars[v], var_size * sizeof(double), hipMemcpyDeviceToHost)); memoryManager::deallocate_gpu(hip_vars[v]); } @@ -1654,17 +1612,13 @@ int main(int argc, char** argv) { int pack_len = pack_index_list_lengths[l]; hip_pack_index_lists[l] = memoryManager::allocate_gpu(pack_len); - hipErrchk(hipMemcpy(hip_pack_index_lists[l], - pack_index_lists[l], - pack_len * sizeof(int), - hipMemcpyHostToDevice)); + hipErrchk(hipMemcpy(hip_pack_index_lists[l], pack_index_lists[l], + pack_len * sizeof(int), hipMemcpyHostToDevice)); int unpack_len = unpack_index_list_lengths[l]; hip_unpack_index_lists[l] = memoryManager::allocate_gpu(unpack_len); - hipErrchk(hipMemcpy(hip_unpack_index_lists[l], - unpack_index_lists[l], - unpack_len * sizeof(int), - hipMemcpyHostToDevice)); + hipErrchk(hipMemcpy(hip_unpack_index_lists[l], unpack_index_lists[l], + unpack_len * sizeof(int), hipMemcpyHostToDevice)); } std::swap(vars, hip_vars); @@ -1697,14 +1651,14 @@ int main(int argc, char** argv) RAJA::direct_dispatch, camp::list>>; - using workpool = RAJA:: - WorkPool, pinned_allocator>; + using workpool = RAJA::WorkPool, + pinned_allocator>; - using workgroup = RAJA:: - WorkGroup, pinned_allocator>; + using workgroup = RAJA::WorkGroup, + pinned_allocator>; - using worksite = RAJA:: - WorkSite, pinned_allocator>; + using worksite = RAJA::WorkSite, + pinned_allocator>; std::vector buffers(num_neighbors, nullptr); @@ -1810,9 +1764,7 @@ int main(int argc, char** argv) for (int v = 0; v < num_vars; ++v) { - hipErrchk(hipMemcpy(vars[v], - hip_vars[v], - var_size * sizeof(double), + hipErrchk(hipMemcpy(vars[v], hip_vars[v], var_size * sizeof(double), hipMemcpyDeviceToHost)); memoryManager::deallocate_gpu(hip_vars[v]); } @@ -1927,166 +1879,88 @@ void create_pack_lists(std::vector& pack_index_lists, std::vector pack_index_list_extents(num_neighbors); // faces - pack_index_list_extents[0] = Extent{halo_width, - halo_width + halo_width, - halo_width, - grid_dims[1] + halo_width, - halo_width, - grid_dims[2] + halo_width}; - pack_index_list_extents[1] = Extent{grid_dims[0], - grid_dims[0] + halo_width, - halo_width, - grid_dims[1] + halo_width, - halo_width, - grid_dims[2] + halo_width}; - pack_index_list_extents[2] = Extent{halo_width, - grid_dims[0] + halo_width, - halo_width, - halo_width + halo_width, - halo_width, - grid_dims[2] + halo_width}; - pack_index_list_extents[3] = Extent{halo_width, - grid_dims[0] + halo_width, - grid_dims[1], - grid_dims[1] + halo_width, - halo_width, - grid_dims[2] + halo_width}; - pack_index_list_extents[4] = Extent{halo_width, - grid_dims[0] + halo_width, - halo_width, - grid_dims[1] + halo_width, - halo_width, - halo_width + halo_width}; - pack_index_list_extents[5] = Extent{halo_width, - grid_dims[0] + halo_width, - halo_width, - grid_dims[1] + halo_width, - grid_dims[2], - grid_dims[2] + halo_width}; + pack_index_list_extents[0] = Extent{halo_width, halo_width + halo_width, + halo_width, grid_dims[1] + halo_width, + halo_width, grid_dims[2] + halo_width}; + pack_index_list_extents[1] = Extent{grid_dims[0], grid_dims[0] + halo_width, + halo_width, grid_dims[1] + halo_width, + halo_width, grid_dims[2] + halo_width}; + pack_index_list_extents[2] = Extent{halo_width, grid_dims[0] + halo_width, + halo_width, halo_width + halo_width, + halo_width, grid_dims[2] + halo_width}; + pack_index_list_extents[3] = Extent{halo_width, grid_dims[0] + halo_width, + grid_dims[1], grid_dims[1] + halo_width, + halo_width, grid_dims[2] + halo_width}; + pack_index_list_extents[4] = Extent{halo_width, grid_dims[0] + halo_width, + halo_width, grid_dims[1] + halo_width, + halo_width, halo_width + halo_width}; + pack_index_list_extents[5] = Extent{halo_width, grid_dims[0] + halo_width, + halo_width, grid_dims[1] + halo_width, + grid_dims[2], grid_dims[2] + halo_width}; // edges - pack_index_list_extents[6] = Extent{halo_width, - halo_width + halo_width, - halo_width, - halo_width + halo_width, - halo_width, - grid_dims[2] + halo_width}; - pack_index_list_extents[7] = Extent{halo_width, - halo_width + halo_width, - grid_dims[1], - grid_dims[1] + halo_width, - halo_width, - grid_dims[2] + halo_width}; - pack_index_list_extents[8] = Extent{grid_dims[0], - grid_dims[0] + halo_width, - halo_width, - halo_width + halo_width, - halo_width, - grid_dims[2] + halo_width}; - pack_index_list_extents[9] = Extent{grid_dims[0], - grid_dims[0] + halo_width, - grid_dims[1], - grid_dims[1] + halo_width, - halo_width, - grid_dims[2] + halo_width}; - pack_index_list_extents[10] = Extent{halo_width, - halo_width + halo_width, - halo_width, - grid_dims[1] + halo_width, - halo_width, - halo_width + halo_width}; - pack_index_list_extents[11] = Extent{halo_width, - halo_width + halo_width, - halo_width, - grid_dims[1] + halo_width, - grid_dims[2], - grid_dims[2] + halo_width}; - pack_index_list_extents[12] = Extent{grid_dims[0], - grid_dims[0] + halo_width, - halo_width, - grid_dims[1] + halo_width, - halo_width, - halo_width + halo_width}; - pack_index_list_extents[13] = Extent{grid_dims[0], - grid_dims[0] + halo_width, - halo_width, - grid_dims[1] + halo_width, - grid_dims[2], - grid_dims[2] + halo_width}; - pack_index_list_extents[14] = Extent{halo_width, - grid_dims[0] + halo_width, - halo_width, - halo_width + halo_width, - halo_width, - halo_width + halo_width}; - pack_index_list_extents[15] = Extent{halo_width, - grid_dims[0] + halo_width, - halo_width, - halo_width + halo_width, - grid_dims[2], - grid_dims[2] + halo_width}; - pack_index_list_extents[16] = Extent{halo_width, - grid_dims[0] + halo_width, - grid_dims[1], - grid_dims[1] + halo_width, - halo_width, - halo_width + halo_width}; - pack_index_list_extents[17] = Extent{halo_width, - grid_dims[0] + halo_width, - grid_dims[1], - grid_dims[1] + halo_width, - grid_dims[2], - grid_dims[2] + halo_width}; + pack_index_list_extents[6] = Extent{halo_width, halo_width + halo_width, + halo_width, halo_width + halo_width, + halo_width, grid_dims[2] + halo_width}; + pack_index_list_extents[7] = Extent{halo_width, halo_width + halo_width, + grid_dims[1], grid_dims[1] + halo_width, + halo_width, grid_dims[2] + halo_width}; + pack_index_list_extents[8] = Extent{grid_dims[0], grid_dims[0] + halo_width, + halo_width, halo_width + halo_width, + halo_width, grid_dims[2] + halo_width}; + pack_index_list_extents[9] = Extent{grid_dims[0], grid_dims[0] + halo_width, + grid_dims[1], grid_dims[1] + halo_width, + halo_width, grid_dims[2] + halo_width}; + pack_index_list_extents[10] = Extent{halo_width, halo_width + halo_width, + halo_width, grid_dims[1] + halo_width, + halo_width, halo_width + halo_width}; + pack_index_list_extents[11] = Extent{halo_width, halo_width + halo_width, + halo_width, grid_dims[1] + halo_width, + grid_dims[2], grid_dims[2] + halo_width}; + pack_index_list_extents[12] = Extent{grid_dims[0], grid_dims[0] + halo_width, + halo_width, grid_dims[1] + halo_width, + halo_width, halo_width + halo_width}; + pack_index_list_extents[13] = Extent{grid_dims[0], grid_dims[0] + halo_width, + halo_width, grid_dims[1] + halo_width, + grid_dims[2], grid_dims[2] + halo_width}; + pack_index_list_extents[14] = Extent{halo_width, grid_dims[0] + halo_width, + halo_width, halo_width + halo_width, + halo_width, halo_width + halo_width}; + pack_index_list_extents[15] = Extent{halo_width, grid_dims[0] + halo_width, + halo_width, halo_width + halo_width, + grid_dims[2], grid_dims[2] + halo_width}; + pack_index_list_extents[16] = Extent{halo_width, grid_dims[0] + halo_width, + grid_dims[1], grid_dims[1] + halo_width, + halo_width, halo_width + halo_width}; + pack_index_list_extents[17] = Extent{halo_width, grid_dims[0] + halo_width, + grid_dims[1], grid_dims[1] + halo_width, + grid_dims[2], grid_dims[2] + halo_width}; // corners - pack_index_list_extents[18] = Extent{halo_width, - halo_width + halo_width, - halo_width, - halo_width + halo_width, - halo_width, - halo_width + halo_width}; - pack_index_list_extents[19] = Extent{halo_width, - halo_width + halo_width, - halo_width, - halo_width + halo_width, - grid_dims[2], - grid_dims[2] + halo_width}; - pack_index_list_extents[20] = Extent{halo_width, - halo_width + halo_width, - grid_dims[1], - grid_dims[1] + halo_width, - halo_width, - halo_width + halo_width}; - pack_index_list_extents[21] = Extent{halo_width, - halo_width + halo_width, - grid_dims[1], - grid_dims[1] + halo_width, - grid_dims[2], - grid_dims[2] + halo_width}; - pack_index_list_extents[22] = Extent{grid_dims[0], - grid_dims[0] + halo_width, - halo_width, - halo_width + halo_width, - halo_width, - halo_width + halo_width}; - pack_index_list_extents[23] = Extent{grid_dims[0], - grid_dims[0] + halo_width, - halo_width, - halo_width + halo_width, - grid_dims[2], - grid_dims[2] + halo_width}; - pack_index_list_extents[24] = Extent{grid_dims[0], - grid_dims[0] + halo_width, - grid_dims[1], - grid_dims[1] + halo_width, - halo_width, - halo_width + halo_width}; - pack_index_list_extents[25] = Extent{grid_dims[0], - grid_dims[0] + halo_width, - grid_dims[1], - grid_dims[1] + halo_width, - grid_dims[2], - grid_dims[2] + halo_width}; + pack_index_list_extents[18] = Extent{halo_width, halo_width + halo_width, + halo_width, halo_width + halo_width, + halo_width, halo_width + halo_width}; + pack_index_list_extents[19] = Extent{halo_width, halo_width + halo_width, + halo_width, halo_width + halo_width, + grid_dims[2], grid_dims[2] + halo_width}; + pack_index_list_extents[20] = Extent{halo_width, halo_width + halo_width, + grid_dims[1], grid_dims[1] + halo_width, + halo_width, halo_width + halo_width}; + pack_index_list_extents[21] = Extent{halo_width, halo_width + halo_width, + grid_dims[1], grid_dims[1] + halo_width, + grid_dims[2], grid_dims[2] + halo_width}; + pack_index_list_extents[22] = Extent{grid_dims[0], grid_dims[0] + halo_width, + halo_width, halo_width + halo_width, + halo_width, halo_width + halo_width}; + pack_index_list_extents[23] = Extent{grid_dims[0], grid_dims[0] + halo_width, + halo_width, halo_width + halo_width, + grid_dims[2], grid_dims[2] + halo_width}; + pack_index_list_extents[24] = Extent{grid_dims[0], grid_dims[0] + halo_width, + grid_dims[1], grid_dims[1] + halo_width, + halo_width, halo_width + halo_width}; + pack_index_list_extents[25] = Extent{grid_dims[0], grid_dims[0] + halo_width, + grid_dims[1], grid_dims[1] + halo_width, + grid_dims[2], grid_dims[2] + halo_width}; const int grid_i_stride = 1; const int grid_j_stride = grid_dims[0] + 2 * halo_width; @@ -2149,36 +2023,27 @@ void create_unpack_lists(std::vector& unpack_index_lists, std::vector unpack_index_list_extents(num_neighbors); // faces - unpack_index_list_extents[0] = Extent{0, - halo_width, - halo_width, - grid_dims[1] + halo_width, - halo_width, - grid_dims[2] + halo_width}; + unpack_index_list_extents[0] = Extent{0, halo_width, + halo_width, grid_dims[1] + halo_width, + halo_width, grid_dims[2] + halo_width}; unpack_index_list_extents[1] = Extent{grid_dims[0] + halo_width, grid_dims[0] + 2 * halo_width, halo_width, grid_dims[1] + halo_width, halo_width, grid_dims[2] + halo_width}; - unpack_index_list_extents[2] = Extent{halo_width, - grid_dims[0] + halo_width, - 0, - halo_width, - halo_width, - grid_dims[2] + halo_width}; + unpack_index_list_extents[2] = + Extent{halo_width, grid_dims[0] + halo_width, 0, halo_width, + halo_width, grid_dims[2] + halo_width}; unpack_index_list_extents[3] = Extent{halo_width, grid_dims[0] + halo_width, grid_dims[1] + halo_width, grid_dims[1] + 2 * halo_width, halo_width, grid_dims[2] + halo_width}; - unpack_index_list_extents[4] = Extent{halo_width, - grid_dims[0] + halo_width, - halo_width, - grid_dims[1] + halo_width, - 0, - halo_width}; + unpack_index_list_extents[4] = Extent{halo_width, grid_dims[0] + halo_width, + halo_width, grid_dims[1] + halo_width, + 0, halo_width}; unpack_index_list_extents[5] = Extent{halo_width, grid_dims[0] + halo_width, halo_width, @@ -2229,12 +2094,9 @@ void create_unpack_lists(std::vector& unpack_index_lists, grid_dims[2] + 2 * halo_width}; unpack_index_list_extents[14] = Extent{ halo_width, grid_dims[0] + halo_width, 0, halo_width, 0, halo_width}; - unpack_index_list_extents[15] = Extent{halo_width, - grid_dims[0] + halo_width, - 0, - halo_width, - grid_dims[2] + halo_width, - grid_dims[2] + 2 * halo_width}; + unpack_index_list_extents[15] = Extent{ + halo_width, grid_dims[0] + halo_width, 0, + halo_width, grid_dims[2] + halo_width, grid_dims[2] + 2 * halo_width}; unpack_index_list_extents[16] = Extent{halo_width, grid_dims[0] + halo_width, grid_dims[1] + halo_width, @@ -2257,12 +2119,9 @@ void create_unpack_lists(std::vector& unpack_index_lists, halo_width, grid_dims[2] + halo_width, grid_dims[2] + 2 * halo_width}; - unpack_index_list_extents[20] = Extent{0, - halo_width, - grid_dims[1] + halo_width, - grid_dims[1] + 2 * halo_width, - 0, - halo_width}; + unpack_index_list_extents[20] = Extent{ + 0, halo_width, grid_dims[1] + halo_width, grid_dims[1] + 2 * halo_width, + 0, halo_width}; unpack_index_list_extents[21] = Extent{0, halo_width, grid_dims[1] + halo_width, @@ -2275,24 +2134,19 @@ void create_unpack_lists(std::vector& unpack_index_lists, halo_width, 0, halo_width}; - unpack_index_list_extents[23] = Extent{grid_dims[0] + halo_width, - grid_dims[0] + 2 * halo_width, - 0, - halo_width, - grid_dims[2] + halo_width, - grid_dims[2] + 2 * halo_width}; + unpack_index_list_extents[23] = Extent{ + grid_dims[0] + halo_width, grid_dims[0] + 2 * halo_width, 0, halo_width, + grid_dims[2] + halo_width, grid_dims[2] + 2 * halo_width}; unpack_index_list_extents[24] = Extent{grid_dims[0] + halo_width, grid_dims[0] + 2 * halo_width, grid_dims[1] + halo_width, grid_dims[1] + 2 * halo_width, 0, halo_width}; - unpack_index_list_extents[25] = Extent{grid_dims[0] + halo_width, - grid_dims[0] + 2 * halo_width, - grid_dims[1] + halo_width, - grid_dims[1] + 2 * halo_width, - grid_dims[2] + halo_width, - grid_dims[2] + 2 * halo_width}; + unpack_index_list_extents[25] = + Extent{grid_dims[0] + halo_width, grid_dims[0] + 2 * halo_width, + grid_dims[1] + halo_width, grid_dims[1] + 2 * halo_width, + grid_dims[2] + halo_width, grid_dims[2] + 2 * halo_width}; const int grid_i_stride = 1; const int grid_j_stride = grid_dims[0] + 2 * halo_width; diff --git a/examples/tut_launch_basic.cpp b/examples/tut_launch_basic.cpp index 85c80b9ad5..5a22512dfb 100644 --- a/examples/tut_launch_basic.cpp +++ b/examples/tut_launch_basic.cpp @@ -125,10 +125,7 @@ __global__ void gpuKernel() printf("device-iter: threadIdx_tx %d threadIdx_ty %d block_bx %d " "block_by %d \n", - tx, - ty, - bx, - by); + tx, ty, bx, by); } } } @@ -198,31 +195,24 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv)) { // _team_loops_start RAJA::loop( - ctx, - RAJA::TypedRangeSegment(0, Nteams), + ctx, RAJA::TypedRangeSegment(0, Nteams), [&](int by) { RAJA::loop( - ctx, - RAJA::TypedRangeSegment(0, Nteams), + ctx, RAJA::TypedRangeSegment(0, Nteams), [&](int bx) { RAJA::loop( - ctx, - RAJA::TypedRangeSegment(0, Nthreads), + ctx, RAJA::TypedRangeSegment(0, Nthreads), [&](int ty) { RAJA::loop( - ctx, - RAJA::TypedRangeSegment(0, Nthreads), + ctx, RAJA::TypedRangeSegment(0, Nthreads), [&](int tx) { printf("RAJA Teams: threadId_x %d threadId_y " "%d teamId_x %d teamId_y %d \n", - tx, - ty, - bx, - by); + tx, ty, bx, by); }); }); }); @@ -244,11 +234,8 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv)) for (int tx = 0; tx < Nthreads; ++tx) { - printf("c-iter: iter_tx %d iter_ty %d iter_bx %d iter_by %d \n", - tx, - ty, - bx, - by); + printf("c-iter: iter_tx %d iter_ty %d iter_bx %d iter_by %d \n", tx, + ty, bx, by); } } } diff --git a/examples/tut_matrix-multiply.cpp b/examples/tut_matrix-multiply.cpp index 699fcc3dc2..97759a5112 100644 --- a/examples/tut_matrix-multiply.cpp +++ b/examples/tut_matrix-multiply.cpp @@ -391,10 +391,9 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) // This is the same as using an OpenMP 'parallel for' directive on the // outer loop with a 'collapse(2) clause. // - using EXEC_POL3 = RAJA::KernelPolicy< - RAJA::statement::Collapse, // row, col - RAJA::statement::Lambda<0>>>; + using EXEC_POL3 = RAJA::KernelPolicy, // row, col + RAJA::statement::Lambda<0>>>; RAJA::kernel(RAJA::make_tuple(col_range, row_range), [=](int col, int row) @@ -429,12 +428,10 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) // and col = threadIdx.x in the kernel. // // - using EXEC_POL4 = - RAJA::KernelPolicy>>>>; + using EXEC_POL4 = RAJA::KernelPolicy>>>>; RAJA::kernel(RAJA::make_tuple(col_range, row_range), [=] RAJA_DEVICE(int col, int row) @@ -467,18 +464,12 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) // using EXEC_POL5 = RAJA::KernelPolicy, - RAJA::cuda_block_y_loop, + 1, RAJA::tile_fixed, RAJA::cuda_block_y_loop, RAJA::statement::Tile< - 0, - RAJA::tile_fixed, - RAJA::cuda_block_x_loop, + 0, RAJA::tile_fixed, RAJA::cuda_block_x_loop, RAJA::statement::For< - 1, - RAJA::cuda_thread_y_loop, - RAJA::statement::For<0, - RAJA::cuda_thread_x_loop, + 1, RAJA::cuda_thread_y_loop, + RAJA::statement::For<0, RAJA::cuda_thread_x_loop, RAJA::statement::Lambda<0>>>>>>>; RAJA::kernel(RAJA::make_tuple(col_range, row_range), @@ -526,12 +517,10 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) // and blocksize N; i.e., kernel<<>> and defining row = blockIdx.x // and col = threadIdx.x in the kernel. // - using EXEC_POL4 = - RAJA::KernelPolicy>>>>; + using EXEC_POL4 = RAJA::KernelPolicy>>>>; RAJA::kernel(RAJA::make_tuple(col_range, row_range), [=] RAJA_DEVICE(int col, int row) @@ -567,18 +556,12 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) // using EXEC_POL5 = RAJA::KernelPolicy, - RAJA::hip_block_y_loop, + 1, RAJA::tile_fixed, RAJA::hip_block_y_loop, RAJA::statement::Tile< - 0, - RAJA::tile_fixed, - RAJA::hip_block_x_loop, + 0, RAJA::tile_fixed, RAJA::hip_block_x_loop, RAJA::statement::For< - 1, - RAJA::hip_thread_y_loop, - RAJA::statement::For<0, - RAJA::hip_thread_x_loop, + 1, RAJA::hip_thread_y_loop, + RAJA::statement::For<0, RAJA::hip_thread_x_loop, RAJA::statement::Lambda<0>>>>>>>; RAJA::kernel(RAJA::make_tuple(col_range, row_range), @@ -626,22 +609,20 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) // // _matmult_3lambdakernel_seq_start using EXEC_POL6a = RAJA::KernelPolicy>, // dot = 0.0 - RAJA::statement::For<2, - RAJA::seq_exec, + 0, RAJA::seq_exec, RAJA::statement::Lambda<0, RAJA::Params<0>>, // dot + // = + // 0.0 + RAJA::statement::For<2, RAJA::seq_exec, RAJA::statement::Lambda<1> // inner loop: dot += // ... >, - RAJA::statement:: - Lambda<2, RAJA::Segs<0, 1>, RAJA::Params<0>> // set - // C(row, - // col) - // = dot + RAJA::statement::Lambda<2, RAJA::Segs<0, 1>, + RAJA::Params<0>> // set + // C(row, + // col) + // = dot >>>; RAJA::kernel_param( @@ -684,18 +665,16 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) using RAJA::Segs; using EXEC_POL6b = RAJA::KernelPolicy>, // dot = 0.0 - RAJA::statement::For< - 2, - RAJA::seq_exec, - RAJA::statement::Lambda<1, Segs<0, 1, 2>, Params<0>> // dot += ... - >, - RAJA::statement::Lambda<2, Segs<0, 1>, Params<0>> // C(row, col) = dot + 0, RAJA::seq_exec, RAJA::statement::Lambda<0, Params<0>>, // dot = + // 0.0 + RAJA::statement::For<2, RAJA::seq_exec, + RAJA::statement::Lambda<1, Segs<0, 1, 2>, + Params<0>> // dot += ... + >, + RAJA::statement::Lambda<2, Segs<0, 1>, Params<0>> // C(row, col) = + // dot >>>; RAJA::kernel_param( @@ -730,11 +709,9 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) // _matmult_3lambdakernel_ompcollapse_start using EXEC_POL7 = RAJA::KernelPolicy, // row, col - RAJA::statement::Lambda<0, RAJA::Params<0>>, // dot = 0.0 - RAJA::statement::For<2, - RAJA::seq_exec, + RAJA::omp_parallel_collapse_exec, RAJA::ArgList<1, 0>, // row, col + RAJA::statement::Lambda<0, RAJA::Params<0>>, // dot = 0.0 + RAJA::statement::For<2, RAJA::seq_exec, RAJA::statement::Lambda<1> // inner loop: dot += ... >, RAJA::statement::Lambda<2, RAJA::Segs<0, 1>, RAJA::Params<0>> // set @@ -783,12 +760,11 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) 0, RAJA::cuda_thread_x_loop, // col RAJA::statement::Lambda<0, RAJA::Params<0>>, // dot = 0.0 - RAJA::statement::For<2, - RAJA::seq_exec, + RAJA::statement::For<2, RAJA::seq_exec, RAJA::statement::Lambda<1> // dot += ... >, - RAJA::statement:: - Lambda<2, RAJA::Segs<0, 1>, RAJA::Params<0>> // set C = ... + RAJA::statement::Lambda<2, RAJA::Segs<0, 1>, + RAJA::Params<0>> // set C = ... >>>>; // _matmult_3lambdakernel_cuda_end @@ -822,13 +798,9 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) // _matmult_3lambdakernel_cudatiled_start using EXEC_POL9a = RAJA::KernelPolicy, - RAJA::cuda_block_y_loop, + 1, RAJA::tile_fixed, RAJA::cuda_block_y_loop, RAJA::statement::Tile< - 0, - RAJA::tile_fixed, - RAJA::cuda_block_x_loop, + 0, RAJA::tile_fixed, RAJA::cuda_block_x_loop, RAJA::statement::For< 1, RAJA::cuda_thread_y_loop, // row @@ -836,14 +808,13 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) 0, RAJA::cuda_thread_x_loop, // col RAJA::statement::Lambda<0, RAJA::Params<0>>, // dot = 0.0 - RAJA::statement::For<2, - RAJA::seq_exec, + RAJA::statement::For<2, RAJA::seq_exec, RAJA::statement::Lambda<1> // dot += // ... >, - RAJA::statement:: - Lambda<2, RAJA::Segs<0, 1>, RAJA::Params<0>> // set C - // = ... + RAJA::statement::Lambda<2, RAJA::Segs<0, 1>, + RAJA::Params<0>> // set C + // = ... >>>>>>; // _matmult_3lambdakernel_cudatiled_end @@ -876,13 +847,9 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) using EXEC_POL9b = RAJA::KernelPolicy, - RAJA::cuda_block_y_loop, + 1, RAJA::tile_fixed, RAJA::cuda_block_y_loop, RAJA::statement::Tile< - 0, - RAJA::tile_fixed, - RAJA::cuda_block_x_loop, + 0, RAJA::tile_fixed, RAJA::cuda_block_x_loop, RAJA::statement::For< 1, RAJA::cuda_thread_y_loop, // row @@ -891,13 +858,13 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) RAJA::cuda_thread_x_loop, // col RAJA::statement::Lambda<0, Params<0>>, // dot = 0.0 RAJA::statement::For< - 2, - RAJA::seq_exec, - RAJA::statement:: - Lambda<1, Segs<0, 1, 2>, Params<0>> // dot += ... + 2, RAJA::seq_exec, + RAJA::statement::Lambda<1, Segs<0, 1, 2>, + Params<0>> // dot += ... >, - RAJA::statement::Lambda<2, Segs<0, 1>, Params<0>> // set C - // = ... + RAJA::statement::Lambda<2, Segs<0, 1>, Params<0>> // set + // C = + // ... >>>>>>; RAJA::kernel_param( @@ -935,87 +902,75 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) // for an introduction to RAJA LocalArray types and thread synchronization. using Shmem = - RAJA::LocalArray>; using shmem_Lambda0 = RAJA::statement::Lambda<0, RAJA::Offsets<0, 2>, RAJA::Params<2>>; - using shmem_Lambda1 = RAJA::statement:: - Lambda<1, RAJA::Segs<0, 1>, RAJA::Offsets<0, 1>, RAJA::Params<0>>; - using shmem_Lambda2 = RAJA::statement:: - Lambda<2, RAJA::Segs<1, 2>, RAJA::Offsets<1, 2>, RAJA::Params<1>>; + using shmem_Lambda1 = + RAJA::statement::Lambda<1, RAJA::Segs<0, 1>, RAJA::Offsets<0, 1>, + RAJA::Params<0>>; + using shmem_Lambda2 = + RAJA::statement::Lambda<2, RAJA::Segs<1, 2>, RAJA::Offsets<1, 2>, + RAJA::Params<1>>; using shmem_Lambda3 = RAJA::statement::Lambda<3, RAJA::Offsets<0, 1, 2>, RAJA::Params<0, 1, 2>>; - using shmem_Lambda4 = RAJA::statement:: - Lambda<4, RAJA::Segs<0, 2>, RAJA::Offsets<0, 2>, RAJA::Params<2>>; + using shmem_Lambda4 = + RAJA::statement::Lambda<4, RAJA::Segs<0, 2>, RAJA::Offsets<0, 2>, + RAJA::Params<2>>; using EXEC_POL10 = RAJA::KernelPolicy, + RAJA::cuda_shared_mem, RAJA::ParamList<2, 1, 0>, // Tile rows and cols of C (the result matrix C) RAJA::statement::Tile< - 0, - RAJA::tile_fixed, - RAJA::cuda_block_x_direct, + 0, RAJA::tile_fixed, RAJA::cuda_block_x_direct, RAJA::statement::Tile< - 2, - RAJA::tile_fixed, + 2, RAJA::tile_fixed, RAJA::cuda_block_y_direct, // zero out shmem tile of C RAJA::statement::For< - 2, - RAJA::cuda_thread_y_loop, - RAJA::statement:: - For<0, RAJA::cuda_thread_x_loop, shmem_Lambda0>>, + 2, RAJA::cuda_thread_y_loop, + RAJA::statement::For<0, RAJA::cuda_thread_x_loop, + shmem_Lambda0>>, // Slide window across matrix: Load tiles of global matrices // A, B and compute local dot products RAJA::statement::Tile< - 1, - RAJA::tile_fixed, - RAJA::seq_exec, + 1, RAJA::tile_fixed, RAJA::seq_exec, // Load tile of A into shmem RAJA::statement::For< - 1, - RAJA::cuda_thread_y_loop, - RAJA::statement:: - For<0, RAJA::cuda_thread_x_loop, shmem_Lambda1>>, + 1, RAJA::cuda_thread_y_loop, + RAJA::statement::For<0, RAJA::cuda_thread_x_loop, + shmem_Lambda1>>, // Load tile of B into shmem RAJA::statement::For< - 2, - RAJA::cuda_thread_y_loop, - RAJA::statement:: - For<1, RAJA::cuda_thread_x_loop, shmem_Lambda2>>, + 2, RAJA::cuda_thread_y_loop, + RAJA::statement::For<1, RAJA::cuda_thread_x_loop, + shmem_Lambda2>>, RAJA::statement::CudaSyncThreads, // Partial multiplication RAJA::statement::For< - 2, - RAJA::cuda_thread_y_loop, + 2, RAJA::cuda_thread_y_loop, RAJA::statement::For< - 1, - RAJA::seq_exec, - RAJA::statement::For<0, - RAJA::cuda_thread_x_loop, + 1, RAJA::seq_exec, + RAJA::statement::For<0, RAJA::cuda_thread_x_loop, shmem_Lambda3>>>, RAJA::statement::CudaSyncThreads>, // sliding window // Write memory out to global matrix RAJA::statement::For< - 2, - RAJA::cuda_thread_y_loop, - RAJA::statement::For<0, - RAJA::cuda_thread_x_loop, + 2, RAJA::cuda_thread_y_loop, + RAJA::statement::For<0, RAJA::cuda_thread_x_loop, shmem_Lambda4>>>>> // Create shared // memory > // Cuda kernel @@ -1042,12 +997,8 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) { bShared(tm, tp) = Bview(m, p); }, // Do partial update in shmem - [=] RAJA_HOST_DEVICE(int tn, - int tm, - int tp, - Shmem& aShared, - Shmem& bShared, - Shmem& cShared) + [=] RAJA_HOST_DEVICE(int tn, int tm, int tp, Shmem& aShared, + Shmem& bShared, Shmem& cShared) { cShared(tn, tp) += aShared(tn, tm) * bShared(tm, tp); }, // Write out complete result @@ -1104,12 +1055,11 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) 0, RAJA::hip_thread_x_loop, // col RAJA::statement::Lambda<0, RAJA::Params<0>>, // dot = 0.0 - RAJA::statement::For<2, - RAJA::seq_exec, + RAJA::statement::For<2, RAJA::seq_exec, RAJA::statement::Lambda<1> // dot += ... >, - RAJA::statement:: - Lambda<2, RAJA::Segs<0, 1>, RAJA::Params<0>> // set C = ... + RAJA::statement::Lambda<2, RAJA::Segs<0, 1>, + RAJA::Params<0>> // set C = ... >>>>; // _matmult_3lambdakernel_hip_end @@ -1147,13 +1097,9 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) // _matmult_3lambdakernel_hiptiled_start using EXEC_POL9b = RAJA::KernelPolicy, - RAJA::hip_block_y_loop, + 1, RAJA::tile_fixed, RAJA::hip_block_y_loop, RAJA::statement::Tile< - 0, - RAJA::tile_fixed, - RAJA::hip_block_x_loop, + 0, RAJA::tile_fixed, RAJA::hip_block_x_loop, RAJA::statement::For< 1, RAJA::hip_thread_y_loop, // row @@ -1162,13 +1108,13 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) RAJA::hip_thread_x_loop, // col RAJA::statement::Lambda<0, Params<0>>, // dot = 0.0 RAJA::statement::For< - 2, - RAJA::seq_exec, - RAJA::statement:: - Lambda<1, Segs<0, 1, 2>, Params<0>> // dot += ... + 2, RAJA::seq_exec, + RAJA::statement::Lambda<1, Segs<0, 1, 2>, + Params<0>> // dot += ... >, - RAJA::statement::Lambda<2, Segs<0, 1>, Params<0>> // set C - // = ... + RAJA::statement::Lambda<2, Segs<0, 1>, Params<0>> // set + // C = + // ... >>>>>>; // _matmult_3lambdakernel_hiptiled_end @@ -1211,8 +1157,8 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) // (int)griddim.y, (int)blockdim.x, (int)blockdim.y); // Launch HIP kernel defined near the top of this file. - hipLaunchKernelGGL( - (matMultKernel), dim3(griddim), dim3(blockdim), 0, 0, N, d_C, d_A, d_B); + hipLaunchKernelGGL((matMultKernel), dim3(griddim), dim3(blockdim), 0, 0, N, + d_C, d_A, d_B); hipDeviceSynchronize(); diff --git a/examples/wave-eqn.cpp b/examples/wave-eqn.cpp index c25a8ded7c..bce47544ba 100644 --- a/examples/wave-eqn.cpp +++ b/examples/wave-eqn.cpp @@ -126,8 +126,7 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) // Sequential policy using fdPolicy = RAJA::KernelPolicy>>>; // OpenMP policy @@ -198,8 +197,7 @@ void computeErr(double* P, double tf, grid_s grid) RAJA::ReduceMax tMax(-1.0); using initialPolicy = RAJA::KernelPolicy>>>; RAJA::kernel(RAJA::make_tuple(fdBounds, fdBounds), @@ -231,8 +229,7 @@ void setIC(double* P1, double* P2, double t0, double t1, grid_s grid) RAJA::RangeSegment fdBounds(0, grid.nx); using initialPolicy = RAJA::KernelPolicy>>>; RAJA::kernel(RAJA::make_tuple(fdBounds, fdBounds), @@ -259,8 +256,8 @@ void wave(T* P1, T* P2, RAJA::RangeSegment fdBounds, double ct, int nx) // // Coefficients for fourth order stencil // - double coeff[5] = { - -1.0 / 12.0, 4.0 / 3.0, -5.0 / 2.0, 4.0 / 3.0, -1.0 / 12.0}; + double coeff[5] = {-1.0 / 12.0, 4.0 / 3.0, -5.0 / 2.0, 4.0 / 3.0, + -1.0 / 12.0}; const int id = tx + ty * nx; double P_old = P1[id]; diff --git a/exercises/atomic-histogram_solution.cpp b/exercises/atomic-histogram_solution.cpp index 7884b4e305..4f9dff0c8a 100644 --- a/exercises/atomic-histogram_solution.cpp +++ b/exercises/atomic-histogram_solution.cpp @@ -194,8 +194,7 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) // _rajacuda_atomic_histogram_start RAJA::forall>( - array_range, - [=] RAJA_DEVICE(int i) + array_range, [=] RAJA_DEVICE(int i) { RAJA::atomicAdd(&hist[array[i]], 1); }); // _rajacuda_atomic_histogram_end @@ -218,8 +217,7 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) // _rajacuda_atomicauto_histogram_start RAJA::forall>( - array_range, - [=] RAJA_DEVICE(int i) + array_range, [=] RAJA_DEVICE(int i) { RAJA::atomicAdd(&hist[array[i]], 1); }); // _rajacuda_atomicauto_histogram_end @@ -240,8 +238,7 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) // _rajahip_atomic_histogram_start RAJA::forall>( - array_range, - [=] RAJA_DEVICE(int i) + array_range, [=] RAJA_DEVICE(int i) { RAJA::atomicAdd(&hist[array[i]], 1); }); // _rajahip_atomic_histogram_end @@ -264,8 +261,7 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) // _rajahip_atomicauto_histogram_start RAJA::forall>( - array_range, - [=] RAJA_DEVICE(int i) + array_range, [=] RAJA_DEVICE(int i) { RAJA::atomicAdd(&hist[array[i]], 1); }); // _rajahip_atomicauto_histogram_end diff --git a/exercises/dot-product_solution.cpp b/exercises/dot-product_solution.cpp index c181ca04c2..57887ea51b 100644 --- a/exercises/dot-product_solution.cpp +++ b/exercises/dot-product_solution.cpp @@ -107,8 +107,8 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) // _rajaomp_dotprod_start RAJA::ReduceSum ompdot(0.0); - RAJA::forall( - RAJA::RangeSegment(0, N), [=](int i) { ompdot += a[i] * b[i]; }); + RAJA::forall(RAJA::RangeSegment(0, N), [=](int i) + { ompdot += a[i] * b[i]; }); dot = ompdot.get(); // _rajaomp_dotprod_end diff --git a/exercises/kernel-matrix-transpose-local-array.cpp b/exercises/kernel-matrix-transpose-local-array.cpp index 80ff56a913..29c18e7052 100644 --- a/exercises/kernel-matrix-transpose-local-array.cpp +++ b/exercises/kernel-matrix-transpose-local-array.cpp @@ -195,8 +195,8 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) // // _mattranspose_localarray_start - using TILE_MEM = RAJA:: - LocalArray, RAJA::SizeList>; + using TILE_MEM = RAJA::LocalArray, + RAJA::SizeList>; TILE_MEM Tile_Array; // _mattranspose_localarray_end @@ -362,31 +362,23 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) // tiles needed to carry out the transpose // RAJA::statement::Tile< - 1, - RAJA::tile_fixed, - RAJA::seq_exec, + 1, RAJA::tile_fixed, RAJA::seq_exec, RAJA::statement::Tile< - 0, - RAJA::tile_fixed, - RAJA::seq_exec, + 0, RAJA::tile_fixed, RAJA::seq_exec, // This statement will initalize local array memory inside a // kernel. The cpu_tile_mem policy specifies that memory should be // allocated on the stack. The entries in the RAJA::ParamList // identify RAJA local arrays to intialize in the parameter tuple. RAJA::statement::InitLocalMem< - RAJA::cpu_tile_mem, - RAJA::ParamList<2>, + RAJA::cpu_tile_mem, RAJA::ParamList<2>, // // (1) Execution policies for the first set of inner // loops. These loops copy data from the global matrices // to the local tile. // RAJA::statement::ForICount< - 1, - RAJA::statement::Param<1>, - RAJA::omp_parallel_for_exec, - RAJA::statement::ForICount<0, - RAJA::statement::Param<0>, + 1, RAJA::statement::Param<1>, RAJA::omp_parallel_for_exec, + RAJA::statement::ForICount<0, RAJA::statement::Param<0>, RAJA::seq_exec, RAJA::statement::Lambda<0>>>, // @@ -398,13 +390,9 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) // index has unit stride. // RAJA::statement::ForICount< - 0, - RAJA::statement::Param<0>, - RAJA::seq_exec, + 0, RAJA::statement::Param<0>, RAJA::seq_exec, RAJA::statement::ForICount< - 1, - RAJA::statement::Param<1>, - RAJA::seq_exec, + 1, RAJA::statement::Param<1>, RAJA::seq_exec, RAJA::statement::Lambda<1>>>>>>>; RAJA::kernel_param( @@ -435,31 +423,23 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) // tiles needed to carry out the transpose // RAJA::statement::Tile< - 1, - RAJA::tile_fixed, - RAJA::cuda_block_y_loop, + 1, RAJA::tile_fixed, RAJA::cuda_block_y_loop, RAJA::statement::Tile< - 0, - RAJA::tile_fixed, - RAJA::cuda_block_x_loop, + 0, RAJA::tile_fixed, RAJA::cuda_block_x_loop, // This statement will initalize local array memory inside a // kernel. The cpu_tile_mem policy specifies that memory should be // allocated on the stack. The entries in the RAJA::ParamList // identify RAJA local arrays to intialize in the parameter tuple. RAJA::statement::InitLocalMem< - RAJA::cuda_shared_mem, - RAJA::ParamList<2>, + RAJA::cuda_shared_mem, RAJA::ParamList<2>, // // (1) Execution policies for the first set of inner // loops. These loops copy data from the global matrices // to the local tile. // RAJA::statement::ForICount< - 1, - RAJA::statement::Param<0>, - RAJA::cuda_thread_y_direct, - RAJA::statement::ForICount<0, - RAJA::statement::Param<1>, + 1, RAJA::statement::Param<0>, RAJA::cuda_thread_y_direct, + RAJA::statement::ForICount<0, RAJA::statement::Param<1>, RAJA::cuda_thread_x_direct, RAJA::statement::Lambda<0>>>, // Synchronize threads to ensure all loads @@ -474,11 +454,8 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) // index has unit stride. // RAJA::statement::ForICount< - 0, - RAJA::statement::Param<1>, - RAJA::cuda_thread_y_direct, - RAJA::statement::ForICount<1, - RAJA::statement::Param<0>, + 0, RAJA::statement::Param<1>, RAJA::cuda_thread_y_direct, + RAJA::statement::ForICount<1, RAJA::statement::Param<0>, RAJA::cuda_thread_x_direct, RAJA::statement::Lambda<1>>>, // Synchronize threads to ensure all reads @@ -531,31 +508,23 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) // tiles needed to carry out the transpose // RAJA::statement::Tile< - 1, - RAJA::tile_fixed, - RAJA::hip_block_y_loop, + 1, RAJA::tile_fixed, RAJA::hip_block_y_loop, RAJA::statement::Tile< - 0, - RAJA::tile_fixed, - RAJA::hip_block_x_loop, + 0, RAJA::tile_fixed, RAJA::hip_block_x_loop, // This statement will initalize local array memory inside a // kernel. The cpu_tile_mem policy specifies that memory should be // allocated on the stack. The entries in the RAJA::ParamList // identify RAJA local arrays to intialize in the parameter tuple. RAJA::statement::InitLocalMem< - RAJA::hip_shared_mem, - RAJA::ParamList<2>, + RAJA::hip_shared_mem, RAJA::ParamList<2>, // // (1) Execution policies for the first set of inner // loops. These loops copy data from the global matrices // to the local tile. // RAJA::statement::ForICount< - 1, - RAJA::statement::Param<0>, - RAJA::hip_thread_y_direct, - RAJA::statement::ForICount<0, - RAJA::statement::Param<1>, + 1, RAJA::statement::Param<0>, RAJA::hip_thread_y_direct, + RAJA::statement::ForICount<0, RAJA::statement::Param<1>, RAJA::hip_thread_x_direct, RAJA::statement::Lambda<0>>>, // Synchronize threads to ensure all loads @@ -570,11 +539,8 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) // index has unit stride. // RAJA::statement::ForICount< - 0, - RAJA::statement::Param<1>, - RAJA::hip_thread_y_direct, - RAJA::statement::ForICount<1, - RAJA::statement::Param<0>, + 0, RAJA::statement::Param<1>, RAJA::hip_thread_y_direct, + RAJA::statement::ForICount<1, RAJA::statement::Param<0>, RAJA::hip_thread_x_direct, RAJA::statement::Lambda<1>>>, // Synchronize threads to ensure all reads diff --git a/exercises/kernel-matrix-transpose-local-array_solution.cpp b/exercises/kernel-matrix-transpose-local-array_solution.cpp index 80ec0e61d4..eedad74d05 100644 --- a/exercises/kernel-matrix-transpose-local-array_solution.cpp +++ b/exercises/kernel-matrix-transpose-local-array_solution.cpp @@ -195,8 +195,8 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) // // _mattranspose_localarray_start - using TILE_MEM = RAJA:: - LocalArray, RAJA::SizeList>; + using TILE_MEM = RAJA::LocalArray, + RAJA::SizeList>; TILE_MEM Tile_Array; // _mattranspose_localarray_end @@ -210,33 +210,22 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) // _mattranspose_localarray_raja_start using SEQ_EXEC_POL_I = RAJA::KernelPolicy, - RAJA::seq_exec, + 1, RAJA::tile_fixed, RAJA::seq_exec, RAJA::statement::Tile< - 0, - RAJA::tile_fixed, - RAJA::seq_exec, + 0, RAJA::tile_fixed, RAJA::seq_exec, RAJA::statement::InitLocalMem< - RAJA::cpu_tile_mem, - RAJA::ParamList<2>, + RAJA::cpu_tile_mem, RAJA::ParamList<2>, RAJA::statement::ForICount< - 1, - RAJA::statement::Param<0>, - RAJA::seq_exec, - RAJA::statement::ForICount<0, - RAJA::statement::Param<1>, + 1, RAJA::statement::Param<0>, RAJA::seq_exec, + RAJA::statement::ForICount<0, RAJA::statement::Param<1>, RAJA::seq_exec, RAJA::statement::Lambda<0>>>, RAJA::statement::ForICount< - 0, - RAJA::statement::Param<1>, - RAJA::seq_exec, - RAJA::statement::ForICount<1, - RAJA::statement::Param<0>, + 0, RAJA::statement::Param<1>, RAJA::seq_exec, + RAJA::statement::ForICount<1, RAJA::statement::Param<0>, RAJA::seq_exec, RAJA::statement::Lambda<1>>> @@ -275,31 +264,23 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) // tiles needed to carry out the transpose // RAJA::statement::Tile< - 1, - RAJA::tile_fixed, - RAJA::omp_parallel_for_exec, + 1, RAJA::tile_fixed, RAJA::omp_parallel_for_exec, RAJA::statement::Tile< - 0, - RAJA::tile_fixed, - RAJA::seq_exec, + 0, RAJA::tile_fixed, RAJA::seq_exec, // This statement will initalize local array memory inside a // kernel. The cpu_tile_mem policy specifies that memory should be // allocated on the stack. The entries in the RAJA::ParamList // identify RAJA local arrays in the parameter tuple to intialize. RAJA::statement::InitLocalMem< - RAJA::cpu_tile_mem, - RAJA::ParamList<2>, + RAJA::cpu_tile_mem, RAJA::ParamList<2>, // // (1) Execution policies for the first set of inner // loops. These loops copy data from the global matrices // to the local tile. // RAJA::statement::ForICount< - 1, - RAJA::statement::Param<0>, - RAJA::seq_exec, - RAJA::statement::ForICount<0, - RAJA::statement::Param<1>, + 1, RAJA::statement::Param<0>, RAJA::seq_exec, + RAJA::statement::ForICount<0, RAJA::statement::Param<1>, RAJA::seq_exec, RAJA::statement::Lambda<0>>>, // @@ -311,13 +292,9 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) // index has unit stride. // RAJA::statement::ForICount< - 0, - RAJA::statement::Param<1>, - RAJA::seq_exec, + 0, RAJA::statement::Param<1>, RAJA::seq_exec, RAJA::statement::ForICount< - 1, - RAJA::statement::Param<0>, - RAJA::seq_exec, + 1, RAJA::statement::Param<0>, RAJA::seq_exec, RAJA::statement::Lambda<1>>>>>>>; RAJA::kernel_param( @@ -347,31 +324,23 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) // tiles needed to carry out the transpose // RAJA::statement::Tile< - 1, - RAJA::tile_fixed, - RAJA::seq_exec, + 1, RAJA::tile_fixed, RAJA::seq_exec, RAJA::statement::Tile< - 0, - RAJA::tile_fixed, - RAJA::seq_exec, + 0, RAJA::tile_fixed, RAJA::seq_exec, // This statement will initalize local array memory inside a // kernel. The cpu_tile_mem policy specifies that memory should be // allocated on the stack. The entries in the RAJA::ParamList // identify RAJA local arrays to intialize in the parameter tuple. RAJA::statement::InitLocalMem< - RAJA::cpu_tile_mem, - RAJA::ParamList<2>, + RAJA::cpu_tile_mem, RAJA::ParamList<2>, // // (1) Execution policies for the first set of inner // loops. These loops copy data from the global matrices // to the local tile. // RAJA::statement::ForICount< - 1, - RAJA::statement::Param<1>, - RAJA::omp_parallel_for_exec, - RAJA::statement::ForICount<0, - RAJA::statement::Param<0>, + 1, RAJA::statement::Param<1>, RAJA::omp_parallel_for_exec, + RAJA::statement::ForICount<0, RAJA::statement::Param<0>, RAJA::seq_exec, RAJA::statement::Lambda<0>>>, // @@ -383,13 +352,9 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) // index has unit stride. // RAJA::statement::ForICount< - 0, - RAJA::statement::Param<0>, - RAJA::seq_exec, + 0, RAJA::statement::Param<0>, RAJA::seq_exec, RAJA::statement::ForICount< - 1, - RAJA::statement::Param<1>, - RAJA::seq_exec, + 1, RAJA::statement::Param<1>, RAJA::seq_exec, RAJA::statement::Lambda<1>>>>>>>; RAJA::kernel_param( @@ -420,31 +385,23 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) // tiles needed to carry out the transpose // RAJA::statement::Tile< - 1, - RAJA::tile_fixed, - RAJA::cuda_block_y_loop, + 1, RAJA::tile_fixed, RAJA::cuda_block_y_loop, RAJA::statement::Tile< - 0, - RAJA::tile_fixed, - RAJA::cuda_block_x_loop, + 0, RAJA::tile_fixed, RAJA::cuda_block_x_loop, // This statement will initalize local array memory inside a // kernel. The cpu_tile_mem policy specifies that memory should be // allocated on the stack. The entries in the RAJA::ParamList // identify RAJA local arrays to intialize in the parameter tuple. RAJA::statement::InitLocalMem< - RAJA::cuda_shared_mem, - RAJA::ParamList<2>, + RAJA::cuda_shared_mem, RAJA::ParamList<2>, // // (1) Execution policies for the first set of inner // loops. These loops copy data from the global matrices // to the local tile. // RAJA::statement::ForICount< - 1, - RAJA::statement::Param<0>, - RAJA::cuda_thread_y_direct, - RAJA::statement::ForICount<0, - RAJA::statement::Param<1>, + 1, RAJA::statement::Param<0>, RAJA::cuda_thread_y_direct, + RAJA::statement::ForICount<0, RAJA::statement::Param<1>, RAJA::cuda_thread_x_direct, RAJA::statement::Lambda<0>>>, // Synchronize threads to ensure all loads @@ -459,11 +416,8 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) // index has unit stride. // RAJA::statement::ForICount< - 0, - RAJA::statement::Param<1>, - RAJA::cuda_thread_y_direct, - RAJA::statement::ForICount<1, - RAJA::statement::Param<0>, + 0, RAJA::statement::Param<1>, RAJA::cuda_thread_y_direct, + RAJA::statement::ForICount<1, RAJA::statement::Param<0>, RAJA::cuda_thread_x_direct, RAJA::statement::Lambda<1>>>, // Synchronize threads to ensure all reads @@ -516,31 +470,23 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) // tiles needed to carry out the transpose // RAJA::statement::Tile< - 1, - RAJA::tile_fixed, - RAJA::hip_block_y_loop, + 1, RAJA::tile_fixed, RAJA::hip_block_y_loop, RAJA::statement::Tile< - 0, - RAJA::tile_fixed, - RAJA::hip_block_x_loop, + 0, RAJA::tile_fixed, RAJA::hip_block_x_loop, // This statement will initalize local array memory inside a // kernel. The cpu_tile_mem policy specifies that memory should be // allocated on the stack. The entries in the RAJA::ParamList // identify RAJA local arrays to intialize in the parameter tuple. RAJA::statement::InitLocalMem< - RAJA::hip_shared_mem, - RAJA::ParamList<2>, + RAJA::hip_shared_mem, RAJA::ParamList<2>, // // (1) Execution policies for the first set of inner // loops. These loops copy data from the global matrices // to the local tile. // RAJA::statement::ForICount< - 1, - RAJA::statement::Param<0>, - RAJA::hip_thread_y_direct, - RAJA::statement::ForICount<0, - RAJA::statement::Param<1>, + 1, RAJA::statement::Param<0>, RAJA::hip_thread_y_direct, + RAJA::statement::ForICount<0, RAJA::statement::Param<1>, RAJA::hip_thread_x_direct, RAJA::statement::Lambda<0>>>, // Synchronize threads to ensure all loads @@ -555,11 +501,8 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) // index has unit stride. // RAJA::statement::ForICount< - 0, - RAJA::statement::Param<1>, - RAJA::hip_thread_y_direct, - RAJA::statement::ForICount<1, - RAJA::statement::Param<0>, + 0, RAJA::statement::Param<1>, RAJA::hip_thread_y_direct, + RAJA::statement::ForICount<1, RAJA::statement::Param<0>, RAJA::hip_thread_x_direct, RAJA::statement::Lambda<1>>>, // Synchronize threads to ensure all reads @@ -598,38 +541,26 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) // _raja_mattranspose_lambdaargs_start using SEQ_EXEC_POL_II = RAJA::KernelPolicy, - RAJA::seq_exec, + 1, RAJA::tile_fixed, RAJA::seq_exec, RAJA::statement::Tile< - 0, - RAJA::tile_fixed, - RAJA::seq_exec, + 0, RAJA::tile_fixed, RAJA::seq_exec, RAJA::statement::InitLocalMem< - RAJA::cpu_tile_mem, - RAJA::ParamList<0>, + RAJA::cpu_tile_mem, RAJA::ParamList<0>, RAJA::statement::For< - 1, - RAJA::seq_exec, - RAJA::statement::For<0, - RAJA::seq_exec, - RAJA::statement::Lambda<0, - Segs<0>, - Segs<1>, - Offsets<0>, - Offsets<1>, - Params<0>>>>, + 1, RAJA::seq_exec, + RAJA::statement::For< + 0, RAJA::seq_exec, + RAJA::statement::Lambda<0, Segs<0>, Segs<1>, Offsets<0>, + Offsets<1>, Params<0>>>>, RAJA::statement::For< - 0, - RAJA::seq_exec, + 0, RAJA::seq_exec, RAJA::statement::For< - 1, - RAJA::seq_exec, - RAJA::statement:: - Lambda<1, Segs<0, 1>, Offsets<0, 1>, Params<0>>>> + 1, RAJA::seq_exec, + RAJA::statement::Lambda<1, Segs<0, 1>, Offsets<0, 1>, + Params<0>>>> >>>>; diff --git a/exercises/kernel-matrix-transpose-tiled.cpp b/exercises/kernel-matrix-transpose-tiled.cpp index d7658565ff..59a7a9b58f 100644 --- a/exercises/kernel-matrix-transpose-tiled.cpp +++ b/exercises/kernel-matrix-transpose-tiled.cpp @@ -175,18 +175,13 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) /// using TILED_KERNEL_EXEC_POL = RAJA::KernelPolicy, - RAJA::seq_exec, + 1, RAJA::tile_fixed, RAJA::seq_exec, RAJA::statement::Tile< - 0, - RAJA::tile_fixed, - RAJA::seq_exec, + 0, RAJA::tile_fixed, RAJA::seq_exec, RAJA::statement::For< - 1, - RAJA::seq_exec, - RAJA::statement:: - For<0, RAJA::seq_exec, RAJA::statement::Lambda<0>>>>>>; + 1, RAJA::seq_exec, + RAJA::statement::For<0, RAJA::seq_exec, + RAJA::statement::Lambda<0>>>>>>; RAJA::kernel(RAJA::make_tuple(col_Range, row_Range), [=](int col, int row) @@ -243,18 +238,14 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) // to/from the tile. // using TILED_KERNEL_EXEC_POL_OMP2 = RAJA::KernelPolicy, - RAJA::seq_exec, - RAJA::statement::Tile<0, - RAJA::tile_fixed, - RAJA::seq_exec, - RAJA::statement::Collapse< - RAJA::omp_parallel_collapse_exec, - RAJA::ArgList<0, 1>, - RAJA::statement::Lambda<0>> // closes collapse - > // closes Tile 0 - > // closes Tile 1 + 1, RAJA::tile_fixed, RAJA::seq_exec, + RAJA::statement::Tile< + 0, RAJA::tile_fixed, RAJA::seq_exec, + RAJA::statement::Collapse< + RAJA::omp_parallel_collapse_exec, RAJA::ArgList<0, 1>, + RAJA::statement::Lambda<0>> // closes collapse + > // closes Tile 0 + > // closes Tile 1 >; // closes policy list RAJA::kernel( @@ -318,23 +309,16 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) using TILED_KERNEL_EXEC_POL_HIP = RAJA::KernelPolicy, - RAJA::hip_block_y_loop, + 1, RAJA::tile_fixed, RAJA::hip_block_y_loop, RAJA::statement::Tile< - 0, - RAJA::tile_fixed, - RAJA::hip_block_x_loop, + 0, RAJA::tile_fixed, RAJA::hip_block_x_loop, RAJA::statement::For< - 1, - RAJA::hip_thread_x_direct, - RAJA::statement::For<0, - RAJA::hip_thread_y_direct, + 1, RAJA::hip_thread_x_direct, + RAJA::statement::For<0, RAJA::hip_thread_y_direct, RAJA::statement::Lambda<0>>>>>>>; RAJA::kernel( - RAJA::make_tuple(col_Range, row_Range), - [=] RAJA_DEVICE(int col, int row) + RAJA::make_tuple(col_Range, row_Range), [=] RAJA_DEVICE(int col, int row) { d_Atview(col, row) = d_Aview(row, col); }); hipErrchk( diff --git a/exercises/kernel-matrix-transpose-tiled_solution.cpp b/exercises/kernel-matrix-transpose-tiled_solution.cpp index 39b6e00b6a..9e0c838e58 100644 --- a/exercises/kernel-matrix-transpose-tiled_solution.cpp +++ b/exercises/kernel-matrix-transpose-tiled_solution.cpp @@ -165,18 +165,13 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) // // _raja_tiled_mattranspose_start using TILED_KERNEL_EXEC_POL = RAJA::KernelPolicy, - RAJA::seq_exec, + 1, RAJA::tile_fixed, RAJA::seq_exec, RAJA::statement::Tile< - 0, - RAJA::tile_fixed, - RAJA::seq_exec, + 0, RAJA::tile_fixed, RAJA::seq_exec, RAJA::statement::For< - 1, - RAJA::seq_exec, - RAJA::statement:: - For<0, RAJA::seq_exec, RAJA::statement::Lambda<0>>>>>>; + 1, RAJA::seq_exec, + RAJA::statement::For<0, RAJA::seq_exec, + RAJA::statement::Lambda<0>>>>>>; RAJA::kernel(RAJA::make_tuple(col_Range, row_Range), [=](int col, int row) @@ -198,18 +193,13 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) // one of the inner loops. // using TILED_KERNEL_EXEC_POL_OMP = RAJA::KernelPolicy, - RAJA::omp_parallel_for_exec, + 1, RAJA::tile_fixed, RAJA::omp_parallel_for_exec, RAJA::statement::Tile< - 0, - RAJA::tile_fixed, - RAJA::seq_exec, + 0, RAJA::tile_fixed, RAJA::seq_exec, RAJA::statement::For< - 1, - RAJA::omp_parallel_for_exec, - RAJA::statement:: - For<0, RAJA::seq_exec, RAJA::statement::Lambda<0>>>>>>; + 1, RAJA::omp_parallel_for_exec, + RAJA::statement::For<0, RAJA::seq_exec, + RAJA::statement::Lambda<0>>>>>>; RAJA::kernel( RAJA::make_tuple(col_Range, row_Range), @@ -230,18 +220,14 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) // to/from the tile. // using TILED_KERNEL_EXEC_POL_OMP2 = RAJA::KernelPolicy, - RAJA::seq_exec, - RAJA::statement::Tile<0, - RAJA::tile_fixed, - RAJA::seq_exec, - RAJA::statement::Collapse< - RAJA::omp_parallel_collapse_exec, - RAJA::ArgList<0, 1>, - RAJA::statement::Lambda<0>> // closes collapse - > // closes Tile 0 - > // closes Tile 1 + 1, RAJA::tile_fixed, RAJA::seq_exec, + RAJA::statement::Tile< + 0, RAJA::tile_fixed, RAJA::seq_exec, + RAJA::statement::Collapse< + RAJA::omp_parallel_collapse_exec, RAJA::ArgList<0, 1>, + RAJA::statement::Lambda<0>> // closes collapse + > // closes Tile 0 + > // closes Tile 1 >; // closes policy list RAJA::kernel( @@ -263,23 +249,16 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) // _raja_mattranspose_cuda_start using TILED_KERNEL_EXEC_POL_CUDA = RAJA::KernelPolicy, - RAJA::cuda_block_y_loop, + 1, RAJA::tile_fixed, RAJA::cuda_block_y_loop, RAJA::statement::Tile< - 0, - RAJA::tile_fixed, - RAJA::cuda_block_x_loop, + 0, RAJA::tile_fixed, RAJA::cuda_block_x_loop, RAJA::statement::For< - 1, - RAJA::cuda_thread_x_direct, - RAJA::statement::For<0, - RAJA::cuda_thread_y_direct, + 1, RAJA::cuda_thread_x_direct, + RAJA::statement::For<0, RAJA::cuda_thread_y_direct, RAJA::statement::Lambda<0>>>>>>>; RAJA::kernel( - RAJA::make_tuple(col_Range, row_Range), - [=] RAJA_DEVICE(int col, int row) + RAJA::make_tuple(col_Range, row_Range), [=] RAJA_DEVICE(int col, int row) { Atview(col, row) = Aview(row, col); }); // _raja_mattranspose_cuda_end @@ -305,23 +284,16 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) using TILED_KERNEL_EXEC_POL_HIP = RAJA::KernelPolicy, - RAJA::hip_block_y_loop, + 1, RAJA::tile_fixed, RAJA::hip_block_y_loop, RAJA::statement::Tile< - 0, - RAJA::tile_fixed, - RAJA::hip_block_x_loop, + 0, RAJA::tile_fixed, RAJA::hip_block_x_loop, RAJA::statement::For< - 1, - RAJA::hip_thread_x_direct, - RAJA::statement::For<0, - RAJA::hip_thread_y_direct, + 1, RAJA::hip_thread_x_direct, + RAJA::statement::For<0, RAJA::hip_thread_y_direct, RAJA::statement::Lambda<0>>>>>>>; RAJA::kernel( - RAJA::make_tuple(col_Range, row_Range), - [=] RAJA_DEVICE(int col, int row) + RAJA::make_tuple(col_Range, row_Range), [=] RAJA_DEVICE(int col, int row) { d_Atview(col, row) = d_Aview(row, col); }); hipErrchk( diff --git a/exercises/kernel-matrix-transpose_solution.cpp b/exercises/kernel-matrix-transpose_solution.cpp index 05aadcbaa3..ea18b22d6d 100644 --- a/exercises/kernel-matrix-transpose_solution.cpp +++ b/exercises/kernel-matrix-transpose_solution.cpp @@ -126,8 +126,7 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) // // _raja_mattranspose_start using KERNEL_EXEC_POL = RAJA::KernelPolicy>>>; RAJA::kernel(RAJA::make_tuple(col_Range, row_Range), @@ -149,8 +148,7 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) // one of the inner loops. // using KERNEL_EXEC_POL_OMP = RAJA::KernelPolicy>>>; RAJA::kernel(RAJA::make_tuple(col_Range, row_Range), @@ -168,12 +166,10 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) std::memset(At, 0, N_r * N_c * sizeof(int)); // _raja_mattranspose_cuda_start - using KERNEL_EXEC_POL_CUDA = - RAJA::KernelPolicy>>>>; + using KERNEL_EXEC_POL_CUDA = RAJA::KernelPolicy>>>>; RAJA::kernel(RAJA::make_tuple(col_Range, row_Range), [=] RAJA_DEVICE(int col, int row) diff --git a/exercises/kernelintro-execpols.cpp b/exercises/kernelintro-execpols.cpp index bfc3f8ddd1..df8ee78800 100644 --- a/exercises/kernelintro-execpols.cpp +++ b/exercises/kernelintro-execpols.cpp @@ -226,10 +226,9 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) std::memset(a, 0, N_tot * sizeof(double)); // _raja_tensorinit_omp_collapse_start - using EXEC_POL3 = RAJA::KernelPolicy< - RAJA::statement::Collapse, // k, j, i - RAJA::statement::Lambda<0>>>; + using EXEC_POL3 = RAJA::KernelPolicy, // k, j, i + RAJA::statement::Lambda<0>>>; RAJA::kernel(RAJA::make_tuple(RAJA::TypedRangeSegment(0, N), RAJA::TypedRangeSegment(0, N), @@ -317,13 +316,9 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) using EXEC_POL6 = RAJA::KernelPolicy, - RAJA::cuda_block_y_direct, + 1, RAJA::tile_fixed, RAJA::cuda_block_y_direct, RAJA::statement::Tile< - 0, - RAJA::tile_fixed, - RAJA::cuda_block_x_direct, + 0, RAJA::tile_fixed, RAJA::cuda_block_x_direct, RAJA::statement::For< 2, RAJA::cuda_block_z_direct, // k @@ -432,13 +427,9 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) using EXEC_POL8 = RAJA::KernelPolicy, - RAJA::hip_block_y_direct, + 1, RAJA::tile_fixed, RAJA::hip_block_y_direct, RAJA::statement::Tile< - 0, - RAJA::tile_fixed, - RAJA::hip_block_x_direct, + 0, RAJA::tile_fixed, RAJA::hip_block_x_direct, RAJA::statement::For< 2, RAJA::hip_block_z_direct, // k diff --git a/exercises/kernelintro-execpols_solution.cpp b/exercises/kernelintro-execpols_solution.cpp index 0323befabe..3b483ff4fe 100644 --- a/exercises/kernelintro-execpols_solution.cpp +++ b/exercises/kernelintro-execpols_solution.cpp @@ -235,10 +235,9 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) std::memset(a, 0, N_tot * sizeof(double)); // _raja_tensorinit_omp_collapse_start - using EXEC_POL3 = RAJA::KernelPolicy< - RAJA::statement::Collapse, // k, j, i - RAJA::statement::Lambda<0>>>; + using EXEC_POL3 = RAJA::KernelPolicy, // k, j, i + RAJA::statement::Lambda<0>>>; RAJA::kernel(RAJA::make_tuple(RAJA::TypedRangeSegment(0, N), RAJA::TypedRangeSegment(0, N), @@ -259,8 +258,7 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) // _raja_tensorinit_omp_collapse_start using EXEC_POL4 = RAJA::KernelPolicy, // k, j + RAJA::omp_parallel_collapse_exec, RAJA::ArgList<2, 1>, // k, j RAJA::statement::For<0, RAJA::seq_exec, // i RAJA::statement::Lambda<0>>>>; @@ -332,13 +330,9 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) using EXEC_POL6 = RAJA::KernelPolicy, - RAJA::cuda_block_y_direct, + 1, RAJA::tile_fixed, RAJA::cuda_block_y_direct, RAJA::statement::Tile< - 0, - RAJA::tile_fixed, - RAJA::cuda_block_x_direct, + 0, RAJA::tile_fixed, RAJA::cuda_block_x_direct, RAJA::statement::For< 2, RAJA::cuda_block_z_direct, // k @@ -447,13 +441,9 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) using EXEC_POL8 = RAJA::KernelPolicy, - RAJA::hip_block_y_direct, + 1, RAJA::tile_fixed, RAJA::hip_block_y_direct, RAJA::statement::Tile< - 0, - RAJA::tile_fixed, - RAJA::hip_block_x_direct, + 0, RAJA::tile_fixed, RAJA::hip_block_x_direct, RAJA::statement::For< 2, RAJA::hip_block_z_direct, // k diff --git a/exercises/kernelintro-nested-loop-reorder.cpp b/exercises/kernelintro-nested-loop-reorder.cpp index db2371f897..78a12e34c4 100644 --- a/exercises/kernelintro-nested-loop-reorder.cpp +++ b/exercises/kernelintro-nested-loop-reorder.cpp @@ -105,8 +105,7 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) RAJA::statement::Lambda<0>>>>>; RAJA::kernel( - RAJA::make_tuple(IRange, JRange, KRange), - [=](IIDX i, JIDX j, KIDX k) + RAJA::make_tuple(IRange, JRange, KRange), [=](IIDX i, JIDX j, KIDX k) { printf(" (%d, %d, %d) \n", (int)(*i), (int)(*j), (int)(*k)); }); // _raja_kji_loops_end diff --git a/exercises/kernelintro-nested-loop-reorder_solution.cpp b/exercises/kernelintro-nested-loop-reorder_solution.cpp index 311afce13c..f4a96ba809 100644 --- a/exercises/kernelintro-nested-loop-reorder_solution.cpp +++ b/exercises/kernelintro-nested-loop-reorder_solution.cpp @@ -105,8 +105,7 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) RAJA::statement::Lambda<0>>>>>; RAJA::kernel( - RAJA::make_tuple(IRange, JRange, KRange), - [=](IIDX i, JIDX j, KIDX k) + RAJA::make_tuple(IRange, JRange, KRange), [=](IIDX i, JIDX j, KIDX k) { printf(" (%d, %d, %d) \n", (int)(*i), (int)(*j), (int)(*k)); }); // _raja_kji_loops_end @@ -149,8 +148,7 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) RAJA::statement::Lambda<0>>>>>; RAJA::kernel( - RAJA::make_tuple(IRange, JRange, KRange), - [=](IIDX i, JIDX j, KIDX k) + RAJA::make_tuple(IRange, JRange, KRange), [=](IIDX i, JIDX j, KIDX k) { printf(" (%d, %d, %d) \n", (int)(*i), (int)(*j), (int)(*k)); }); // _raja_jik_loops_end @@ -194,8 +192,7 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) RAJA::statement::Lambda<0>>>>>; RAJA::kernel( - RAJA::make_tuple(IRange, JRange, KRange), - [=](IIDX i, JIDX j, KIDX k) + RAJA::make_tuple(IRange, JRange, KRange), [=](IIDX i, JIDX j, KIDX k) { printf(" (%d, %d, %d) \n", (int)(*i), (int)(*j), (int)(*k)); }); // _raja_ikj_loops_end diff --git a/exercises/launch-matrix-transpose-local-array.cpp b/exercises/launch-matrix-transpose-local-array.cpp index 524457754d..9b42dd7e30 100644 --- a/exercises/launch-matrix-transpose-local-array.cpp +++ b/exercises/launch-matrix-transpose-local-array.cpp @@ -193,15 +193,11 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) { RAJA::tile( - ctx, - TILE_DIM, - RAJA::TypedRangeSegment(0, N_r), + ctx, TILE_DIM, RAJA::TypedRangeSegment(0, N_r), [&](RAJA::TypedRangeSegment const& row_tile) { RAJA::tile( - ctx, - TILE_DIM, - RAJA::TypedRangeSegment(0, N_c), + ctx, TILE_DIM, RAJA::TypedRangeSegment(0, N_c), [&](RAJA::TypedRangeSegment const& col_tile) { RAJA_TEAM_SHARED double Tile_Array[TILE_DIM][TILE_DIM]; @@ -214,13 +210,11 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) /// RAJA::loop_icount( - ctx, - col_tile, + ctx, col_tile, [&](int col, int tx) { RAJA::loop_icount( - ctx, - row_tile, + ctx, row_tile, [&](int row, int ty) { Atview(col, row) = Tile_Array[ty][tx]; }); }); @@ -399,39 +393,31 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) { RAJA::tile( - ctx, - TILE_DIM, - RAJA::TypedRangeSegment(0, N_r), + ctx, TILE_DIM, RAJA::TypedRangeSegment(0, N_r), [&](RAJA::TypedRangeSegment const& row_tile) { RAJA::tile( - ctx, - TILE_DIM, - RAJA::TypedRangeSegment(0, N_c), + ctx, TILE_DIM, RAJA::TypedRangeSegment(0, N_c), [&](RAJA::TypedRangeSegment const& col_tile) { RAJA_TEAM_SHARED double Tile_Array[TILE_DIM][TILE_DIM]; RAJA::loop_icount( - ctx, - row_tile, + ctx, row_tile, [&](int row, int ty) { RAJA::loop_icount( - ctx, - col_tile, + ctx, col_tile, [&](int col, int tx) { Tile_Array[ty][tx] = d_Aview(row, col); }); }); RAJA::loop_icount( - ctx, - col_tile, + ctx, col_tile, [&](int col, int tx) { RAJA::loop_icount( - ctx, - row_tile, + ctx, row_tile, [&](int row, int ty) { d_Atview(col, row) = Tile_Array[ty][tx]; }); }); diff --git a/exercises/launch-matrix-transpose-local-array_solution.cpp b/exercises/launch-matrix-transpose-local-array_solution.cpp index b2fd888d99..0e4abbe1bf 100644 --- a/exercises/launch-matrix-transpose-local-array_solution.cpp +++ b/exercises/launch-matrix-transpose-local-array_solution.cpp @@ -193,26 +193,20 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) { RAJA::tile( - ctx, - TILE_DIM, - RAJA::TypedRangeSegment(0, N_r), + ctx, TILE_DIM, RAJA::TypedRangeSegment(0, N_r), [&](RAJA::TypedRangeSegment const& row_tile) { RAJA::tile( - ctx, - TILE_DIM, - RAJA::TypedRangeSegment(0, N_c), + ctx, TILE_DIM, RAJA::TypedRangeSegment(0, N_c), [&](RAJA::TypedRangeSegment const& col_tile) { RAJA_TEAM_SHARED double Tile_Array[TILE_DIM][TILE_DIM]; RAJA::loop_icount( - ctx, - row_tile, + ctx, row_tile, [&](int row, int ty) { - RAJA::loop_icount(ctx, - col_tile, + RAJA::loop_icount(ctx, col_tile, [&](int col, int tx) { Tile_Array[ty][tx] = Aview(row, col); @@ -220,13 +214,11 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) }); RAJA::loop_icount( - ctx, - col_tile, + ctx, col_tile, [&](int col, int tx) { RAJA::loop_icount( - ctx, - row_tile, + ctx, row_tile, [&](int row, int ty) { Atview(col, row) = Tile_Array[ty][tx]; }); }); @@ -259,26 +251,20 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) { RAJA::tile( - ctx, - TILE_DIM, - RAJA::TypedRangeSegment(0, N_r), + ctx, TILE_DIM, RAJA::TypedRangeSegment(0, N_r), [&](RAJA::TypedRangeSegment const& row_tile) { RAJA::tile( - ctx, - TILE_DIM, - RAJA::TypedRangeSegment(0, N_c), + ctx, TILE_DIM, RAJA::TypedRangeSegment(0, N_c), [&](RAJA::TypedRangeSegment const& col_tile) { RAJA_TEAM_SHARED double Tile_Array[TILE_DIM][TILE_DIM]; RAJA::loop_icount( - ctx, - row_tile, + ctx, row_tile, [&](int row, int ty) { - RAJA::loop_icount(ctx, - col_tile, + RAJA::loop_icount(ctx, col_tile, [&](int col, int tx) { Tile_Array[ty][tx] = Aview(row, col); @@ -286,13 +272,11 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) }); RAJA::loop_icount( - ctx, - col_tile, + ctx, col_tile, [&](int col, int tx) { RAJA::loop_icount( - ctx, - row_tile, + ctx, row_tile, [&](int row, int ty) { Atview(col, row) = Tile_Array[ty][tx]; }); }); @@ -331,39 +315,31 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) { RAJA::tile( - ctx, - TILE_DIM, - RAJA::TypedRangeSegment(0, N_r), + ctx, TILE_DIM, RAJA::TypedRangeSegment(0, N_r), [&](RAJA::TypedRangeSegment const& row_tile) { RAJA::tile( - ctx, - TILE_DIM, - RAJA::TypedRangeSegment(0, N_c), + ctx, TILE_DIM, RAJA::TypedRangeSegment(0, N_c), [&](RAJA::TypedRangeSegment const& col_tile) { RAJA_TEAM_SHARED double Tile_Array[TILE_DIM][TILE_DIM]; RAJA::loop_icount( - ctx, - row_tile, + ctx, row_tile, [&](int row, int ty) { RAJA::loop_icount( - ctx, - col_tile, + ctx, col_tile, [&](int col, int tx) { Tile_Array[ty][tx] = Aview(row, col); }); }); RAJA::loop_icount( - ctx, - col_tile, + ctx, col_tile, [&](int col, int tx) { RAJA::loop_icount( - ctx, - row_tile, + ctx, row_tile, [&](int row, int ty) { Atview(col, row) = Tile_Array[ty][tx]; }); }); @@ -418,39 +394,31 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) { RAJA::tile( - ctx, - TILE_DIM, - RAJA::TypedRangeSegment(0, N_r), + ctx, TILE_DIM, RAJA::TypedRangeSegment(0, N_r), [&](RAJA::TypedRangeSegment const& row_tile) { RAJA::tile( - ctx, - TILE_DIM, - RAJA::TypedRangeSegment(0, N_c), + ctx, TILE_DIM, RAJA::TypedRangeSegment(0, N_c), [&](RAJA::TypedRangeSegment const& col_tile) { RAJA_TEAM_SHARED double Tile_Array[TILE_DIM][TILE_DIM]; RAJA::loop_icount( - ctx, - row_tile, + ctx, row_tile, [&](int row, int ty) { RAJA::loop_icount( - ctx, - col_tile, + ctx, col_tile, [&](int col, int tx) { Tile_Array[ty][tx] = d_Aview(row, col); }); }); RAJA::loop_icount( - ctx, - col_tile, + ctx, col_tile, [&](int col, int tx) { RAJA::loop_icount( - ctx, - row_tile, + ctx, row_tile, [&](int row, int ty) { d_Atview(col, row) = Tile_Array[ty][tx]; }); }); diff --git a/exercises/launch-matrix-transpose-tiled.cpp b/exercises/launch-matrix-transpose-tiled.cpp index d74afb6989..006b6a2958 100644 --- a/exercises/launch-matrix-transpose-tiled.cpp +++ b/exercises/launch-matrix-transpose-tiled.cpp @@ -175,7 +175,8 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) using launch_policy_1 = RAJA::LaunchPolicy; RAJA::launch( - RAJA::LaunchParams(), // LaunchParams may be empty when running on the cpu + RAJA::LaunchParams(), // LaunchParams may be empty when running on the + // cpu [=] RAJA_HOST_DEVICE(RAJA::LaunchContext /*ctx*/) { /* @@ -362,24 +363,18 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) { RAJA::tile( - ctx, - TILE_DIM, - row_Range2, + ctx, TILE_DIM, row_Range2, [&](RAJA::TypedRangeSegment const& row_tile) { RAJA::tile( - ctx, - TILE_DIM, - col_Range2, + ctx, TILE_DIM, col_Range2, [&](RAJA::TypedRangeSegment const& col_tile) { - RAJA::loop(ctx, - row_tile, + RAJA::loop(ctx, row_tile, [&](int row) { RAJA::loop( - ctx, - col_tile, + ctx, col_tile, [&](int col) { Atview(col, row) = Aview(row, col); diff --git a/exercises/launch-matrix-transpose-tiled_solution.cpp b/exercises/launch-matrix-transpose-tiled_solution.cpp index 258f595b68..6ca5ede73c 100644 --- a/exercises/launch-matrix-transpose-tiled_solution.cpp +++ b/exercises/launch-matrix-transpose-tiled_solution.cpp @@ -169,28 +169,23 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) using launch_policy_1 = RAJA::LaunchPolicy; RAJA::launch( - RAJA::LaunchParams(), // LaunchParams may be empty when running on the cpu + RAJA::LaunchParams(), // LaunchParams may be empty when running on the + // cpu [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) { RAJA::tile( - ctx, - TILE_DIM, - row_Range, + ctx, TILE_DIM, row_Range, [&](RAJA::TypedRangeSegment const& row_tile) { RAJA::tile( - ctx, - TILE_DIM, - col_Range, + ctx, TILE_DIM, col_Range, [&](RAJA::TypedRangeSegment const& col_tile) { - RAJA::loop(ctx, - row_tile, + RAJA::loop(ctx, row_tile, [&](int row) { RAJA::loop( - ctx, - col_tile, + ctx, col_tile, [&](int col) { Atview(col, row) = Aview(row, col); @@ -220,28 +215,23 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) using launch_policy_2 = RAJA::LaunchPolicy; RAJA::launch( - RAJA::LaunchParams(), // LaunchParams may be empty when running on the cpu + RAJA::LaunchParams(), // LaunchParams may be empty when running on the + // cpu [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) { RAJA::tile( - ctx, - TILE_DIM, - row_Range, + ctx, TILE_DIM, row_Range, [&](RAJA::TypedRangeSegment const& row_tile) { RAJA::tile( - ctx, - TILE_DIM, - col_Range, + ctx, TILE_DIM, col_Range, [&](RAJA::TypedRangeSegment const& col_tile) { - RAJA::loop(ctx, - row_tile, + RAJA::loop(ctx, row_tile, [&](int row) { RAJA::loop( - ctx, - col_tile, + ctx, col_tile, [&](int col) { Atview(col, row) = Aview(row, col); @@ -285,24 +275,18 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) { RAJA::tile( - ctx, - TILE_DIM, - row_Range, + ctx, TILE_DIM, row_Range, [&](RAJA::TypedRangeSegment const& row_tile) { RAJA::tile( - ctx, - TILE_DIM, - col_Range, + ctx, TILE_DIM, col_Range, [&](RAJA::TypedRangeSegment const& col_tile) { - RAJA::loop(ctx, - row_tile, + RAJA::loop(ctx, row_tile, [&](int row) { RAJA::loop( - ctx, - col_tile, + ctx, col_tile, [&](int col) { Atview(col, row) = Aview(row, col); @@ -353,24 +337,18 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) { RAJA::tile( - ctx, - TILE_DIM, - row_Range, + ctx, TILE_DIM, row_Range, [&](RAJA::TypedRangeSegment const& row_tile) { RAJA::tile( - ctx, - TILE_DIM, - col_Range, + ctx, TILE_DIM, col_Range, [&](RAJA::TypedRangeSegment const& col_tile) { - RAJA::loop(ctx, - row_tile, + RAJA::loop(ctx, row_tile, [&](int row) { RAJA::loop( - ctx, - col_tile, + ctx, col_tile, [&](int col) { d_Atview(col, row) = d_Aview(row, col); diff --git a/exercises/launch-matrix-transpose.cpp b/exercises/launch-matrix-transpose.cpp index 14c52d784b..5b1c8e2aa4 100644 --- a/exercises/launch-matrix-transpose.cpp +++ b/exercises/launch-matrix-transpose.cpp @@ -133,13 +133,11 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) // host [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) { - RAJA::loop(ctx, - row_Range, + RAJA::loop(ctx, row_Range, [&](int /*row*/) { RAJA::loop( - ctx, - col_Range, + ctx, col_Range, [&](int /*col*/) { /// TODO... @@ -205,13 +203,11 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) { RAJA::loop( - ctx, - row_Range, + ctx, row_Range, [&](int row) { RAJA::loop( - ctx, - col_Range, + ctx, col_Range, [&](int col) { Atview(col, row) = Aview(row, col); }); }); }); diff --git a/exercises/launch-matrix-transpose_solution.cpp b/exercises/launch-matrix-transpose_solution.cpp index 7bdd775683..e5e1f164d9 100644 --- a/exercises/launch-matrix-transpose_solution.cpp +++ b/exercises/launch-matrix-transpose_solution.cpp @@ -134,13 +134,11 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) { RAJA::loop( - ctx, - row_Range, + ctx, row_Range, [&](int row) { RAJA::loop( - ctx, - col_Range, + ctx, col_Range, [&](int col) { Atview(col, row) = Aview(row, col); }); }); }); @@ -168,13 +166,11 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) { RAJA::loop( - ctx, - row_Range, + ctx, row_Range, [&](int row) { RAJA::loop( - ctx, - col_Range, + ctx, col_Range, [&](int col) { Atview(col, row) = Aview(row, col); }); }); }); @@ -201,13 +197,11 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) { RAJA::loop( - ctx, - row_Range, + ctx, row_Range, [&](int row) { RAJA::loop( - ctx, - col_Range, + ctx, col_Range, [&](int col) { Atview(col, row) = Aview(row, col); }); }); }); diff --git a/exercises/launchintro-execpols.cpp b/exercises/launchintro-execpols.cpp index 9791caf763..1252701063 100644 --- a/exercises/launchintro-execpols.cpp +++ b/exercises/launchintro-execpols.cpp @@ -269,18 +269,15 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) { RAJA::loop( - ctx, - RAJA::TypedRangeSegment(0, N), + ctx, RAJA::TypedRangeSegment(0, N), [&](int k) { RAJA::loop( - ctx, - RAJA::TypedRangeSegment(0, N), + ctx, RAJA::TypedRangeSegment(0, N), [&](int j) { RAJA::loop( - ctx, - RAJA::TypedRangeSegment(0, N), + ctx, RAJA::TypedRangeSegment(0, N), [&](int i) { aView(i, j, k) = c * i * j * k; }); }); }); @@ -314,30 +311,23 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) { RAJA::loop( - ctx, - RAJA::TypedRangeSegment(0, N), + ctx, RAJA::TypedRangeSegment(0, N), [&](int k) { RAJA::tile( - ctx, - j_block_sz, - RAJA::TypedRangeSegment(0, N), + ctx, j_block_sz, RAJA::TypedRangeSegment(0, N), [&](RAJA::TypedRangeSegment const& j_tile) { RAJA::tile( - ctx, - i_block_sz, - RAJA::TypedRangeSegment(0, N), + ctx, i_block_sz, RAJA::TypedRangeSegment(0, N), [&](RAJA::TypedRangeSegment const& i_tile) { RAJA::loop( - ctx, - j_tile, + ctx, j_tile, [&](int j) { RAJA::loop( - ctx, - i_tile, + ctx, i_tile, [&](int i) { aView(i, j, k) = c * i * j * k; }); }); @@ -420,18 +410,15 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) { RAJA::loop( - ctx, - RAJA::TypedRangeSegment(0, N), + ctx, RAJA::TypedRangeSegment(0, N), [&](int k) { RAJA::loop( - ctx, - RAJA::TypedRangeSegment(0, N), + ctx, RAJA::TypedRangeSegment(0, N), [&](int j) { RAJA::loop( - ctx, - RAJA::TypedRangeSegment(0, N), + ctx, RAJA::TypedRangeSegment(0, N), [&](int i) { d_aView(i, j, k) = c * i * j * k; }); }); }); @@ -466,29 +453,22 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) { RAJA::loop( - ctx, - RAJA::TypedRangeSegment(0, N), + ctx, RAJA::TypedRangeSegment(0, N), [&](int k) { RAJA::tile( - ctx, - j_block_sz, - RAJA::TypedRangeSegment(0, N), + ctx, j_block_sz, RAJA::TypedRangeSegment(0, N), [&](RAJA::TypedRangeSegment const& j_tile) { RAJA::tile( - ctx, - i_block_sz, - RAJA::TypedRangeSegment(0, N), + ctx, i_block_sz, RAJA::TypedRangeSegment(0, N), [&](RAJA::TypedRangeSegment const& i_tile) { RAJA::loop( - ctx, - j_tile, + ctx, j_tile, [&](int j) { - RAJA::loop(ctx, - i_tile, + RAJA::loop(ctx, i_tile, [&](int i) { d_aView(i, j, k) = c * i * j * k; diff --git a/exercises/launchintro-execpols_solution.cpp b/exercises/launchintro-execpols_solution.cpp index 7e40599394..7cd56efed3 100644 --- a/exercises/launchintro-execpols_solution.cpp +++ b/exercises/launchintro-execpols_solution.cpp @@ -140,18 +140,15 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) { RAJA::loop( - ctx, - RAJA::TypedRangeSegment(0, N), + ctx, RAJA::TypedRangeSegment(0, N), [&](int k) { RAJA::loop( - ctx, - RAJA::TypedRangeSegment(0, N), + ctx, RAJA::TypedRangeSegment(0, N), [&](int j) { RAJA::loop( - ctx, - RAJA::TypedRangeSegment(0, N), + ctx, RAJA::TypedRangeSegment(0, N), [&](int i) { aView(i, j, k) = c * i * j * k; }); }); }); @@ -205,18 +202,15 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) { RAJA::loop( - ctx, - RAJA::TypedRangeSegment(0, N), + ctx, RAJA::TypedRangeSegment(0, N), [&](int k) { RAJA::loop( - ctx, - RAJA::TypedRangeSegment(0, N), + ctx, RAJA::TypedRangeSegment(0, N), [&](int j) { RAJA::loop( - ctx, - RAJA::TypedRangeSegment(0, N), + ctx, RAJA::TypedRangeSegment(0, N), [&](int i) { aView(i, j, k) = c * i * j * k; }); }); }); @@ -266,18 +260,15 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) { RAJA::loop( - ctx, - RAJA::TypedRangeSegment(0, N), + ctx, RAJA::TypedRangeSegment(0, N), [&](int k) { RAJA::loop( - ctx, - RAJA::TypedRangeSegment(0, N), + ctx, RAJA::TypedRangeSegment(0, N), [&](int j) { RAJA::loop( - ctx, - RAJA::TypedRangeSegment(0, N), + ctx, RAJA::TypedRangeSegment(0, N), [&](int i) { aView(i, j, k) = c * i * j * k; }); }); }); @@ -311,30 +302,23 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) { RAJA::loop( - ctx, - RAJA::TypedRangeSegment(0, N), + ctx, RAJA::TypedRangeSegment(0, N), [&](int k) { RAJA::tile( - ctx, - j_block_sz, - RAJA::TypedRangeSegment(0, N), + ctx, j_block_sz, RAJA::TypedRangeSegment(0, N), [&](RAJA::TypedRangeSegment const& j_tile) { RAJA::tile( - ctx, - i_block_sz, - RAJA::TypedRangeSegment(0, N), + ctx, i_block_sz, RAJA::TypedRangeSegment(0, N), [&](RAJA::TypedRangeSegment const& i_tile) { RAJA::loop( - ctx, - j_tile, + ctx, j_tile, [&](int j) { RAJA::loop( - ctx, - i_tile, + ctx, i_tile, [&](int i) { aView(i, j, k) = c * i * j * k; }); }); @@ -417,18 +401,15 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) { RAJA::loop( - ctx, - RAJA::TypedRangeSegment(0, N), + ctx, RAJA::TypedRangeSegment(0, N), [&](int k) { RAJA::loop( - ctx, - RAJA::TypedRangeSegment(0, N), + ctx, RAJA::TypedRangeSegment(0, N), [&](int j) { RAJA::loop( - ctx, - RAJA::TypedRangeSegment(0, N), + ctx, RAJA::TypedRangeSegment(0, N), [&](int i) { d_aView(i, j, k) = c * i * j * k; }); }); }); @@ -463,29 +444,22 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) { RAJA::loop( - ctx, - RAJA::TypedRangeSegment(0, N), + ctx, RAJA::TypedRangeSegment(0, N), [&](int k) { RAJA::tile( - ctx, - j_block_sz, - RAJA::TypedRangeSegment(0, N), + ctx, j_block_sz, RAJA::TypedRangeSegment(0, N), [&](RAJA::TypedRangeSegment const& j_tile) { RAJA::tile( - ctx, - i_block_sz, - RAJA::TypedRangeSegment(0, N), + ctx, i_block_sz, RAJA::TypedRangeSegment(0, N), [&](RAJA::TypedRangeSegment const& i_tile) { RAJA::loop( - ctx, - j_tile, + ctx, j_tile, [&](int j) { - RAJA::loop(ctx, - i_tile, + RAJA::loop(ctx, i_tile, [&](int i) { d_aView(i, j, k) = c * i * j * k; diff --git a/exercises/offset-layout-stencil.cpp b/exercises/offset-layout-stencil.cpp index 892c0b09c5..2dc54fef81 100644 --- a/exercises/offset-layout-stencil.cpp +++ b/exercises/offset-layout-stencil.cpp @@ -329,8 +329,8 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) }); // _offsetlayout_rajahip_end - hipErrchk(hipMemcpy( - output, d_output, totCells * sizeof(int), hipMemcpyDeviceToHost)); + hipErrchk(hipMemcpy(output, d_output, totCells * sizeof(int), + hipMemcpyDeviceToHost)); std::cout << "\noutput lattice:\n"; printLattice(output, totCellsInRow, totCellsInCol); diff --git a/exercises/offset-layout-stencil_solution.cpp b/exercises/offset-layout-stencil_solution.cpp index 881f188fc4..8b8ebc9f30 100644 --- a/exercises/offset-layout-stencil_solution.cpp +++ b/exercises/offset-layout-stencil_solution.cpp @@ -247,10 +247,9 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) std::memset(output, 0, totCells * sizeof(int)); // _offsetlayout_rajaomp_start - using NESTED_EXEC_POL2 = RAJA::KernelPolicy< - RAJA::statement::Collapse, // row, col - RAJA::statement::Lambda<0>>>; + using NESTED_EXEC_POL2 = RAJA::KernelPolicy, // row, col + RAJA::statement::Lambda<0>>>; RAJA::kernel(RAJA::make_tuple(col_range, row_range), [=](int col, int row) @@ -316,8 +315,8 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) hipErrchk( hipMemcpy(d_input, input, totCells * sizeof(int), hipMemcpyHostToDevice)); - hipErrchk(hipMemcpy( - d_output, output, totCells * sizeof(int), hipMemcpyHostToDevice)); + hipErrchk(hipMemcpy(d_output, output, totCells * sizeof(int), + hipMemcpyHostToDevice)); RAJA::View> d_inputView(d_input, layout); RAJA::View> d_outputView(d_output, layout); @@ -342,8 +341,8 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) }); // _offsetlayout_rajahip_end - hipErrchk(hipMemcpy( - output, d_output, totCells * sizeof(int), hipMemcpyDeviceToHost)); + hipErrchk(hipMemcpy(output, d_output, totCells * sizeof(int), + hipMemcpyDeviceToHost)); std::cout << "\noutput lattice:\n"; printLattice(output, totCellsInRow, totCellsInCol); diff --git a/exercises/permuted-layout-batch-matrix-multiply_solution.cpp b/exercises/permuted-layout-batch-matrix-multiply_solution.cpp index 1aded0e9fe..ed05769394 100644 --- a/exercises/permuted-layout-batch-matrix-multiply_solution.cpp +++ b/exercises/permuted-layout-batch-matrix-multiply_solution.cpp @@ -557,10 +557,10 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) hipMemcpy(d_A, A, N_c * N_r * N * sizeof(double), hipMemcpyHostToDevice)); hipErrchk( hipMemcpy(d_B, B, N_c * N_r * N * sizeof(double), hipMemcpyHostToDevice)); - hipErrchk(hipMemcpy( - d_A2, A2, N_c * N_r * N * sizeof(double), hipMemcpyHostToDevice)); - hipErrchk(hipMemcpy( - d_B2, B2, N_c * N_r * N * sizeof(double), hipMemcpyHostToDevice)); + hipErrchk(hipMemcpy(d_A2, A2, N_c * N_r * N * sizeof(double), + hipMemcpyHostToDevice)); + hipErrchk(hipMemcpy(d_B2, B2, N_c * N_r * N * sizeof(double), + hipMemcpyHostToDevice)); minRun = std::numeric_limits::max(); for (int i = 0; i < NITER; ++i) @@ -665,8 +665,8 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) timer.reset(); } - hipErrchk(hipMemcpy( - C2, d_C2, N_c * N_r * N * sizeof(double), hipMemcpyDeviceToHost)); + hipErrchk(hipMemcpy(C2, d_C2, N_c * N_r * N * sizeof(double), + hipMemcpyDeviceToHost)); std::cout << "\trun time : " << minRun << " seconds" << std::endl; checkResult(Cview2, N, N_r, N_c); diff --git a/exercises/scan_solution.cpp b/exercises/scan_solution.cpp index 223f7557aa..03322e45a5 100644 --- a/exercises/scan_solution.cpp +++ b/exercises/scan_solution.cpp @@ -174,8 +174,7 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) // _scan_inclusive_omp_plus_start RAJA::inclusive_scan( - RAJA::make_span(in, N), - RAJA::make_span(out, N), + RAJA::make_span(in, N), RAJA::make_span(out, N), RAJA::operators::plus{}); // _scan_inclusive_omp_plus_end @@ -244,8 +243,7 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) // _scan_exclusive_cuda_plus_start RAJA::exclusive_scan>( - RAJA::make_span(in, N), - RAJA::make_span(out, N), + RAJA::make_span(in, N), RAJA::make_span(out, N), RAJA::operators::plus{}); // _scan_exclusive_cuda_plus_end @@ -291,8 +289,7 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) hipErrchk(hipMemcpy(d_out, out, N * sizeof(int), hipMemcpyHostToDevice)); RAJA::exclusive_scan>( - RAJA::make_span(d_in, N), - RAJA::make_span(d_out, N), + RAJA::make_span(d_in, N), RAJA::make_span(d_out, N), RAJA::operators::plus{}); hipErrchk(hipMemcpy(out, d_out, N * sizeof(int), hipMemcpyDeviceToHost)); diff --git a/exercises/sort_solution.cpp b/exercises/sort_solution.cpp index 8bacb285d8..8a3daf4db1 100644 --- a/exercises/sort_solution.cpp +++ b/exercises/sort_solution.cpp @@ -254,8 +254,7 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) // _sort_stable_pairs_omp_greater_start RAJA::stable_sort_pairs( - RAJA::make_span(out, N), - RAJA::make_span(out_vals, N), + RAJA::make_span(out, N), RAJA::make_span(out_vals, N), RAJA::operators::greater{}); // _sort_stable_pairs_omp_greater_end @@ -282,8 +281,7 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) // _sort_pairs_cuda_greater_start RAJA::sort_pairs>( - RAJA::make_span(out, N), - RAJA::make_span(out_vals, N), + RAJA::make_span(out, N), RAJA::make_span(out_vals, N), RAJA::operators::greater{}); // _sort_pairs_cuda_greater_end @@ -332,8 +330,7 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) hipMemcpy(d_out_vals, out_vals, N * sizeof(int), hipMemcpyHostToDevice)); RAJA::sort_pairs>( - RAJA::make_span(d_out, N), - RAJA::make_span(d_out_vals, N), + RAJA::make_span(d_out, N), RAJA::make_span(d_out_vals, N), RAJA::operators::less{}); hipErrchk(hipMemcpy(out, d_out, N * sizeof(int), hipMemcpyDeviceToHost)); diff --git a/exercises/tutorial_halfday/ex8_tiled-matrix-transpose.cpp b/exercises/tutorial_halfday/ex8_tiled-matrix-transpose.cpp index fd24969ae6..2cdd785635 100644 --- a/exercises/tutorial_halfday/ex8_tiled-matrix-transpose.cpp +++ b/exercises/tutorial_halfday/ex8_tiled-matrix-transpose.cpp @@ -257,18 +257,14 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) // to/from the tile. // using KERNEL_EXEC_POL_OMP2 = RAJA::KernelPolicy, - RAJA::seq_exec, - RAJA::statement::Tile<0, - RAJA::tile_fixed, - RAJA::seq_exec, - RAJA::statement::Collapse< - RAJA::omp_parallel_collapse_exec, - RAJA::ArgList<0, 1>, - RAJA::statement::Lambda<0>> // closes collapse - > // closes Tile 0 - > // closes Tile 1 + 1, RAJA::tile_fixed, RAJA::seq_exec, + RAJA::statement::Tile< + 0, RAJA::tile_fixed, RAJA::seq_exec, + RAJA::statement::Collapse< + RAJA::omp_parallel_collapse_exec, RAJA::ArgList<0, 1>, + RAJA::statement::Lambda<0>> // closes collapse + > // closes Tile 0 + > // closes Tile 1 >; // closes policy list RAJA::kernel(RAJA::make_tuple(col_Range, row_Range), diff --git a/exercises/tutorial_halfday/ex8_tiled-matrix-transpose_solution.cpp b/exercises/tutorial_halfday/ex8_tiled-matrix-transpose_solution.cpp index 2f6f320080..07bfdc3ce4 100644 --- a/exercises/tutorial_halfday/ex8_tiled-matrix-transpose_solution.cpp +++ b/exercises/tutorial_halfday/ex8_tiled-matrix-transpose_solution.cpp @@ -168,18 +168,13 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) // using KERNEL_EXEC_POL_SEQ = RAJA::KernelPolicy, - RAJA::seq_exec, + 1, RAJA::tile_fixed, RAJA::seq_exec, RAJA::statement::Tile< - 0, - RAJA::tile_fixed, - RAJA::seq_exec, + 0, RAJA::tile_fixed, RAJA::seq_exec, RAJA::statement::For< - 1, - RAJA::seq_exec, - RAJA::statement:: - For<0, RAJA::seq_exec, RAJA::statement::Lambda<0>>>>>>; + 1, RAJA::seq_exec, + RAJA::statement::For<0, RAJA::seq_exec, + RAJA::statement::Lambda<0>>>>>>; RAJA::kernel(RAJA::make_tuple(col_Range, row_Range), [=](int col, int row) @@ -201,18 +196,13 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) // using KERNEL_EXEC_POL_OMP = RAJA::KernelPolicy, - RAJA::seq_exec, + 1, RAJA::tile_fixed, RAJA::seq_exec, RAJA::statement::Tile< - 0, - RAJA::tile_fixed, - RAJA::seq_exec, + 0, RAJA::tile_fixed, RAJA::seq_exec, RAJA::statement::For< - 1, - RAJA::omp_parallel_for_exec, - RAJA::statement:: - For<0, RAJA::seq_exec, RAJA::statement::Lambda<0>>>>>>; + 1, RAJA::omp_parallel_for_exec, + RAJA::statement::For<0, RAJA::seq_exec, + RAJA::statement::Lambda<0>>>>>>; RAJA::kernel(RAJA::make_tuple(col_Range, row_Range), [=](int col, int row) @@ -235,18 +225,14 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) // using KERNEL_EXEC_POL_OMP2 = RAJA::KernelPolicy, - RAJA::seq_exec, - RAJA::statement::Tile<0, - RAJA::tile_fixed, - RAJA::seq_exec, - RAJA::statement::Collapse< - RAJA::omp_parallel_collapse_exec, - RAJA::ArgList<0, 1>, - RAJA::statement::Lambda<0>> // closes collapse - > // closes Tile 0 - > // closes Tile 1 + 1, RAJA::tile_fixed, RAJA::seq_exec, + RAJA::statement::Tile< + 0, RAJA::tile_fixed, RAJA::seq_exec, + RAJA::statement::Collapse< + RAJA::omp_parallel_collapse_exec, RAJA::ArgList<0, 1>, + RAJA::statement::Lambda<0>> // closes collapse + > // closes Tile 0 + > // closes Tile 1 >; // closes policy list RAJA::kernel(RAJA::make_tuple(col_Range, row_Range), @@ -265,18 +251,12 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) using KERNEL_EXEC_POL_CUDA = RAJA::KernelPolicy, - RAJA::cuda_block_y_loop, + 1, RAJA::tile_fixed, RAJA::cuda_block_y_loop, RAJA::statement::Tile< - 0, - RAJA::tile_fixed, - RAJA::cuda_block_x_loop, + 0, RAJA::tile_fixed, RAJA::cuda_block_x_loop, RAJA::statement::For< - 1, - RAJA::cuda_thread_y_direct, - RAJA::statement::For<0, - RAJA::cuda_thread_x_direct, + 1, RAJA::cuda_thread_y_direct, + RAJA::statement::For<0, RAJA::cuda_thread_x_direct, RAJA::statement::Lambda<0>>>>>>>; RAJA::kernel(RAJA::make_tuple(col_Range, row_Range), diff --git a/exercises/tutorial_halfday/ex9_matrix-transpose-local-array_solution.cpp b/exercises/tutorial_halfday/ex9_matrix-transpose-local-array_solution.cpp index f8acc88e31..8921291c23 100644 --- a/exercises/tutorial_halfday/ex9_matrix-transpose-local-array_solution.cpp +++ b/exercises/tutorial_halfday/ex9_matrix-transpose-local-array_solution.cpp @@ -210,33 +210,22 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) std::memset(At, 0, N_r * N_c * sizeof(int)); using SEQ_EXEC_POL = RAJA::KernelPolicy, - RAJA::seq_exec, + 1, RAJA::tile_fixed, RAJA::seq_exec, RAJA::statement::Tile< - 0, - RAJA::tile_fixed, - RAJA::seq_exec, + 0, RAJA::tile_fixed, RAJA::seq_exec, RAJA::statement::InitLocalMem< - RAJA::cpu_tile_mem, - RAJA::ParamList<2>, + RAJA::cpu_tile_mem, RAJA::ParamList<2>, RAJA::statement::ForICount< - 1, - RAJA::statement::Param<1>, - RAJA::seq_exec, - RAJA::statement::ForICount<0, - RAJA::statement::Param<0>, + 1, RAJA::statement::Param<1>, RAJA::seq_exec, + RAJA::statement::ForICount<0, RAJA::statement::Param<0>, RAJA::seq_exec, RAJA::statement::Lambda<0>>>, RAJA::statement::ForICount< - 0, - RAJA::statement::Param<0>, - RAJA::seq_exec, - RAJA::statement::ForICount<1, - RAJA::statement::Param<1>, + 0, RAJA::statement::Param<0>, RAJA::seq_exec, + RAJA::statement::ForICount<1, RAJA::statement::Param<1>, RAJA::seq_exec, RAJA::statement::Lambda<1>>> @@ -264,33 +253,22 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) std::memset(At, 0, N_r * N_c * sizeof(int)); using OPENMP_EXEC_POL = RAJA::KernelPolicy, - RAJA::omp_parallel_for_exec, + 1, RAJA::tile_fixed, RAJA::omp_parallel_for_exec, RAJA::statement::Tile< - 0, - RAJA::tile_fixed, - RAJA::seq_exec, + 0, RAJA::tile_fixed, RAJA::seq_exec, RAJA::statement::InitLocalMem< - RAJA::cpu_tile_mem, - RAJA::ParamList<2>, + RAJA::cpu_tile_mem, RAJA::ParamList<2>, RAJA::statement::ForICount< - 1, - RAJA::statement::Param<1>, - RAJA::seq_exec, - RAJA::statement::ForICount<0, - RAJA::statement::Param<0>, + 1, RAJA::statement::Param<1>, RAJA::seq_exec, + RAJA::statement::ForICount<0, RAJA::statement::Param<0>, RAJA::seq_exec, RAJA::statement::Lambda<0>>>, RAJA::statement::ForICount< - 0, - RAJA::statement::Param<0>, - RAJA::seq_exec, - RAJA::statement::ForICount<1, - RAJA::statement::Param<1>, + 0, RAJA::statement::Param<0>, RAJA::seq_exec, + RAJA::statement::ForICount<1, RAJA::statement::Param<1>, RAJA::seq_exec, RAJA::statement::Lambda<1>>>>>>>; @@ -319,35 +297,24 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) using CUDA_EXEC_POL = RAJA::KernelPolicy, - RAJA::cuda_block_y_loop, + 1, RAJA::tile_fixed, RAJA::cuda_block_y_loop, RAJA::statement::Tile< - 0, - RAJA::tile_fixed, - RAJA::cuda_block_x_loop, + 0, RAJA::tile_fixed, RAJA::cuda_block_x_loop, RAJA::statement::InitLocalMem< - RAJA::cuda_shared_mem, - RAJA::ParamList<2>, + RAJA::cuda_shared_mem, RAJA::ParamList<2>, RAJA::statement::ForICount< - 1, - RAJA::statement::Param<1>, - RAJA::cuda_thread_y_direct, - RAJA::statement::ForICount<0, - RAJA::statement::Param<0>, + 1, RAJA::statement::Param<1>, RAJA::cuda_thread_y_direct, + RAJA::statement::ForICount<0, RAJA::statement::Param<0>, RAJA::cuda_thread_x_direct, RAJA::statement::Lambda<0>>>, RAJA::statement::CudaSyncThreads, RAJA::statement::ForICount< - 0, - RAJA::statement::Param<0>, - RAJA::cuda_thread_y_direct, - RAJA::statement::ForICount<1, - RAJA::statement::Param<1>, + 0, RAJA::statement::Param<0>, RAJA::cuda_thread_y_direct, + RAJA::statement::ForICount<1, RAJA::statement::Param<1>, RAJA::cuda_thread_x_direct, RAJA::statement::Lambda<1>>>, diff --git a/exercises/vertexsum-indexset.cpp b/exercises/vertexsum-indexset.cpp index e534b4e737..e8474ed0fe 100644 --- a/exercises/vertexsum-indexset.cpp +++ b/exercises/vertexsum-indexset.cpp @@ -376,8 +376,8 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) int* d_e2v_map = memoryManager::allocate_gpu(4 * Nelem_tot); hipMemcpy(d_areae, areae, Nelem_tot * sizeof(double), hipMemcpyHostToDevice); - hipMemcpy( - d_e2v_map, e2v_map, 4 * Nelem_tot * sizeof(int), hipMemcpyHostToDevice); + hipMemcpy(d_e2v_map, e2v_map, 4 * Nelem_tot * sizeof(int), + hipMemcpyHostToDevice); std::memset(areav, 0, Nvert_tot * sizeof(double)); hipMemcpy(d_areav, areav, Nvert_tot * sizeof(double), hipMemcpyHostToDevice); diff --git a/exercises/vertexsum-indexset_solution.cpp b/exercises/vertexsum-indexset_solution.cpp index ac81c53f4a..366d3fa8b4 100644 --- a/exercises/vertexsum-indexset_solution.cpp +++ b/exercises/vertexsum-indexset_solution.cpp @@ -368,8 +368,8 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) int* d_e2v_map = memoryManager::allocate_gpu(4 * Nelem_tot); hipMemcpy(d_areae, areae, Nelem_tot * sizeof(double), hipMemcpyHostToDevice); - hipMemcpy( - d_e2v_map, e2v_map, 4 * Nelem_tot * sizeof(int), hipMemcpyHostToDevice); + hipMemcpy(d_e2v_map, e2v_map, 4 * Nelem_tot * sizeof(int), + hipMemcpyHostToDevice); std::memset(areav, 0, Nvert_tot * sizeof(double)); hipMemcpy(d_areav, areav, Nvert_tot * sizeof(double), hipMemcpyHostToDevice); diff --git a/include/RAJA/index/IndexSet.hpp b/include/RAJA/index/IndexSet.hpp index 4d1adcefd8..ba1863fa80 100644 --- a/include/RAJA/index/IndexSet.hpp +++ b/include/RAJA/index/IndexSet.hpp @@ -326,8 +326,7 @@ class TypedIndexSet : public TypedIndexSet RAJA_INLINE void push_back(Tnew&& val) { push_internal(new typename std::decay::type(std::forward(val)), - PUSH_BACK, - PUSH_COPY); + PUSH_BACK, PUSH_COPY); } //! Add copy of segment to front end of index set. @@ -335,8 +334,7 @@ class TypedIndexSet : public TypedIndexSet RAJA_INLINE void push_front(Tnew&& val) { push_internal(new typename std::decay::type(std::forward(val)), - PUSH_FRONT, - PUSH_COPY); + PUSH_FRONT, PUSH_COPY); } //! Return total length -- sum of lengths of all segments @@ -373,8 +371,8 @@ class TypedIndexSet : public TypedIndexSet { if (getSegmentTypes()[segid] != T0_TypeId) { - PARENT::segmentCall( - segid, std::forward(body), std::forward(args)...); + PARENT::segmentCall(segid, std::forward(body), + std::forward(args)...); return; } Index_type offset = getSegmentOffsets()[segid]; diff --git a/include/RAJA/index/IndexSetUtils.hpp b/include/RAJA/index/IndexSetUtils.hpp index 4b6303d113..d750d6536b 100644 --- a/include/RAJA/index/IndexSetUtils.hpp +++ b/include/RAJA/index/IndexSetUtils.hpp @@ -65,8 +65,8 @@ template RAJA_INLINE void getIndices(CONTAINER_T& con, const SEGMENT_T& seg) { CONTAINER_T tcon; - forall( - seg, [&](typename CONTAINER_T::value_type idx) { tcon.push_back(idx); }); + forall(seg, [&](typename CONTAINER_T::value_type idx) + { tcon.push_back(idx); }); con = tcon; } diff --git a/include/RAJA/index/RangeSegment.hpp b/include/RAJA/index/RangeSegment.hpp index 4aafd63593..f70b3b4b30 100644 --- a/include/RAJA/index/RangeSegment.hpp +++ b/include/RAJA/index/RangeSegment.hpp @@ -501,8 +501,8 @@ struct TypedRangeStrideSegment end = end < m_end[0] ? m_end[0] : end; } - return TypedRangeStrideSegment{ - stripIndexType(start), stripIndexType(end), m_begin.get_stride()}; + return TypedRangeStrideSegment{stripIndexType(start), stripIndexType(end), + m_begin.get_stride()}; } /*! diff --git a/include/RAJA/internal/RAJAVec.hpp b/include/RAJA/internal/RAJAVec.hpp index c0aa6bd69f..682c59479e 100644 --- a/include/RAJA/internal/RAJAVec.hpp +++ b/include/RAJA/internal/RAJAVec.hpp @@ -452,8 +452,8 @@ class RAJAVec { for (; m_size < new_size; ++m_size) { - allocator_traits_type::construct( - m_allocator, m_data + m_size, std::forward(os)...); + allocator_traits_type::construct(m_allocator, m_data + m_size, + std::forward(os)...); } } @@ -464,8 +464,8 @@ class RAJAVec { for (; m_size < new_size; ++m_size) { - allocator_traits_type::construct( - m_allocator, m_data + m_size, o_data[m_size]); + allocator_traits_type::construct(m_allocator, m_data + m_size, + o_data[m_size]); } } @@ -476,8 +476,8 @@ class RAJAVec { for (; m_size < new_size; ++m_size) { - allocator_traits_type::construct( - m_allocator, m_data + m_size, std::move(o_data[m_size])); + allocator_traits_type::construct(m_allocator, m_data + m_size, + std::move(o_data[m_size])); } } @@ -503,16 +503,16 @@ class RAJAVec if (m_size > 0) { size_type i = m_size; - allocator_traits_type::construct( - m_allocator, m_data + i, std::move(m_data[i - 1])); + allocator_traits_type::construct(m_allocator, m_data + i, + std::move(m_data[i - 1])); for (--i; i > 0; --i) { m_data[i] = std::move(m_data[i - 1]); } allocator_traits_type::destroy(m_allocator, m_data); } - allocator_traits_type::construct( - m_allocator, m_data, std::forward(os)...); + allocator_traits_type::construct(m_allocator, m_data, + std::forward(os)...); m_size++; } @@ -523,8 +523,8 @@ class RAJAVec void emplace_back_private(Os&&... os) { reserve(m_size + 1); - allocator_traits_type::construct( - m_allocator, m_data + m_size, std::forward(os)...); + allocator_traits_type::construct(m_allocator, m_data + m_size, + std::forward(os)...); m_size++; } @@ -587,8 +587,8 @@ class RAJAVec { for (size_type i = 0; i < m_size; ++i) { - allocator_traits_type::construct( - m_allocator, tdata + i, std::move(m_data[i])); + allocator_traits_type::construct(m_allocator, tdata + i, + std::move(m_data[i])); allocator_traits_type::destroy(m_allocator, m_data + i); } allocator_traits_type::deallocate(m_allocator, m_data, m_capacity); diff --git a/include/RAJA/pattern/WorkGroup/Dispatcher.hpp b/include/RAJA/pattern/WorkGroup/Dispatcher.hpp index f9a138e6c6..a515c27fc3 100644 --- a/include/RAJA/pattern/WorkGroup/Dispatcher.hpp +++ b/include/RAJA/pattern/WorkGroup/Dispatcher.hpp @@ -182,8 +182,7 @@ struct Dispatcher}, - invoker_type{&s_host_invoke}, - destroyer_type{&s_destroy}, + invoker_type{&s_host_invoke}, destroyer_type{&s_destroy}, sizeof(T)}; } /// @@ -205,8 +204,7 @@ struct Dispatcher}, invoker_type{std::forward(createOnDevice)( DeviceInvokerFactory{})}, - destroyer_type{&s_destroy}, - sizeof(T)}; + destroyer_type{&s_destroy}, sizeof(T)}; } mover_type move_construct_destroy; @@ -375,10 +373,8 @@ struct Dispatcher s_base_impl; static host_impl_type s_host_impl; - return {mover_type{&s_base_impl}, - host_invoker_type{&s_host_impl}, - destroyer_type{&s_base_impl}, - sizeof(T)}; + return {mover_type{&s_base_impl}, host_invoker_type{&s_host_impl}, + destroyer_type{&s_base_impl}, sizeof(T)}; } /// /// create a Dispatcher that can be used on the device for objects of type T @@ -399,10 +395,8 @@ struct Dispatcher s_base_impl; static device_impl_type* s_device_impl_ptr{std::forward( createOnDevice)(DeviceImplTypeFactory{})}; - return {mover_type{&s_base_impl}, - device_invoker_type{s_device_impl_ptr}, - destroyer_type{&s_base_impl}, - sizeof(T)}; + return {mover_type{&s_base_impl}, device_invoker_type{s_device_impl_ptr}, + destroyer_type{&s_base_impl}, sizeof(T)}; } mover_type move_construct_destroy; @@ -665,9 +659,7 @@ struct Dispatcher(args)...); } @@ -696,9 +688,7 @@ struct Dispatcher(args)...); } @@ -781,8 +771,8 @@ struct Dispatcher(callable_indices{}, callable_types{}); static_assert(id != id_type(-1), "T must be in direct_dispatch types"); - return { - mover_type{id}, host_invoker_type{id}, destroyer_type{id}, sizeof(T)}; + return {mover_type{id}, host_invoker_type{id}, destroyer_type{id}, + sizeof(T)}; } /// /// create a Dispatcher that can be used on the device for objects of type T @@ -799,8 +789,8 @@ struct Dispatcher(callable_indices{}, callable_types{}); static_assert(id != id_type(-1), "T must be in direct_dispatch types"); - return { - mover_type{id}, device_invoker_type{id}, destroyer_type{id}, sizeof(T)}; + return {mover_type{id}, device_invoker_type{id}, destroyer_type{id}, + sizeof(T)}; } mover_type move_construct_destroy; diff --git a/include/RAJA/pattern/WorkGroup/WorkRunner.hpp b/include/RAJA/pattern/WorkGroup/WorkRunner.hpp index fc40a72b22..363f692710 100644 --- a/include/RAJA/pattern/WorkGroup/WorkRunner.hpp +++ b/include/RAJA/pattern/WorkGroup/WorkRunner.hpp @@ -126,9 +126,7 @@ struct HoldForall RAJA_INLINE void operator()(resource_type r, Args... args) const { - wrap::forall(r, - ExecutionPolicy(), - m_segment, + wrap::forall(r, ExecutionPolicy(), m_segment, HoldBodyArgs{m_body, std::forward(args)...}); } @@ -222,8 +220,7 @@ struct WorkRunnerForallOrdered_base storage.template emplace( get_Dispatcher(dispatcher_exec_policy{}), - std::forward(seg), - std::forward(loop)); + std::forward(seg), std::forward(loop)); } // clear any state so ready to be destroyed or reused diff --git a/include/RAJA/pattern/WorkGroup/WorkStorage.hpp b/include/RAJA/pattern/WorkGroup/WorkStorage.hpp index 6b0bea8689..3bf3030b89 100644 --- a/include/RAJA/pattern/WorkGroup/WorkStorage.hpp +++ b/include/RAJA/pattern/WorkGroup/WorkStorage.hpp @@ -419,8 +419,7 @@ class WorkStorage value_type::move_destroy(value_ptr, other_value_and_size.ptr); allocator_traits_type::deallocate( - rhs.m_aloc, - reinterpret_cast(other_value_and_size.ptr), + rhs.m_aloc, reinterpret_cast(other_value_and_size.ptr), other_value_and_size.size); return pointer_and_size{value_ptr, other_value_and_size.size}; @@ -431,8 +430,7 @@ class WorkStorage { value_type::destroy(value_and_size_ptr.ptr); allocator_traits_type::deallocate( - m_aloc, - reinterpret_cast(value_and_size_ptr.ptr), + m_aloc, reinterpret_cast(value_and_size_ptr.ptr), value_and_size_ptr.size); } }; @@ -591,8 +589,8 @@ class WorkStorage array_clear(); if (m_array_begin != nullptr) { - allocator_traits_type::deallocate( - m_aloc, m_array_begin, storage_capacity()); + allocator_traits_type::deallocate(m_aloc, m_array_begin, + storage_capacity()); m_array_begin = nullptr; m_array_end = nullptr; m_array_cap = nullptr; @@ -684,8 +682,8 @@ class WorkStorage if (m_array_begin != nullptr) { - allocator_traits_type::deallocate( - m_aloc, m_array_begin, storage_capacity()); + allocator_traits_type::deallocate(m_aloc, m_array_begin, + storage_capacity()); } m_array_begin = new_array_begin; @@ -896,8 +894,8 @@ class WorkStorage(p), - std::forward(c), - std::forward(loop_body), - std::forward(f_params)); + return forall_impl( + r, std::forward(p), std::forward(c), + std::forward(loop_body), std::forward(f_params)); } template (p), - std::forward(c), - std::forward(loop_body), - expt::get_empty_forall_param_pack()); + return forall_impl( + r, std::forward(p), std::forward(c), + std::forward(loop_body), expt::get_empty_forall_param_pack()); } @@ -231,14 +227,11 @@ RAJA_INLINE resources::EventProxy forall_Icount(Res r, using std::distance; using std::end; auto range = RangeSegment(0, distance(begin(c), end(c))); - detail::icount_adapter adapted( - c, loop_body, icount); + detail::icount_adapter adapted(c, loop_body, + icount); using policy::sequential::forall_impl; RAJA_FORCEINLINE_RECURSIVE - return forall_impl(r, - std::forward(p), - range, - adapted, + return forall_impl(r, std::forward(p), range, adapted, std::forward(f_params)); } @@ -267,18 +260,13 @@ RAJA_INLINE resources::EventProxy // no need for icount variant here auto segIterRes = resources::get_resource::type::get_default(); - wrap::forall(segIterRes, - SegmentIterPolicy(), - iset, + wrap::forall(segIterRes, SegmentIterPolicy(), iset, [=, &r](int segID) { iset.segmentCall( segID, detail::CallForallIcount(iset.getStartingIcount(segID)), - SegmentExecPolicy(), - loop_body, - r, - f_params); + SegmentExecPolicy(), loop_body, r, f_params); }); return RAJA::resources::EventProxy(r); } @@ -298,17 +286,11 @@ RAJA_INLINE resources::EventProxy { auto segIterRes = resources::get_resource::type::get_default(); - wrap::forall(segIterRes, - SegmentIterPolicy(), - iset, + wrap::forall(segIterRes, SegmentIterPolicy(), iset, [=, &r](int segID) { - iset.segmentCall(segID, - detail::CallForall{}, - SegmentExecPolicy(), - loop_body, - r, - f_params); + iset.segmentCall(segID, detail::CallForall{}, + SegmentExecPolicy(), loop_body, r, f_params); }); return RAJA::resources::EventProxy(r); } @@ -363,11 +345,8 @@ forall_Icount(ExecutionPolicy&& p, Res r, IdxSet&& c, Params&&... params) util::callPreLaunchPlugins(context); RAJA::resources::EventProxy e = - wrap::forall_Icount(r, - std::forward(p), - std::forward(c), - std::move(body), - f_params); + wrap::forall_Icount(r, std::forward(p), + std::forward(c), std::move(body), f_params); util::callPostLaunchPlugins(context); return e; @@ -382,9 +361,7 @@ RAJA_INLINE resources::EventProxy { auto r = Res::get_default(); return ::RAJA::policy_by_value_interface::forall_Icount( - std::forward(p), - r, - std::forward(c), + std::forward(p), r, std::forward(c), std::forward(loop_body)); } @@ -423,11 +400,9 @@ RAJA_INLINE util::callPreLaunchPlugins(context); - resources::EventProxy e = wrap::forall(r, - std::forward(p), - std::forward(c), - std::move(body), - f_params); + resources::EventProxy e = + wrap::forall(r, std::forward(p), std::forward(c), + std::move(body), f_params); util::callPostLaunchPlugins(context); return e; @@ -444,9 +419,7 @@ RAJA_INLINE { auto r = Res::get_default(); return ::RAJA::policy_by_value_interface::forall( - std::forward(p), - r, - std::forward(c), + std::forward(p), r, std::forward(c), std::forward(loop_body)); } @@ -473,8 +446,7 @@ RAJA_INLINE concepts::enable_if_t, auto r = Res::get_default(); // plugins handled in multipolicy policy_invoker - return forall_impl(r, - std::forward(p), + return forall_impl(r, std::forward(p), std::forward(c), std::forward(loop_body)); } @@ -522,13 +494,9 @@ RAJA_INLINE concepts::enable_if_t, util::callPreLaunchPlugins(context); - resources::EventProxy e = - wrap::forall_Icount(r, - std::forward(p), - std::forward(c), - icount, - std::move(body), - f_params); + resources::EventProxy e = wrap::forall_Icount( + r, std::forward(p), std::forward(c), icount, + std::move(body), f_params); util::callPostLaunchPlugins(context); return e; @@ -551,10 +519,7 @@ forall_Icount(ExecutionPolicy&& p, { auto r = Res::get_default(); return ::RAJA::policy_by_value_interface::forall_Icount( - std::forward(p), - r, - std::forward(c), - icount, + std::forward(p), r, std::forward(c), icount, std::forward(loop_body)); } @@ -595,11 +560,9 @@ forall(ExecutionPolicy&& p, Res r, Container&& c, Params&&... params) util::callPreLaunchPlugins(context); - resources::EventProxy e = wrap::forall(r, - std::forward(p), - std::forward(c), - std::move(body), - f_params); + resources::EventProxy e = + wrap::forall(r, std::forward(p), + std::forward(c), std::move(body), f_params); util::callPostLaunchPlugins(context); return e; @@ -619,9 +582,7 @@ forall(ExecutionPolicy&& p, Container&& c, LoopBody&& loop_body) { auto r = Res::get_default(); return ::RAJA::policy_by_value_interface::forall( - std::forward(p), - r, - std::forward(c), + std::forward(p), r, std::forward(c), std::forward(loop_body)); } @@ -640,16 +601,16 @@ template < RAJA_INLINE resources::EventProxy forall(Args&&... args) { Res r = Res::get_default(); - return ::RAJA::policy_by_value_interface::forall( - ExecutionPolicy(), r, std::forward(args)...); + return ::RAJA::policy_by_value_interface::forall(ExecutionPolicy(), r, + std::forward(args)...); } template RAJA_INLINE concepts::enable_if_t, type_traits::is_resource> forall(Res r, Args&&... args) { - return ::RAJA::policy_by_value_interface::forall( - ExecutionPolicy(), r, std::forward(args)...); + return ::RAJA::policy_by_value_interface::forall(ExecutionPolicy(), r, + std::forward(args)...); } /*! @@ -713,8 +674,8 @@ CallForallIcount::operator()(T const& segment, ForallParams f_params) const { // go through wrap to unwrap icount - return wrap::forall_Icount( - r, ExecutionPolicy(), segment, start, body, f_params); + return wrap::forall_Icount(r, ExecutionPolicy(), segment, start, body, + f_params); } } // namespace detail @@ -765,8 +726,8 @@ struct dynamic_helper return {r}; } - return dynamic_helper::invoke_forall( - r, pol, seg, body); + return dynamic_helper::invoke_forall(r, pol, seg, + body); } }; diff --git a/include/RAJA/pattern/kernel.hpp b/include/RAJA/pattern/kernel.hpp index 0c4473e620..488caff1e1 100644 --- a/include/RAJA/pattern/kernel.hpp +++ b/include/RAJA/pattern/kernel.hpp @@ -119,10 +119,8 @@ RAJA_INLINE resources::EventProxy using param_tuple_t = camp::decay; - using loop_data_t = internal::LoopData...>; + using loop_data_t = internal::LoopData...>; util::callPreCapturePlugins(context); @@ -133,8 +131,7 @@ RAJA_INLINE resources::EventProxy // and only copied to provide thread-private instances. loop_data_t loop_data( make_wrapped_tuple(std::forward(segments)), - std::forward(params), - resource, + std::forward(params), resource, std::forward(bodies)...); util::callPostCapturePlugins(context); @@ -160,9 +157,7 @@ RAJA_INLINE resources::EventProxy kernel_resource(SegmentTuple&& segments, Resource resource, Bodies&&... bodies) { return RAJA::kernel_param_resource( - std::forward(segments), - RAJA::make_tuple(), - resource, + std::forward(segments), RAJA::make_tuple(), resource, std::forward(bodies)...); } @@ -175,10 +170,8 @@ kernel_param(SegmentTuple&& segments, ParamTuple&& params, Bodies&&... bodies) { auto res = resources::get_default_resource(); return RAJA::kernel_param_resource( - std::forward(segments), - std::forward(params), - res, - std::forward(bodies)...); + std::forward(segments), std::forward(params), + res, std::forward(bodies)...); } template @@ -187,9 +180,7 @@ RAJA_INLINE resources::EventProxy> { auto res = resources::get_default_resource(); return RAJA::kernel_param_resource( - std::forward(segments), - RAJA::make_tuple(), - res, + std::forward(segments), RAJA::make_tuple(), res, std::forward(bodies)...); } diff --git a/include/RAJA/pattern/kernel/For.hpp b/include/RAJA/pattern/kernel/For.hpp index 6206780e85..f4c54c1146 100644 --- a/include/RAJA/pattern/kernel/For.hpp +++ b/include/RAJA/pattern/kernel/For.hpp @@ -110,10 +110,7 @@ struct StatementExecutor< auto r = data.res; - forall_impl(r, - ExecPolicy{}, - TypedRangeSegment(0, len), - for_wrapper, + forall_impl(r, ExecPolicy{}, TypedRangeSegment(0, len), for_wrapper, RAJA::expt::get_empty_forall_param_pack()); } }; diff --git a/include/RAJA/pattern/kernel/ForICount.hpp b/include/RAJA/pattern/kernel/ForICount.hpp index 31525a9374..345cacb4d5 100644 --- a/include/RAJA/pattern/kernel/ForICount.hpp +++ b/include/RAJA/pattern/kernel/ForICount.hpp @@ -119,10 +119,7 @@ struct StatementExecutor< auto r = resources::get_resource::type::get_default(); - forall_impl(r, - ExecPolicy{}, - TypedRangeSegment(0, len), - for_wrapper, + forall_impl(r, ExecPolicy{}, TypedRangeSegment(0, len), for_wrapper, RAJA::expt::get_empty_forall_param_pack()); } }; diff --git a/include/RAJA/pattern/kernel/Hyperplane.hpp b/include/RAJA/pattern/kernel/Hyperplane.hpp index 8686eb47f0..0f866b63e7 100644 --- a/include/RAJA/pattern/kernel/Hyperplane.hpp +++ b/include/RAJA/pattern/kernel/Hyperplane.hpp @@ -125,8 +125,7 @@ struct StatementExecutor, + ExecPolicy, ArgList, HyperplaneInner, EnclosedStmts...>>; // Create a For-loop wrapper for the outer loop @@ -145,11 +144,8 @@ struct StatementExecutor::type::get_default(); - forall_impl(r, - HpExecPolicy{}, - TypedRangeSegment(0, hp_len), - outer_wrapper, - RAJA::expt::get_empty_forall_param_pack()); + forall_impl(r, HpExecPolicy{}, TypedRangeSegment(0, hp_len), + outer_wrapper, RAJA::expt::get_empty_forall_param_pack()); } }; diff --git a/include/RAJA/pattern/kernel/InitLocalMem.hpp b/include/RAJA/pattern/kernel/InitLocalMem.hpp index b8cb6208d3..6450f697bc 100644 --- a/include/RAJA/pattern/kernel/InitLocalMem.hpp +++ b/include/RAJA/pattern/kernel/InitLocalMem.hpp @@ -81,8 +81,7 @@ struct StatementExecutor::param_tuple_t>::value_type; + Pos, typename camp::decay::param_tuple_t>::value_type; // Initialize memory #ifdef RAJA_COMPILER_MSVC diff --git a/include/RAJA/pattern/kernel/Tile.hpp b/include/RAJA/pattern/kernel/Tile.hpp index 501a9d5c15..3e92146ff0 100644 --- a/include/RAJA/pattern/kernel/Tile.hpp +++ b/include/RAJA/pattern/kernel/Tile.hpp @@ -162,9 +162,9 @@ struct IterableTiler RAJA_HOST_DEVICE RAJA_INLINE iterator operator+(const difference_type& rhs) const { - return iterator(itiler, - block_id + rhs >= itiler.num_blocks ? itiler.num_blocks - : block_id + rhs); + return iterator(itiler, block_id + rhs >= itiler.num_blocks + ? itiler.num_blocks + : block_id + rhs); } RAJA_HOST_DEVICE @@ -250,10 +250,7 @@ struct StatementExecutor< // Loop over tiles, executing enclosed statement list auto r = resources::get_resource::type::get_default(); - forall_impl(r, - EPol{}, - tiled_iterable, - tile_wrapper, + forall_impl(r, EPol{}, tiled_iterable, tile_wrapper, RAJA::expt::get_empty_forall_param_pack()); // Set range back to original values @@ -291,10 +288,7 @@ struct StatementExecutor< // Loop over tiles, executing enclosed statement list auto r = resources::get_resource::type::get_default(); - forall_impl(r, - EPol{}, - tiled_iterable, - tile_wrapper, + forall_impl(r, EPol{}, tiled_iterable, tile_wrapper, RAJA::expt::get_empty_forall_param_pack()); // Set range back to original values diff --git a/include/RAJA/pattern/kernel/TileTCount.hpp b/include/RAJA/pattern/kernel/TileTCount.hpp index eadf8dc2d2..733e0a838e 100644 --- a/include/RAJA/pattern/kernel/TileTCount.hpp +++ b/include/RAJA/pattern/kernel/TileTCount.hpp @@ -130,10 +130,7 @@ struct StatementExecutor< // Loop over tiles, executing enclosed statement list auto r = resources::get_resource::type::get_default(); - forall_impl(r, - EPol{}, - tiled_iterable, - tile_wrapper, + forall_impl(r, EPol{}, tiled_iterable, tile_wrapper, RAJA::expt::get_empty_forall_param_pack()); // Set range back to original values diff --git a/include/RAJA/pattern/kernel/internal/StatementList.hpp b/include/RAJA/pattern/kernel/internal/StatementList.hpp index ac88ffe3cf..fcd04b75be 100644 --- a/include/RAJA/pattern/kernel/internal/StatementList.hpp +++ b/include/RAJA/pattern/kernel/internal/StatementList.hpp @@ -61,9 +61,7 @@ struct StatementListExecutor StatementExecutor::exec(std::forward(data)); // call our next statement - StatementListExecutor::exec(std::forward(data)); } }; diff --git a/include/RAJA/pattern/launch/launch_core.hpp b/include/RAJA/pattern/launch/launch_core.hpp index 78e2a8f9f7..7f610bd175 100644 --- a/include/RAJA/pattern/launch/launch_core.hpp +++ b/include/RAJA/pattern/launch/launch_core.hpp @@ -265,8 +265,8 @@ void launch(LaunchParams const& launch_params, using Res = typename resources::get_resource< typename LAUNCH_POLICY::host_policy_t>::type; - launch_t::exec( - Res::get_default(), launch_params, kernel_name, p_body, reducers); + launch_t::exec(Res::get_default(), launch_params, kernel_name, p_body, + reducers); util::callPostLaunchPlugins(context); } @@ -306,8 +306,8 @@ void launch(LaunchParams const& launch_params, using Res = typename resources::get_resource< typename LAUNCH_POLICY::host_policy_t>::type; - launch_t::exec( - Res::get_default(), launch_params, kernel_name, p_body, reducers); + launch_t::exec(Res::get_default(), launch_params, kernel_name, p_body, + reducers); util::callPostLaunchPlugins(context); } @@ -370,9 +370,7 @@ void launch(ExecPlace place, using Res = typename resources::get_resource< typename POLICY_LIST::host_policy_t>::type; launch>( - Res::get_default(), - launch_params, - kernel_name, + Res::get_default(), launch_params, kernel_name, std::forward(rest_of_launch_args)...); break; } @@ -382,9 +380,7 @@ void launch(ExecPlace place, using Res = typename resources::get_resource< typename POLICY_LIST::device_policy_t>::type; launch>( - Res::get_default(), - launch_params, - kernel_name, + Res::get_default(), launch_params, kernel_name, std::forward(rest_of_launch_args)...); break; } @@ -413,9 +409,7 @@ void launch(ExecPlace place, using Res = typename resources::get_resource< typename POLICY_LIST::host_policy_t>::type; launch>( - Res::get_default(), - launch_params, - kernel_name, + Res::get_default(), launch_params, kernel_name, std::forward(rest_of_launch_args)...); break; } @@ -425,9 +419,7 @@ void launch(ExecPlace place, using Res = typename resources::get_resource< typename POLICY_LIST::device_policy_t>::type; launch>( - Res::get_default(), - launch_params, - kernel_name, + Res::get_default(), launch_params, kernel_name, std::forward(rest_of_launch_args)...); break; } @@ -664,8 +656,8 @@ RAJA_HOST_DEVICE RAJA_INLINE void loop_icount(CONTEXT const& ctx, SEGMENT const& segment, BODY const& body) { - LoopICountExecute, SEGMENT>::exec( - ctx, segment, body); + LoopICountExecute, SEGMENT>::exec(ctx, segment, + body); } namespace expt @@ -682,8 +674,8 @@ RAJA_HOST_DEVICE RAJA_INLINE void loop(CONTEXT const& ctx, BODY const& body) { - LoopExecute, SEGMENT>::exec( - ctx, segment0, segment1, body); + LoopExecute, SEGMENT>::exec(ctx, segment0, segment1, + body); } RAJA_SUPPRESS_HD_WARN @@ -721,8 +713,8 @@ RAJA_HOST_DEVICE RAJA_INLINE void tile(CONTEXT const& ctx, BODY const& body) { - TileExecute, SEGMENT>::exec( - ctx, tile_size, segment, body); + TileExecute, SEGMENT>::exec(ctx, tile_size, segment, + body); } template , SEGMENT>::exec( - ctx, tile_size, segment, body); + TileTCountExecute, SEGMENT>::exec(ctx, tile_size, + segment, body); } namespace expt diff --git a/include/RAJA/pattern/params/forall.hpp b/include/RAJA/pattern/params/forall.hpp index 62e0d53aba..9a204936ac 100644 --- a/include/RAJA/pattern/params/forall.hpp +++ b/include/RAJA/pattern/params/forall.hpp @@ -153,9 +153,7 @@ struct ParamMultiplexer static void constexpr init(ForallParamPack& f_params, Args&&... args) { - FP::detail_init(EXEC_POL(), - typename FP::params_seq(), - f_params, + FP::detail_init(EXEC_POL(), typename FP::params_seq(), f_params, std::forward(args)...); } template & f_params, Args&&... args) { - FP::detail_combine(EXEC_POL(), - typename FP::params_seq(), - f_params, + FP::detail_combine(EXEC_POL(), typename FP::params_seq(), f_params, std::forward(args)...); } template & f_params, Args&&... args) { - FP::detail_resolve(EXEC_POL(), - typename FP::params_seq(), - f_params, + FP::detail_resolve(EXEC_POL(), typename FP::params_seq(), f_params, std::forward(args)...); } }; @@ -365,10 +359,10 @@ constexpr concepts::enable_if> check_invocable(LAMBDA&&, const camp::list&) { #if !defined(RAJA_ENABLE_HIP) - static_assert(is_invocable::type, - EXPECTED_ARGS...>::value, - "LAMBDA Not invocable w/ EXPECTED_ARGS."); + static_assert( + is_invocable::type, + EXPECTED_ARGS...>::value, + "LAMBDA Not invocable w/ EXPECTED_ARGS."); #endif } @@ -449,8 +443,7 @@ RAJA_HOST_DEVICE constexpr auto invoke_body(Params&& params, Fn&& f, Ts&&... extra) { return detail::invoke_with_order( - camp::forward(params), - camp::forward(f), + camp::forward(params), camp::forward(f), typename camp::decay::lambda_arg_seq(), camp::forward(extra)...); } diff --git a/include/RAJA/pattern/scan.hpp b/include/RAJA/pattern/scan.hpp index b25ab5d018..01ef0fd1ba 100644 --- a/include/RAJA/pattern/scan.hpp +++ b/include/RAJA/pattern/scan.hpp @@ -73,8 +73,8 @@ RAJA_INLINE { return resources::EventProxy(r); } - return impl::scan::inclusive_inplace( - r, std::forward(p), begin(c), end(c), binop); + return impl::scan::inclusive_inplace(r, std::forward(p), begin(c), + end(c), binop); } /// template < @@ -137,8 +137,8 @@ RAJA_INLINE { return resources::EventProxy(r); } - return impl::scan::exclusive_inplace( - r, std::forward(p), begin(c), end(c), binop, value); + return impl::scan::exclusive_inplace(r, std::forward(p), begin(c), + end(c), binop, value); } /// template (r); } - return impl::scan::inclusive( - r, std::forward(p), begin(in), end(in), begin(out), binop); + return impl::scan::inclusive(r, std::forward(p), begin(in), + end(in), begin(out), binop); } /// template (p), - r, - std::forward(in), - std::forward(out), - binop); + std::forward(p), r, std::forward(in), + std::forward(out), binop); } /*! @@ -292,13 +289,8 @@ RAJA_INLINE { return resources::EventProxy(r); } - return impl::scan::exclusive(r, - std::forward(p), - begin(in), - end(in), - begin(out), - binop, - value); + return impl::scan::exclusive(r, std::forward(p), begin(in), + end(in), begin(out), binop, value); } /// template (p), - r, - std::forward(in), - std::forward(out), - binop, - value); + std::forward(p), r, std::forward(in), + std::forward(out), binop, value); } } // namespace policy_by_value_interface diff --git a/include/RAJA/pattern/sort.hpp b/include/RAJA/pattern/sort.hpp index c02348fbf1..684964ef6e 100644 --- a/include/RAJA/pattern/sort.hpp +++ b/include/RAJA/pattern/sort.hpp @@ -73,8 +73,8 @@ sort(ExecPolicy&& p, Res r, Container&& c, Compare comp = Compare{}) if (N > 1) { - return impl::sort::unstable( - r, std::forward(p), begin_it, end_it, comp); + return impl::sort::unstable(r, std::forward(p), begin_it, + end_it, comp); } else { @@ -139,8 +139,8 @@ stable_sort(ExecPolicy&& p, Res r, Container&& c, Compare comp = Compare{}) if (N > 1) { - return impl::sort::stable( - r, std::forward(p), begin_it, end_it, comp); + return impl::sort::stable(r, std::forward(p), begin_it, end_it, + comp); } else { @@ -214,8 +214,8 @@ sort_pairs(ExecPolicy&& p, if (N > 1) { - return impl::sort::unstable_pairs( - r, std::forward(p), begin_key, end_key, begin(vals), comp); + return impl::sort::unstable_pairs(r, std::forward(p), begin_key, + end_key, begin(vals), comp); } else { @@ -243,11 +243,8 @@ sort_pairs(ExecPolicy&& p, { Res r = Res::get_default(); return ::RAJA::policy_by_value_interface::sort_pairs( - std::forward(p), - r, - std::forward(keys), - std::forward(vals), - comp); + std::forward(p), r, std::forward(keys), + std::forward(vals), comp); } /*! @@ -298,8 +295,8 @@ stable_sort_pairs(ExecPolicy&& p, if (N > 1) { - return impl::sort::stable_pairs( - r, std::forward(p), begin_key, end_key, begin(vals), comp); + return impl::sort::stable_pairs(r, std::forward(p), begin_key, + end_key, begin(vals), comp); } else { @@ -327,11 +324,8 @@ stable_sort_pairs(ExecPolicy&& p, { Res r = Res::get_default(); return ::RAJA::policy_by_value_interface::stable_sort_pairs( - std::forward(p), - r, - std::forward(keys), - std::forward(vals), - comp); + std::forward(p), r, std::forward(keys), + std::forward(vals), comp); } } // namespace policy_by_value_interface @@ -362,8 +356,8 @@ concepts::enable_if_t, type_traits::is_resource> sort(Res r, Args&&... args) { - return ::RAJA::policy_by_value_interface::sort( - ExecPolicy(), r, std::forward(args)...); + return ::RAJA::policy_by_value_interface::sort(ExecPolicy(), r, + std::forward(args)...); } /*! diff --git a/include/RAJA/pattern/tensor/TensorIndex.hpp b/include/RAJA/pattern/tensor/TensorIndex.hpp index c79035789f..9f5c09f152 100644 --- a/include/RAJA/pattern/tensor/TensorIndex.hpp +++ b/include/RAJA/pattern/tensor/TensorIndex.hpp @@ -65,11 +65,8 @@ class TensorIndex value_type(-1)>> static_all() { - return StaticTensorIndex>(); + return StaticTensorIndex>(); } RAJA_INLINE diff --git a/include/RAJA/pattern/tensor/internal/ET/MultiplyOperator.hpp b/include/RAJA/pattern/tensor/internal/ET/MultiplyOperator.hpp index eb70821915..3f5adc2f43 100644 --- a/include/RAJA/pattern/tensor/internal/ET/MultiplyOperator.hpp +++ b/include/RAJA/pattern/tensor/internal/ET/MultiplyOperator.hpp @@ -54,8 +54,7 @@ struct MultiplyOperator RAJA_HOST_DEVICE static void print_ast() { - printf("Elemental(%d,%d)", - (int)s_num_dims, + printf("Elemental(%d,%d)", (int)s_num_dims, (int)RIGHT_OPERAND_TYPE::s_num_dims); } @@ -519,16 +518,14 @@ struct MultiplyOperator< { using LeftType = - StaticTensorTile, camp::int_seq>; // evaluate both sides of operator auto left = et_left.eval(LeftType()); using RightType = - StaticTensorTile, camp::int_seq>; @@ -536,8 +533,7 @@ struct MultiplyOperator< // accumulate product auto temp = left.right_multiply_vector_accumulate(right, result); - MultiplyBridge>:: multiply_into_result(result, tile, et_left, et_right); result += temp; @@ -546,15 +542,13 @@ struct MultiplyOperator< { using LeftType = - StaticTensorTile, camp::int_seq>; auto left = et_left.eval(LeftType()); using RightType = - StaticTensorTile, camp::int_seq>; auto right = et_right.eval(RightType()); @@ -606,16 +600,14 @@ struct MultiplyOperator< { using LeftType = - StaticTensorTile, camp::int_seq>; // evaluate both sides of operator auto left = et_left.eval(LeftType()); using RightType = - StaticTensorTile, camp::int_seq>; @@ -629,15 +621,13 @@ struct MultiplyOperator< { using LeftType = - StaticTensorTile, camp::int_seq>; auto left = et_left.eval(LeftType()); using RightType = - StaticTensorTile, camp::int_seq>; auto right = et_right.eval(RightType()); @@ -683,8 +673,7 @@ struct MultiplyOperator< const size_t iter_count = (k_size / tile_size) + ((k_size % tile_size != 0) ? 1 : 0); - MultiplyBridge>:: multiply_into_result(result, tile, et_left, et_right); } diff --git a/include/RAJA/pattern/tensor/internal/ET/TensorDivide.hpp b/include/RAJA/pattern/tensor/internal/ET/TensorDivide.hpp index f4e5b3eec3..cb93f08e09 100644 --- a/include/RAJA/pattern/tensor/internal/ET/TensorDivide.hpp +++ b/include/RAJA/pattern/tensor/internal/ET/TensorDivide.hpp @@ -211,8 +211,8 @@ struct DivideOperator< return numerator.divide(right.eval(tile)); } - return numerator.divide_nm( - right.eval(tile), tile.m_size[0], tile.m_size[1]); + return numerator.divide_nm(right.eval(tile), tile.m_size[0], + tile.m_size[1]); } }; @@ -257,8 +257,8 @@ struct DivideOperator< } else { - return left.eval(tile).divide_nm( - denominator, tile.m_size[0], tile.m_size[1]); + return left.eval(tile).divide_nm(denominator, tile.m_size[0], + tile.m_size[1]); } } }; @@ -301,8 +301,8 @@ struct DivideOperator< } else { - return left.eval(tile).divide_nm( - right.eval(tile), tile.m_size[0], tile.m_size[1]); + return left.eval(tile).divide_nm(right.eval(tile), tile.m_size[0], + tile.m_size[1]); } } }; diff --git a/include/RAJA/pattern/tensor/internal/ET/TensorMultiply.hpp b/include/RAJA/pattern/tensor/internal/ET/TensorMultiply.hpp index 14f0645e53..ed4689ef6e 100644 --- a/include/RAJA/pattern/tensor/internal/ET/TensorMultiply.hpp +++ b/include/RAJA/pattern/tensor/internal/ET/TensorMultiply.hpp @@ -119,8 +119,7 @@ class TensorMultiply normalize_operand_t> operator+(ADD const& add) const { - return TensorMultiplyAdd>( m_left_operand, m_right_operand, normalizeOperand(add)); } diff --git a/include/RAJA/pattern/tensor/internal/ET/TensorMultiplyAdd.hpp b/include/RAJA/pattern/tensor/internal/ET/TensorMultiplyAdd.hpp index 4ebda615b2..fc118ca4e5 100644 --- a/include/RAJA/pattern/tensor/internal/ET/TensorMultiplyAdd.hpp +++ b/include/RAJA/pattern/tensor/internal/ET/TensorMultiplyAdd.hpp @@ -91,8 +91,8 @@ class TensorMultiplyAdd m_right_operand, m_add_operand)) { - return multiply_op::multiply_add( - tile, m_left_operand, m_right_operand, m_add_operand); + return multiply_op::multiply_add(tile, m_left_operand, m_right_operand, + m_add_operand); } diff --git a/include/RAJA/pattern/tensor/internal/MatrixMatrixMultiply.hpp b/include/RAJA/pattern/tensor/internal/MatrixMatrixMultiply.hpp index a8685ea3cd..c9b7be60dc 100644 --- a/include/RAJA/pattern/tensor/internal/MatrixMatrixMultiply.hpp +++ b/include/RAJA/pattern/tensor/internal/MatrixMatrixMultiply.hpp @@ -297,14 +297,12 @@ struct MatrixMatrixMultiplyHelper< RAJA_UNROLL for (camp::idx_t c_reg = 0; - c_reg < N_SIZE / result_type::s_major_dim_per_register; - ++c_reg) + c_reg < N_SIZE / result_type::s_major_dim_per_register; ++c_reg) { RAJA_UNROLL for (camp::idx_t c_segment = 0; - c_segment < result_type::s_major_dim_per_register; - ++c_segment) + c_segment < result_type::s_major_dim_per_register; ++c_segment) { register_type c_tmp; diff --git a/include/RAJA/pattern/tensor/internal/MatrixRegisterImpl.hpp b/include/RAJA/pattern/tensor/internal/MatrixRegisterImpl.hpp index cf4ca1cce5..58a8e21868 100644 --- a/include/RAJA/pattern/tensor/internal/MatrixRegisterImpl.hpp +++ b/include/RAJA/pattern/tensor/internal/MatrixRegisterImpl.hpp @@ -291,11 +291,8 @@ class TensorRegister::result_type matrix_multiply(RMAT const& mat) const { - typename RAJA::internal::expt::MatrixMatrixMultiplyHelper::result_type - res(0); + typename RAJA::internal::expt::MatrixMatrixMultiplyHelper< + self_type, RMAT>::result_type res(0); RAJA::internal::expt::MatrixMatrixMultiplyHelper::multiply( *this, mat, res); return res; @@ -1843,11 +1814,10 @@ class TensorRegister::result_type const& C) const { - typename RAJA::internal::expt::MatrixMatrixMultiplyHelper::result_type - res(C); - RAJA::internal::expt::MatrixMatrixMultiplyHelper:: - multiply_accumulate(*this, B, res); + typename RAJA::internal::expt::MatrixMatrixMultiplyHelper< + self_type, RMAT>::result_type res(C); + RAJA::internal::expt::MatrixMatrixMultiplyHelper< + self_type, RMAT>::multiply_accumulate(*this, B, res); return res; } @@ -1858,8 +1828,8 @@ class TensorRegister:: - multiply_accumulate(*this, B, acc); + RAJA::internal::expt::MatrixMatrixMultiplyHelper< + self_type, RMAT>::multiply_accumulate(*this, B, acc); } diff --git a/include/RAJA/pattern/tensor/internal/RegisterBase.hpp b/include/RAJA/pattern/tensor/internal/RegisterBase.hpp index 2e22449c1d..7d125238c6 100644 --- a/include/RAJA/pattern/tensor/internal/RegisterBase.hpp +++ b/include/RAJA/pattern/tensor/internal/RegisterBase.hpp @@ -264,9 +264,8 @@ class RegisterBase> camp::idx_t stride_inner, camp::idx_t stride_outer) { - getThis()->gather( - ptr, - self_type::s_segmented_offsets(segbits, stride_inner, stride_outer)); + getThis()->gather(ptr, self_type::s_segmented_offsets(segbits, stride_inner, + stride_outer)); return *getThis(); } @@ -385,9 +384,8 @@ class RegisterBase> camp::idx_t stride_inner, camp::idx_t stride_outer) const { - getThis()->scatter( - ptr, - self_type::s_segmented_offsets(segbits, stride_inner, stride_outer)); + getThis()->scatter(ptr, self_type::s_segmented_offsets( + segbits, stride_inner, stride_outer)); return *getThis(); } diff --git a/include/RAJA/pattern/tensor/internal/TensorRef.hpp b/include/RAJA/pattern/tensor/internal/TensorRef.hpp index d98c0ccf65..4288c1c333 100644 --- a/include/RAJA/pattern/tensor/internal/TensorRef.hpp +++ b/include/RAJA/pattern/tensor/internal/TensorRef.hpp @@ -450,8 +450,7 @@ struct TensorRef RAJA_INLINE void print() const { - printf("TensorRef: dims=%d, m_pointer=%p, m_stride=[", - (int)NUM_DIMS, + printf("TensorRef: dims=%d, m_pointer=%p, m_stride=[", (int)NUM_DIMS, m_pointer); for (camp::idx_t i = 0; i < NUM_DIMS; ++i) @@ -527,8 +526,7 @@ struct StaticTensorRef policies, Selector s) -> MultiPolicy { - return detail::make_multi_policy( - camp::make_idx_seq_t{}, s, policies); + return detail::make_multi_policy(camp::make_idx_seq_t{}, + s, policies); } namespace detail @@ -193,8 +193,7 @@ struct policy_invoker : public policy_invoker } else { - NextInvoker::invoke(offset, - std::forward(iter), + NextInvoker::invoke(offset, std::forward(iter), std::forward(loop_body)); } } diff --git a/include/RAJA/policy/atomic_builtin.hpp b/include/RAJA/policy/atomic_builtin.hpp index 045088fb33..6350e1e6fb 100644 --- a/include/RAJA/policy/atomic_builtin.hpp +++ b/include/RAJA/policy/atomic_builtin.hpp @@ -427,8 +427,8 @@ template ::value, bool> = true> RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicCAS(T* acc, T compare, T value) { - __atomic_compare_exchange_n( - acc, &compare, value, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED); + __atomic_compare_exchange_n(acc, &compare, value, false, __ATOMIC_RELAXED, + __ATOMIC_RELAXED); return compare; } @@ -554,10 +554,9 @@ RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicCAS(T* acc, T compare, T value) { using R = builtin_useReinterpret_t; - return RAJA::util::reinterp_A_as_B( - builtin_atomicCAS(reinterpret_cast(acc), - RAJA::util::reinterp_A_as_B(compare), - RAJA::util::reinterp_A_as_B(value))); + return RAJA::util::reinterp_A_as_B(builtin_atomicCAS( + reinterpret_cast(acc), RAJA::util::reinterp_A_as_B(compare), + RAJA::util::reinterp_A_as_B(value))); } @@ -731,8 +730,7 @@ template RAJA_DEVICE_HIP RAJA_INLINE T atomicMin(builtin_atomic, T* acc, T value) { return detail::builtin_atomicCAS_loop( - acc, - [value](T old) { return value < old ? value : old; }, + acc, [value](T old) { return value < old ? value : old; }, [value](T current) { return current <= value; }); } @@ -740,8 +738,7 @@ template RAJA_DEVICE_HIP RAJA_INLINE T atomicMax(builtin_atomic, T* acc, T value) { return detail::builtin_atomicCAS_loop( - acc, - [value](T old) { return old < value ? value : old; }, + acc, [value](T old) { return old < value ? value : old; }, [value](T current) { return value <= current; }); } @@ -755,8 +752,7 @@ template RAJA_DEVICE_HIP RAJA_INLINE T atomicInc(builtin_atomic, T* acc, T value) { return detail::builtin_atomicCAS_loop( - acc, - [value](T old) + acc, [value](T old) { return value <= old ? static_cast(0) : old + static_cast(1); }); } diff --git a/include/RAJA/policy/cuda/MemUtils_CUDA.hpp b/include/RAJA/policy/cuda/MemUtils_CUDA.hpp index 671b92a7b9..294f532de9 100644 --- a/include/RAJA/policy/cuda/MemUtils_CUDA.hpp +++ b/include/RAJA/policy/cuda/MemUtils_CUDA.hpp @@ -147,8 +147,8 @@ struct DevicePinnedAllocator cudaErrchk(cudaMallocManaged(&ptr, nbytes, cudaMemAttachGlobal)); cudaErrchk( cudaMemAdvise(ptr, nbytes, cudaMemAdviseSetPreferredLocation, device)); - cudaErrchk(cudaMemAdvise( - ptr, nbytes, cudaMemAdviseSetAccessedBy, cudaCpuDeviceId)); + cudaErrchk(cudaMemAdvise(ptr, nbytes, cudaMemAdviseSetAccessedBy, + cudaCpuDeviceId)); return ptr; } @@ -445,11 +445,9 @@ cuda_occupancy_max_blocks_threads(const void* func, data.func_dynamic_shmem_per_block = func_dynamic_shmem_per_block; - cudaErrchk( - cudaOccupancyMaxPotentialBlockSize(&data.func_max_blocks_per_device, - &data.func_max_threads_per_block, - func, - func_dynamic_shmem_per_block)); + cudaErrchk(cudaOccupancyMaxPotentialBlockSize( + &data.func_max_blocks_per_device, &data.func_max_threads_per_block, + func, func_dynamic_shmem_per_block)); } return data; @@ -477,9 +475,7 @@ cuda_occupancy_max_blocks(const void* func, size_t func_dynamic_shmem_per_block) data.func_threads_per_block = func_threads_per_block; cudaErrchk(cudaOccupancyMaxActiveBlocksPerMultiprocessor( - &data.func_max_blocks_per_sm, - func, - func_threads_per_block, + &data.func_max_blocks_per_sm, func, func_threads_per_block, func_dynamic_shmem_per_block)); } @@ -503,9 +499,7 @@ cuda_occupancy_max_blocks(const void* func, data.func_threads_per_block = func_threads_per_block; cudaErrchk(cudaOccupancyMaxActiveBlocksPerMultiprocessor( - &data.func_max_blocks_per_sm, - func, - func_threads_per_block, + &data.func_max_blocks_per_sm, func, func_threads_per_block, func_dynamic_shmem_per_block)); } diff --git a/include/RAJA/policy/cuda/WorkGroup/WorkRunner.hpp b/include/RAJA/policy/cuda/WorkGroup/WorkRunner.hpp index cb31f32f86..b031481713 100644 --- a/include/RAJA/policy/cuda/WorkGroup/WorkRunner.hpp +++ b/include/RAJA/policy/cuda/WorkGroup/WorkRunner.hpp @@ -328,8 +328,7 @@ struct WorkRunner< storage.template emplace( get_Dispatcher(dispatcher_exec_policy{}), - std::forward(iter), - std::forward(loop_body)); + std::forward(iter), std::forward(loop_body)); } } @@ -342,17 +341,14 @@ struct WorkRunner< { using Iterator = camp::decay; using IndexType = camp::decay; + std::end(storage)))>; using value_type = typename WorkContainer::value_type; per_run_storage run_storage{}; - auto func = cuda_unordered_y_block_global; + auto func = + cuda_unordered_y_block_global; // // Compute the requested iteration space size @@ -376,8 +372,7 @@ struct WorkRunner< cuda_dim_t gridSize{ static_cast((average_iterations + block_size - 1) / block_size), - static_cast(num_loops), - 1}; + static_cast(num_loops), 1}; RAJA_FT_BEGIN; @@ -391,8 +386,8 @@ struct WorkRunner< // Launch the kernel // void* func_args[] = {(void*)&begin, (void*)&args...}; - RAJA::cuda::launch( - (const void*)func, gridSize, blockSize, func_args, shmem, r, Async); + RAJA::cuda::launch((const void*)func, gridSize, blockSize, func_args, + shmem, r, Async); } RAJA_FT_END; diff --git a/include/RAJA/policy/cuda/atomic.hpp b/include/RAJA/policy/cuda/atomic.hpp index 628ccc4715..768f4297a6 100644 --- a/include/RAJA/policy/cuda/atomic.hpp +++ b/include/RAJA/policy/cuda/atomic.hpp @@ -298,10 +298,9 @@ RAJA_INLINE __device__ T cuda_atomicCAS(T* acc, T compare, T value) { using R = cuda_useReinterpretCAS_t; - return RAJA::util::reinterp_A_as_B( - cuda_atomicCAS(reinterpret_cast(acc), - RAJA::util::reinterp_A_as_B(compare), - RAJA::util::reinterp_A_as_B(value))); + return RAJA::util::reinterp_A_as_B(cuda_atomicCAS( + reinterpret_cast(acc), RAJA::util::reinterp_A_as_B(compare), + RAJA::util::reinterp_A_as_B(value))); } /*! @@ -474,8 +473,7 @@ template < RAJA_INLINE __device__ T cuda_atomicMin(T* acc, T value) { return cuda_atomicCAS_loop( - acc, - [value](T old) { return value < old ? value : old; }, + acc, [value](T old) { return value < old ? value : old; }, [value](T current) { return current <= value; }); } @@ -498,8 +496,7 @@ template < RAJA_INLINE __device__ T cuda_atomicMax(T* acc, T value) { return cuda_atomicCAS_loop( - acc, - [value](T old) { return old < value ? value : old; }, + acc, [value](T old) { return old < value ? value : old; }, [value](T current) { return value <= current; }); } @@ -529,11 +526,9 @@ RAJA_INLINE __device__ T cuda_atomicInc(T* acc, T value) { // See: // http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#atomicinc - return cuda_atomicCAS_loop(acc, - [value](T old) { - return value <= old ? static_cast(0) - : old + static_cast(1); - }); + return cuda_atomicCAS_loop( + acc, [value](T old) + { return value <= old ? static_cast(0) : old + static_cast(1); }); } template < diff --git a/include/RAJA/policy/cuda/forall.hpp b/include/RAJA/policy/cuda/forall.hpp index 137cdc591e..746c21f2ca 100644 --- a/include/RAJA/policy/cuda/forall.hpp +++ b/include/RAJA/policy/cuda/forall.hpp @@ -630,17 +630,13 @@ forall_impl(resources::Cuda cuda_res, using LOOP_BODY = camp::decay; using IndexType = camp::decay; - using EXEC_POL = ::RAJA::policy::cuda::cuda_exec_explicit; - using UniqueMarker = ::camp:: - list; - using DimensionCalculator = impl::ForallDimensionCalculator; + using EXEC_POL = ::RAJA::policy::cuda::cuda_exec_explicit< + IterationMapping, IterationGetter, Concretizer, BlocksPerSM, Async>; + using UniqueMarker = ::camp::list; + using DimensionCalculator = + impl::ForallDimensionCalculator; // // Compute the requested iteration space size @@ -653,12 +649,9 @@ forall_impl(resources::Cuda cuda_res, if (len > 0) { - auto func = - reinterpret_cast(&impl::forall_cuda_kernel); + auto func = reinterpret_cast( + &impl::forall_cuda_kernel); // // Setup shared memory buffers @@ -677,20 +670,16 @@ forall_impl(resources::Cuda cuda_res, // // Privatize the loop_body, using make_launch_body to setup reductions // - LOOP_BODY body = - RAJA::cuda::make_launch_body(func, - dims.blocks, - dims.threads, - shmem, - cuda_res, - std::forward(loop_body)); + LOOP_BODY body = RAJA::cuda::make_launch_body( + func, dims.blocks, dims.threads, shmem, cuda_res, + std::forward(loop_body)); // // Launch the kernels // void* args[] = {(void*)&body, (void*)&begin, (void*)&len}; - RAJA::cuda::launch( - func, dims.blocks, dims.threads, args, shmem, cuda_res, Async); + RAJA::cuda::launch(func, dims.blocks, dims.threads, args, shmem, cuda_res, + Async); } RAJA_FT_END; @@ -727,21 +716,14 @@ forall_impl(resources::Cuda cuda_res, using LOOP_BODY = camp::decay; using IndexType = camp::decay; - using EXEC_POL = ::RAJA::policy::cuda::cuda_exec_explicit; - using UniqueMarker = ::camp::list, - LOOP_BODY, - Iterator, - ForallParam>; - using DimensionCalculator = impl::ForallDimensionCalculator; + using EXEC_POL = ::RAJA::policy::cuda::cuda_exec_explicit< + IterationMapping, IterationGetter, Concretizer, BlocksPerSM, Async>; + using UniqueMarker = + ::camp::list, + LOOP_BODY, Iterator, ForallParam>; + using DimensionCalculator = + impl::ForallDimensionCalculator; // // Compute the requested iteration space size @@ -755,12 +737,8 @@ forall_impl(resources::Cuda cuda_res, { auto func = reinterpret_cast( - &impl::forallp_cuda_kernel>); + &impl::forallp_cuda_kernel>); // // Setup shared memory buffers @@ -786,21 +764,17 @@ forall_impl(resources::Cuda cuda_res, // // Privatize the loop_body, using make_launch_body to setup reductions // - LOOP_BODY body = - RAJA::cuda::make_launch_body(func, - dims.blocks, - dims.threads, - shmem, - cuda_res, - std::forward(loop_body)); + LOOP_BODY body = RAJA::cuda::make_launch_body( + func, dims.blocks, dims.threads, shmem, cuda_res, + std::forward(loop_body)); // // Launch the kernels // - void* args[] = { - (void*)&body, (void*)&begin, (void*)&len, (void*)&f_params}; - RAJA::cuda::launch( - func, dims.blocks, dims.threads, args, shmem, cuda_res, Async); + void* args[] = {(void*)&body, (void*)&begin, (void*)&len, + (void*)&f_params}; + RAJA::cuda::launch(func, dims.blocks, dims.threads, args, shmem, cuda_res, + Async); RAJA::expt::ParamMultiplexer::resolve(f_params, launch_info); } @@ -851,15 +825,12 @@ RAJA_INLINE resources::EventProxy forall_impl( int num_seg = iset.getNumSegments(); for (int isi = 0; isi < num_seg; ++isi) { - iset.segmentCall(r, - isi, - detail::CallForall(), - ::RAJA::policy::cuda::cuda_exec_explicit(), - loop_body); + iset.segmentCall( + r, isi, detail::CallForall(), + ::RAJA::policy::cuda::cuda_exec_explicit(), + loop_body); } // iterate over segments of index set if (!Async) RAJA::cuda::synchronize(r); diff --git a/include/RAJA/policy/cuda/intrinsics.hpp b/include/RAJA/policy/cuda/intrinsics.hpp index 990af56784..2d8aa65cba 100644 --- a/include/RAJA/policy/cuda/intrinsics.hpp +++ b/include/RAJA/policy/cuda/intrinsics.hpp @@ -93,8 +93,8 @@ struct AccessorDeviceScopeUseBlockFence template static RAJA_DEVICE RAJA_INLINE T get(T* in_ptr, size_t idx) { - using ArrayType = RAJA::detail:: - AsIntegerArray; + using ArrayType = RAJA::detail::AsIntegerArray; using integer_type = typename ArrayType::integer_type; ArrayType u; @@ -112,8 +112,8 @@ struct AccessorDeviceScopeUseBlockFence template static RAJA_DEVICE RAJA_INLINE void set(T* in_ptr, size_t idx, T val) { - using ArrayType = RAJA::detail:: - AsIntegerArray; + using ArrayType = RAJA::detail::AsIntegerArray; using integer_type = typename ArrayType::integer_type; ArrayType u; @@ -153,9 +153,9 @@ constexpr size_t max_shfl_int_type_size = sizeof(unsigned int); template RAJA_DEVICE RAJA_INLINE T shfl_xor_sync(T var, int laneMask) { - RAJA::detail:: - AsIntegerArray - u; + RAJA::detail::AsIntegerArray + u; u.set_value(var); for (size_t i = 0; i < u.array_size(); ++i) @@ -172,9 +172,9 @@ RAJA_DEVICE RAJA_INLINE T shfl_xor_sync(T var, int laneMask) template RAJA_DEVICE RAJA_INLINE T shfl_sync(T var, int srcLane) { - RAJA::detail:: - AsIntegerArray - u; + RAJA::detail::AsIntegerArray + u; u.set_value(var); for (size_t i = 0; i < u.array_size(); ++i) @@ -448,10 +448,8 @@ RAJA_DEVICE RAJA_INLINE T block_reduce(T val, T identity) // Partial placement new: Should call new(tmpsd) here but recasting memory // to avoid calling constructor/destructor in shared memory. RAJA::detail::SoAArray* sd = - reinterpret_cast< - RAJA::detail::SoAArray*>( - tmpsd); + reinterpret_cast*>(tmpsd); // write per warp values to shared memory if (warpId == 0) diff --git a/include/RAJA/policy/cuda/kernel/CudaKernel.hpp b/include/RAJA/policy/cuda/kernel/CudaKernel.hpp index abe4458413..c14b3c97f7 100644 --- a/include/RAJA/policy/cuda/kernel/CudaKernel.hpp +++ b/include/RAJA/policy/cuda/kernel/CudaKernel.hpp @@ -258,8 +258,8 @@ struct CudaKernelLauncherGetter executor_t>)>; static constexpr type get() noexcept { - return &internal:: - CudaKernelLauncherFixed; + return &internal::CudaKernelLauncherFixed; } }; @@ -567,8 +567,8 @@ struct StatementExecutor< // int recommended_blocks; int recommended_threads; - launch_t::recommended_blocks_threads( - shmem, recommended_blocks, recommended_threads); + launch_t::recommended_blocks_threads(shmem, recommended_blocks, + recommended_threads); // @@ -586,8 +586,7 @@ struct StatementExecutor< if (recommended_threads >= get_size(launch_dims.min_dims.threads)) { - fit_threads = fitCudaDims(recommended_threads, - launch_dims.dims.threads, + fit_threads = fitCudaDims(recommended_threads, launch_dims.dims.threads, launch_dims.min_dims.threads); } @@ -598,8 +597,7 @@ struct StatementExecutor< get_size(fit_threads) != recommended_threads) { - fit_threads = fitCudaDims(max_threads, - launch_dims.dims.threads, + fit_threads = fitCudaDims(max_threads, launch_dims.dims.threads, launch_dims.min_dims.threads); } @@ -631,8 +629,8 @@ struct StatementExecutor< use_blocks = max_blocks; } - launch_dims.dims.blocks = fitCudaDims( - use_blocks, launch_dims.dims.blocks, launch_dims.min_dims.blocks); + launch_dims.dims.blocks = fitCudaDims(use_blocks, launch_dims.dims.blocks, + launch_dims.min_dims.blocks); // // make sure that we fit @@ -656,23 +654,16 @@ struct StatementExecutor< // of the launch_dims and potential changes to shmem here that is // currently an unresolved issue. // - auto cuda_data = RAJA::cuda::make_launch_body(func, - launch_dims.dims.blocks, - launch_dims.dims.threads, - shmem, - res, - data); + auto cuda_data = RAJA::cuda::make_launch_body( + func, launch_dims.dims.blocks, launch_dims.dims.threads, shmem, res, + data); // // Launch the kernel // void* args[] = {(void*)&cuda_data}; - RAJA::cuda::launch(func, - launch_dims.dims.blocks, - launch_dims.dims.threads, - args, - shmem, - res, + RAJA::cuda::launch(func, launch_dims.dims.blocks, + launch_dims.dims.threads, args, shmem, res, launch_t::async); } } diff --git a/include/RAJA/policy/cuda/kernel/InitLocalMem.hpp b/include/RAJA/policy/cuda/kernel/InitLocalMem.hpp index 4c024c3023..feda56fd39 100644 --- a/include/RAJA/policy/cuda/kernel/InitLocalMem.hpp +++ b/include/RAJA/policy/cuda/kernel/InitLocalMem.hpp @@ -60,11 +60,9 @@ struct CudaStatementExecutor::param_tuple_t>::value_type; - const camp::idx_t NumElem = - camp::tuple_element_t::param_tuple_t>:: - layout_type::s_size; + Pos, typename camp::decay::param_tuple_t>::value_type; + const camp::idx_t NumElem = camp::tuple_element_t< + Pos, typename camp::decay::param_tuple_t>::layout_type::s_size; __shared__ varType Array[NumElem]; camp::get(data.param_tuple).set_data(&Array[0]); @@ -78,11 +76,9 @@ struct CudaStatementExecutor::param_tuple_t>::value_type; - const camp::idx_t NumElem = - camp::tuple_element_t::param_tuple_t>:: - layout_type::s_size; + Pos, typename camp::decay::param_tuple_t>::value_type; + const camp::idx_t NumElem = camp::tuple_element_t< + Pos, typename camp::decay::param_tuple_t>::layout_type::s_size; __shared__ varType Array[NumElem]; camp::get(data.param_tuple).set_data(&Array[0]); @@ -146,11 +142,9 @@ struct CudaStatementExecutor::param_tuple_t>::value_type; - const camp::idx_t NumElem = - camp::tuple_element_t::param_tuple_t>:: - layout_type::s_size; + Pos, typename camp::decay::param_tuple_t>::value_type; + const camp::idx_t NumElem = camp::tuple_element_t< + Pos, typename camp::decay::param_tuple_t>::layout_type::s_size; varType Array[NumElem]; camp::get(data.param_tuple).set_data(&Array[0]); @@ -164,11 +158,9 @@ struct CudaStatementExecutor::param_tuple_t>::value_type; - const camp::idx_t NumElem = - camp::tuple_element_t::param_tuple_t>:: - layout_type::s_size; + Pos, typename camp::decay::param_tuple_t>::value_type; + const camp::idx_t NumElem = camp::tuple_element_t< + Pos, typename camp::decay::param_tuple_t>::layout_type::s_size; varType Array[NumElem]; camp::get(data.param_tuple).set_data(&Array[0]); diff --git a/include/RAJA/policy/cuda/kernel/internal.hpp b/include/RAJA/policy/cuda/kernel/internal.hpp index 837d8f6442..1e302f86e2 100644 --- a/include/RAJA/policy/cuda/kernel/internal.hpp +++ b/include/RAJA/policy/cuda/kernel/internal.hpp @@ -193,8 +193,8 @@ struct CudaStatementListExecutor, Types> static inline LaunchDims calculateDimensions(Data const& data) { // Compute this statements launch dimensions - return CudaStatementListExecutorHelper<0, num_stmts, enclosed_stmts_t>:: - calculateDimensions(data); + return CudaStatementListExecutorHelper< + 0, num_stmts, enclosed_stmts_t>::calculateDimensions(data); } }; diff --git a/include/RAJA/policy/cuda/launch.hpp b/include/RAJA/policy/cuda/launch.hpp index 77fe2f325c..406cf57432 100644 --- a/include/RAJA/policy/cuda/launch.hpp +++ b/include/RAJA/policy/cuda/launch.hpp @@ -116,26 +116,16 @@ struct LaunchExecute< // // Privatize the loop_body, using make_launch_body to setup reductions // - BODY body = - RAJA::cuda::make_launch_body(func, - gridSize, - blockSize, - shared_mem_size, - cuda_res, - std::forward(body_in)); + BODY body = RAJA::cuda::make_launch_body( + func, gridSize, blockSize, shared_mem_size, cuda_res, + std::forward(body_in)); // // Launch the kernel // void* args[] = {(void*)&body}; - RAJA::cuda::launch(func, - gridSize, - blockSize, - args, - shared_mem_size, - cuda_res, - async, - kernel_name); + RAJA::cuda::launch(func, gridSize, blockSize, args, shared_mem_size, + cuda_res, async, kernel_name); } RAJA_FT_END; @@ -195,9 +185,7 @@ struct LaunchExecute< { using EXEC_POL = RAJA::policy::cuda::cuda_launch_explicit_t< - async, - named_usage::unspecified, - named_usage::unspecified>; + async, named_usage::unspecified, named_usage::unspecified>; RAJA::expt::ParamMultiplexer::init(launch_reducers, launch_info); @@ -205,26 +193,16 @@ struct LaunchExecute< // // Privatize the loop_body, using make_launch_body to setup reductions // - BODY body = - RAJA::cuda::make_launch_body(func, - gridSize, - blockSize, - shared_mem_size, - cuda_res, - std::forward(body_in)); + BODY body = RAJA::cuda::make_launch_body( + func, gridSize, blockSize, shared_mem_size, cuda_res, + std::forward(body_in)); // // Launch the kernel // void* args[] = {(void*)&body, (void*)&launch_reducers}; - RAJA::cuda::launch(func, - gridSize, - blockSize, - args, - shared_mem_size, - cuda_res, - async, - kernel_name); + RAJA::cuda::launch(func, gridSize, blockSize, args, shared_mem_size, + cuda_res, async, kernel_name); RAJA::expt::ParamMultiplexer::resolve(launch_reducers, launch_info); @@ -331,26 +309,16 @@ struct LaunchExecute< // // Privatize the loop_body, using make_launch_body to setup reductions // - BODY body = - RAJA::cuda::make_launch_body(func, - gridSize, - blockSize, - shared_mem_size, - cuda_res, - std::forward(body_in)); + BODY body = RAJA::cuda::make_launch_body( + func, gridSize, blockSize, shared_mem_size, cuda_res, + std::forward(body_in)); // // Launch the kernel // void* args[] = {(void*)&body}; - RAJA::cuda::launch(func, - gridSize, - blockSize, - args, - shared_mem_size, - cuda_res, - async, - kernel_name); + RAJA::cuda::launch(func, gridSize, blockSize, args, shared_mem_size, + cuda_res, async, kernel_name); } RAJA_FT_END; @@ -376,9 +344,7 @@ struct LaunchExecute< using BODY = camp::decay; auto func = reinterpret_cast( - &launch_new_reduce_global_fcn_fixed>); resources::Cuda cuda_res = res.get(); @@ -414,34 +380,25 @@ struct LaunchExecute< launch_info.res = cuda_res; { - using EXEC_POL = RAJA::policy::cuda:: - cuda_launch_explicit_t; + using EXEC_POL = + RAJA::policy::cuda::cuda_launch_explicit_t; RAJA::expt::ParamMultiplexer::init(launch_reducers, launch_info); // // Privatize the loop_body, using make_launch_body to setup reductions // - BODY body = - RAJA::cuda::make_launch_body(func, - gridSize, - blockSize, - shared_mem_size, - cuda_res, - std::forward(body_in)); + BODY body = RAJA::cuda::make_launch_body( + func, gridSize, blockSize, shared_mem_size, cuda_res, + std::forward(body_in)); // // Launch the kernel // void* args[] = {(void*)&body, (void*)&launch_reducers}; - RAJA::cuda::launch(func, - gridSize, - blockSize, - args, - shared_mem_size, - cuda_res, - async, - kernel_name); + RAJA::cuda::launch(func, gridSize, blockSize, args, shared_mem_size, + cuda_res, async, kernel_name); RAJA::expt::ParamMultiplexer::resolve(launch_reducers, launch_info); @@ -550,8 +507,7 @@ struct LoopExecute< if (i0 < len0 && i1 < len1 && i2 < len2) { - body(*(segment0.begin() + i0), - *(segment1.begin() + i1), + body(*(segment0.begin() + i0), *(segment1.begin() + i1), *(segment2.begin() + i2)); } } @@ -673,8 +629,7 @@ struct LoopExecute< for (diff_t i2 = i2_init; i2 < len2; i2 += i2_stride) { - body(*(segment0.begin() + i0), - *(segment1.begin() + i1), + body(*(segment0.begin() + i0), *(segment1.begin() + i1), *(segment2.begin() + i2)); } } @@ -774,12 +729,8 @@ struct LoopICountExecute< if (i0 < len0 && i1 < len1 && i2 < len2) { - body(*(segment0.begin() + i0), - *(segment1.begin() + i1), - *(segment2.begin() + i2), - i0, - i1, - i2); + body(*(segment0.begin() + i0), *(segment1.begin() + i1), + *(segment2.begin() + i2), i0, i1, i2); } } }; @@ -900,12 +851,8 @@ struct LoopICountExecute< for (diff_t i2 = i2_init; i2 < len2; i2 += i2_stride) { - body(*(segment0.begin() + i0), - *(segment1.begin() + i1), - *(segment2.begin() + i2), - i0, - i1, - i2); + body(*(segment0.begin() + i0), *(segment1.begin() + i1), + *(segment2.begin() + i2), i0, i1, i2); } } } diff --git a/include/RAJA/policy/cuda/multi_reduce.hpp b/include/RAJA/policy/cuda/multi_reduce.hpp index bc57cce32a..af8cf382d5 100644 --- a/include/RAJA/policy/cuda/multi_reduce.hpp +++ b/include/RAJA/policy/cuda/multi_reduce.hpp @@ -114,8 +114,7 @@ block_multi_reduce_init_shmem(int num_bins, int numThreads = blockDim.x * blockDim.y * blockDim.z; for (int shmem_offset = threadId; - shmem_offset < shared_replication * num_bins; - shmem_offset += numThreads) + shmem_offset < shared_replication * num_bins; shmem_offset += numThreads) { shared_mem[shmem_offset] = identity; } @@ -219,8 +218,8 @@ struct MultiReduceGridAtomicHostInit_TallyData m_tally_bins(get_tally_bins(m_num_bins)), m_tally_replication(get_tally_replication()) { - m_tally_mem = create_tally( - container, identity, m_num_bins, m_tally_bins, m_tally_replication); + m_tally_mem = create_tally(container, identity, m_num_bins, m_tally_bins, + m_tally_replication); } MultiReduceGridAtomicHostInit_TallyData() = delete; @@ -246,8 +245,8 @@ struct MultiReduceGridAtomicHostInit_TallyData m_num_bins = new_num_bins; m_tally_bins = get_tally_bins(m_num_bins); m_tally_replication = get_tally_replication(); - m_tally_mem = create_tally( - container, identity, m_num_bins, m_tally_bins, m_tally_replication); + m_tally_mem = create_tally(container, identity, m_num_bins, m_tally_bins, + m_tally_replication); } else { @@ -256,8 +255,8 @@ struct MultiReduceGridAtomicHostInit_TallyData int bin = 0; for (auto const& value : container) { - m_tally_mem[GetTallyOffset{}( - bin, m_tally_bins, tally_rep, m_tally_replication)] = value; + m_tally_mem[GetTallyOffset{}(bin, m_tally_bins, tally_rep, + m_tally_replication)] = value; ++bin; } } @@ -265,8 +264,8 @@ struct MultiReduceGridAtomicHostInit_TallyData { for (int bin = 0; bin < m_num_bins; ++bin) { - m_tally_mem[GetTallyOffset{}( - bin, m_tally_bins, tally_rep, m_tally_replication)] = identity; + m_tally_mem[GetTallyOffset{}(bin, m_tally_bins, tally_rep, + m_tally_replication)] = identity; } } } @@ -394,8 +393,8 @@ struct MultiReduceGridAtomicHostInit_TallyData { for (int bin = num_bins; bin > 0; --bin) { - int tally_offset = GetTallyOffset{}( - bin - 1, tally_bins, tally_rep - 1, tally_replication); + int tally_offset = GetTallyOffset{}(bin - 1, tally_bins, tally_rep - 1, + tally_replication); tally_mem[tally_offset].~T(); } } @@ -411,7 +410,8 @@ struct MultiReduceGridAtomicHostInit_TallyData T m_identity; int m_num_bins; int m_tally_bins; - int m_tally_replication; // power of 2, at least the max number of omp threads + int m_tally_replication; // power of 2, at least the max number of omp + // threads }; @@ -452,14 +452,8 @@ struct MultiReduceGridAtomicHostInit_Data void combine_device(int bin, T value) { impl::block_multi_reduce_combine_global_atomic( - m_num_bins, - m_identity, - bin, - value, - m_tally_mem, - GetTallyOffset{}, - m_tally_replication, - m_tally_bins); + m_num_bins, m_identity, bin, value, m_tally_mem, GetTallyOffset{}, + m_tally_replication, m_tally_bins); } //! combine value on host, combine a value into the tally @@ -573,8 +567,8 @@ struct MultiReduceBlockThenGridAtomicHostInit_Data T* shared_mem = get_shared_mem(); if (shared_mem != nullptr) { - impl::block_multi_reduce_init_shmem( - m_num_bins, m_identity, shared_mem, m_shared_replication); + impl::block_multi_reduce_init_shmem(m_num_bins, m_identity, shared_mem, + m_shared_replication); } } @@ -586,15 +580,9 @@ struct MultiReduceBlockThenGridAtomicHostInit_Data if (shared_mem != nullptr) { impl::grid_multi_reduce_shmem_to_global_atomic( - m_num_bins, - m_identity, - shared_mem, - GetSharedOffset{}, - m_shared_replication, - m_tally_mem, - GetTallyOffset{}, - m_tally_replication, - m_tally_bins); + m_num_bins, m_identity, shared_mem, GetSharedOffset{}, + m_shared_replication, m_tally_mem, GetTallyOffset{}, + m_tally_replication, m_tally_bins); } } @@ -607,25 +595,14 @@ struct MultiReduceBlockThenGridAtomicHostInit_Data if (shared_mem != nullptr) { impl::block_multi_reduce_combine_shmem_atomic( - m_num_bins, - m_identity, - bin, - value, - shared_mem, - GetSharedOffset{}, + m_num_bins, m_identity, bin, value, shared_mem, GetSharedOffset{}, m_shared_replication); } else { impl::block_multi_reduce_combine_global_atomic( - m_num_bins, - m_identity, - bin, - value, - m_tally_mem, - GetTallyOffset{}, - m_tally_replication, - m_tally_bins); + m_num_bins, m_identity, bin, value, m_tally_mem, GetTallyOffset{}, + m_tally_replication, m_tally_bins); } } diff --git a/include/RAJA/policy/cuda/policy.hpp b/include/RAJA/policy/cuda/policy.hpp index d7412c4d51..a0232701aa 100644 --- a/include/RAJA/policy/cuda/policy.hpp +++ b/include/RAJA/policy/cuda/policy.hpp @@ -591,8 +591,7 @@ struct CudaDims { if (num_blocks() != 0) { - return {(blocks.x ? blocks.x : 1), - (blocks.y ? blocks.y : 1), + return {(blocks.x ? blocks.x : 1), (blocks.y ? blocks.y : 1), (blocks.z ? blocks.z : 1)}; } else @@ -606,8 +605,7 @@ struct CudaDims { if (num_threads() != 0) { - return {(threads.x ? threads.x : 1), - (threads.y ? threads.y : 1), + return {(threads.x ? threads.x : 1), (threads.y ? threads.y : 1), (threads.z ? threads.z : 1)}; } else diff --git a/include/RAJA/policy/cuda/raja_cudaerrchk.hpp b/include/RAJA/policy/cuda/raja_cudaerrchk.hpp index 989b328155..2c2c7b4496 100644 --- a/include/RAJA/policy/cuda/raja_cudaerrchk.hpp +++ b/include/RAJA/policy/cuda/raja_cudaerrchk.hpp @@ -66,10 +66,7 @@ cudaAssert(cudaError_t code, const char* file, int line, bool abort = true) } else { - fprintf(stderr, - "CUDAassert: %s %s %d\n", - cudaGetErrorString(code), - file, + fprintf(stderr, "CUDAassert: %s %s %d\n", cudaGetErrorString(code), file, line); } } diff --git a/include/RAJA/policy/cuda/reduce.hpp b/include/RAJA/policy/cuda/reduce.hpp index 2e3f13519e..82c1443d9b 100644 --- a/include/RAJA/policy/cuda/reduce.hpp +++ b/include/RAJA/policy/cuda/reduce.hpp @@ -263,16 +263,15 @@ RAJA_DEVICE RAJA_INLINE T block_reduce(T val, T identity) { // Need to separate declaration and initialization for clang-cuda - __shared__ unsigned char tmpsd[sizeof( - RAJA::detail:: - SoAArray)]; + __shared__ unsigned char + tmpsd[sizeof(RAJA::detail::SoAArray< + T, RAJA::policy::cuda::device_constants.MAX_WARPS>)]; // Partial placement new: Should call new(tmpsd) here but recasting memory // to avoid calling constructor/destructor in shared memory. RAJA::detail::SoAArray* sd = reinterpret_cast*>(tmpsd); + T, RAJA::policy::cuda::device_constants.MAX_WARPS>*>(tmpsd); // write per warp values to shared memory if (warpId == 0) @@ -730,9 +729,10 @@ struct ReduceLastBlock_Data { T temp = value; - size_t replicationId = impl:: - grid_reduce_last_block( - temp, identity, device, device_count); + size_t replicationId = + impl::grid_reduce_last_block(temp, identity, device, + device_count); if (replicationId != replication) { output[replicationId] = temp; @@ -917,11 +917,10 @@ struct ReduceAtomicDeviceInit_Data { T temp = value; - size_t replicationId = impl::grid_reduce_atomic_device_init( - temp, identity, device, device_count); + size_t replicationId = + impl::grid_reduce_atomic_device_init( + temp, identity, device, device_count); if (replicationId != replication) { output[replicationId] = temp; diff --git a/include/RAJA/policy/cuda/scan.hpp b/include/RAJA/policy/cuda/scan.hpp index 42aa31b2cb..4415c8ab10 100644 --- a/include/RAJA/policy/cuda/scan.hpp +++ b/include/RAJA/policy/cuda/scan.hpp @@ -67,24 +67,16 @@ RAJA_INLINE resources::EventProxy void* d_temp_storage = nullptr; size_t temp_storage_bytes = 0; cudaErrchk(::cub::DeviceScan::InclusiveScan(d_temp_storage, - temp_storage_bytes, - begin, - begin, - binary_op, - len, - stream)); + temp_storage_bytes, begin, begin, + binary_op, len, stream)); // Allocate temporary storage d_temp_storage = cuda::device_mempool_type::getInstance().malloc( temp_storage_bytes); // Run cudaErrchk(::cub::DeviceScan::InclusiveScan(d_temp_storage, - temp_storage_bytes, - begin, - begin, - binary_op, - len, - stream)); + temp_storage_bytes, begin, begin, + binary_op, len, stream)); // Free temporary storage cuda::device_mempool_type::getInstance().free(d_temp_storage); @@ -124,26 +116,16 @@ RAJA_INLINE resources::EventProxy void* d_temp_storage = nullptr; size_t temp_storage_bytes = 0; cudaErrchk(::cub::DeviceScan::ExclusiveScan(d_temp_storage, - temp_storage_bytes, - begin, - begin, - binary_op, - init, - len, - stream)); + temp_storage_bytes, begin, begin, + binary_op, init, len, stream)); // Allocate temporary storage d_temp_storage = cuda::device_mempool_type::getInstance().malloc( temp_storage_bytes); // Run cudaErrchk(::cub::DeviceScan::ExclusiveScan(d_temp_storage, - temp_storage_bytes, - begin, - begin, - binary_op, - init, - len, - stream)); + temp_storage_bytes, begin, begin, + binary_op, init, len, stream)); // Free temporary storage cuda::device_mempool_type::getInstance().free(d_temp_storage); @@ -232,26 +214,16 @@ RAJA_INLINE resources::EventProxy void* d_temp_storage = nullptr; size_t temp_storage_bytes = 0; cudaErrchk(::cub::DeviceScan::ExclusiveScan(d_temp_storage, - temp_storage_bytes, - begin, - out, - binary_op, - init, - len, - stream)); + temp_storage_bytes, begin, out, + binary_op, init, len, stream)); // Allocate temporary storage d_temp_storage = cuda::device_mempool_type::getInstance().malloc( temp_storage_bytes); // Run cudaErrchk(::cub::DeviceScan::ExclusiveScan(d_temp_storage, - temp_storage_bytes, - begin, - out, - binary_op, - init, - len, - stream)); + temp_storage_bytes, begin, out, + binary_op, init, len, stream)); // Free temporary storage cuda::device_mempool_type::getInstance().free(d_temp_storage); diff --git a/include/RAJA/policy/cuda/sort.hpp b/include/RAJA/policy/cuda/sort.hpp index 3eadda61f2..7bcb7a2440 100644 --- a/include/RAJA/policy/cuda/sort.hpp +++ b/include/RAJA/policy/cuda/sort.hpp @@ -127,12 +127,8 @@ stable(resources::Cuda cuda_res, void* d_temp_storage = nullptr; size_t temp_storage_bytes = 0; cudaErrchk(::cub::DeviceRadixSort::SortKeys(d_temp_storage, - temp_storage_bytes, - d_keys, - len, - begin_bit, - end_bit, - stream)); + temp_storage_bytes, d_keys, len, + begin_bit, end_bit, stream)); // Allocate temporary storage d_temp_storage = cuda::device_mempool_type::getInstance().malloc( @@ -140,12 +136,8 @@ stable(resources::Cuda cuda_res, // Run cudaErrchk(::cub::DeviceRadixSort::SortKeys(d_temp_storage, - temp_storage_bytes, - d_keys, - len, - begin_bit, - end_bit, - stream)); + temp_storage_bytes, d_keys, len, + begin_bit, end_bit, stream)); // Free temporary storage cuda::device_mempool_type::getInstance().free(d_temp_storage); @@ -153,8 +145,8 @@ stable(resources::Cuda cuda_res, { // copy - cudaErrchk(cudaMemcpyAsync( - begin, d_out, len * sizeof(R), cudaMemcpyDefault, stream)); + cudaErrchk(cudaMemcpyAsync(begin, d_out, len * sizeof(R), cudaMemcpyDefault, + stream)); } cuda::device_mempool_type::getInstance().free(d_out); @@ -204,26 +196,18 @@ stable(resources::Cuda cuda_res, // Determine temporary device storage requirements void* d_temp_storage = nullptr; size_t temp_storage_bytes = 0; - cudaErrchk(::cub::DeviceRadixSort::SortKeysDescending(d_temp_storage, - temp_storage_bytes, - d_keys, - len, - begin_bit, - end_bit, - stream)); + cudaErrchk(::cub::DeviceRadixSort::SortKeysDescending( + d_temp_storage, temp_storage_bytes, d_keys, len, begin_bit, end_bit, + stream)); // Allocate temporary storage d_temp_storage = cuda::device_mempool_type::getInstance().malloc( temp_storage_bytes); // Run - cudaErrchk(::cub::DeviceRadixSort::SortKeysDescending(d_temp_storage, - temp_storage_bytes, - d_keys, - len, - begin_bit, - end_bit, - stream)); + cudaErrchk(::cub::DeviceRadixSort::SortKeysDescending( + d_temp_storage, temp_storage_bytes, d_keys, len, begin_bit, end_bit, + stream)); // Free temporary storage cuda::device_mempool_type::getInstance().free(d_temp_storage); @@ -231,8 +215,8 @@ stable(resources::Cuda cuda_res, { // copy - cudaErrchk(cudaMemcpyAsync( - begin, d_out, len * sizeof(R), cudaMemcpyDefault, stream)); + cudaErrchk(cudaMemcpyAsync(begin, d_out, len * sizeof(R), cudaMemcpyDefault, + stream)); } cuda::device_mempool_type::getInstance().free(d_out); @@ -438,28 +422,18 @@ stable_pairs(resources::Cuda cuda_res, // Determine temporary device storage requirements void* d_temp_storage = nullptr; size_t temp_storage_bytes = 0; - cudaErrchk(::cub::DeviceRadixSort::SortPairs(d_temp_storage, - temp_storage_bytes, - d_keys, - d_vals, - len, - begin_bit, - end_bit, - stream)); + cudaErrchk(::cub::DeviceRadixSort::SortPairs( + d_temp_storage, temp_storage_bytes, d_keys, d_vals, len, begin_bit, + end_bit, stream)); // Allocate temporary storage d_temp_storage = cuda::device_mempool_type::getInstance().malloc( temp_storage_bytes); // Run - cudaErrchk(::cub::DeviceRadixSort::SortPairs(d_temp_storage, - temp_storage_bytes, - d_keys, - d_vals, - len, - begin_bit, - end_bit, - stream)); + cudaErrchk(::cub::DeviceRadixSort::SortPairs( + d_temp_storage, temp_storage_bytes, d_keys, d_vals, len, begin_bit, + end_bit, stream)); // Free temporary storage cuda::device_mempool_type::getInstance().free(d_temp_storage); @@ -467,15 +441,15 @@ stable_pairs(resources::Cuda cuda_res, { // copy keys - cudaErrchk(cudaMemcpyAsync( - keys_begin, d_keys_out, len * sizeof(K), cudaMemcpyDefault, stream)); + cudaErrchk(cudaMemcpyAsync(keys_begin, d_keys_out, len * sizeof(K), + cudaMemcpyDefault, stream)); } if (d_vals.Current() == d_vals_out) { // copy vals - cudaErrchk(cudaMemcpyAsync( - vals_begin, d_vals_out, len * sizeof(V), cudaMemcpyDefault, stream)); + cudaErrchk(cudaMemcpyAsync(vals_begin, d_vals_out, len * sizeof(V), + cudaMemcpyDefault, stream)); } cuda::device_mempool_type::getInstance().free(d_keys_out); @@ -533,28 +507,18 @@ stable_pairs(resources::Cuda cuda_res, // Determine temporary device storage requirements void* d_temp_storage = nullptr; size_t temp_storage_bytes = 0; - cudaErrchk(::cub::DeviceRadixSort::SortPairsDescending(d_temp_storage, - temp_storage_bytes, - d_keys, - d_vals, - len, - begin_bit, - end_bit, - stream)); + cudaErrchk(::cub::DeviceRadixSort::SortPairsDescending( + d_temp_storage, temp_storage_bytes, d_keys, d_vals, len, begin_bit, + end_bit, stream)); // Allocate temporary storage d_temp_storage = cuda::device_mempool_type::getInstance().malloc( temp_storage_bytes); // Run - cudaErrchk(::cub::DeviceRadixSort::SortPairsDescending(d_temp_storage, - temp_storage_bytes, - d_keys, - d_vals, - len, - begin_bit, - end_bit, - stream)); + cudaErrchk(::cub::DeviceRadixSort::SortPairsDescending( + d_temp_storage, temp_storage_bytes, d_keys, d_vals, len, begin_bit, + end_bit, stream)); // Free temporary storage cuda::device_mempool_type::getInstance().free(d_temp_storage); @@ -562,15 +526,15 @@ stable_pairs(resources::Cuda cuda_res, { // copy keys - cudaErrchk(cudaMemcpyAsync( - keys_begin, d_keys_out, len * sizeof(K), cudaMemcpyDefault, stream)); + cudaErrchk(cudaMemcpyAsync(keys_begin, d_keys_out, len * sizeof(K), + cudaMemcpyDefault, stream)); } if (d_vals.Current() == d_vals_out) { // copy vals - cudaErrchk(cudaMemcpyAsync( - vals_begin, d_vals_out, len * sizeof(V), cudaMemcpyDefault, stream)); + cudaErrchk(cudaMemcpyAsync(vals_begin, d_vals_out, len * sizeof(V), + cudaMemcpyDefault, stream)); } cuda::device_mempool_type::getInstance().free(d_keys_out); diff --git a/include/RAJA/policy/desul/atomic.hpp b/include/RAJA/policy/desul/atomic.hpp index 45f0b344fe..39595fb63c 100644 --- a/include/RAJA/policy/desul/atomic.hpp +++ b/include/RAJA/policy/desul/atomic.hpp @@ -30,56 +30,56 @@ RAJA_SUPPRESS_HD_WARN template RAJA_HOST_DEVICE RAJA_INLINE T atomicLoad(AtomicPolicy, T* acc) { - return desul::atomic_load( - acc, raja_default_desul_order{}, raja_default_desul_scope{}); + return desul::atomic_load(acc, raja_default_desul_order{}, + raja_default_desul_scope{}); } RAJA_SUPPRESS_HD_WARN template RAJA_HOST_DEVICE RAJA_INLINE void atomicStore(AtomicPolicy, T* acc, T value) { - desul::atomic_store( - acc, value, raja_default_desul_order{}, raja_default_desul_scope{}); + desul::atomic_store(acc, value, raja_default_desul_order{}, + raja_default_desul_scope{}); } RAJA_SUPPRESS_HD_WARN template RAJA_HOST_DEVICE RAJA_INLINE T atomicAdd(AtomicPolicy, T* acc, T value) { - return desul::atomic_fetch_add( - acc, value, raja_default_desul_order{}, raja_default_desul_scope{}); + return desul::atomic_fetch_add(acc, value, raja_default_desul_order{}, + raja_default_desul_scope{}); } RAJA_SUPPRESS_HD_WARN template RAJA_HOST_DEVICE RAJA_INLINE T atomicSub(AtomicPolicy, T* acc, T value) { - return desul::atomic_fetch_sub( - acc, value, raja_default_desul_order{}, raja_default_desul_scope{}); + return desul::atomic_fetch_sub(acc, value, raja_default_desul_order{}, + raja_default_desul_scope{}); } RAJA_SUPPRESS_HD_WARN template RAJA_HOST_DEVICE RAJA_INLINE T atomicMin(AtomicPolicy, T* acc, T value) { - return desul::atomic_fetch_min( - acc, value, raja_default_desul_order{}, raja_default_desul_scope{}); + return desul::atomic_fetch_min(acc, value, raja_default_desul_order{}, + raja_default_desul_scope{}); } RAJA_SUPPRESS_HD_WARN template RAJA_HOST_DEVICE RAJA_INLINE T atomicMax(AtomicPolicy, T* acc, T value) { - return desul::atomic_fetch_max( - acc, value, raja_default_desul_order{}, raja_default_desul_scope{}); + return desul::atomic_fetch_max(acc, value, raja_default_desul_order{}, + raja_default_desul_scope{}); } RAJA_SUPPRESS_HD_WARN template RAJA_HOST_DEVICE RAJA_INLINE T atomicInc(AtomicPolicy, T* acc) { - return desul::atomic_fetch_inc( - acc, raja_default_desul_order{}, raja_default_desul_scope{}); + return desul::atomic_fetch_inc(acc, raja_default_desul_order{}, + raja_default_desul_scope{}); } RAJA_SUPPRESS_HD_WARN @@ -88,16 +88,16 @@ RAJA_HOST_DEVICE RAJA_INLINE T atomicInc(AtomicPolicy, T* acc, T val) { // See: // http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#atomicinc - return desul::atomic_fetch_inc_mod( - acc, val, raja_default_desul_order{}, raja_default_desul_scope{}); + return desul::atomic_fetch_inc_mod(acc, val, raja_default_desul_order{}, + raja_default_desul_scope{}); } RAJA_SUPPRESS_HD_WARN template RAJA_HOST_DEVICE RAJA_INLINE T atomicDec(AtomicPolicy, T* acc) { - return desul::atomic_fetch_dec( - acc, raja_default_desul_order{}, raja_default_desul_scope{}); + return desul::atomic_fetch_dec(acc, raja_default_desul_order{}, + raja_default_desul_scope{}); } RAJA_SUPPRESS_HD_WARN @@ -106,40 +106,40 @@ RAJA_HOST_DEVICE RAJA_INLINE T atomicDec(AtomicPolicy, T* acc, T val) { // See: // http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#atomicdec - return desul::atomic_fetch_dec_mod( - acc, val, raja_default_desul_order{}, raja_default_desul_scope{}); + return desul::atomic_fetch_dec_mod(acc, val, raja_default_desul_order{}, + raja_default_desul_scope{}); } RAJA_SUPPRESS_HD_WARN template RAJA_HOST_DEVICE RAJA_INLINE T atomicAnd(AtomicPolicy, T* acc, T value) { - return desul::atomic_fetch_and( - acc, value, raja_default_desul_order{}, raja_default_desul_scope{}); + return desul::atomic_fetch_and(acc, value, raja_default_desul_order{}, + raja_default_desul_scope{}); } RAJA_SUPPRESS_HD_WARN template RAJA_HOST_DEVICE RAJA_INLINE T atomicOr(AtomicPolicy, T* acc, T value) { - return desul::atomic_fetch_or( - acc, value, raja_default_desul_order{}, raja_default_desul_scope{}); + return desul::atomic_fetch_or(acc, value, raja_default_desul_order{}, + raja_default_desul_scope{}); } RAJA_SUPPRESS_HD_WARN template RAJA_HOST_DEVICE RAJA_INLINE T atomicXor(AtomicPolicy, T* acc, T value) { - return desul::atomic_fetch_xor( - acc, value, raja_default_desul_order{}, raja_default_desul_scope{}); + return desul::atomic_fetch_xor(acc, value, raja_default_desul_order{}, + raja_default_desul_scope{}); } RAJA_SUPPRESS_HD_WARN template RAJA_HOST_DEVICE RAJA_INLINE T atomicExchange(AtomicPolicy, T* acc, T value) { - return desul::atomic_exchange( - acc, value, raja_default_desul_order{}, raja_default_desul_scope{}); + return desul::atomic_exchange(acc, value, raja_default_desul_order{}, + raja_default_desul_scope{}); } RAJA_SUPPRESS_HD_WARN @@ -147,9 +147,7 @@ template RAJA_HOST_DEVICE RAJA_INLINE T atomicCAS(AtomicPolicy, T* acc, T compare, T value) { - return desul::atomic_compare_exchange(acc, - compare, - value, + return desul::atomic_compare_exchange(acc, compare, value, raja_default_desul_order{}, raja_default_desul_scope{}); } diff --git a/include/RAJA/policy/hip/MemUtils_HIP.hpp b/include/RAJA/policy/hip/MemUtils_HIP.hpp index 83c37c5480..6f130d3670 100644 --- a/include/RAJA/policy/hip/MemUtils_HIP.hpp +++ b/include/RAJA/policy/hip/MemUtils_HIP.hpp @@ -79,8 +79,8 @@ struct PinnedAllocator void* malloc(size_t nbytes) { void* ptr; - hipErrchk(hipHostMalloc( - &ptr, nbytes, hipHostMallocMapped | hipHostMallocNonCoherent)); + hipErrchk(hipHostMalloc(&ptr, nbytes, + hipHostMallocMapped | hipHostMallocNonCoherent)); return ptr; } @@ -441,11 +441,9 @@ hip_occupancy_max_blocks_threads(const void* func, data.func_dynamic_shmem_per_block = func_dynamic_shmem_per_block; #ifdef RAJA_ENABLE_HIP_OCCUPANCY_CALCULATOR - hipErrchk( - hipOccupancyMaxPotentialBlockSize(&data.func_max_blocks_per_device, - &data.func_max_threads_per_block, - func, - func_dynamic_shmem_per_block)); + hipErrchk(hipOccupancyMaxPotentialBlockSize( + &data.func_max_blocks_per_device, &data.func_max_threads_per_block, + func, func_dynamic_shmem_per_block)); #else RAJA_UNUSED_VAR(func); hipDeviceProp_t& prop = hip::device_prop(); @@ -480,9 +478,7 @@ hip_occupancy_max_blocks(const void* func, size_t func_dynamic_shmem_per_block) #ifdef RAJA_ENABLE_HIP_OCCUPANCY_CALCULATOR hipErrchk(hipOccupancyMaxActiveBlocksPerMultiprocessor( - &data.func_max_blocks_per_sm, - func, - func_threads_per_block, + &data.func_max_blocks_per_sm, func, func_threads_per_block, func_dynamic_shmem_per_block)); #else RAJA_UNUSED_VAR(func); @@ -516,9 +512,7 @@ hip_occupancy_max_blocks(const void* func, #ifdef RAJA_ENABLE_HIP_OCCUPANCY_CALCULATOR hipErrchk(hipOccupancyMaxActiveBlocksPerMultiprocessor( - &data.func_max_blocks_per_sm, - func, - func_threads_per_block, + &data.func_max_blocks_per_sm, func, func_threads_per_block, func_dynamic_shmem_per_block)); #else RAJA_UNUSED_VAR(func); diff --git a/include/RAJA/policy/hip/WorkGroup/WorkRunner.hpp b/include/RAJA/policy/hip/WorkGroup/WorkRunner.hpp index c9b56efbb1..afcbcb949a 100644 --- a/include/RAJA/policy/hip/WorkGroup/WorkRunner.hpp +++ b/include/RAJA/policy/hip/WorkGroup/WorkRunner.hpp @@ -318,8 +318,7 @@ struct WorkRunner< storage.template emplace( get_Dispatcher(dispatcher_exec_policy{}), - std::forward(iter), - std::forward(loop_body)); + std::forward(iter), std::forward(loop_body)); } } @@ -332,16 +331,13 @@ struct WorkRunner< { using Iterator = camp::decay; using IndexType = camp::decay; + std::end(storage)))>; using value_type = typename WorkContainer::value_type; per_run_storage run_storage{}; - auto func = hip_unordered_y_block_global; + auto func = hip_unordered_y_block_global; // // Compute the requested iteration space size @@ -365,8 +361,7 @@ struct WorkRunner< hip_dim_t gridSize{ static_cast((average_iterations + block_size - 1) / block_size), - static_cast(num_loops), - 1}; + static_cast(num_loops), 1}; RAJA_FT_BEGIN; @@ -380,8 +375,8 @@ struct WorkRunner< // Launch the kernel // void* func_args[] = {(void*)&begin, (void*)&args...}; - RAJA::hip::launch( - (const void*)func, gridSize, blockSize, func_args, shmem, r, Async); + RAJA::hip::launch((const void*)func, gridSize, blockSize, func_args, + shmem, r, Async); } RAJA_FT_END; diff --git a/include/RAJA/policy/hip/atomic.hpp b/include/RAJA/policy/hip/atomic.hpp index 5ae1eb659d..ba306f1eaa 100644 --- a/include/RAJA/policy/hip/atomic.hpp +++ b/include/RAJA/policy/hip/atomic.hpp @@ -351,10 +351,9 @@ RAJA_INLINE __device__ T hip_atomicCAS(T* acc, T compare, T value) { using R = hip_useReinterpretCommon_t; - return RAJA::util::reinterp_A_as_B( - hip_atomicCAS(reinterpret_cast(acc), - RAJA::util::reinterp_A_as_B(compare), - RAJA::util::reinterp_A_as_B(value))); + return RAJA::util::reinterp_A_as_B(hip_atomicCAS( + reinterpret_cast(acc), RAJA::util::reinterp_A_as_B(compare), + RAJA::util::reinterp_A_as_B(value))); } @@ -553,8 +552,7 @@ template < RAJA_INLINE __device__ T hip_atomicMin(T* acc, T value) { return hip_atomicCAS_loop( - acc, - [value](T old) { return value < old ? value : old; }, + acc, [value](T old) { return value < old ? value : old; }, [value](T current) { return current <= value; }); } @@ -578,8 +576,7 @@ template < RAJA_INLINE __device__ T hip_atomicMax(T* acc, T value) { return hip_atomicCAS_loop( - acc, - [value](T old) { return old < value ? value : old; }, + acc, [value](T old) { return old < value ? value : old; }, [value](T current) { return value <= current; }); } @@ -598,11 +595,9 @@ RAJA_INLINE __device__ T hip_atomicMax(T* acc, T value) template RAJA_INLINE __device__ T hip_atomicInc(T* acc, T value) { - return hip_atomicCAS_loop(acc, - [value](T old) { - return value <= old ? static_cast(0) - : old + static_cast(1); - }); + return hip_atomicCAS_loop( + acc, [value](T old) + { return value <= old ? static_cast(0) : old + static_cast(1); }); } diff --git a/include/RAJA/policy/hip/forall.hpp b/include/RAJA/policy/hip/forall.hpp index 7f2a227c72..b5430e3470 100644 --- a/include/RAJA/policy/hip/forall.hpp +++ b/include/RAJA/policy/hip/forall.hpp @@ -620,14 +620,14 @@ forall_impl( using LOOP_BODY = camp::decay; using IndexType = camp::decay; - using EXEC_POL = ::RAJA::policy::hip:: - hip_exec; - using UniqueMarker = ::camp:: - list; - using DimensionCalculator = impl::ForallDimensionCalculator; + using EXEC_POL = + ::RAJA::policy::hip::hip_exec; + using UniqueMarker = ::camp::list; + using DimensionCalculator = + impl::ForallDimensionCalculator; // // Compute the requested iteration space size @@ -660,20 +660,16 @@ forall_impl( // // Privatize the loop_body, using make_launch_body to setup reductions // - LOOP_BODY body = - RAJA::hip::make_launch_body(func, - dims.blocks, - dims.threads, - shmem, - hip_res, - std::forward(loop_body)); + LOOP_BODY body = RAJA::hip::make_launch_body( + func, dims.blocks, dims.threads, shmem, hip_res, + std::forward(loop_body)); // // Launch the kernels // void* args[] = {(void*)&body, (void*)&begin, (void*)&len}; - RAJA::hip::launch( - func, dims.blocks, dims.threads, args, shmem, hip_res, Async); + RAJA::hip::launch(func, dims.blocks, dims.threads, args, shmem, hip_res, + Async); } RAJA_FT_END; @@ -707,14 +703,14 @@ forall_impl( using LOOP_BODY = camp::decay; using IndexType = camp::decay; - using EXEC_POL = ::RAJA::policy::hip:: - hip_exec; - using UniqueMarker = ::camp:: - list; - using DimensionCalculator = impl::ForallDimensionCalculator; + using EXEC_POL = + ::RAJA::policy::hip::hip_exec; + using UniqueMarker = ::camp::list; + using DimensionCalculator = + impl::ForallDimensionCalculator; // // Compute the requested iteration space size @@ -728,10 +724,7 @@ forall_impl( { auto func = reinterpret_cast( - &impl::forallp_hip_kernel>); // @@ -758,21 +751,17 @@ forall_impl( // // Privatize the loop_body, using make_launch_body to setup reductions // - LOOP_BODY body = - RAJA::hip::make_launch_body(func, - dims.blocks, - dims.threads, - shmem, - hip_res, - std::forward(loop_body)); + LOOP_BODY body = RAJA::hip::make_launch_body( + func, dims.blocks, dims.threads, shmem, hip_res, + std::forward(loop_body)); // // Launch the kernels // - void* args[] = { - (void*)&body, (void*)&begin, (void*)&len, (void*)&f_params}; - RAJA::hip::launch( - func, dims.blocks, dims.threads, args, shmem, hip_res, Async); + void* args[] = {(void*)&body, (void*)&begin, (void*)&len, + (void*)&f_params}; + RAJA::hip::launch(func, dims.blocks, dims.threads, args, shmem, hip_res, + Async); RAJA::expt::ParamMultiplexer::resolve(f_params, launch_info); } @@ -821,11 +810,9 @@ RAJA_INLINE resources::EventProxy forall_impl( for (int isi = 0; isi < num_seg; ++isi) { iset.segmentCall( - r, - isi, - detail::CallForall(), - ::RAJA::policy::hip:: - hip_exec(), + r, isi, detail::CallForall(), + ::RAJA::policy::hip::hip_exec(), loop_body); } // iterate over segments of index set diff --git a/include/RAJA/policy/hip/intrinsics.hpp b/include/RAJA/policy/hip/intrinsics.hpp index 59d726e131..3c4360685b 100644 --- a/include/RAJA/policy/hip/intrinsics.hpp +++ b/include/RAJA/policy/hip/intrinsics.hpp @@ -93,8 +93,8 @@ struct AccessorDeviceScopeUseBlockFence template static RAJA_DEVICE RAJA_INLINE T get(T* in_ptr, size_t idx) { - using ArrayType = RAJA::detail:: - AsIntegerArray; + using ArrayType = RAJA::detail::AsIntegerArray; using integer_type = typename ArrayType::integer_type; ArrayType u; @@ -105,8 +105,8 @@ struct AccessorDeviceScopeUseBlockFence { #if defined(RAJA_USE_HIP_INTRINSICS) && \ RAJA_INTERNAL_CLANG_HAS_BUILTIN(__hip_atomic_load) - u.array[i] = __hip_atomic_load( - &ptr[i], __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); + u.array[i] = __hip_atomic_load(&ptr[i], __ATOMIC_RELAXED, + __HIP_MEMORY_SCOPE_AGENT); #else u.array[i] = atomicAdd(&ptr[i], integer_type(0)); #endif @@ -118,8 +118,8 @@ struct AccessorDeviceScopeUseBlockFence template static RAJA_DEVICE RAJA_INLINE void set(T* in_ptr, size_t idx, T val) { - using ArrayType = RAJA::detail:: - AsIntegerArray; + using ArrayType = RAJA::detail::AsIntegerArray; using integer_type = typename ArrayType::integer_type; ArrayType u; @@ -130,8 +130,8 @@ struct AccessorDeviceScopeUseBlockFence { #if defined(RAJA_USE_HIP_INTRINSICS) && \ RAJA_INTERNAL_CLANG_HAS_BUILTIN(__hip_atomic_store) - __hip_atomic_store( - &ptr[i], u.array[i], __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); + __hip_atomic_store(&ptr[i], u.array[i], __ATOMIC_RELAXED, + __HIP_MEMORY_SCOPE_AGENT); #else atomicExch(&ptr[i], u.array[i]); #endif @@ -181,9 +181,9 @@ constexpr size_t max_shfl_int_type_size = sizeof(unsigned int); template RAJA_DEVICE RAJA_INLINE T shfl_xor_sync(T var, int laneMask) { - RAJA::detail:: - AsIntegerArray - u; + RAJA::detail::AsIntegerArray + u; u.set_value(var); for (size_t i = 0; i < u.array_size(); ++i) @@ -196,9 +196,9 @@ RAJA_DEVICE RAJA_INLINE T shfl_xor_sync(T var, int laneMask) template RAJA_DEVICE RAJA_INLINE T shfl_sync(T var, int srcLane) { - RAJA::detail:: - AsIntegerArray - u; + RAJA::detail::AsIntegerArray + u; u.set_value(var); for (size_t i = 0; i < u.array_size(); ++i) @@ -348,10 +348,8 @@ RAJA_DEVICE RAJA_INLINE T block_reduce(T val, T identity) __shared__ unsigned char tmpsd[sizeof( RAJA::detail::SoAArray)]; RAJA::detail::SoAArray* sd = - reinterpret_cast< - RAJA::detail::SoAArray*>( - tmpsd); + reinterpret_cast*>(tmpsd); // write per warp values to shared memory if (warpId == 0) diff --git a/include/RAJA/policy/hip/kernel/HipKernel.hpp b/include/RAJA/policy/hip/kernel/HipKernel.hpp index f4c636abbe..8e892c3718 100644 --- a/include/RAJA/policy/hip/kernel/HipKernel.hpp +++ b/include/RAJA/policy/hip/kernel/HipKernel.hpp @@ -527,8 +527,8 @@ struct StatementExecutor< // int recommended_blocks; int recommended_threads; - launch_t::recommended_blocks_threads( - shmem, recommended_blocks, recommended_threads); + launch_t::recommended_blocks_threads(shmem, recommended_blocks, + recommended_threads); // @@ -546,8 +546,7 @@ struct StatementExecutor< if (recommended_threads >= get_size(launch_dims.min_dims.threads)) { - fit_threads = fitHipDims(recommended_threads, - launch_dims.dims.threads, + fit_threads = fitHipDims(recommended_threads, launch_dims.dims.threads, launch_dims.min_dims.threads); } @@ -558,8 +557,7 @@ struct StatementExecutor< get_size(fit_threads) != recommended_threads) { - fit_threads = fitHipDims(max_threads, - launch_dims.dims.threads, + fit_threads = fitHipDims(max_threads, launch_dims.dims.threads, launch_dims.min_dims.threads); } @@ -591,8 +589,8 @@ struct StatementExecutor< use_blocks = max_blocks; } - launch_dims.dims.blocks = fitHipDims( - use_blocks, launch_dims.dims.blocks, launch_dims.min_dims.blocks); + launch_dims.dims.blocks = fitHipDims(use_blocks, launch_dims.dims.blocks, + launch_dims.min_dims.blocks); // // make sure that we fit @@ -616,23 +614,16 @@ struct StatementExecutor< // of the launch_dims and potential changes to shmem here that is // currently an unresolved issue. // - auto hip_data = RAJA::hip::make_launch_body(func, - launch_dims.dims.blocks, - launch_dims.dims.threads, - shmem, - res, - data); + auto hip_data = RAJA::hip::make_launch_body( + func, launch_dims.dims.blocks, launch_dims.dims.threads, shmem, res, + data); // // Launch the kernel // void* args[] = {(void*)&hip_data}; - RAJA::hip::launch(func, - launch_dims.dims.blocks, - launch_dims.dims.threads, - args, - shmem, - res, + RAJA::hip::launch(func, launch_dims.dims.blocks, + launch_dims.dims.threads, args, shmem, res, launch_t::async); } } diff --git a/include/RAJA/policy/hip/kernel/InitLocalMem.hpp b/include/RAJA/policy/hip/kernel/InitLocalMem.hpp index 503c89cab1..bd263fb705 100644 --- a/include/RAJA/policy/hip/kernel/InitLocalMem.hpp +++ b/include/RAJA/policy/hip/kernel/InitLocalMem.hpp @@ -60,11 +60,9 @@ struct HipStatementExecutor::param_tuple_t>::value_type; - const camp::idx_t NumElem = - camp::tuple_element_t::param_tuple_t>:: - layout_type::s_size; + Pos, typename camp::decay::param_tuple_t>::value_type; + const camp::idx_t NumElem = camp::tuple_element_t< + Pos, typename camp::decay::param_tuple_t>::layout_type::s_size; __shared__ varType Array[NumElem]; camp::get(data.param_tuple).set_data(&Array[0]); @@ -78,11 +76,9 @@ struct HipStatementExecutor::param_tuple_t>::value_type; - const camp::idx_t NumElem = - camp::tuple_element_t::param_tuple_t>:: - layout_type::s_size; + Pos, typename camp::decay::param_tuple_t>::value_type; + const camp::idx_t NumElem = camp::tuple_element_t< + Pos, typename camp::decay::param_tuple_t>::layout_type::s_size; __shared__ varType Array[NumElem]; camp::get(data.param_tuple).set_data(&Array[0]); @@ -146,11 +142,9 @@ struct HipStatementExecutor::param_tuple_t>::value_type; - const camp::idx_t NumElem = - camp::tuple_element_t::param_tuple_t>:: - layout_type::s_size; + Pos, typename camp::decay::param_tuple_t>::value_type; + const camp::idx_t NumElem = camp::tuple_element_t< + Pos, typename camp::decay::param_tuple_t>::layout_type::s_size; varType Array[NumElem]; camp::get(data.param_tuple).set_data(&Array[0]); @@ -164,11 +158,9 @@ struct HipStatementExecutor::param_tuple_t>::value_type; - const camp::idx_t NumElem = - camp::tuple_element_t::param_tuple_t>:: - layout_type::s_size; + Pos, typename camp::decay::param_tuple_t>::value_type; + const camp::idx_t NumElem = camp::tuple_element_t< + Pos, typename camp::decay::param_tuple_t>::layout_type::s_size; varType Array[NumElem]; camp::get(data.param_tuple).set_data(&Array[0]); diff --git a/include/RAJA/policy/hip/kernel/internal.hpp b/include/RAJA/policy/hip/kernel/internal.hpp index a4dc2fc49b..a0d1218b85 100644 --- a/include/RAJA/policy/hip/kernel/internal.hpp +++ b/include/RAJA/policy/hip/kernel/internal.hpp @@ -193,8 +193,8 @@ struct HipStatementListExecutor, Types> static inline LaunchDims calculateDimensions(Data const& data) { // Compute this statements launch dimensions - return HipStatementListExecutorHelper<0, num_stmts, enclosed_stmts_t>:: - calculateDimensions(data); + return HipStatementListExecutorHelper< + 0, num_stmts, enclosed_stmts_t>::calculateDimensions(data); } }; diff --git a/include/RAJA/policy/hip/launch.hpp b/include/RAJA/policy/hip/launch.hpp index e6c7ccdc86..1aa27db7eb 100644 --- a/include/RAJA/policy/hip/launch.hpp +++ b/include/RAJA/policy/hip/launch.hpp @@ -113,25 +113,16 @@ struct LaunchExecute< // // Privatize the loop_body, using make_launch_body to setup reductions // - BODY body = RAJA::hip::make_launch_body(func, - gridSize, - blockSize, - shared_mem_size, - hip_res, + BODY body = RAJA::hip::make_launch_body(func, gridSize, blockSize, + shared_mem_size, hip_res, std::forward(body_in)); // // Launch the kernel // void* args[] = {(void*)&body}; - RAJA::hip::launch(func, - gridSize, - blockSize, - args, - shared_mem_size, - hip_res, - async, - kernel_name); + RAJA::hip::launch(func, gridSize, blockSize, args, shared_mem_size, + hip_res, async, kernel_name); } RAJA_FT_END; @@ -199,25 +190,16 @@ struct LaunchExecute< // // Privatize the loop_body, using make_launch_body to setup reductions // - BODY body = RAJA::hip::make_launch_body(func, - gridSize, - blockSize, - shared_mem_size, - hip_res, + BODY body = RAJA::hip::make_launch_body(func, gridSize, blockSize, + shared_mem_size, hip_res, std::forward(body_in)); // // Launch the kernel // void* args[] = {(void*)&body, (void*)&launch_reducers}; - RAJA::hip::launch(func, - gridSize, - blockSize, - args, - shared_mem_size, - hip_res, - async, - kernel_name); + RAJA::hip::launch(func, gridSize, blockSize, args, shared_mem_size, + hip_res, async, kernel_name); RAJA::expt::ParamMultiplexer::resolve(launch_reducers, launch_info); @@ -318,25 +300,16 @@ struct LaunchExecute> // // Privatize the loop_body, using make_launch_body to setup reductions // - BODY body = RAJA::hip::make_launch_body(func, - gridSize, - blockSize, - shared_mem_size, - hip_res, + BODY body = RAJA::hip::make_launch_body(func, gridSize, blockSize, + shared_mem_size, hip_res, std::forward(body_in)); // // Launch the kernel // void* args[] = {(void*)&body}; - RAJA::hip::launch(func, - gridSize, - blockSize, - args, - shared_mem_size, - hip_res, - async, - kernel_name); + RAJA::hip::launch(func, gridSize, blockSize, args, shared_mem_size, + hip_res, async, kernel_name); } RAJA_FT_END; @@ -361,8 +334,7 @@ struct LaunchExecute> using BODY = camp::decay; auto func = reinterpret_cast( - &launch_new_reduce_global_fcn_fixed>); resources::Hip hip_res = res.get(); @@ -405,25 +377,16 @@ struct LaunchExecute> // // Privatize the loop_body, using make_launch_body to setup reductions // - BODY body = RAJA::hip::make_launch_body(func, - gridSize, - blockSize, - shared_mem_size, - hip_res, + BODY body = RAJA::hip::make_launch_body(func, gridSize, blockSize, + shared_mem_size, hip_res, std::forward(body_in)); // // Launch the kernel // void* args[] = {(void*)&body, (void*)&launch_reducers}; - RAJA::hip::launch(func, - gridSize, - blockSize, - args, - shared_mem_size, - hip_res, - async, - kernel_name); + RAJA::hip::launch(func, gridSize, blockSize, args, shared_mem_size, + hip_res, async, kernel_name); RAJA::expt::ParamMultiplexer::resolve(launch_reducers, launch_info); @@ -533,8 +496,7 @@ struct LoopExecute< if (i0 < len0 && i1 < len1 && i2 < len2) { - body(*(segment0.begin() + i0), - *(segment1.begin() + i1), + body(*(segment0.begin() + i0), *(segment1.begin() + i1), *(segment2.begin() + i2)); } } @@ -656,8 +618,7 @@ struct LoopExecute< for (diff_t i2 = i2_init; i2 < len2; i2 += i2_stride) { - body(*(segment0.begin() + i0), - *(segment1.begin() + i1), + body(*(segment0.begin() + i0), *(segment1.begin() + i1), *(segment2.begin() + i2)); } } @@ -757,12 +718,8 @@ struct LoopICountExecute< if (i0 < len0 && i1 < len1 && i2 < len2) { - body(*(segment0.begin() + i0), - *(segment1.begin() + i1), - *(segment2.begin() + i2), - i0, - i1, - i2); + body(*(segment0.begin() + i0), *(segment1.begin() + i1), + *(segment2.begin() + i2), i0, i1, i2); } } }; @@ -883,12 +840,8 @@ struct LoopICountExecute< for (diff_t i2 = i2_init; i2 < len2; i2 += i2_stride) { - body(*(segment0.begin() + i0), - *(segment1.begin() + i1), - *(segment2.begin() + i2), - i0, - i1, - i2); + body(*(segment0.begin() + i0), *(segment1.begin() + i1), + *(segment2.begin() + i2), i0, i1, i2); } } } diff --git a/include/RAJA/policy/hip/multi_reduce.hpp b/include/RAJA/policy/hip/multi_reduce.hpp index 4467b0ab24..a74a2aaa51 100644 --- a/include/RAJA/policy/hip/multi_reduce.hpp +++ b/include/RAJA/policy/hip/multi_reduce.hpp @@ -114,8 +114,7 @@ block_multi_reduce_init_shmem(int num_bins, int numThreads = blockDim.x * blockDim.y * blockDim.z; for (int shmem_offset = threadId; - shmem_offset < shared_replication * num_bins; - shmem_offset += numThreads) + shmem_offset < shared_replication * num_bins; shmem_offset += numThreads) { shared_mem[shmem_offset] = identity; } @@ -219,8 +218,8 @@ struct MultiReduceGridAtomicHostInit_TallyData m_tally_bins(get_tally_bins(m_num_bins)), m_tally_replication(get_tally_replication()) { - m_tally_mem = create_tally( - container, identity, m_num_bins, m_tally_bins, m_tally_replication); + m_tally_mem = create_tally(container, identity, m_num_bins, m_tally_bins, + m_tally_replication); } MultiReduceGridAtomicHostInit_TallyData() = delete; @@ -246,8 +245,8 @@ struct MultiReduceGridAtomicHostInit_TallyData m_num_bins = new_num_bins; m_tally_bins = get_tally_bins(m_num_bins); m_tally_replication = get_tally_replication(); - m_tally_mem = create_tally( - container, identity, m_num_bins, m_tally_bins, m_tally_replication); + m_tally_mem = create_tally(container, identity, m_num_bins, m_tally_bins, + m_tally_replication); } else { @@ -256,8 +255,8 @@ struct MultiReduceGridAtomicHostInit_TallyData int bin = 0; for (auto const& value : container) { - m_tally_mem[GetTallyOffset{}( - bin, m_tally_bins, tally_rep, m_tally_replication)] = value; + m_tally_mem[GetTallyOffset{}(bin, m_tally_bins, tally_rep, + m_tally_replication)] = value; ++bin; } } @@ -265,8 +264,8 @@ struct MultiReduceGridAtomicHostInit_TallyData { for (int bin = 0; bin < m_num_bins; ++bin) { - m_tally_mem[GetTallyOffset{}( - bin, m_tally_bins, tally_rep, m_tally_replication)] = identity; + m_tally_mem[GetTallyOffset{}(bin, m_tally_bins, tally_rep, + m_tally_replication)] = identity; } } } @@ -394,8 +393,8 @@ struct MultiReduceGridAtomicHostInit_TallyData { for (int bin = num_bins; bin > 0; --bin) { - int tally_offset = GetTallyOffset{}( - bin - 1, tally_bins, tally_rep - 1, tally_replication); + int tally_offset = GetTallyOffset{}(bin - 1, tally_bins, tally_rep - 1, + tally_replication); tally_mem[tally_offset].~T(); } } @@ -411,7 +410,8 @@ struct MultiReduceGridAtomicHostInit_TallyData T m_identity; int m_num_bins; int m_tally_bins; - int m_tally_replication; // power of 2, at least the max number of omp threads + int m_tally_replication; // power of 2, at least the max number of omp + // threads }; @@ -452,14 +452,8 @@ struct MultiReduceGridAtomicHostInit_Data void combine_device(int bin, T value) { impl::block_multi_reduce_combine_global_atomic( - m_num_bins, - m_identity, - bin, - value, - m_tally_mem, - GetTallyOffset{}, - m_tally_replication, - m_tally_bins); + m_num_bins, m_identity, bin, value, m_tally_mem, GetTallyOffset{}, + m_tally_replication, m_tally_bins); } //! combine value on host, combine a value into the tally @@ -573,8 +567,8 @@ struct MultiReduceBlockThenGridAtomicHostInit_Data T* shared_mem = get_shared_mem(); if (shared_mem != nullptr) { - impl::block_multi_reduce_init_shmem( - m_num_bins, m_identity, shared_mem, m_shared_replication); + impl::block_multi_reduce_init_shmem(m_num_bins, m_identity, shared_mem, + m_shared_replication); } } @@ -586,15 +580,9 @@ struct MultiReduceBlockThenGridAtomicHostInit_Data if (shared_mem != nullptr) { impl::grid_multi_reduce_shmem_to_global_atomic( - m_num_bins, - m_identity, - shared_mem, - GetSharedOffset{}, - m_shared_replication, - m_tally_mem, - GetTallyOffset{}, - m_tally_replication, - m_tally_bins); + m_num_bins, m_identity, shared_mem, GetSharedOffset{}, + m_shared_replication, m_tally_mem, GetTallyOffset{}, + m_tally_replication, m_tally_bins); } } @@ -607,25 +595,14 @@ struct MultiReduceBlockThenGridAtomicHostInit_Data if (shared_mem != nullptr) { impl::block_multi_reduce_combine_shmem_atomic( - m_num_bins, - m_identity, - bin, - value, - shared_mem, - GetSharedOffset{}, + m_num_bins, m_identity, bin, value, shared_mem, GetSharedOffset{}, m_shared_replication); } else { impl::block_multi_reduce_combine_global_atomic( - m_num_bins, - m_identity, - bin, - value, - m_tally_mem, - GetTallyOffset{}, - m_tally_replication, - m_tally_bins); + m_num_bins, m_identity, bin, value, m_tally_mem, GetTallyOffset{}, + m_tally_replication, m_tally_bins); } } diff --git a/include/RAJA/policy/hip/policy.hpp b/include/RAJA/policy/hip/policy.hpp index 6547933d0b..415d1199c8 100644 --- a/include/RAJA/policy/hip/policy.hpp +++ b/include/RAJA/policy/hip/policy.hpp @@ -580,8 +580,7 @@ struct HipDims { if (num_blocks() != 0) { - return {(blocks.x ? blocks.x : 1), - (blocks.y ? blocks.y : 1), + return {(blocks.x ? blocks.x : 1), (blocks.y ? blocks.y : 1), (blocks.z ? blocks.z : 1)}; } else @@ -595,8 +594,7 @@ struct HipDims { if (num_threads() != 0) { - return {(threads.x ? threads.x : 1), - (threads.y ? threads.y : 1), + return {(threads.x ? threads.x : 1), (threads.y ? threads.y : 1), (threads.z ? threads.z : 1)}; } else diff --git a/include/RAJA/policy/hip/raja_hiperrchk.hpp b/include/RAJA/policy/hip/raja_hiperrchk.hpp index a018509164..5e0cabed0d 100644 --- a/include/RAJA/policy/hip/raja_hiperrchk.hpp +++ b/include/RAJA/policy/hip/raja_hiperrchk.hpp @@ -65,8 +65,8 @@ hipAssert(hipError_t code, const char* file, int line, bool abort = true) } else { - fprintf( - stderr, "HIPassert: %s %s %d\n", hipGetErrorString(code), file, line); + fprintf(stderr, "HIPassert: %s %s %d\n", hipGetErrorString(code), file, + line); } } } diff --git a/include/RAJA/policy/hip/reduce.hpp b/include/RAJA/policy/hip/reduce.hpp index 2927b6bc70..bdd03bd9fc 100644 --- a/include/RAJA/policy/hip/reduce.hpp +++ b/include/RAJA/policy/hip/reduce.hpp @@ -265,8 +265,7 @@ RAJA_DEVICE RAJA_INLINE T block_reduce(T val, T identity) // to avoid calling constructor/destructor in shared memory. RAJA::detail::SoAArray* sd = reinterpret_cast*>(tmpsd); + T, RAJA::policy::hip::device_constants.MAX_WARPS>*>(tmpsd); // write per warp values to shared memory if (warpId == 0) @@ -721,10 +720,11 @@ struct ReduceLastBlock_Data RAJA_DEVICE void grid_reduce(T* output) { - T temp = value; - size_t replicationId = impl:: - grid_reduce_last_block( - temp, identity, device, device_count); + T temp = value; + size_t replicationId = + impl::grid_reduce_last_block(temp, identity, device, + device_count); if (replicationId != replication) { output[replicationId] = temp; @@ -910,11 +910,10 @@ struct ReduceAtomicDeviceInit_Data { T temp = value; - size_t replicationId = impl::grid_reduce_atomic_device_init( - temp, identity, device, device_count); + size_t replicationId = + impl::grid_reduce_atomic_device_init( + temp, identity, device, device_count); if (replicationId != replication) { output[replicationId] = temp; diff --git a/include/RAJA/policy/hip/scan.hpp b/include/RAJA/policy/hip/scan.hpp index b9a3adbc7e..391d7d606e 100644 --- a/include/RAJA/policy/hip/scan.hpp +++ b/include/RAJA/policy/hip/scan.hpp @@ -68,20 +68,11 @@ RAJA_INLINE resources::EventProxy inclusive_inplace( void* d_temp_storage = nullptr; size_t temp_storage_bytes = 0; #if defined(__HIPCC__) - hipErrchk(::rocprim::inclusive_scan(d_temp_storage, - temp_storage_bytes, - begin, - begin, - len, - binary_op, - stream)); + hipErrchk(::rocprim::inclusive_scan(d_temp_storage, temp_storage_bytes, begin, + begin, len, binary_op, stream)); #elif defined(__CUDACC__) - hipErrchk(::cub::DeviceScan::InclusiveScan(d_temp_storage, - temp_storage_bytes, - begin, - begin, - binary_op, - len, + hipErrchk(::cub::DeviceScan::InclusiveScan(d_temp_storage, temp_storage_bytes, + begin, begin, binary_op, len, stream)); #endif @@ -91,20 +82,11 @@ RAJA_INLINE resources::EventProxy inclusive_inplace( temp_storage_bytes); // Run #if defined(__HIPCC__) - hipErrchk(::rocprim::inclusive_scan(d_temp_storage, - temp_storage_bytes, - begin, - begin, - len, - binary_op, - stream)); + hipErrchk(::rocprim::inclusive_scan(d_temp_storage, temp_storage_bytes, begin, + begin, len, binary_op, stream)); #elif defined(__CUDACC__) - hipErrchk(::cub::DeviceScan::InclusiveScan(d_temp_storage, - temp_storage_bytes, - begin, - begin, - binary_op, - len, + hipErrchk(::cub::DeviceScan::InclusiveScan(d_temp_storage, temp_storage_bytes, + begin, begin, binary_op, len, stream)); #endif // Free temporary storage @@ -142,22 +124,11 @@ RAJA_INLINE resources::EventProxy exclusive_inplace( void* d_temp_storage = nullptr; size_t temp_storage_bytes = 0; #if defined(__HIPCC__) - hipErrchk(::rocprim::exclusive_scan(d_temp_storage, - temp_storage_bytes, - begin, - begin, - init, - len, - binary_op, - stream)); + hipErrchk(::rocprim::exclusive_scan(d_temp_storage, temp_storage_bytes, begin, + begin, init, len, binary_op, stream)); #elif defined(__CUDACC__) - hipErrchk(::cub::DeviceScan::ExclusiveScan(d_temp_storage, - temp_storage_bytes, - begin, - begin, - binary_op, - init, - len, + hipErrchk(::cub::DeviceScan::ExclusiveScan(d_temp_storage, temp_storage_bytes, + begin, begin, binary_op, init, len, stream)); #endif // Allocate temporary storage @@ -166,22 +137,11 @@ RAJA_INLINE resources::EventProxy exclusive_inplace( temp_storage_bytes); // Run #if defined(__HIPCC__) - hipErrchk(::rocprim::exclusive_scan(d_temp_storage, - temp_storage_bytes, - begin, - begin, - init, - len, - binary_op, - stream)); + hipErrchk(::rocprim::exclusive_scan(d_temp_storage, temp_storage_bytes, begin, + begin, init, len, binary_op, stream)); #elif defined(__CUDACC__) - hipErrchk(::cub::DeviceScan::ExclusiveScan(d_temp_storage, - temp_storage_bytes, - begin, - begin, - binary_op, - init, - len, + hipErrchk(::cub::DeviceScan::ExclusiveScan(d_temp_storage, temp_storage_bytes, + begin, begin, binary_op, init, len, stream)); #endif // Free temporary storage @@ -219,8 +179,8 @@ RAJA_INLINE resources::EventProxy void* d_temp_storage = nullptr; size_t temp_storage_bytes = 0; #if defined(__HIPCC__) - hipErrchk(::rocprim::inclusive_scan( - d_temp_storage, temp_storage_bytes, begin, out, len, binary_op, stream)); + hipErrchk(::rocprim::inclusive_scan(d_temp_storage, temp_storage_bytes, begin, + out, len, binary_op, stream)); #elif defined(__CUDACC__) hipErrchk(::cub::DeviceScan::InclusiveScan( d_temp_storage, temp_storage_bytes, begin, out, binary_op, len, stream)); @@ -231,8 +191,8 @@ RAJA_INLINE resources::EventProxy temp_storage_bytes); // Run #if defined(__HIPCC__) - hipErrchk(::rocprim::inclusive_scan( - d_temp_storage, temp_storage_bytes, begin, out, len, binary_op, stream)); + hipErrchk(::rocprim::inclusive_scan(d_temp_storage, temp_storage_bytes, begin, + out, len, binary_op, stream)); #elif defined(__CUDACC__) hipErrchk(::cub::DeviceScan::InclusiveScan( d_temp_storage, temp_storage_bytes, begin, out, binary_op, len, stream)); @@ -274,22 +234,11 @@ RAJA_INLINE resources::EventProxy void* d_temp_storage = nullptr; size_t temp_storage_bytes = 0; #if defined(__HIPCC__) - hipErrchk(::rocprim::exclusive_scan(d_temp_storage, - temp_storage_bytes, - begin, - out, - init, - len, - binary_op, - stream)); + hipErrchk(::rocprim::exclusive_scan(d_temp_storage, temp_storage_bytes, begin, + out, init, len, binary_op, stream)); #elif defined(__CUDACC__) - hipErrchk(::cub::DeviceScan::ExclusiveScan(d_temp_storage, - temp_storage_bytes, - begin, - out, - binary_op, - init, - len, + hipErrchk(::cub::DeviceScan::ExclusiveScan(d_temp_storage, temp_storage_bytes, + begin, out, binary_op, init, len, stream)); #endif // Allocate temporary storage @@ -298,22 +247,11 @@ RAJA_INLINE resources::EventProxy temp_storage_bytes); // Run #if defined(__HIPCC__) - hipErrchk(::rocprim::exclusive_scan(d_temp_storage, - temp_storage_bytes, - begin, - out, - init, - len, - binary_op, - stream)); + hipErrchk(::rocprim::exclusive_scan(d_temp_storage, temp_storage_bytes, begin, + out, init, len, binary_op, stream)); #elif defined(__CUDACC__) - hipErrchk(::cub::DeviceScan::ExclusiveScan(d_temp_storage, - temp_storage_bytes, - begin, - out, - binary_op, - init, - len, + hipErrchk(::cub::DeviceScan::ExclusiveScan(d_temp_storage, temp_storage_bytes, + begin, out, binary_op, init, len, stream)); #endif // Free temporary storage diff --git a/include/RAJA/policy/hip/sort.hpp b/include/RAJA/policy/hip/sort.hpp index 1f729da96e..e04d0eb13d 100644 --- a/include/RAJA/policy/hip/sort.hpp +++ b/include/RAJA/policy/hip/sort.hpp @@ -103,9 +103,8 @@ stable(resources::Hip hip_res, concepts::any_of< camp::is_same>>, - camp::is_same>>>>:: - value, + camp::is_same>>>>::value, "RAJA stable_sort is only implemented for pointers to " "arithmetic types and RAJA::operators::less and " "RAJA::operators::greater."); @@ -150,21 +149,13 @@ stable(resources::Hip hip_res, void* d_temp_storage = nullptr; size_t temp_storage_bytes = 0; #if defined(__HIPCC__) - hipErrchk(::rocprim::radix_sort_keys(d_temp_storage, - temp_storage_bytes, - d_keys, - len, - begin_bit, - end_bit, + hipErrchk(::rocprim::radix_sort_keys(d_temp_storage, temp_storage_bytes, + d_keys, len, begin_bit, end_bit, stream)); #elif defined(__CUDACC__) cudaErrchk(::cub::DeviceRadixSort::SortKeys(d_temp_storage, - temp_storage_bytes, - d_keys, - len, - begin_bit, - end_bit, - stream)); + temp_storage_bytes, d_keys, len, + begin_bit, end_bit, stream)); #endif // Allocate temporary storage d_temp_storage = @@ -173,21 +164,13 @@ stable(resources::Hip hip_res, // Run #if defined(__HIPCC__) - hipErrchk(::rocprim::radix_sort_keys(d_temp_storage, - temp_storage_bytes, - d_keys, - len, - begin_bit, - end_bit, + hipErrchk(::rocprim::radix_sort_keys(d_temp_storage, temp_storage_bytes, + d_keys, len, begin_bit, end_bit, stream)); #elif defined(__CUDACC__) cudaErrchk(::cub::DeviceRadixSort::SortKeys(d_temp_storage, - temp_storage_bytes, - d_keys, - len, - begin_bit, - end_bit, - stream)); + temp_storage_bytes, d_keys, len, + begin_bit, end_bit, stream)); #endif // Free temporary storage hip::device_mempool_type::getInstance().free(d_temp_storage); @@ -196,8 +179,8 @@ stable(resources::Hip hip_res, { // copy - hipErrchk(hipMemcpyAsync( - begin, d_out, len * sizeof(R), hipMemcpyDefault, stream)); + hipErrchk(hipMemcpyAsync(begin, d_out, len * sizeof(R), hipMemcpyDefault, + stream)); } hip::device_mempool_type::getInstance().free(d_out); @@ -244,21 +227,13 @@ stable(resources::Hip hip_res, void* d_temp_storage = nullptr; size_t temp_storage_bytes = 0; #if defined(__HIPCC__) - hipErrchk(::rocprim::radix_sort_keys_desc(d_temp_storage, - temp_storage_bytes, - d_keys, - len, - begin_bit, - end_bit, + hipErrchk(::rocprim::radix_sort_keys_desc(d_temp_storage, temp_storage_bytes, + d_keys, len, begin_bit, end_bit, stream)); #elif defined(__CUDACC__) - cudaErrchk(::cub::DeviceRadixSort::SortKeysDescending(d_temp_storage, - temp_storage_bytes, - d_keys, - len, - begin_bit, - end_bit, - stream)); + cudaErrchk(::cub::DeviceRadixSort::SortKeysDescending( + d_temp_storage, temp_storage_bytes, d_keys, len, begin_bit, end_bit, + stream)); #endif // Allocate temporary storage d_temp_storage = @@ -267,21 +242,13 @@ stable(resources::Hip hip_res, // Run #if defined(__HIPCC__) - hipErrchk(::rocprim::radix_sort_keys_desc(d_temp_storage, - temp_storage_bytes, - d_keys, - len, - begin_bit, - end_bit, + hipErrchk(::rocprim::radix_sort_keys_desc(d_temp_storage, temp_storage_bytes, + d_keys, len, begin_bit, end_bit, stream)); #elif defined(__CUDACC__) - cudaErrchk(::cub::DeviceRadixSort::SortKeysDescending(d_temp_storage, - temp_storage_bytes, - d_keys, - len, - begin_bit, - end_bit, - stream)); + cudaErrchk(::cub::DeviceRadixSort::SortKeysDescending( + d_temp_storage, temp_storage_bytes, d_keys, len, begin_bit, end_bit, + stream)); #endif // Free temporary storage hip::device_mempool_type::getInstance().free(d_temp_storage); @@ -290,8 +257,8 @@ stable(resources::Hip hip_res, { // copy - hipErrchk(hipMemcpyAsync( - begin, d_out, len * sizeof(R), hipMemcpyDefault, stream)); + hipErrchk(hipMemcpyAsync(begin, d_out, len * sizeof(R), hipMemcpyDefault, + stream)); } hip::device_mempool_type::getInstance().free(d_out); @@ -335,9 +302,8 @@ unstable(resources::Hip hip_res, concepts::any_of< camp::is_same>>, - camp::is_same>>>>:: - value, + camp::is_same>>>>::value, "RAJA sort is only implemented for pointers to arithmetic " "types and RAJA::operators::less and RAJA::operators::greater."); @@ -480,23 +446,13 @@ stable_pairs( void* d_temp_storage = nullptr; size_t temp_storage_bytes = 0; #if defined(__HIPCC__) - hipErrchk(::rocprim::radix_sort_pairs(d_temp_storage, - temp_storage_bytes, - d_keys, - d_vals, - len, - begin_bit, - end_bit, + hipErrchk(::rocprim::radix_sort_pairs(d_temp_storage, temp_storage_bytes, + d_keys, d_vals, len, begin_bit, end_bit, stream)); #elif defined(__CUDACC__) - cudaErrchk(::cub::DeviceRadixSort::SortPairs(d_temp_storage, - temp_storage_bytes, - d_keys, - d_vals, - len, - begin_bit, - end_bit, - stream)); + cudaErrchk(::cub::DeviceRadixSort::SortPairs( + d_temp_storage, temp_storage_bytes, d_keys, d_vals, len, begin_bit, + end_bit, stream)); #endif // Allocate temporary storage d_temp_storage = @@ -505,23 +461,13 @@ stable_pairs( // Run #if defined(__HIPCC__) - hipErrchk(::rocprim::radix_sort_pairs(d_temp_storage, - temp_storage_bytes, - d_keys, - d_vals, - len, - begin_bit, - end_bit, + hipErrchk(::rocprim::radix_sort_pairs(d_temp_storage, temp_storage_bytes, + d_keys, d_vals, len, begin_bit, end_bit, stream)); #elif defined(__CUDACC__) - cudaErrchk(::cub::DeviceRadixSort::SortPairs(d_temp_storage, - temp_storage_bytes, - d_keys, - d_vals, - len, - begin_bit, - end_bit, - stream)); + cudaErrchk(::cub::DeviceRadixSort::SortPairs( + d_temp_storage, temp_storage_bytes, d_keys, d_vals, len, begin_bit, + end_bit, stream)); #endif // Free temporary storage hip::device_mempool_type::getInstance().free(d_temp_storage); @@ -530,15 +476,15 @@ stable_pairs( { // copy keys - hipErrchk(hipMemcpyAsync( - keys_begin, d_keys_out, len * sizeof(K), hipMemcpyDefault, stream)); + hipErrchk(hipMemcpyAsync(keys_begin, d_keys_out, len * sizeof(K), + hipMemcpyDefault, stream)); } if (detail::get_current(d_vals) == d_vals_out) { // copy vals - hipErrchk(hipMemcpyAsync( - vals_begin, d_vals_out, len * sizeof(V), hipMemcpyDefault, stream)); + hipErrchk(hipMemcpyAsync(vals_begin, d_vals_out, len * sizeof(V), + hipMemcpyDefault, stream)); } hip::device_mempool_type::getInstance().free(d_keys_out); @@ -594,23 +540,13 @@ stable_pairs( void* d_temp_storage = nullptr; size_t temp_storage_bytes = 0; #if defined(__HIPCC__) - hipErrchk(::rocprim::radix_sort_pairs_desc(d_temp_storage, - temp_storage_bytes, - d_keys, - d_vals, - len, - begin_bit, - end_bit, - stream)); + hipErrchk(::rocprim::radix_sort_pairs_desc(d_temp_storage, temp_storage_bytes, + d_keys, d_vals, len, begin_bit, + end_bit, stream)); #elif defined(__CUDACC__) - cudaErrchk(::cub::DeviceRadixSort::SortPairsDescending(d_temp_storage, - temp_storage_bytes, - d_keys, - d_vals, - len, - begin_bit, - end_bit, - stream)); + cudaErrchk(::cub::DeviceRadixSort::SortPairsDescending( + d_temp_storage, temp_storage_bytes, d_keys, d_vals, len, begin_bit, + end_bit, stream)); #endif // Allocate temporary storage d_temp_storage = @@ -619,23 +555,13 @@ stable_pairs( // Run #if defined(__HIPCC__) - hipErrchk(::rocprim::radix_sort_pairs_desc(d_temp_storage, - temp_storage_bytes, - d_keys, - d_vals, - len, - begin_bit, - end_bit, - stream)); + hipErrchk(::rocprim::radix_sort_pairs_desc(d_temp_storage, temp_storage_bytes, + d_keys, d_vals, len, begin_bit, + end_bit, stream)); #elif defined(__CUDACC__) - cudaErrchk(::cub::DeviceRadixSort::SortPairsDescending(d_temp_storage, - temp_storage_bytes, - d_keys, - d_vals, - len, - begin_bit, - end_bit, - stream)); + cudaErrchk(::cub::DeviceRadixSort::SortPairsDescending( + d_temp_storage, temp_storage_bytes, d_keys, d_vals, len, begin_bit, + end_bit, stream)); #endif // Free temporary storage hip::device_mempool_type::getInstance().free(d_temp_storage); @@ -644,15 +570,15 @@ stable_pairs( { // copy keys - hipErrchk(hipMemcpyAsync( - keys_begin, d_keys_out, len * sizeof(K), hipMemcpyDefault, stream)); + hipErrchk(hipMemcpyAsync(keys_begin, d_keys_out, len * sizeof(K), + hipMemcpyDefault, stream)); } if (detail::get_current(d_vals) == d_vals_out) { // copy vals - hipErrchk(hipMemcpyAsync( - vals_begin, d_vals_out, len * sizeof(V), hipMemcpyDefault, stream)); + hipErrchk(hipMemcpyAsync(vals_begin, d_vals_out, len * sizeof(V), + hipMemcpyDefault, stream)); } hip::device_mempool_type::getInstance().free(d_keys_out); diff --git a/include/RAJA/policy/openmp/forall.hpp b/include/RAJA/policy/openmp/forall.hpp index c290ba0f62..e21c8ef93e 100644 --- a/include/RAJA/policy/openmp/forall.hpp +++ b/include/RAJA/policy/openmp/forall.hpp @@ -244,8 +244,7 @@ RAJA_INLINE void forall_impl(const Policy&, Iterable&& iter, Func&& loop_body) int prev_chunk; omp_get_schedule(&prev_sched, &prev_chunk); omp_set_schedule(Policy::schedule, Policy::chunk_size); - forall_impl(::RAJA::policy::omp::Runtime{}, - std::forward(iter), + forall_impl(::RAJA::policy::omp::Runtime{}, std::forward(iter), std::forward(loop_body)); omp_set_schedule(prev_sched, prev_chunk); } @@ -344,8 +343,8 @@ forall_impl(resources::Host host_res, Func&& loop_body, ForallParam) { - internal::forall_impl( - Schedule{}, std::forward(iter), std::forward(loop_body)); + internal::forall_impl(Schedule{}, std::forward(iter), + std::forward(loop_body)); return resources::EventProxy(host_res); } @@ -363,8 +362,8 @@ forall_impl(resources::Host host_res, Func&& loop_body, ForallParam) { - internal::forall_impl_nowait( - Schedule{}, std::forward(iter), std::forward(loop_body)); + internal::forall_impl_nowait(Schedule{}, std::forward(iter), + std::forward(loop_body)); return resources::EventProxy(host_res); } diff --git a/include/RAJA/policy/openmp/launch.hpp b/include/RAJA/policy/openmp/launch.hpp index 6e21a445c4..a6450b8555 100644 --- a/include/RAJA/policy/openmp/launch.hpp +++ b/include/RAJA/policy/openmp/launch.hpp @@ -254,8 +254,7 @@ struct LoopExecute { for (int i = 0; i < len0; i++) { - body(*(segment0.begin() + i), - *(segment1.begin() + j), + body(*(segment0.begin() + i), *(segment1.begin() + j), *(segment2.begin() + k)); } } @@ -328,12 +327,8 @@ struct LoopICountExecute { for (int i = 0; i < len0; i++) { - body(*(segment0.begin() + i), - *(segment1.begin() + j), - *(segment2.begin() + k), - i, - j, - k); + body(*(segment0.begin() + i), *(segment1.begin() + j), + *(segment2.begin() + k), i, j, k); } } } @@ -441,8 +436,8 @@ struct LoopICountExecute for (int i = 0; i < len0; i++) { - loop_body.get_priv()( - *(segment0.begin() + i), *(segment1.begin() + j), i, j); + loop_body.get_priv()(*(segment0.begin() + i), + *(segment1.begin() + j), i, j); } } }); @@ -476,10 +471,7 @@ struct LoopICountExecute { loop_body.get_priv()(*(segment0.begin() + i), *(segment1.begin() + j), - *(segment2.begin() + k), - i, - j, - k); + *(segment2.begin() + k), i, j, k); } } } diff --git a/include/RAJA/policy/openmp/multi_reduce.hpp b/include/RAJA/policy/openmp/multi_reduce.hpp index 0b35f4c0ab..01b9038d2b 100644 --- a/include/RAJA/policy/openmp/multi_reduce.hpp +++ b/include/RAJA/policy/openmp/multi_reduce.hpp @@ -227,12 +227,8 @@ struct MultiReduceDataOMP< m_identity(identity), m_data(nullptr) { - m_data = create_data(container, - identity, - m_num_bins, - m_max_threads, - m_padded_bins, - m_padded_threads); + m_data = create_data(container, identity, m_num_bins, m_max_threads, + m_padded_bins, m_padded_threads); } MultiReduceDataOMP(MultiReduceDataOMP const& other) @@ -254,8 +250,8 @@ struct MultiReduceDataOMP< { if (!m_parent) { - destroy_data( - m_data, m_num_bins, m_max_threads, m_padded_bins, m_padded_threads); + destroy_data(m_data, m_num_bins, m_max_threads, m_padded_bins, + m_padded_threads); } } } @@ -267,16 +263,12 @@ struct MultiReduceDataOMP< size_t new_num_bins = container.size(); if (new_num_bins != m_num_bins) { - destroy_data( - m_data, m_num_bins, m_max_threads, m_padded_bins, m_padded_threads); + destroy_data(m_data, m_num_bins, m_max_threads, m_padded_bins, + m_padded_threads); m_num_bins = new_num_bins; m_padded_bins = pad_bins(m_num_bins); - m_data = create_data(container, - identity, - m_num_bins, - m_max_threads, - m_padded_bins, - m_padded_threads); + m_data = create_data(container, identity, m_num_bins, m_max_threads, + m_padded_bins, m_padded_threads); } else { @@ -287,8 +279,8 @@ struct MultiReduceDataOMP< size_t bin = 0; for (auto const& value : container) { - m_data[index_data( - bin, thread_idx, m_padded_bins, m_padded_threads)] = value; + m_data[index_data(bin, thread_idx, m_padded_bins, + m_padded_threads)] = value; ++bin; } } @@ -296,8 +288,8 @@ struct MultiReduceDataOMP< { for (size_t bin = 0; bin < m_num_bins; ++bin) { - m_data[index_data( - bin, thread_idx, m_padded_bins, m_padded_threads)] = identity; + m_data[index_data(bin, thread_idx, m_padded_bins, + m_padded_threads)] = identity; } } } diff --git a/include/RAJA/policy/openmp/params/forall.hpp b/include/RAJA/policy/openmp/params/forall.hpp index c761f4c2c0..1739318941 100644 --- a/include/RAJA/policy/openmp/params/forall.hpp +++ b/include/RAJA/policy/openmp/params/forall.hpp @@ -342,8 +342,7 @@ RAJA_INLINE resources::EventProxy Func&& loop_body, ForallParam f_params) { - expt::internal::forall_impl(Schedule{}, - std::forward(iter), + expt::internal::forall_impl(Schedule{}, std::forward(iter), std::forward(loop_body), std::forward(f_params)); return resources::EventProxy(host_res); diff --git a/include/RAJA/policy/openmp/scan.hpp b/include/RAJA/policy/openmp/scan.hpp index e3c916527f..28e690c831 100644 --- a/include/RAJA/policy/openmp/scan.hpp +++ b/include/RAJA/policy/openmp/scan.hpp @@ -67,18 +67,14 @@ RAJA_INLINE concepts::enable_if_t, const DistanceT idx_end = firstIndex(n, p, pid + 1); if (idx_begin != idx_end) { - inclusive_inplace( - host_res, ::RAJA::seq_exec{}, begin + idx_begin, begin + idx_end, f); + inclusive_inplace(host_res, ::RAJA::seq_exec{}, begin + idx_begin, + begin + idx_end, f); sums[pid] = begin[idx_end - 1]; } #pragma omp barrier #pragma omp single - exclusive_inplace(host_res, - ::RAJA::seq_exec{}, - sums.data(), - sums.data() + p, - f, - BinFn::identity()); + exclusive_inplace(host_res, ::RAJA::seq_exec{}, sums.data(), + sums.data() + p, f, BinFn::identity()); for (auto i = idx_begin; i < idx_end; ++i) { begin[i] = f(begin[i], sums[pid]); @@ -119,17 +115,13 @@ RAJA_INLINE concepts::enable_if_t, #pragma omp barrier if (idx_begin != idx_end) { - exclusive_inplace( - host_res, seq_exec{}, begin + idx_begin, begin + idx_end, f, init); + exclusive_inplace(host_res, seq_exec{}, begin + idx_begin, + begin + idx_end, f, init); sums[pid] = begin[idx_end - 1]; } #pragma omp barrier #pragma omp single - exclusive_inplace(host_res, - seq_exec{}, - sums.data(), - sums.data() + p, - f, + exclusive_inplace(host_res, seq_exec{}, sums.data(), sums.data() + p, f, BinFn::identity()); for (auto i = idx_begin; i < idx_end; ++i) { @@ -180,8 +172,8 @@ RAJA_INLINE concepts::enable_if_t, { using std::distance; ::std::copy(begin, end, out); - return exclusive_inplace( - host_res, exec, out, out + distance(begin, end), f, v); + return exclusive_inplace(host_res, exec, out, out + distance(begin, end), f, + v); } } // namespace scan diff --git a/include/RAJA/policy/openmp/sort.hpp b/include/RAJA/policy/openmp/sort.hpp index 3bad04d1d6..c8a137eaee 100644 --- a/include/RAJA/policy/openmp/sort.hpp +++ b/include/RAJA/policy/openmp/sort.hpp @@ -85,8 +85,8 @@ inline void sort_task(Sorter sorter, // std::inplace_merge(begin + i_begin, begin + i_middle, begin + i_end, // comp); - RAJA::detail::inplace_merge( - begin + i_begin, begin + i_middle, begin + i_end, comp); + RAJA::detail::inplace_merge(begin + i_begin, begin + i_middle, + begin + i_end, comp); } } @@ -137,8 +137,8 @@ inline void sort_parallel_region(Sorter sorter, // this thread merges ranges [i_begin, i_middle) and [i_middle, i_end) // std::inplace_merge(begin + i_begin, begin + i_middle, begin + i_end, // comp); - RAJA::detail::inplace_merge( - begin + i_begin, begin + i_middle, begin + i_end, comp); + RAJA::detail::inplace_merge(begin + i_begin, begin + i_middle, + begin + i_end, comp); } } } @@ -255,8 +255,8 @@ unstable_pairs(resources::Host host_res, auto begin = RAJA::zip(keys_begin, vals_begin); auto end = RAJA::zip(keys_end, vals_begin + (keys_end - keys_begin)); using zip_ref = RAJA::detail::IterRef>; - detail::openmp::sort( - detail::UnstableSorter{}, begin, end, RAJA::compare_first(comp)); + detail::openmp::sort(detail::UnstableSorter{}, begin, end, + RAJA::compare_first(comp)); return resources::EventProxy(host_res); } @@ -281,8 +281,8 @@ stable_pairs(resources::Host host_res, auto begin = RAJA::zip(keys_begin, vals_begin); auto end = RAJA::zip(keys_end, vals_begin + (keys_end - keys_begin)); using zip_ref = RAJA::detail::IterRef>; - detail::openmp::sort( - detail::StableSorter{}, begin, end, RAJA::compare_first(comp)); + detail::openmp::sort(detail::StableSorter{}, begin, end, + RAJA::compare_first(comp)); return resources::EventProxy(host_res); } diff --git a/include/RAJA/policy/openmp_target/kernel/For.hpp b/include/RAJA/policy/openmp_target/kernel/For.hpp index 9dc3bfe39f..ddc09add09 100644 --- a/include/RAJA/policy/openmp_target/kernel/For.hpp +++ b/include/RAJA/policy/openmp_target/kernel/For.hpp @@ -68,10 +68,8 @@ struct StatementExecutor{}, - TypedRangeSegment(0, len), - for_wrapper, + forall_impl(r, omp_target_parallel_for_exec{}, + TypedRangeSegment(0, len), for_wrapper, RAJA::expt::get_empty_forall_param_pack()); } }; diff --git a/include/RAJA/policy/openmp_target/reduce.hpp b/include/RAJA/policy/openmp_target/reduce.hpp index 8f789517be..d0e16601bc 100644 --- a/include/RAJA/policy/openmp_target/reduce.hpp +++ b/include/RAJA/policy/openmp_target/reduce.hpp @@ -129,10 +129,7 @@ struct Reduce_Data // precondition: host and device are valid pointers if (omp_target_memcpy(reinterpret_cast(device), reinterpret_cast(host), - omp::MaxNumTeams * sizeof(T), - 0, - 0, - info.deviceID, + omp::MaxNumTeams * sizeof(T), 0, 0, info.deviceID, info.hostID) != 0) { printf("Unable to copy memory from host to device\n"); @@ -146,10 +143,7 @@ struct Reduce_Data // precondition: host and device are valid pointers if (omp_target_memcpy(reinterpret_cast(host), reinterpret_cast(device), - omp::MaxNumTeams * sizeof(T), - 0, - 0, - info.hostID, + omp::MaxNumTeams * sizeof(T), 0, 0, info.hostID, info.deviceID) != 0) { printf("Unable to copy memory from device to host\n"); diff --git a/include/RAJA/policy/sequential/launch.hpp b/include/RAJA/policy/sequential/launch.hpp index f9397aabee..28ad518547 100644 --- a/include/RAJA/policy/sequential/launch.hpp +++ b/include/RAJA/policy/sequential/launch.hpp @@ -170,8 +170,7 @@ struct LoopExecute { for (int i = 0; i < len0; i++) { - body(*(segment0.begin() + i), - *(segment1.begin() + j), + body(*(segment0.begin() + i), *(segment1.begin() + j), *(segment2.begin() + k)); } } @@ -239,12 +238,8 @@ struct LoopICountExecute { for (int i = 0; i < len0; i++) { - body(*(segment0.begin() + i), - *(segment1.begin() + j), - *(segment2.begin() + k), - i, - j, - k); + body(*(segment0.begin() + i), *(segment1.begin() + j), + *(segment2.begin() + k), i, j, k); } } } diff --git a/include/RAJA/policy/sycl/forall.hpp b/include/RAJA/policy/sycl/forall.hpp index ea276026ea..b712af52df 100644 --- a/include/RAJA/policy/sycl/forall.hpp +++ b/include/RAJA/policy/sycl/forall.hpp @@ -294,8 +294,7 @@ forall_impl(resources::Sycl& sycl_res, q->submit( [&](::sycl::handler& h) { - h.parallel_for(::sycl::range<1>(len), - reduction, + h.parallel_for(::sycl::range<1>(len), reduction, [=](::sycl::item<1> it, auto& red) { ForallParam fp; @@ -391,8 +390,7 @@ forall_impl(resources::Sycl& sycl_res, q->submit( [&](::sycl::handler& h) { - h.parallel_for(::sycl::range<1>(len), - reduction, + h.parallel_for(::sycl::range<1>(len), reduction, [=](::sycl::item<1> it, auto& red) { Index_type ii = it.get_id(0); @@ -451,8 +449,8 @@ RAJA_INLINE resources::EventProxy int num_seg = iset.getNumSegments(); for (int isi = 0; isi < num_seg; ++isi) { - iset.segmentCall( - r, isi, detail::CallForall(), sycl_exec(), loop_body); + iset.segmentCall(r, isi, detail::CallForall(), sycl_exec(), + loop_body); } // iterate over segments of index set if (!Async) diff --git a/include/RAJA/policy/sycl/kernel/SyclKernel.hpp b/include/RAJA/policy/sycl/kernel/SyclKernel.hpp index cf2a3bc642..3fa7299a36 100644 --- a/include/RAJA/policy/sycl/kernel/SyclKernel.hpp +++ b/include/RAJA/policy/sycl/kernel/SyclKernel.hpp @@ -144,8 +144,7 @@ struct SyclLaunchHelper, StmtList, Data, Types> [&](cl::sycl::handler& h) { h.parallel_for( - launch_dims.fit_nd_range(qu), - [=](cl::sycl::nd_item<3> item) + launch_dims.fit_nd_range(qu), [=](cl::sycl::nd_item<3> item) { SyclKernelLauncher(*m_data, item); }); }) .wait(); // Need to wait to free memory @@ -212,10 +211,7 @@ struct StatementExecutor< using executor_t = sycl_statement_list_executor_t; using launch_t = SyclLaunchHelper::value, - LaunchConfig, - stmt_list_t, - data_t, - Types>; + LaunchConfig, stmt_list_t, data_t, Types>; camp::resources::Sycl res = data.get_resource(); ::sycl::queue* q = res.get_queue(); diff --git a/include/RAJA/policy/sycl/kernel/internal.hpp b/include/RAJA/policy/sycl/kernel/internal.hpp index 2003659c47..a0ac5aeae6 100644 --- a/include/RAJA/policy/sycl/kernel/internal.hpp +++ b/include/RAJA/policy/sycl/kernel/internal.hpp @@ -184,10 +184,10 @@ struct LaunchDims ((launch_global.z / launch_local.z) + 1) * launch_local.z; } - cl::sycl::range<3> ret_th = { - launch_local.x, launch_local.y, launch_local.z}; - cl::sycl::range<3> ret_gl = { - launch_global.x, launch_global.y, launch_global.z}; + cl::sycl::range<3> ret_th = {launch_local.x, launch_local.y, + launch_local.z}; + cl::sycl::range<3> ret_gl = {launch_global.x, launch_global.y, + launch_global.z}; return cl::sycl::nd_range<3>(ret_gl, ret_th); } @@ -271,8 +271,8 @@ struct SyclStatementListExecutor, Types> static inline LaunchDims calculateDimensions(Data const& data) { // Compute this statements launch dimensions - return SyclStatementListExecutorHelper<0, num_stmts, enclosed_stmts_t>:: - calculateDimensions(data); + return SyclStatementListExecutorHelper< + 0, num_stmts, enclosed_stmts_t>::calculateDimensions(data); } }; diff --git a/include/RAJA/policy/sycl/launch.hpp b/include/RAJA/policy/sycl/launch.hpp index 15d8d7bde2..6f15e0bab2 100644 --- a/include/RAJA/policy/sycl/launch.hpp +++ b/include/RAJA/policy/sycl/launch.hpp @@ -172,8 +172,7 @@ struct LaunchExecute> launch_params.shared_mem_size, h); h.parallel_for( - cl::sycl::nd_range<3>(gridSize, blockSize), - reduction, + cl::sycl::nd_range<3>(gridSize, blockSize), reduction, [=](cl::sycl::nd_item<3> itm, auto& red) { LaunchContext ctx; @@ -361,8 +360,7 @@ struct LaunchExecute> launch_params.shared_mem_size, h); h.parallel_for( - cl::sycl::nd_range<3>(gridSize, blockSize), - reduction, + cl::sycl::nd_range<3>(gridSize, blockSize), reduction, [=](cl::sycl::nd_item<3> itm, auto& red) { LaunchContext ctx; @@ -491,8 +489,7 @@ struct LoopExecute, SEGMENT> ctx.itm->get_local_id(DIM2); if (tx < len0 && ty < len1 && tz < len2) - body(*(segment0.begin() + tx), - *(segment1.begin() + ty), + body(*(segment0.begin() + tx), *(segment1.begin() + ty), *(segment1.begin() + ty)); } } @@ -865,8 +862,7 @@ struct LoopExecute, SEGMENT> const int ty = ctx.itm->get_group(DIM1); const int tz = ctx.itm->get_group(DIM2); if (tx < len0 && ty < len1 && tz < len2) - body(*(segment0.begin() + tx), - *(segment1.begin() + ty), + body(*(segment0.begin() + tx), *(segment1.begin() + ty), *(segment2.begin() + tz)); } } @@ -916,12 +912,8 @@ struct LoopICountExecute, SEGMENT> const int ty = ctx.itm->get_group(DIM1); const int tz = ctx.itm->get_group(DIM2); if (tx < len0 && ty < len1 && tz < len2) - body(*(segment0.begin() + tx), - *(segment1.begin() + ty), - *(segment2.begin() + tz), - tx, - ty, - tz); + body(*(segment0.begin() + tx), *(segment1.begin() + ty), + *(segment2.begin() + tz), tx, ty, tz); } } }; @@ -995,8 +987,7 @@ struct LoopExecute, SEGMENT> bz += ctx.itm->get_group_range(DIM2)) { - body(*(segment0.begin() + bx), - *(segment1.begin() + by), + body(*(segment0.begin() + bx), *(segment1.begin() + by), *(segment2.begin() + bz)); } } @@ -1062,12 +1053,8 @@ struct LoopICountExecute, SEGMENT> bz += ctx.itm->get_group_range(DIM0)) { - body(*(segment0.begin() + bx), - *(segment1.begin() + by), - *(segment2.begin() + bz), - bx, - by, - bz); + body(*(segment0.begin() + bx), *(segment1.begin() + by), + *(segment2.begin() + bz), bx, by, bz); } } } diff --git a/include/RAJA/policy/sycl/reduce.hpp b/include/RAJA/policy/sycl/reduce.hpp index 42857a74dc..1edb198a8c 100644 --- a/include/RAJA/policy/sycl/reduce.hpp +++ b/include/RAJA/policy/sycl/reduce.hpp @@ -143,9 +143,9 @@ struct Reduce_Data } // precondition: host and device are valid pointers - auto e = q->memcpy(reinterpret_cast(device), - reinterpret_cast(host), - sycl::MaxNumTeams * sizeof(T)); + auto e = + q->memcpy(reinterpret_cast(device), + reinterpret_cast(host), sycl::MaxNumTeams * sizeof(T)); e.wait(); } @@ -246,8 +246,7 @@ struct TargetReduce #ifdef __SYCL_DEVICE_ONLY__ auto i = 0; //__spirv::initLocalInvocationId<1, cl::sycl::id<1>>()[0]; auto atm = - ::sycl::atomic_ref( val.device[i]); @@ -265,8 +264,7 @@ struct TargetReduce #ifdef __SYCL_DEVICE_ONLY__ auto i = 0; //__spirv::initLocalInvocationId<1, cl::sycl::id<1>>()[0]; auto atm = - ::sycl::atomic_ref( val.device[i]); @@ -432,8 +430,7 @@ class ReduceSum : public TargetReduce, T> #ifdef __SYCL_DEVICE_ONLY__ auto i = 0; //__spirv::initLocalInvocationId<1, cl::sycl::id<1>>()[0]; auto atm = - ::sycl::atomic_ref( parent::val.device[i]); @@ -462,8 +459,7 @@ class ReduceBitOr #ifdef __SYCL_DEVICE_ONLY__ auto i = 0; //__spirv::initLocalInvocationId<1, cl::sycl::id<1>>()[0]; auto atm = - ::sycl::atomic_ref( parent::val.device[i]); @@ -481,8 +477,7 @@ class ReduceBitOr #ifdef __SYCL_DEVICE_ONLY__ auto i = 0; //__spirv::initLocalInvocationId<1, cl::sycl::id<1>>()[0]; auto atm = - ::sycl::atomic_ref( parent::val.device[i]); @@ -511,8 +506,7 @@ class ReduceBitAnd #ifdef __SYCL_DEVICE_ONLY__ auto i = 0; //__spirv::initLocalInvocationId<1, cl::sycl::id<1>>()[0]; auto atm = - ::sycl::atomic_ref( parent::val.device[i]); @@ -530,8 +524,7 @@ class ReduceBitAnd #ifdef __SYCL_DEVICE_ONLY__ auto i = 0; //__spirv::initLocalInvocationId<1, cl::sycl::id<1>>()[0]; auto atm = - ::sycl::atomic_ref( parent::val.device[i]); @@ -560,8 +553,7 @@ class ReduceMin : public TargetReduce, T> #ifdef __SYCL_DEVICE_ONLY__ auto i = 0; //__spirv::initLocalInvocationId<1, cl::sycl::id<1>>()[0]; auto atm = - ::sycl::atomic_ref( parent::val.device[i]); @@ -579,8 +571,7 @@ class ReduceMin : public TargetReduce, T> #ifdef __SYCL_DEVICE_ONLY__ auto i = 0; //__spirv::initLocalInvocationId<1, cl::sycl::id<1>>()[0]; auto atm = - ::sycl::atomic_ref( parent::val.device[i]); @@ -609,8 +600,7 @@ class ReduceMax : public TargetReduce, T> #ifdef __SYCL_DEVICE_ONLY__ auto i = 0; //__spirv::initLocalInvocationId<1, cl::sycl::id<1>>()[0]; auto atm = - ::sycl::atomic_ref( parent::val.device[i]); @@ -628,8 +618,7 @@ class ReduceMax : public TargetReduce, T> #ifdef __SYCL_DEVICE_ONLY__ auto i = 0; //__spirv::initLocalInvocationId<1, cl::sycl::id<1>>()[0]; auto atm = - ::sycl::atomic_ref( parent::val.device[i]); diff --git a/include/RAJA/policy/tensor/arch/avx/avx_double.hpp b/include/RAJA/policy/tensor/arch/avx/avx_double.hpp index 753a075bd2..d8c3cd3057 100644 --- a/include/RAJA/policy/tensor/arch/avx/avx_double.hpp +++ b/include/RAJA/policy/tensor/arch/avx/avx_double.hpp @@ -57,8 +57,8 @@ class Register __m256i createMask(camp::idx_t N) const { // Generate a mask - return _mm256_set_epi64x( - N >= 4 ? -1 : 0, N >= 3 ? -1 : 0, N >= 2 ? -1 : 0, N >= 1 ? -1 : 0); + return _mm256_set_epi64x(N >= 4 ? -1 : 0, N >= 3 ? -1 : 0, N >= 2 ? -1 : 0, + N >= 1 ? -1 : 0); } RAJA_INLINE @@ -297,10 +297,9 @@ class Register self_type divide_n(self_type const& b, camp::idx_t N) const { // AVX2 does not supply a masked divide, so do it manually - return self_type(_mm256_set_pd(N >= 4 ? get(3) / b.get(3) : 0, - N >= 3 ? get(2) / b.get(2) : 0, - N >= 2 ? get(1) / b.get(1) : 0, - N >= 1 ? get(0) / b.get(0) : 0)); + return self_type(_mm256_set_pd( + N >= 4 ? get(3) / b.get(3) : 0, N >= 3 ? get(2) / b.get(2) : 0, + N >= 2 ? get(1) / b.get(1) : 0, N >= 1 ? get(0) / b.get(0) : 0)); } /*! diff --git a/include/RAJA/policy/tensor/arch/avx/avx_float.hpp b/include/RAJA/policy/tensor/arch/avx/avx_float.hpp index 078d3a421c..41c2c3134c 100644 --- a/include/RAJA/policy/tensor/arch/avx/avx_float.hpp +++ b/include/RAJA/policy/tensor/arch/avx/avx_float.hpp @@ -56,14 +56,9 @@ class Register __m256i createMask(camp::idx_t N) const { // Generate a mask - return _mm256_set_epi32(N >= 8 ? -1 : 0, - N >= 7 ? -1 : 0, - N >= 6 ? -1 : 0, - N >= 5 ? -1 : 0, - N >= 4 ? -1 : 0, - N >= 3 ? -1 : 0, - N >= 2 ? -1 : 0, - N >= 1 ? -1 : 0); + return _mm256_set_epi32(N >= 8 ? -1 : 0, N >= 7 ? -1 : 0, N >= 6 ? -1 : 0, + N >= 5 ? -1 : 0, N >= 4 ? -1 : 0, N >= 3 ? -1 : 0, + N >= 2 ? -1 : 0, N >= 1 ? -1 : 0); } public: @@ -302,14 +297,11 @@ class Register self_type divide_n(self_type const& b, camp::idx_t N) const { // AVX2 does not supply a masked divide - return self_type(_mm256_set_ps(N >= 8 ? get(7) / b.get(7) : 0, - N >= 7 ? get(6) / b.get(6) : 0, - N >= 6 ? get(5) / b.get(5) : 0, - N >= 5 ? get(4) / b.get(4) : 0, - N >= 4 ? get(3) / b.get(3) : 0, - N >= 3 ? get(2) / b.get(2) : 0, - N >= 2 ? get(1) / b.get(1) : 0, - N >= 1 ? get(0) / b.get(0) : 0)); + return self_type(_mm256_set_ps( + N >= 8 ? get(7) / b.get(7) : 0, N >= 7 ? get(6) / b.get(6) : 0, + N >= 6 ? get(5) / b.get(5) : 0, N >= 5 ? get(4) / b.get(4) : 0, + N >= 4 ? get(3) / b.get(3) : 0, N >= 3 ? get(2) / b.get(2) : 0, + N >= 2 ? get(1) / b.get(1) : 0, N >= 1 ? get(0) / b.get(0) : 0)); } diff --git a/include/RAJA/policy/tensor/arch/avx/avx_int32.hpp b/include/RAJA/policy/tensor/arch/avx/avx_int32.hpp index fc67ad6323..1426a4658a 100644 --- a/include/RAJA/policy/tensor/arch/avx/avx_int32.hpp +++ b/include/RAJA/policy/tensor/arch/avx/avx_int32.hpp @@ -56,56 +56,35 @@ class Register __m256i createMask(camp::idx_t N) const { // Generate a mask - return _mm256_set_epi32(N >= 8 ? -1 : 0, - N >= 7 ? -1 : 0, - N >= 6 ? -1 : 0, - N >= 5 ? -1 : 0, - N >= 4 ? -1 : 0, - N >= 3 ? -1 : 0, - N >= 2 ? -1 : 0, - N >= 1 ? -1 : 0); + return _mm256_set_epi32(N >= 8 ? -1 : 0, N >= 7 ? -1 : 0, N >= 6 ? -1 : 0, + N >= 5 ? -1 : 0, N >= 4 ? -1 : 0, N >= 3 ? -1 : 0, + N >= 2 ? -1 : 0, N >= 1 ? -1 : 0); } RAJA_INLINE __m256i createStridedOffsets(camp::idx_t stride) const { // Generate a strided offset list - return _mm256_set_epi32(7 * stride, - 6 * stride, - 5 * stride, - 4 * stride, - 3 * stride, - 2 * stride, - stride, - 0); + return _mm256_set_epi32(7 * stride, 6 * stride, 5 * stride, 4 * stride, + 3 * stride, 2 * stride, stride, 0); } RAJA_INLINE __m256i createPermute1(camp::idx_t N) const { // Generate a permutation for first round of min/max routines - return _mm256_set_epi32(N >= 7 ? 6 : 0, - N >= 8 ? 7 : 0, - N >= 5 ? 4 : 0, - N >= 6 ? 5 : 0, - N >= 3 ? 2 : 0, - N >= 4 ? 3 : 0, - N >= 1 ? 0 : 0, - N >= 2 ? 1 : 0); + return _mm256_set_epi32(N >= 7 ? 6 : 0, N >= 8 ? 7 : 0, N >= 5 ? 4 : 0, + N >= 6 ? 5 : 0, N >= 3 ? 2 : 0, N >= 4 ? 3 : 0, + N >= 1 ? 0 : 0, N >= 2 ? 1 : 0); } RAJA_INLINE __m256i createPermute2(camp::idx_t N) const { // Generate a permutation for second round of min/max routines - return _mm256_set_epi32(N >= 6 ? 5 : 0, - N >= 5 ? 4 : 0, - N >= 8 ? 7 : 0, - N >= 7 ? 6 : 0, - N >= 2 ? 1 : 0, - N >= 1 ? 0 : 0, - N >= 4 ? 3 : 0, - N >= 2 ? 2 : 0); + return _mm256_set_epi32(N >= 6 ? 5 : 0, N >= 5 ? 4 : 0, N >= 8 ? 7 : 0, + N >= 7 ? 6 : 0, N >= 2 ? 1 : 0, N >= 1 ? 0 : 0, + N >= 4 ? 3 : 0, N >= 2 ? 2 : 0); } public: @@ -243,8 +222,7 @@ class Register RAJA_INLINE self_type const& store_packed_n(element_type* ptr, camp::idx_t N) const { - _mm256_maskstore_ps(reinterpret_cast(ptr), - createMask(N), + _mm256_maskstore_ps(reinterpret_cast(ptr), createMask(N), reinterpret_cast<__m256>(m_value)); return *this; } @@ -460,14 +438,10 @@ class Register self_type divide(self_type const& b) const { // AVX2 does not supply an integer divide, so do it manually - return self_type(_mm256_set_epi32(get(7) / b.get(7), - get(6) / b.get(6), - get(5) / b.get(5), - get(4) / b.get(4), - get(3) / b.get(3), - get(2) / b.get(2), - get(1) / b.get(1), - get(0) / b.get(0))); + return self_type(_mm256_set_epi32(get(7) / b.get(7), get(6) / b.get(6), + get(5) / b.get(5), get(4) / b.get(4), + get(3) / b.get(3), get(2) / b.get(2), + get(1) / b.get(1), get(0) / b.get(0))); } RAJA_HOST_DEVICE @@ -475,14 +449,11 @@ class Register self_type divide_n(self_type const& b, camp::idx_t N) const { // AVX2 does not supply an integer divide, so do it manually - return self_type(_mm256_set_epi32(N >= 8 ? get(7) / b.get(7) : 0, - N >= 7 ? get(6) / b.get(6) : 0, - N >= 6 ? get(5) / b.get(5) : 0, - N >= 5 ? get(4) / b.get(4) : 0, - N >= 4 ? get(3) / b.get(3) : 0, - N >= 3 ? get(2) / b.get(2) : 0, - N >= 2 ? get(1) / b.get(1) : 0, - N >= 1 ? get(0) / b.get(0) : 0)); + return self_type(_mm256_set_epi32( + N >= 8 ? get(7) / b.get(7) : 0, N >= 7 ? get(6) / b.get(6) : 0, + N >= 6 ? get(5) / b.get(5) : 0, N >= 5 ? get(4) / b.get(4) : 0, + N >= 4 ? get(3) / b.get(3) : 0, N >= 3 ? get(2) / b.get(2) : 0, + N >= 2 ? get(1) / b.get(1) : 0, N >= 1 ? get(0) / b.get(0) : 0)); } diff --git a/include/RAJA/policy/tensor/arch/avx/avx_int64.hpp b/include/RAJA/policy/tensor/arch/avx/avx_int64.hpp index d5b5cc41d0..313592f70a 100644 --- a/include/RAJA/policy/tensor/arch/avx/avx_int64.hpp +++ b/include/RAJA/policy/tensor/arch/avx/avx_int64.hpp @@ -56,8 +56,8 @@ class Register __m256i createMask(camp::idx_t N) const { // Generate a mask - return _mm256_set_epi64x( - N >= 4 ? -1 : 0, N >= 3 ? -1 : 0, N >= 2 ? -1 : 0, N >= 1 ? -1 : 0); + return _mm256_set_epi64x(N >= 4 ? -1 : 0, N >= 3 ? -1 : 0, N >= 2 ? -1 : 0, + N >= 1 ? -1 : 0); } RAJA_INLINE @@ -204,8 +204,7 @@ class Register RAJA_INLINE self_type const& store_packed_n(element_type* ptr, camp::idx_t N) const { - _mm256_maskstore_pd(reinterpret_cast(ptr), - createMask(N), + _mm256_maskstore_pd(reinterpret_cast(ptr), createMask(N), reinterpret_cast<__m256d>(m_value)); return *this; } @@ -355,10 +354,8 @@ class Register self_type multiply(self_type const& b) const { // AVX2 does not supply an int64_t multiply, so do it manually - return self_type(_mm256_set_epi64x(get(3) * b.get(3), - get(2) * b.get(2), - get(1) * b.get(1), - get(0) * b.get(0))); + return self_type(_mm256_set_epi64x(get(3) * b.get(3), get(2) * b.get(2), + get(1) * b.get(1), get(0) * b.get(0))); } RAJA_HOST_DEVICE @@ -366,10 +363,8 @@ class Register self_type divide(self_type const& b) const { // AVX2 does not supply an integer divide, so do it manually - return self_type(_mm256_set_epi64x(get(3) / b.get(3), - get(2) / b.get(2), - get(1) / b.get(1), - get(0) / b.get(0))); + return self_type(_mm256_set_epi64x(get(3) / b.get(3), get(2) / b.get(2), + get(1) / b.get(1), get(0) / b.get(0))); } RAJA_HOST_DEVICE @@ -377,10 +372,9 @@ class Register self_type divide_n(self_type const& b, camp::idx_t N) const { // AVX2 does not supply an integer divide, so do it manually - return self_type(_mm256_set_epi64x(N >= 4 ? get(3) / b.get(3) : 0, - N >= 3 ? get(2) / b.get(2) : 0, - N >= 2 ? get(1) / b.get(1) : 0, - N >= 1 ? get(0) / b.get(0) : 0)); + return self_type(_mm256_set_epi64x( + N >= 4 ? get(3) / b.get(3) : 0, N >= 3 ? get(2) / b.get(2) : 0, + N >= 2 ? get(1) / b.get(1) : 0, N >= 1 ? get(0) / b.get(0) : 0)); } diff --git a/include/RAJA/policy/tensor/arch/avx2/avx2_double.hpp b/include/RAJA/policy/tensor/arch/avx2/avx2_double.hpp index d2d08ccc06..385f938dd2 100644 --- a/include/RAJA/policy/tensor/arch/avx2/avx2_double.hpp +++ b/include/RAJA/policy/tensor/arch/avx2/avx2_double.hpp @@ -56,8 +56,8 @@ class Register __m256i createMask(camp::idx_t N) const { // Generate a mask - return _mm256_set_epi64x( - N >= 4 ? -1 : 0, N >= 3 ? -1 : 0, N >= 2 ? -1 : 0, N >= 1 ? -1 : 0); + return _mm256_set_epi64x(N >= 4 ? -1 : 0, N >= 3 ? -1 : 0, N >= 2 ? -1 : 0, + N >= 1 ? -1 : 0); } RAJA_INLINE @@ -162,8 +162,8 @@ class Register #ifdef RAJA_ENABLE_VECTOR_STATS RAJA::tensor_stats::num_vector_load_strided++; #endif - m_value = _mm256_i64gather_pd( - ptr, createStridedOffsets(stride), sizeof(element_type)); + m_value = _mm256_i64gather_pd(ptr, createStridedOffsets(stride), + sizeof(element_type)); return *this; } @@ -180,11 +180,9 @@ class Register #ifdef RAJA_ENABLE_VECTOR_STATS RAJA::tensor_stats::num_vector_load_strided_n++; #endif - m_value = _mm256_mask_i64gather_pd(_mm256_setzero_pd(), - ptr, - createStridedOffsets(stride), - _mm256_castsi256_pd(createMask(N)), - sizeof(element_type)); + m_value = _mm256_mask_i64gather_pd( + _mm256_setzero_pd(), ptr, createStridedOffsets(stride), + _mm256_castsi256_pd(createMask(N)), sizeof(element_type)); return *this; } @@ -224,11 +222,9 @@ class Register #ifdef RAJA_ENABLE_VECTOR_STATS RAJA::tensor_stats::num_vector_load_strided_n++; #endif - m_value = _mm256_mask_i64gather_pd(_mm256_setzero_pd(), - ptr, - offsets.get_register(), - _mm256_castsi256_pd(createMask(N)), - sizeof(element_type)); + m_value = _mm256_mask_i64gather_pd( + _mm256_setzero_pd(), ptr, offsets.get_register(), + _mm256_castsi256_pd(createMask(N)), sizeof(element_type)); return *this; } @@ -390,10 +386,9 @@ class Register self_type divide_n(self_type const& b, camp::idx_t N) const { // AVX2 does not supply a masked divide, so do it manually - return self_type(_mm256_set_pd(N >= 4 ? get(3) / b.get(3) : 0, - N >= 3 ? get(2) / b.get(2) : 0, - N >= 2 ? get(1) / b.get(1) : 0, - N >= 1 ? get(0) / b.get(0) : 0)); + return self_type(_mm256_set_pd( + N >= 4 ? get(3) / b.get(3) : 0, N >= 3 ? get(2) / b.get(2) : 0, + N >= 2 ? get(1) / b.get(1) : 0, N >= 1 ? get(0) / b.get(0) : 0)); } // only use FMA's if the compiler has them turned on diff --git a/include/RAJA/policy/tensor/arch/avx2/avx2_float.hpp b/include/RAJA/policy/tensor/arch/avx2/avx2_float.hpp index abb7916790..a9f8bcfe7c 100644 --- a/include/RAJA/policy/tensor/arch/avx2/avx2_float.hpp +++ b/include/RAJA/policy/tensor/arch/avx2/avx2_float.hpp @@ -56,56 +56,35 @@ class Register __m256i createMask(camp::idx_t N) const { // Generate a mask - return _mm256_set_epi32(N >= 8 ? -1 : 0, - N >= 7 ? -1 : 0, - N >= 6 ? -1 : 0, - N >= 5 ? -1 : 0, - N >= 4 ? -1 : 0, - N >= 3 ? -1 : 0, - N >= 2 ? -1 : 0, - N >= 1 ? -1 : 0); + return _mm256_set_epi32(N >= 8 ? -1 : 0, N >= 7 ? -1 : 0, N >= 6 ? -1 : 0, + N >= 5 ? -1 : 0, N >= 4 ? -1 : 0, N >= 3 ? -1 : 0, + N >= 2 ? -1 : 0, N >= 1 ? -1 : 0); } RAJA_INLINE __m256i createStridedOffsets(camp::idx_t stride) const { // Generate a strided offset list - return _mm256_set_epi32(7 * stride, - 6 * stride, - 5 * stride, - 4 * stride, - 3 * stride, - 2 * stride, - stride, - 0); + return _mm256_set_epi32(7 * stride, 6 * stride, 5 * stride, 4 * stride, + 3 * stride, 2 * stride, stride, 0); } RAJA_INLINE __m256i createPermute1(camp::idx_t N) const { // Generate a permutation for first round of min/max routines - return _mm256_set_epi32(N >= 7 ? 6 : 0, - N >= 8 ? 7 : 0, - N >= 5 ? 4 : 0, - N >= 6 ? 5 : 0, - N >= 3 ? 2 : 0, - N >= 4 ? 3 : 0, - N >= 1 ? 0 : 0, - N >= 2 ? 1 : 0); + return _mm256_set_epi32(N >= 7 ? 6 : 0, N >= 8 ? 7 : 0, N >= 5 ? 4 : 0, + N >= 6 ? 5 : 0, N >= 3 ? 2 : 0, N >= 4 ? 3 : 0, + N >= 1 ? 0 : 0, N >= 2 ? 1 : 0); } RAJA_INLINE __m256i createPermute2(camp::idx_t N) const { // Generate a permutation for second round of min/max routines - return _mm256_set_epi32(N >= 6 ? 5 : 0, - N >= 5 ? 4 : 0, - N >= 8 ? 7 : 0, - N >= 7 ? 6 : 0, - N >= 2 ? 1 : 0, - N >= 1 ? 0 : 0, - N >= 4 ? 3 : 0, - N >= 2 ? 2 : 0); + return _mm256_set_epi32(N >= 6 ? 5 : 0, N >= 5 ? 4 : 0, N >= 8 ? 7 : 0, + N >= 7 ? 6 : 0, N >= 2 ? 1 : 0, N >= 1 ? 0 : 0, + N >= 4 ? 3 : 0, N >= 2 ? 2 : 0); } public: @@ -198,8 +177,8 @@ class Register RAJA_INLINE self_type& load_strided(element_type const* ptr, camp::idx_t stride) { - m_value = _mm256_i32gather_ps( - ptr, createStridedOffsets(stride), sizeof(element_type)); + m_value = _mm256_i32gather_ps(ptr, createStridedOffsets(stride), + sizeof(element_type)); return *this; } @@ -213,11 +192,9 @@ class Register self_type& load_strided_n(element_type const* ptr, camp::idx_t stride, camp::idx_t N) { - m_value = _mm256_mask_i32gather_ps(_mm256_setzero_ps(), - ptr, - createStridedOffsets(stride), - _mm256_castsi256_ps(createMask(N)), - sizeof(element_type)); + m_value = _mm256_mask_i32gather_ps( + _mm256_setzero_ps(), ptr, createStridedOffsets(stride), + _mm256_castsi256_ps(createMask(N)), sizeof(element_type)); return *this; } @@ -346,14 +323,11 @@ class Register self_type divide_n(self_type const& b, camp::idx_t N) const { // AVX2 does not supply a masked divide - return self_type(_mm256_set_ps(N >= 8 ? get(7) / b.get(7) : 0, - N >= 7 ? get(6) / b.get(6) : 0, - N >= 6 ? get(5) / b.get(5) : 0, - N >= 5 ? get(4) / b.get(4) : 0, - N >= 4 ? get(3) / b.get(3) : 0, - N >= 3 ? get(2) / b.get(2) : 0, - N >= 2 ? get(1) / b.get(1) : 0, - N >= 1 ? get(0) / b.get(0) : 0)); + return self_type(_mm256_set_ps( + N >= 8 ? get(7) / b.get(7) : 0, N >= 7 ? get(6) / b.get(6) : 0, + N >= 6 ? get(5) / b.get(5) : 0, N >= 5 ? get(4) / b.get(4) : 0, + N >= 4 ? get(3) / b.get(3) : 0, N >= 3 ? get(2) / b.get(2) : 0, + N >= 2 ? get(1) / b.get(1) : 0, N >= 1 ? get(0) / b.get(0) : 0)); } // only use FMA's if the compiler has them turned on diff --git a/include/RAJA/policy/tensor/arch/avx2/avx2_int32.hpp b/include/RAJA/policy/tensor/arch/avx2/avx2_int32.hpp index ee148bd6bb..af488521ef 100644 --- a/include/RAJA/policy/tensor/arch/avx2/avx2_int32.hpp +++ b/include/RAJA/policy/tensor/arch/avx2/avx2_int32.hpp @@ -57,56 +57,35 @@ class Register __m256i createMask(camp::idx_t N) const { // Generate a mask - return _mm256_set_epi32(N >= 8 ? -1 : 0, - N >= 7 ? -1 : 0, - N >= 6 ? -1 : 0, - N >= 5 ? -1 : 0, - N >= 4 ? -1 : 0, - N >= 3 ? -1 : 0, - N >= 2 ? -1 : 0, - N >= 1 ? -1 : 0); + return _mm256_set_epi32(N >= 8 ? -1 : 0, N >= 7 ? -1 : 0, N >= 6 ? -1 : 0, + N >= 5 ? -1 : 0, N >= 4 ? -1 : 0, N >= 3 ? -1 : 0, + N >= 2 ? -1 : 0, N >= 1 ? -1 : 0); } RAJA_INLINE __m256i createStridedOffsets(camp::idx_t stride) const { // Generate a strided offset list - return _mm256_set_epi32(7 * stride, - 6 * stride, - 5 * stride, - 4 * stride, - 3 * stride, - 2 * stride, - stride, - 0); + return _mm256_set_epi32(7 * stride, 6 * stride, 5 * stride, 4 * stride, + 3 * stride, 2 * stride, stride, 0); } RAJA_INLINE __m256i createPermute1(camp::idx_t N) const { // Generate a permutation for first round of min/max routines - return _mm256_set_epi32(N >= 7 ? 6 : 0, - N >= 8 ? 7 : 0, - N >= 5 ? 4 : 0, - N >= 6 ? 5 : 0, - N >= 3 ? 2 : 0, - N >= 4 ? 3 : 0, - N >= 1 ? 0 : 0, - N >= 2 ? 1 : 0); + return _mm256_set_epi32(N >= 7 ? 6 : 0, N >= 8 ? 7 : 0, N >= 5 ? 4 : 0, + N >= 6 ? 5 : 0, N >= 3 ? 2 : 0, N >= 4 ? 3 : 0, + N >= 1 ? 0 : 0, N >= 2 ? 1 : 0); } RAJA_INLINE __m256i createPermute2(camp::idx_t N) const { // Generate a permutation for second round of min/max routines - return _mm256_set_epi32(N >= 6 ? 5 : 0, - N >= 5 ? 4 : 0, - N >= 8 ? 7 : 0, - N >= 7 ? 6 : 0, - N >= 2 ? 1 : 0, - N >= 1 ? 0 : 0, - N >= 4 ? 3 : 0, - N >= 2 ? 2 : 0); + return _mm256_set_epi32(N >= 6 ? 5 : 0, N >= 5 ? 4 : 0, N >= 8 ? 7 : 0, + N >= 7 ? 6 : 0, N >= 2 ? 1 : 0, N >= 1 ? 0 : 0, + N >= 4 ? 3 : 0, N >= 2 ? 2 : 0); } public: @@ -202,8 +181,8 @@ class Register RAJA_INLINE self_type& load_strided(element_type const* ptr, camp::idx_t stride) { - m_value = _mm256_i32gather_epi32( - ptr, createStridedOffsets(stride), sizeof(element_type)); + m_value = _mm256_i32gather_epi32(ptr, createStridedOffsets(stride), + sizeof(element_type)); return *this; } @@ -217,11 +196,9 @@ class Register self_type& load_strided_n(element_type const* ptr, camp::idx_t stride, camp::idx_t N) { - m_value = _mm256_mask_i32gather_epi32(_mm256_setzero_si256(), - ptr, + m_value = _mm256_mask_i32gather_epi32(_mm256_setzero_si256(), ptr, createStridedOffsets(stride), - createMask(N), - sizeof(element_type)); + createMask(N), sizeof(element_type)); return *this; } @@ -416,14 +393,10 @@ class Register self_type divide(self_type const& b) const { // AVX2 does not supply an integer divide, so do it manually - return self_type(_mm256_set_epi32(get(7) / b.get(7), - get(6) / b.get(6), - get(5) / b.get(5), - get(4) / b.get(4), - get(3) / b.get(3), - get(2) / b.get(2), - get(1) / b.get(1), - get(0) / b.get(0))); + return self_type(_mm256_set_epi32(get(7) / b.get(7), get(6) / b.get(6), + get(5) / b.get(5), get(4) / b.get(4), + get(3) / b.get(3), get(2) / b.get(2), + get(1) / b.get(1), get(0) / b.get(0))); } RAJA_HOST_DEVICE @@ -431,14 +404,11 @@ class Register self_type divide_n(self_type const& b, camp::idx_t N) const { // AVX2 does not supply an integer divide, so do it manually - return self_type(_mm256_set_epi32(N >= 8 ? get(7) / b.get(7) : 0, - N >= 7 ? get(6) / b.get(6) : 0, - N >= 6 ? get(5) / b.get(5) : 0, - N >= 5 ? get(4) / b.get(4) : 0, - N >= 4 ? get(3) / b.get(3) : 0, - N >= 3 ? get(2) / b.get(2) : 0, - N >= 2 ? get(1) / b.get(1) : 0, - N >= 1 ? get(0) / b.get(0) : 0)); + return self_type(_mm256_set_epi32( + N >= 8 ? get(7) / b.get(7) : 0, N >= 7 ? get(6) / b.get(6) : 0, + N >= 6 ? get(5) / b.get(5) : 0, N >= 5 ? get(4) / b.get(4) : 0, + N >= 4 ? get(3) / b.get(3) : 0, N >= 3 ? get(2) / b.get(2) : 0, + N >= 2 ? get(1) / b.get(1) : 0, N >= 1 ? get(0) / b.get(0) : 0)); } diff --git a/include/RAJA/policy/tensor/arch/avx2/avx2_int64.hpp b/include/RAJA/policy/tensor/arch/avx2/avx2_int64.hpp index 41cdf1c1e0..c0cc3b7012 100644 --- a/include/RAJA/policy/tensor/arch/avx2/avx2_int64.hpp +++ b/include/RAJA/policy/tensor/arch/avx2/avx2_int64.hpp @@ -55,8 +55,8 @@ class Register __m256i createMask(camp::idx_t N) const { // Generate a mask - return _mm256_set_epi64x( - N >= 4 ? -1 : 0, N >= 3 ? -1 : 0, N >= 2 ? -1 : 0, N >= 1 ? -1 : 0); + return _mm256_set_epi64x(N >= 4 ? -1 : 0, N >= 3 ? -1 : 0, N >= 2 ? -1 : 0, + N >= 1 ? -1 : 0); } RAJA_INLINE @@ -182,12 +182,9 @@ class Register self_type& load_strided_n(element_type const* ptr, camp::idx_t stride, camp::idx_t N) { - m_value = - _mm256_mask_i64gather_epi64(_mm256_set1_epi64x(0), - reinterpret_cast(ptr), - createStridedOffsets(stride), - createMask(N), - sizeof(element_type)); + m_value = _mm256_mask_i64gather_epi64( + _mm256_set1_epi64x(0), reinterpret_cast(ptr), + createStridedOffsets(stride), createMask(N), sizeof(element_type)); return *this; } @@ -206,9 +203,9 @@ class Register #ifdef RAJA_ENABLE_VECTOR_STATS RAJA::tensor_stats::num_vector_load_strided_n++; #endif - m_value = _mm256_i64gather_epi64(reinterpret_cast(ptr), - offsets.get_register(), - sizeof(element_type)); + m_value = + _mm256_i64gather_epi64(reinterpret_cast(ptr), + offsets.get_register(), sizeof(element_type)); return *this; } @@ -228,12 +225,9 @@ class Register #ifdef RAJA_ENABLE_VECTOR_STATS RAJA::tensor_stats::num_vector_load_strided_n++; #endif - m_value = - _mm256_mask_i64gather_epi64(_mm256_setzero_si256(), - reinterpret_cast(ptr), - offsets.get_register(), - createMask(N), - sizeof(element_type)); + m_value = _mm256_mask_i64gather_epi64( + _mm256_setzero_si256(), reinterpret_cast(ptr), + offsets.get_register(), createMask(N), sizeof(element_type)); return *this; } @@ -256,8 +250,8 @@ class Register RAJA_INLINE self_type const& store_packed_n(element_type* ptr, camp::idx_t N) const { - _mm256_maskstore_epi64( - reinterpret_cast(ptr), createMask(N), m_value); + _mm256_maskstore_epi64(reinterpret_cast(ptr), createMask(N), + m_value); return *this; } @@ -380,10 +374,8 @@ class Register self_type multiply(self_type const& b) const { // AVX2 does not supply an int64_t multiply, so do it manually - return self_type(_mm256_set_epi64x(get(3) * b.get(3), - get(2) * b.get(2), - get(1) * b.get(1), - get(0) * b.get(0))); + return self_type(_mm256_set_epi64x(get(3) * b.get(3), get(2) * b.get(2), + get(1) * b.get(1), get(0) * b.get(0))); } RAJA_HOST_DEVICE @@ -391,10 +383,8 @@ class Register self_type divide(self_type const& b) const { // AVX2 does not supply an integer divide, so do it manually - return self_type(_mm256_set_epi64x(get(3) / b.get(3), - get(2) / b.get(2), - get(1) / b.get(1), - get(0) / b.get(0))); + return self_type(_mm256_set_epi64x(get(3) / b.get(3), get(2) / b.get(2), + get(1) / b.get(1), get(0) / b.get(0))); } RAJA_HOST_DEVICE @@ -402,10 +392,9 @@ class Register self_type divide_n(self_type const& b, camp::idx_t N) const { // AVX2 does not supply an integer divide, so do it manually - return self_type(_mm256_set_epi64x(N >= 4 ? get(3) / b.get(3) : 0, - N >= 3 ? get(2) / b.get(2) : 0, - N >= 2 ? get(1) / b.get(1) : 0, - N >= 1 ? get(0) / b.get(0) : 0)); + return self_type(_mm256_set_epi64x( + N >= 4 ? get(3) / b.get(3) : 0, N >= 3 ? get(2) / b.get(2) : 0, + N >= 2 ? get(1) / b.get(1) : 0, N >= 1 ? get(0) / b.get(0) : 0)); } diff --git a/include/RAJA/policy/tensor/arch/avx512/avx512_double.hpp b/include/RAJA/policy/tensor/arch/avx512/avx512_double.hpp index 7f45c06141..b35ef7f595 100644 --- a/include/RAJA/policy/tensor/arch/avx512/avx512_double.hpp +++ b/include/RAJA/policy/tensor/arch/avx512/avx512_double.hpp @@ -166,8 +166,8 @@ class Register self_type& load_strided(element_type const* ptr, camp::idx_t stride) { // AVX512F - m_value = _mm512_i64gather_pd( - createStridedOffsets(stride), ptr, sizeof(element_type)); + m_value = _mm512_i64gather_pd(createStridedOffsets(stride), ptr, + sizeof(element_type)); return *this; } @@ -182,10 +182,8 @@ class Register load_strided_n(element_type const* ptr, camp::idx_t stride, camp::idx_t N) { // AVX512F - m_value = _mm512_mask_i64gather_pd(_mm512_setzero_pd(), - createMask(N), - createStridedOffsets(stride), - ptr, + m_value = _mm512_mask_i64gather_pd(_mm512_setzero_pd(), createMask(N), + createStridedOffsets(stride), ptr, sizeof(element_type)); return *this; } @@ -223,8 +221,8 @@ class Register self_type const& store_strided(element_type* ptr, camp::idx_t stride) const { // AVX512F - _mm512_i64scatter_pd( - ptr, createStridedOffsets(stride), m_value, sizeof(element_type)); + _mm512_i64scatter_pd(ptr, createStridedOffsets(stride), m_value, + sizeof(element_type)); return *this; } @@ -238,11 +236,8 @@ class Register store_strided_n(element_type* ptr, camp::idx_t stride, camp::idx_t N) const { // AVX512F - _mm512_mask_i64scatter_pd(ptr, - createMask(N), - createStridedOffsets(stride), - m_value, - sizeof(element_type)); + _mm512_mask_i64scatter_pd(ptr, createMask(N), createStridedOffsets(stride), + m_value, sizeof(element_type)); return *this; } diff --git a/include/RAJA/policy/tensor/arch/avx512/avx512_float.hpp b/include/RAJA/policy/tensor/arch/avx512/avx512_float.hpp index 2082293046..ccc6991ccc 100644 --- a/include/RAJA/policy/tensor/arch/avx512/avx512_float.hpp +++ b/include/RAJA/policy/tensor/arch/avx512/avx512_float.hpp @@ -181,8 +181,8 @@ class Register self_type& load_strided(element_type const* ptr, camp::idx_t stride) { // AVX512F - m_value = _mm512_i32gather_ps( - createStridedOffsets(stride), ptr, sizeof(element_type)); + m_value = _mm512_i32gather_ps(createStridedOffsets(stride), ptr, + sizeof(element_type)); return *this; } @@ -197,10 +197,8 @@ class Register load_strided_n(element_type const* ptr, camp::idx_t stride, camp::idx_t N) { // AVX512F - m_value = _mm512_mask_i32gather_ps(_mm512_setzero_ps(), - createMask(N), - createStridedOffsets(stride), - ptr, + m_value = _mm512_mask_i32gather_ps(_mm512_setzero_ps(), createMask(N), + createStridedOffsets(stride), ptr, sizeof(element_type)); return *this; } @@ -238,8 +236,8 @@ class Register self_type const& store_strided(element_type* ptr, camp::idx_t stride) const { // AVX512F - _mm512_i32scatter_ps( - ptr, createStridedOffsets(stride), m_value, sizeof(element_type)); + _mm512_i32scatter_ps(ptr, createStridedOffsets(stride), m_value, + sizeof(element_type)); return *this; } @@ -253,11 +251,8 @@ class Register store_strided_n(element_type* ptr, camp::idx_t stride, camp::idx_t N) const { // AVX512F - _mm512_mask_i32scatter_ps(ptr, - createMask(N), - createStridedOffsets(stride), - m_value, - sizeof(element_type)); + _mm512_mask_i32scatter_ps(ptr, createMask(N), createStridedOffsets(stride), + m_value, sizeof(element_type)); return *this; } diff --git a/include/RAJA/policy/tensor/arch/avx512/avx512_int32.hpp b/include/RAJA/policy/tensor/arch/avx512/avx512_int32.hpp index 4645ed6cf4..324a50db3a 100644 --- a/include/RAJA/policy/tensor/arch/avx512/avx512_int32.hpp +++ b/include/RAJA/policy/tensor/arch/avx512/avx512_int32.hpp @@ -187,8 +187,8 @@ class Register self_type& load_strided(element_type const* ptr, camp::idx_t stride) { // AVX512F - m_value = _mm512_i32gather_epi32( - createStridedOffsets(stride), ptr, sizeof(element_type)); + m_value = _mm512_i32gather_epi32(createStridedOffsets(stride), ptr, + sizeof(element_type)); return *this; } @@ -203,10 +203,8 @@ class Register load_strided_n(element_type const* ptr, camp::idx_t stride, camp::idx_t N) { // AVX512F - m_value = _mm512_mask_i32gather_epi32(_mm512_setzero_epi32(), - createMask(N), - createStridedOffsets(stride), - ptr, + m_value = _mm512_mask_i32gather_epi32(_mm512_setzero_epi32(), createMask(N), + createStridedOffsets(stride), ptr, sizeof(element_type)); return *this; } @@ -248,8 +246,8 @@ class Register self_type const& store_strided(element_type* ptr, camp::idx_t stride) const { // AVX512F - _mm512_i32scatter_epi32( - ptr, createStridedOffsets(stride), m_value, sizeof(element_type)); + _mm512_i32scatter_epi32(ptr, createStridedOffsets(stride), m_value, + sizeof(element_type)); return *this; } @@ -263,10 +261,8 @@ class Register store_strided_n(element_type* ptr, camp::idx_t stride, camp::idx_t N) const { // AVX512F - _mm512_mask_i32scatter_epi32(ptr, - createMask(N), - createStridedOffsets(stride), - m_value, + _mm512_mask_i32scatter_epi32(ptr, createMask(N), + createStridedOffsets(stride), m_value, sizeof(element_type)); return *this; } @@ -378,22 +374,13 @@ class Register self_type divide(self_type const& b) const { // AVX512 does not supply an integer divide, so do it manually - return self_type(_mm512_set_epi32(get(15) / b.get(15), - get(14) / b.get(14), - get(13) / b.get(13), - get(12) / b.get(12), - get(11) / b.get(11), - get(10) / b.get(10), - get(9) / b.get(9), - get(8) / b.get(8), - get(7) / b.get(7), - get(6) / b.get(6), - get(5) / b.get(5), - get(4) / b.get(4), - get(3) / b.get(3), - get(2) / b.get(2), - get(1) / b.get(1), - get(0) / b.get(0))); + return self_type(_mm512_set_epi32( + get(15) / b.get(15), get(14) / b.get(14), get(13) / b.get(13), + get(12) / b.get(12), get(11) / b.get(11), get(10) / b.get(10), + get(9) / b.get(9), get(8) / b.get(8), get(7) / b.get(7), + get(6) / b.get(6), get(5) / b.get(5), get(4) / b.get(4), + get(3) / b.get(3), get(2) / b.get(2), get(1) / b.get(1), + get(0) / b.get(0))); } RAJA_HOST_DEVICE @@ -401,22 +388,15 @@ class Register self_type divide_n(self_type const& b, camp::idx_t N) const { // AVX512 does not supply an integer divide, so do it manually - return self_type(_mm512_set_epi32(N >= 16 ? get(15) / b.get(15) : 0, - N >= 15 ? get(14) / b.get(14) : 0, - N >= 14 ? get(13) / b.get(13) : 0, - N >= 13 ? get(12) / b.get(12) : 0, - N >= 12 ? get(11) / b.get(11) : 0, - N >= 11 ? get(10) / b.get(10) : 0, - N >= 10 ? get(9) / b.get(9) : 0, - N >= 9 ? get(8) / b.get(8) : 0, - N >= 8 ? get(7) / b.get(7) : 0, - N >= 7 ? get(6) / b.get(6) : 0, - N >= 6 ? get(5) / b.get(5) : 0, - N >= 5 ? get(4) / b.get(4) : 0, - N >= 4 ? get(3) / b.get(3) : 0, - N >= 3 ? get(2) / b.get(2) : 0, - N >= 2 ? get(1) / b.get(1) : 0, - N >= 1 ? get(0) / b.get(0) : 0)); + return self_type(_mm512_set_epi32( + N >= 16 ? get(15) / b.get(15) : 0, N >= 15 ? get(14) / b.get(14) : 0, + N >= 14 ? get(13) / b.get(13) : 0, N >= 13 ? get(12) / b.get(12) : 0, + N >= 12 ? get(11) / b.get(11) : 0, N >= 11 ? get(10) / b.get(10) : 0, + N >= 10 ? get(9) / b.get(9) : 0, N >= 9 ? get(8) / b.get(8) : 0, + N >= 8 ? get(7) / b.get(7) : 0, N >= 7 ? get(6) / b.get(6) : 0, + N >= 6 ? get(5) / b.get(5) : 0, N >= 5 ? get(4) / b.get(4) : 0, + N >= 4 ? get(3) / b.get(3) : 0, N >= 3 ? get(2) / b.get(2) : 0, + N >= 2 ? get(1) / b.get(1) : 0, N >= 1 ? get(0) / b.get(0) : 0)); } diff --git a/include/RAJA/policy/tensor/arch/avx512/avx512_int64.hpp b/include/RAJA/policy/tensor/arch/avx512/avx512_int64.hpp index 655b06c8fd..9266d0b979 100644 --- a/include/RAJA/policy/tensor/arch/avx512/avx512_int64.hpp +++ b/include/RAJA/policy/tensor/arch/avx512/avx512_int64.hpp @@ -175,8 +175,8 @@ class Register self_type& load_strided(element_type const* ptr, camp::idx_t stride) { // AVX512F - m_value = _mm512_i64gather_epi64( - createStridedOffsets(stride), ptr, sizeof(element_type)); + m_value = _mm512_i64gather_epi64(createStridedOffsets(stride), ptr, + sizeof(element_type)); return *this; } @@ -191,10 +191,8 @@ class Register load_strided_n(element_type const* ptr, camp::idx_t stride, camp::idx_t N) { // AVX512F - m_value = _mm512_mask_i64gather_epi64(_mm512_setzero_epi32(), - createMask(N), - createStridedOffsets(stride), - ptr, + m_value = _mm512_mask_i64gather_epi64(_mm512_setzero_epi32(), createMask(N), + createStridedOffsets(stride), ptr, sizeof(element_type)); return *this; } @@ -211,13 +209,13 @@ class Register #if (defined(__GNUC__) && ((__GNUC__ >= 7) && (__GNUC__ <= 10))) || \ (!defined(SYCL_LANGUAGE_VERSION) && \ defined(__INTEL_LLVM_COMPILER)) // Check for oneapi's icpx. - _mm512_mask_storeu_epi64(ptr, - ~0, - m_value); // May cause slowdown due to looping over - // 8 bytes, one at a time. + _mm512_mask_storeu_epi64(ptr, ~0, + m_value); // May cause slowdown due to looping + // over 8 bytes, one at a time. #else - _mm512_storeu_epi64(ptr, m_value); // GNU 7-10 are missing this instruction, - // as is icpx as of version 2022.2. + _mm512_storeu_epi64(ptr, + m_value); // GNU 7-10 are missing this instruction, + // as is icpx as of version 2022.2. #endif return *this; } @@ -242,8 +240,8 @@ class Register self_type const& store_strided(element_type* ptr, camp::idx_t stride) const { // AVX512F - _mm512_i64scatter_epi64( - ptr, createStridedOffsets(stride), m_value, sizeof(element_type)); + _mm512_i64scatter_epi64(ptr, createStridedOffsets(stride), m_value, + sizeof(element_type)); return *this; } @@ -257,10 +255,8 @@ class Register store_strided_n(element_type* ptr, camp::idx_t stride, camp::idx_t N) const { // AVX512F - _mm512_mask_i64scatter_epi64(ptr, - createMask(N), - createStridedOffsets(stride), - m_value, + _mm512_mask_i64scatter_epi64(ptr, createMask(N), + createStridedOffsets(stride), m_value, sizeof(element_type)); return *this; } @@ -329,14 +325,10 @@ class Register self_type divide(self_type const& b) const { // AVX512 does not supply an integer divide, so do it manually - return self_type(_mm512_set_epi64(get(7) / b.get(7), - get(6) / b.get(6), - get(5) / b.get(5), - get(4) / b.get(4), - get(3) / b.get(3), - get(2) / b.get(2), - get(1) / b.get(1), - get(0) / b.get(0))); + return self_type(_mm512_set_epi64(get(7) / b.get(7), get(6) / b.get(6), + get(5) / b.get(5), get(4) / b.get(4), + get(3) / b.get(3), get(2) / b.get(2), + get(1) / b.get(1), get(0) / b.get(0))); } RAJA_HOST_DEVICE @@ -344,14 +336,11 @@ class Register self_type divide_n(self_type const& b, camp::idx_t N) const { // AVX512 does not supply an integer divide, so do it manually - return self_type(_mm512_set_epi64(N >= 8 ? get(7) / b.get(7) : 0, - N >= 7 ? get(6) / b.get(6) : 0, - N >= 6 ? get(5) / b.get(5) : 0, - N >= 5 ? get(4) / b.get(4) : 0, - N >= 4 ? get(3) / b.get(3) : 0, - N >= 3 ? get(2) / b.get(2) : 0, - N >= 2 ? get(1) / b.get(1) : 0, - N >= 1 ? get(0) / b.get(0) : 0)); + return self_type(_mm512_set_epi64( + N >= 8 ? get(7) / b.get(7) : 0, N >= 7 ? get(6) / b.get(6) : 0, + N >= 6 ? get(5) / b.get(5) : 0, N >= 5 ? get(4) / b.get(4) : 0, + N >= 4 ? get(3) / b.get(3) : 0, N >= 3 ? get(2) / b.get(2) : 0, + N >= 2 ? get(1) / b.get(1) : 0, N >= 1 ? get(0) / b.get(0) : 0)); } /*! diff --git a/include/RAJA/util/Layout.hpp b/include/RAJA/util/Layout.hpp index 6f64d403fd..3520983ee5 100644 --- a/include/RAJA/util/Layout.hpp +++ b/include/RAJA/util/Layout.hpp @@ -144,8 +144,7 @@ struct LayoutBase_impl, IdxLin, StrideOneDim> RAJA_INLINE RAJA_HOST_DEVICE void BoundsCheckError(Idx idx) const { printf("Error at index %d, value %ld is not within bounds [0, %ld] \n", - static_cast(N), - static_cast(idx), + static_cast(N), static_cast(idx), static_cast(sizes[N] - 1)); RAJA_ABORT_OR_THROW("Out of bounds error \n"); } diff --git a/include/RAJA/util/OffsetLayout.hpp b/include/RAJA/util/OffsetLayout.hpp index 0ead8684ad..19ef3f12e7 100644 --- a/include/RAJA/util/OffsetLayout.hpp +++ b/include/RAJA/util/OffsetLayout.hpp @@ -75,8 +75,7 @@ struct OffsetLayout_impl, IdxLin> RAJA_INLINE RAJA_HOST_DEVICE void BoundsCheckError(Idx idx) const { printf("Error at index %d, value %ld is not within bounds [%ld, %ld] \n", - static_cast(N), - static_cast(idx), + static_cast(N), static_cast(idx), static_cast(offsets[N]), static_cast(offsets[N] + base_.sizes[N] - 1)); RAJA_ABORT_OR_THROW("Out of bounds error \n"); diff --git a/include/RAJA/util/StaticLayout.hpp b/include/RAJA/util/StaticLayout.hpp index cf86f26ec8..f1e528e05b 100644 --- a/include/RAJA/util/StaticLayout.hpp +++ b/include/RAJA/util/StaticLayout.hpp @@ -76,9 +76,7 @@ struct StaticLayoutBase_impl( internal::expt::getTensorDim() == DIM ? internal::expt::getTensorBegin( - args, - layout.template get_dim_begin< - GetTensorArgIdx::value>()) + args, layout.template get_dim_begin< + GetTensorArgIdx::value>()) : 0 ...); } @@ -165,9 +164,8 @@ RAJA_INLINE RAJA_HOST_DEVICE static constexpr camp::idx_t return RAJA::max( internal::expt::getTensorDim() == DIM ? internal::expt::getTensorSize( - args, - layout.template get_dim_size< - GetTensorArgIdx::value>()) + args, layout.template get_dim_size< + GetTensorArgIdx::value>()) : 0 ...); } #endif @@ -461,10 +459,7 @@ RAJA_INLINE RAJA_HOST_DEVICE constexpr view_return_type_t::value>, - camp::list, - ElementType, - PointerType, - LinIdx, + camp::list, ElementType, PointerType, LinIdx, LayoutType>::make_return(layout, data, args...); } @@ -782,8 +777,7 @@ class TypedViewBase( - Base::m_layout, - Base::m_data, + Base::m_layout, Base::m_data, match_typed_view_arg(args)...); } @@ -803,8 +797,7 @@ class TypedViewBase( - Base::m_layout, - Base::m_data, + Base::m_layout, Base::m_data, match_typed_view_arg(args)...); } diff --git a/include/RAJA/util/View.hpp b/include/RAJA/util/View.hpp index 074cc8b65e..943f843c04 100644 --- a/include/RAJA/util/View.hpp +++ b/include/RAJA/util/View.hpp @@ -127,8 +127,7 @@ removenth(Lay lyout, Tup&& tup) -> decltype(selecttuple( >{})) { return selecttuple( - lyout, - std::forward(tup), + lyout, std::forward(tup), cat_seq_t, // sequence up to Nth offset_seq_t::value - @@ -198,8 +197,7 @@ struct MultiView typename add_offset::type shift_layout(layout); shift_layout.shift(shift); - return RAJA::MultiView::type, + return RAJA::MultiView::type, P2Pidx>(data, shift_layout); } diff --git a/include/RAJA/util/basic_mempool.hpp b/include/RAJA/util/basic_mempool.hpp index 9139f981b4..9ee1c0270f 100644 --- a/include/RAJA/util/basic_mempool.hpp +++ b/include/RAJA/util/basic_mempool.hpp @@ -106,8 +106,8 @@ class MemoryArena ptr_out = adj_ptr; - remove_free_chunk( - iter, adj_ptr, static_cast(adj_ptr) + nbytes); + remove_free_chunk(iter, adj_ptr, + static_cast(adj_ptr) + nbytes); add_used_chunk(adj_ptr, static_cast(adj_ptr) + nbytes); diff --git a/include/RAJA/util/for_each.hpp b/include/RAJA/util/for_each.hpp index 17e3d63ce1..f83281e50e 100644 --- a/include/RAJA/util/for_each.hpp +++ b/include/RAJA/util/for_each.hpp @@ -120,8 +120,7 @@ template RAJA_HOST_DEVICE RAJA_INLINE UnaryFunc for_each_tuple(Tuple&& t, UnaryFunc func) { return detail::for_each_tuple( - std::forward(t), - std::move(func), + std::forward(t), std::move(func), camp::make_idx_seq_t>::value>{}); } diff --git a/include/RAJA/util/reduce.hpp b/include/RAJA/util/reduce.hpp index 83f7d14f78..84b37d8b4a 100644 --- a/include/RAJA/util/reduce.hpp +++ b/include/RAJA/util/reduce.hpp @@ -337,8 +337,8 @@ RAJA_HOST_DEVICE static_assert(type_traits::is_binary_function::value, "BinaryOp must model BinaryFunction"); - return detail::left_fold_reduce( - begin(c), end(c), std::move(init), std::move(op)); + return detail::left_fold_reduce(begin(c), end(c), std::move(init), + std::move(op)); } /*! @@ -360,8 +360,8 @@ RAJA_HOST_DEVICE static_assert(type_traits::is_binary_function::value, "BinaryOp must model BinaryFunction"); - return detail::binary_tree_reduce( - begin(c), end(c), std::move(init), std::move(op)); + return detail::binary_tree_reduce(begin(c), end(c), std::move(init), + std::move(op)); } /*! @@ -384,8 +384,8 @@ RAJA_HOST_DEVICE static_assert(type_traits::is_binary_function::value, "BinaryOp must model BinaryFunction"); - return detail::high_accuracy_reduce( - begin(c), end(c), std::move(init), std::move(op)); + return detail::high_accuracy_reduce(begin(c), end(c), std::move(init), + std::move(op)); } } // namespace RAJA diff --git a/include/RAJA/util/sort.hpp b/include/RAJA/util/sort.hpp index d059d707e7..d4261d1e70 100644 --- a/include/RAJA/util/sort.hpp +++ b/include/RAJA/util/sort.hpp @@ -143,45 +143,14 @@ get_shell_stride(int i) using array_type = long long unsigned[num_shell_strides()]; return (array_type{ // strides from M. Ciura 2001 - 1llu, - 4llu, - 10llu, - 23llu, - 57llu, - 132llu, - 301llu, - 701llu, - 1750llu, + 1llu, 4llu, 10llu, 23llu, 57llu, 132llu, 301llu, 701llu, 1750llu, // extended up to 2^47 with strides[n] = floor(2.25*strides[n-1]) - 3937llu, - 8858llu, - 19930llu, - 44842llu, - 100894llu, - 227011llu, - 510774llu, - 1149241llu, - 2585792llu, - 5818032llu, - 13090572llu, - 29453787llu, - 66271020llu, - 149109795llu, - 335497038llu, - 754868335llu, - 1698453753llu, - 3821520944llu, - 8598422124llu, - 19346449779llu, - 43529512002llu, - 97941402004llu, - 220368154509llu, - 495828347645llu, - 1115613782201llu, - 2510131009952llu, - 5647794772392llu, - 12707538237882llu, - 28591961035234llu, + 3937llu, 8858llu, 19930llu, 44842llu, 100894llu, 227011llu, 510774llu, + 1149241llu, 2585792llu, 5818032llu, 13090572llu, 29453787llu, 66271020llu, + 149109795llu, 335497038llu, 754868335llu, 1698453753llu, 3821520944llu, + 8598422124llu, 19346449779llu, 43529512002llu, 97941402004llu, + 220368154509llu, 495828347645llu, 1115613782201llu, 2510131009952llu, + 5647794772392llu, 12707538237882llu, 28591961035234llu, 64331912329276llu})[i]; } @@ -683,21 +652,15 @@ RAJA_INLINE void merge_sort(Iter begin, Iter end, Compare comp) if (copyvalid) // switch arrays per level of merging to avoid copying // back to copyarr { - detail::merge_like_std(copyarr + start, - copyarr + start + midpoint, - copyarr + start + midpoint, - copyarr + finish, - begin + start, - comp); + detail::merge_like_std(copyarr + start, copyarr + start + midpoint, + copyarr + start + midpoint, copyarr + finish, + begin + start, comp); } else { - detail::merge_like_std(begin + start, - begin + start + midpoint, - begin + start + midpoint, - begin + finish, - copyarr + start, - comp); + detail::merge_like_std(begin + start, begin + start + midpoint, + begin + start + midpoint, begin + finish, + copyarr + start, comp); } } diff --git a/include/RAJA/util/zip_tuple.hpp b/include/RAJA/util/zip_tuple.hpp index fa8993f70d..1618d9ca2e 100644 --- a/include/RAJA/util/zip_tuple.hpp +++ b/include/RAJA/util/zip_tuple.hpp @@ -205,8 +205,7 @@ zip_for_each_impl(Tuple0&& t0, Tuple1&& t1, F&& f, camp::idx_seq) template RAJA_HOST_DEVICE inline void zip_for_each(Tuple&& t, F&& f) { - zip_for_each_impl(std::forward(t), - std::forward(f), + zip_for_each_impl(std::forward(t), std::forward(f), typename camp::decay::IdxSeq{}); } @@ -219,10 +218,8 @@ RAJA_HOST_DEVICE inline void zip_for_each(Tuple0&& t0, Tuple1&& t1, F&& f) static_assert(std::is_same::IdxSeq, typename camp::decay::IdxSeq>::value, "Tuple0 and Tuple1 must have the same size"); - zip_for_each_impl(std::forward(t0), - std::forward(t1), - std::forward(f), - typename camp::decay::IdxSeq{}); + zip_for_each_impl(std::forward(t0), std::forward(t1), + std::forward(f), typename camp::decay::IdxSeq{}); } } // end namespace detail diff --git a/src/KokkosPluginLoader.cpp b/src/KokkosPluginLoader.cpp index bdfcdf61b5..082253924b 100644 --- a/src/KokkosPluginLoader.cpp +++ b/src/KokkosPluginLoader.cpp @@ -99,14 +99,14 @@ void KokkosPluginLoader::initPlugin(const std::string& path) // Getting and storing supported kokkos functions. getFunction(plugin, init_functions, "kokkosp_init_library"); - getFunction( - plugin, pre_functions, "kokkosp_begin_parallel_for"); + getFunction(plugin, pre_functions, + "kokkosp_begin_parallel_for"); - getFunction( - plugin, post_functions, "kokkosp_end_parallel_for"); + getFunction(plugin, post_functions, + "kokkosp_end_parallel_for"); - getFunction( - plugin, finalize_functions, "kokkosp_finalize_library"); + getFunction(plugin, finalize_functions, + "kokkosp_finalize_library"); #else RAJA_UNUSED_ARG(path); #endif diff --git a/test/functional/dynamic_forall/resource-segment/tests/test-dynamic-forall-resource-RangeSegment.hpp b/test/functional/dynamic_forall/resource-segment/tests/test-dynamic-forall-resource-RangeSegment.hpp index c988caa033..8f09c50231 100644 --- a/test/functional/dynamic_forall/resource-segment/tests/test-dynamic-forall-resource-RangeSegment.hpp +++ b/test/functional/dynamic_forall/resource-segment/tests/test-dynamic-forall-resource-RangeSegment.hpp @@ -27,22 +27,20 @@ void DynamicForallResourceRangeSegmentTestImpl(INDEX_TYPE first, INDEX_TYPE* check_array; INDEX_TYPE* test_array; - allocateForallTestData( - N, erased_working_res, &working_array, &check_array, &test_array); + allocateForallTestData(N, erased_working_res, &working_array, + &check_array, &test_array); const INDEX_TYPE rbegin = *r1.begin(); std::iota(test_array, test_array + RAJA::stripIndexType(N), rbegin); RAJA::expt::dynamic_forall( - working_res, - pol, - r1, + working_res, pol, r1, [=] RAJA_HOST_DEVICE(INDEX_TYPE idx) { working_array[RAJA::stripIndexType(idx - rbegin)] = idx; }); - working_res.memcpy( - check_array, working_array, sizeof(INDEX_TYPE) * RAJA::stripIndexType(N)); + working_res.memcpy(check_array, working_array, + sizeof(INDEX_TYPE) * RAJA::stripIndexType(N)); for (INDEX_TYPE i = INDEX_TYPE(0); i < N; i++) { @@ -50,8 +48,8 @@ void DynamicForallResourceRangeSegmentTestImpl(INDEX_TYPE first, check_array[RAJA::stripIndexType(i)]); } - deallocateForallTestData( - erased_working_res, working_array, check_array, test_array); + deallocateForallTestData(erased_working_res, working_array, + check_array, test_array); } @@ -91,8 +89,7 @@ TYPED_TEST_P(DynamicForallResourceRangeSegmentTest, RangeSegmentForallResource) // Loop through policy list for (int pol = 0; pol < host_range; ++pol) { - DynamicForallResourceRangeSegmentTestImpl( INDEX_TYPE(0), INDEX_TYPE(27), pol); } @@ -106,8 +103,7 @@ TYPED_TEST_P(DynamicForallResourceRangeSegmentTest, RangeSegmentForallResource) #endif for (int pol = device_start; pol < N; ++pol) { - DynamicForallResourceRangeSegmentTestImpl( INDEX_TYPE(0), INDEX_TYPE(27), pol); } diff --git a/test/functional/dynamic_forall/segment/tests/test-dynamic-forall-RangeSegment.hpp b/test/functional/dynamic_forall/segment/tests/test-dynamic-forall-RangeSegment.hpp index 531dd29528..3d47e7cd53 100644 --- a/test/functional/dynamic_forall/segment/tests/test-dynamic-forall-RangeSegment.hpp +++ b/test/functional/dynamic_forall/segment/tests/test-dynamic-forall-RangeSegment.hpp @@ -32,8 +32,8 @@ void DynamicForallRangeSegmentTestImpl(INDEX_TYPE first, data_len = 1; } - allocateForallTestData( - data_len, working_res, &working_array, &check_array, &test_array); + allocateForallTestData(data_len, working_res, &working_array, + &check_array, &test_array); if (RAJA::stripIndexType(N) > 0) { @@ -43,8 +43,7 @@ void DynamicForallRangeSegmentTestImpl(INDEX_TYPE first, std::iota(test_array, test_array + RAJA::stripIndexType(N), rbegin); RAJA::expt::dynamic_forall( - pol, - r1, + pol, r1, [=] RAJA_HOST_DEVICE(INDEX_TYPE idx) { working_array[RAJA::stripIndexType(idx - rbegin)] = idx; }); } @@ -53,11 +52,10 @@ void DynamicForallRangeSegmentTestImpl(INDEX_TYPE first, memset(static_cast(test_array), 0, sizeof(INDEX_TYPE) * data_len); - working_res.memcpy( - working_array, test_array, sizeof(INDEX_TYPE) * data_len); + working_res.memcpy(working_array, test_array, + sizeof(INDEX_TYPE) * data_len); - RAJA::expt::dynamic_forall(pol, - r1, + RAJA::expt::dynamic_forall(pol, r1, [=] RAJA_HOST_DEVICE(INDEX_TYPE idx) { (void)idx; @@ -73,8 +71,8 @@ void DynamicForallRangeSegmentTestImpl(INDEX_TYPE first, check_array[RAJA::stripIndexType(i)]); } - deallocateForallTestData( - working_res, working_array, check_array, test_array); + deallocateForallTestData(working_res, working_array, check_array, + test_array); } diff --git a/test/functional/forall/CombiningAdapter/tests/test-forall-CombiningAdapter-1D.hpp b/test/functional/forall/CombiningAdapter/tests/test-forall-CombiningAdapter-1D.hpp index 85b3d09cb1..28f7d3f128 100644 --- a/test/functional/forall/CombiningAdapter/tests/test-forall-CombiningAdapter-1D.hpp +++ b/test/functional/forall/CombiningAdapter/tests/test-forall-CombiningAdapter-1D.hpp @@ -28,8 +28,8 @@ void ForallCombiningAdapter1DTestImpl(INDEX_TYPE first, INDEX_TYPE last) size_t data_len = RAJA::stripIndexType(N) + 1; - allocateForallTestData( - data_len, working_res, &working_array, &check_array, &test_array); + allocateForallTestData(data_len, working_res, &working_array, + &check_array, &test_array); { @@ -69,8 +69,8 @@ void ForallCombiningAdapter1DTestImpl(INDEX_TYPE first, INDEX_TYPE last) check_array[RAJA::stripIndexType(i)]); } - deallocateForallTestData( - working_res, working_array, check_array, test_array); + deallocateForallTestData(working_res, working_array, check_array, + test_array); } diff --git a/test/functional/forall/CombiningAdapter/tests/test-forall-CombiningAdapter-2D.hpp b/test/functional/forall/CombiningAdapter/tests/test-forall-CombiningAdapter-2D.hpp index 1cfcbfe690..327e74c9f0 100644 --- a/test/functional/forall/CombiningAdapter/tests/test-forall-CombiningAdapter-2D.hpp +++ b/test/functional/forall/CombiningAdapter/tests/test-forall-CombiningAdapter-2D.hpp @@ -34,8 +34,8 @@ void ForallCombiningAdapter2DTestImpl(INDEX_TYPE first0, size_t data_len = RAJA::stripIndexType(N) + 1; - allocateForallTestData( - data_len, working_res, &working_array, &check_array, &test_array); + allocateForallTestData(data_len, working_res, &working_array, + &check_array, &test_array); { @@ -66,8 +66,7 @@ void ForallCombiningAdapter2DTestImpl(INDEX_TYPE first0, working_array[RAJA::stripIndexType(N)]++; } }, - r0, - r1); + r0, r1); RAJA::forall(adapter.getRange(), adapter); } @@ -80,8 +79,8 @@ void ForallCombiningAdapter2DTestImpl(INDEX_TYPE first0, check_array[RAJA::stripIndexType(i)]); } - deallocateForallTestData( - working_res, working_array, check_array, test_array); + deallocateForallTestData(working_res, working_array, check_array, + test_array); } diff --git a/test/functional/forall/CombiningAdapter/tests/test-forall-CombiningAdapter-3D.hpp b/test/functional/forall/CombiningAdapter/tests/test-forall-CombiningAdapter-3D.hpp index b3e48fa3d4..f7a2d3405b 100644 --- a/test/functional/forall/CombiningAdapter/tests/test-forall-CombiningAdapter-3D.hpp +++ b/test/functional/forall/CombiningAdapter/tests/test-forall-CombiningAdapter-3D.hpp @@ -39,8 +39,8 @@ void ForallCombiningAdapter3DTestImpl(INDEX_TYPE first0, size_t data_len = RAJA::stripIndexType(N) + 1; - allocateForallTestData( - data_len, working_res, &working_array, &check_array, &test_array); + allocateForallTestData(data_len, working_res, &working_array, + &check_array, &test_array); { @@ -76,9 +76,7 @@ void ForallCombiningAdapter3DTestImpl(INDEX_TYPE first0, working_array[RAJA::stripIndexType(N)]++; } }, - r0, - r1, - r2); + r0, r1, r2); RAJA::forall(adapter.getRange(), adapter); } @@ -91,8 +89,8 @@ void ForallCombiningAdapter3DTestImpl(INDEX_TYPE first0, check_array[RAJA::stripIndexType(i)]); } - deallocateForallTestData( - working_res, working_array, check_array, test_array); + deallocateForallTestData(working_res, working_array, check_array, + test_array); } @@ -118,27 +116,15 @@ void runNegativeTests() { // test zero-length range segment ForallCombiningAdapter3DTestImpl( - INDEX_TYPE(-5), - INDEX_TYPE(-5), - INDEX_TYPE(-3), - INDEX_TYPE(-3), - INDEX_TYPE(-1), - INDEX_TYPE(-1)); + INDEX_TYPE(-5), INDEX_TYPE(-5), INDEX_TYPE(-3), INDEX_TYPE(-3), + INDEX_TYPE(-1), INDEX_TYPE(-1)); ForallCombiningAdapter3DTestImpl( - INDEX_TYPE(-5), - INDEX_TYPE(0), - INDEX_TYPE(-3), - INDEX_TYPE(0), - INDEX_TYPE(-4), - INDEX_TYPE(0)); + INDEX_TYPE(-5), INDEX_TYPE(0), INDEX_TYPE(-3), INDEX_TYPE(0), + INDEX_TYPE(-4), INDEX_TYPE(0)); ForallCombiningAdapter3DTestImpl( - INDEX_TYPE(-5), - INDEX_TYPE(5), - INDEX_TYPE(-3), - INDEX_TYPE(2), - INDEX_TYPE(-7), - INDEX_TYPE(-2)); + INDEX_TYPE(-5), INDEX_TYPE(5), INDEX_TYPE(-3), INDEX_TYPE(2), + INDEX_TYPE(-7), INDEX_TYPE(-2)); } @@ -150,55 +136,27 @@ TYPED_TEST_P(ForallCombiningAdapter3DTest, Forall3D) // test zero-length range segment ForallCombiningAdapter3DTestImpl( - INDEX_TYPE(3), - INDEX_TYPE(3), - INDEX_TYPE(5), - INDEX_TYPE(5), - INDEX_TYPE(7), + INDEX_TYPE(3), INDEX_TYPE(3), INDEX_TYPE(5), INDEX_TYPE(5), INDEX_TYPE(7), INDEX_TYPE(7)); ForallCombiningAdapter3DTestImpl( - INDEX_TYPE(3), - INDEX_TYPE(3), - INDEX_TYPE(5), - INDEX_TYPE(6), - INDEX_TYPE(7), + INDEX_TYPE(3), INDEX_TYPE(3), INDEX_TYPE(5), INDEX_TYPE(6), INDEX_TYPE(7), INDEX_TYPE(8)); ForallCombiningAdapter3DTestImpl( - INDEX_TYPE(3), - INDEX_TYPE(4), - INDEX_TYPE(5), - INDEX_TYPE(5), - INDEX_TYPE(7), + INDEX_TYPE(3), INDEX_TYPE(4), INDEX_TYPE(5), INDEX_TYPE(5), INDEX_TYPE(7), INDEX_TYPE(8)); ForallCombiningAdapter3DTestImpl( - INDEX_TYPE(3), - INDEX_TYPE(4), - INDEX_TYPE(5), - INDEX_TYPE(6), - INDEX_TYPE(7), + INDEX_TYPE(3), INDEX_TYPE(4), INDEX_TYPE(5), INDEX_TYPE(6), INDEX_TYPE(7), INDEX_TYPE(7)); ForallCombiningAdapter3DTestImpl( - INDEX_TYPE(0), - INDEX_TYPE(7), - INDEX_TYPE(0), - INDEX_TYPE(6), - INDEX_TYPE(0), + INDEX_TYPE(0), INDEX_TYPE(7), INDEX_TYPE(0), INDEX_TYPE(6), INDEX_TYPE(0), INDEX_TYPE(3)); ForallCombiningAdapter3DTestImpl( - INDEX_TYPE(1), - INDEX_TYPE(13), - INDEX_TYPE(4), - INDEX_TYPE(17), - INDEX_TYPE(6), - INDEX_TYPE(11)); + INDEX_TYPE(1), INDEX_TYPE(13), INDEX_TYPE(4), INDEX_TYPE(17), + INDEX_TYPE(6), INDEX_TYPE(11)); ForallCombiningAdapter3DTestImpl( - INDEX_TYPE(13), - INDEX_TYPE(46), - INDEX_TYPE(17), - INDEX_TYPE(51), - INDEX_TYPE(4), - INDEX_TYPE(31)); + INDEX_TYPE(13), INDEX_TYPE(46), INDEX_TYPE(17), INDEX_TYPE(51), + INDEX_TYPE(4), INDEX_TYPE(31)); runNegativeTests(); } diff --git a/test/functional/forall/atomic-basic/tests/test-forall-atomic-basic.hpp b/test/functional/forall/atomic-basic/tests/test-forall-atomic-basic.hpp index 096a1267a9..c2d8293ce6 100644 --- a/test/functional/forall/atomic-basic/tests/test-forall-atomic-basic.hpp +++ b/test/functional/forall/atomic-basic/tests/test-forall-atomic-basic.hpp @@ -75,8 +75,8 @@ void ForallAtomicBasicTestImpl(IdxType seglimit) T* test_array; T* check_array; - allocateForallTestData( - len, work_res, &work_array, &check_array, &test_array); + allocateForallTestData(len, work_res, &work_array, &check_array, + &test_array); // use atomic add to reduce the array test_array[0] = static_cast(0); @@ -105,8 +105,8 @@ void ForallAtomicBasicTestImpl(IdxType seglimit) RAJA::atomicInc(work_array + 4); RAJA::atomicDec(work_array + 5); RAJA::atomicExchange(work_array + 6, static_cast(i)); - RAJA::atomicCAS( - work_array + 7, static_cast(i), static_cast(i + 1)); + RAJA::atomicCAS(work_array + 7, static_cast(i), + static_cast(i + 1)); RAJA::atomicLoad(work_array + 8); RAJA::atomicStore(work_array + 9, static_cast(1)); RAJA::atomicInc(work_array + 10, static_cast(16)); @@ -147,24 +147,13 @@ TYPED_TEST_P(ForallAtomicBasicTest, AtomicBasicForall) using IdxType = typename camp::at>::type; using DType = typename camp::at>::type; - ForallAtomicBasicTestImpl, - DType>(10000); - ForallAtomicBasicTestImpl, - DType>(10000); - ForallAtomicBasicTestImpl, - DType>(10000); + ForallAtomicBasicTestImpl, DType>(10000); + ForallAtomicBasicTestImpl, DType>( + 10000); + ForallAtomicBasicTestImpl, DType>(10000); } REGISTER_TYPED_TEST_SUITE_P(ForallAtomicBasicTest, AtomicBasicForall); diff --git a/test/functional/forall/atomic-ref/tests/test-forall-AtomicRefCAS.hpp b/test/functional/forall/atomic-ref/tests/test-forall-AtomicRefCAS.hpp index 82b847e86e..163b518977 100644 --- a/test/functional/forall/atomic-ref/tests/test-forall-AtomicRefCAS.hpp +++ b/test/functional/forall/atomic-ref/tests/test-forall-AtomicRefCAS.hpp @@ -114,8 +114,8 @@ void testAtomicRefCASOp(RAJA::TypedRangeSegment seg, IdxType N) { OtherOp otherop(count, hcount, work_res, seg); - RAJA::forall( - seg, [=] RAJA_HOST_DEVICE(IdxType i) { list[i] = otherop.max + (T)1; }); + RAJA::forall(seg, [=] RAJA_HOST_DEVICE(IdxType i) + { list[i] = otherop.max + (T)1; }); RAJA::forall(seg, [=] RAJA_HOST_DEVICE(IdxType i) { @@ -171,18 +171,12 @@ void ForallAtomicRefCASTestImpl(IdxType N) testAtomicRefCASOp( seg, count, list, hcount, hlist, work_res, N); - testAtomicRefCASOp( - seg, count, list, hcount, hlist, work_res, N); - testAtomicRefCASOp( - seg, count, list, hcount, hlist, work_res, N); + testAtomicRefCASOp(seg, count, list, hcount, + hlist, work_res, N); + testAtomicRefCASOp(seg, count, list, hcount, + hlist, work_res, N); work_res.deallocate(count); work_res.deallocate(list); diff --git a/test/functional/forall/atomic-ref/tests/test-forall-AtomicRefLoadStore.hpp b/test/functional/forall/atomic-ref/tests/test-forall-AtomicRefLoadStore.hpp index 7e63e1bfb1..db2729ecf4 100644 --- a/test/functional/forall/atomic-ref/tests/test-forall-AtomicRefLoadStore.hpp +++ b/test/functional/forall/atomic-ref/tests/test-forall-AtomicRefLoadStore.hpp @@ -116,8 +116,8 @@ void testAtomicRefLoadStoreOp(RAJA::TypedRangeSegment seg, IdxType N) { OtherOp otherop(count, hcount, work_res, seg); - RAJA::forall( - seg, [=] RAJA_HOST_DEVICE(IdxType i) { list[i] = otherop.max + (T)1; }); + RAJA::forall(seg, [=] RAJA_HOST_DEVICE(IdxType i) + { list[i] = otherop.max + (T)1; }); RAJA::forall(seg, [=] RAJA_HOST_DEVICE(IdxType i) { @@ -173,12 +173,9 @@ void ForallAtomicRefLoadStoreTestImpl(IdxType N) testAtomicRefLoadStoreOp( seg, count, list, hcount, hlist, work_res, N); - testAtomicRefLoadStoreOp( - seg, count, list, hcount, hlist, work_res, N); + testAtomicRefLoadStoreOp(seg, count, list, hcount, hlist, + work_res, N); testAtomicRefLoadStoreOp( seg, count, list, hcount, hlist, work_res, N); testAtomicRefLoadStoreOp( diff --git a/test/functional/forall/atomic-ref/tests/test-forall-AtomicRefLogical.hpp b/test/functional/forall/atomic-ref/tests/test-forall-AtomicRefLogical.hpp index 88ab21f6ec..276993e300 100644 --- a/test/functional/forall/atomic-ref/tests/test-forall-AtomicRefLogical.hpp +++ b/test/functional/forall/atomic-ref/tests/test-forall-AtomicRefLogical.hpp @@ -194,8 +194,8 @@ testAtomicRefLogicalOp(RAJA::TypedRangeSegment seg, IdxType N) { OtherOp otherop(count, hcount, work_res, seg); - RAJA::forall( - seg, [=] RAJA_HOST_DEVICE(IdxType i) { list[i] = otherop.max + (T)1; }); + RAJA::forall(seg, [=] RAJA_HOST_DEVICE(IdxType i) + { list[i] = otherop.max + (T)1; }); RAJA::forall(seg, [=] RAJA_HOST_DEVICE(IdxType i) { diff --git a/test/functional/forall/atomic-ref/tests/test-forall-AtomicRefMinMax.hpp b/test/functional/forall/atomic-ref/tests/test-forall-AtomicRefMinMax.hpp index df202c4b98..e3fd035ab5 100644 --- a/test/functional/forall/atomic-ref/tests/test-forall-AtomicRefMinMax.hpp +++ b/test/functional/forall/atomic-ref/tests/test-forall-AtomicRefMinMax.hpp @@ -116,8 +116,8 @@ void testAtomicRefMinMaxOp(RAJA::TypedRangeSegment seg, IdxType N) { OtherOp otherop(count, hcount, work_res, seg); - RAJA::forall( - seg, [=] RAJA_HOST_DEVICE(IdxType i) { list[i] = otherop.max + (T)1; }); + RAJA::forall(seg, [=] RAJA_HOST_DEVICE(IdxType i) + { list[i] = otherop.max + (T)1; }); RAJA::forall(seg, [=] RAJA_HOST_DEVICE(IdxType i) { diff --git a/test/functional/forall/atomic-view/tests/test-forall-AtomicMultiView.hpp b/test/functional/forall/atomic-view/tests/test-forall-AtomicMultiView.hpp index 2069c90ed1..d737929fa9 100644 --- a/test/functional/forall/atomic-view/tests/test-forall-AtomicMultiView.hpp +++ b/test/functional/forall/atomic-view/tests/test-forall-AtomicMultiView.hpp @@ -50,17 +50,15 @@ void ForallAtomicMultiViewTestImpl(IdxType N) #endif // assumes each source[] will be 2x size of each dest[], src_side x dst_side - RAJA::forall(seg_srcside, - [=] RAJA_HOST_DEVICE(IdxType ii) + RAJA::forall(seg_srcside, [=] RAJA_HOST_DEVICE(IdxType ii) { source[ii] = actualsource + (ii * dst_side); }); // assumes each dest[] will be a square matrix, dst_side x dst_side - RAJA::forall(seg_dstside, - [=] RAJA_HOST_DEVICE(IdxType ii) + RAJA::forall(seg_dstside, [=] RAJA_HOST_DEVICE(IdxType ii) { dest[ii] = actualdest + (ii * dst_side); }); - RAJA::forall( - seg, [=] RAJA_HOST_DEVICE(IdxType i) { actualsource[i] = (T)1; }); + RAJA::forall(seg, [=] RAJA_HOST_DEVICE(IdxType i) + { actualsource[i] = (T)1; }); // use atomic add to reduce the array // 1D defaut MultiView diff --git a/test/functional/forall/atomic-view/tests/test-forall-AtomicOutOfBoundsMultiView.hpp b/test/functional/forall/atomic-view/tests/test-forall-AtomicOutOfBoundsMultiView.hpp index 81e4da707d..e863d8d47b 100644 --- a/test/functional/forall/atomic-view/tests/test-forall-AtomicOutOfBoundsMultiView.hpp +++ b/test/functional/forall/atomic-view/tests/test-forall-AtomicOutOfBoundsMultiView.hpp @@ -96,10 +96,7 @@ TYPED_TEST_P(ForallAtomicOutOfBoundsMultiViewTest, using IdxType = typename camp::at>::type; using DType = typename camp::at>::type; - ForallAtomicOutOfBoundsMultiViewTestImpl(20000); } diff --git a/test/functional/forall/atomic-view/tests/test-forall-AtomicView.hpp b/test/functional/forall/atomic-view/tests/test-forall-AtomicView.hpp index 057d5e8ee6..f8760c170b 100644 --- a/test/functional/forall/atomic-view/tests/test-forall-AtomicView.hpp +++ b/test/functional/forall/atomic-view/tests/test-forall-AtomicView.hpp @@ -59,12 +59,11 @@ void ForallAtomicViewTestImpl(IdxType N) // Zero out dest using atomic view - RAJA::forall( - seg_half, [=] RAJA_HOST_DEVICE(IdxType i) { sum_atomic_view(i) = (T)0; }); + RAJA::forall(seg_half, [=] RAJA_HOST_DEVICE(IdxType i) + { sum_atomic_view(i) = (T)0; }); // Assign values to dest using atomic view - RAJA::forall(seg, - [=] RAJA_HOST_DEVICE(IdxType i) + RAJA::forall(seg, [=] RAJA_HOST_DEVICE(IdxType i) { sum_atomic_view(i / 2) += vec_view(i); }); work_res.memcpy(check_array, dest, sizeof(T) * N / 2); diff --git a/test/functional/forall/indexset-view/tests/test-forall-IcountIndexSetView.hpp b/test/functional/forall/indexset-view/tests/test-forall-IcountIndexSetView.hpp index 4e18352df7..4441475bd0 100644 --- a/test/functional/forall/indexset-view/tests/test-forall-IcountIndexSetView.hpp +++ b/test/functional/forall/indexset-view/tests/test-forall-IcountIndexSetView.hpp @@ -45,8 +45,8 @@ void ForallIcountIndexSetViewTestImpl() INDEX_TYPE* check_array; INDEX_TYPE* test_array; - allocateForallTestData( - N, working_res, &working_array, &check_array, &test_array); + allocateForallTestData(N, working_res, &working_array, + &check_array, &test_array); memset(test_array, 0, sizeof(INDEX_TYPE) * N); @@ -63,8 +63,7 @@ void ForallIcountIndexSetViewTestImpl() working_array, layout); RAJA::forall_Icount( - iset, - [=] RAJA_HOST_DEVICE(INDEX_TYPE icount, INDEX_TYPE idx) + iset, [=] RAJA_HOST_DEVICE(INDEX_TYPE icount, INDEX_TYPE idx) { work_view(icount) = idx; }); working_res.memcpy(check_array, working_array, sizeof(INDEX_TYPE) * N); @@ -75,8 +74,8 @@ void ForallIcountIndexSetViewTestImpl() ASSERT_EQ(test_array[i], check_array[i]); } - deallocateForallTestData( - working_res, working_array, check_array, test_array); + deallocateForallTestData(working_res, working_array, check_array, + test_array); } diff --git a/test/functional/forall/indexset-view/tests/test-forall-IndexSetView.hpp b/test/functional/forall/indexset-view/tests/test-forall-IndexSetView.hpp index e56423aa36..2769275a88 100644 --- a/test/functional/forall/indexset-view/tests/test-forall-IndexSetView.hpp +++ b/test/functional/forall/indexset-view/tests/test-forall-IndexSetView.hpp @@ -44,8 +44,8 @@ void ForallIndexSetViewTestImpl() INDEX_TYPE* check_array; INDEX_TYPE* test_array; - allocateForallTestData( - N, working_res, &working_array, &check_array, &test_array); + allocateForallTestData(N, working_res, &working_array, + &check_array, &test_array); memset(test_array, 0, sizeof(INDEX_TYPE) * N); @@ -61,8 +61,8 @@ void ForallIndexSetViewTestImpl() RAJA::Layout<1> layout(N); view_type work_view(working_array, layout); - RAJA::forall( - iset, [=] RAJA_HOST_DEVICE(INDEX_TYPE idx) { working_array[idx] = idx; }); + RAJA::forall(iset, [=] RAJA_HOST_DEVICE(INDEX_TYPE idx) + { working_array[idx] = idx; }); working_res.memcpy(check_array, working_array, sizeof(INDEX_TYPE) * N); @@ -72,8 +72,8 @@ void ForallIndexSetViewTestImpl() ASSERT_EQ(test_array[i], check_array[i]); } - deallocateForallTestData( - working_res, working_array, check_array, test_array); + deallocateForallTestData(working_res, working_array, check_array, + test_array); } diff --git a/test/functional/forall/indexset/tests/test-forall-IcountIndexSet.hpp b/test/functional/forall/indexset/tests/test-forall-IcountIndexSet.hpp index da699db71d..ee4fba7b5f 100644 --- a/test/functional/forall/indexset/tests/test-forall-IcountIndexSet.hpp +++ b/test/functional/forall/indexset/tests/test-forall-IcountIndexSet.hpp @@ -43,8 +43,8 @@ void ForallIcountIndexSetTestImpl() INDEX_TYPE* check_array; INDEX_TYPE* test_array; - allocateForallTestData( - N, working_res, &working_array, &check_array, &test_array); + allocateForallTestData(N, working_res, &working_array, + &check_array, &test_array); memset(test_array, 0, sizeof(INDEX_TYPE) * N); @@ -56,8 +56,7 @@ void ForallIcountIndexSetTestImpl() test_array[ticount++] = is_indices[i]; } - RAJA::forall_Icount(EXEC_POLICY(), - iset, + RAJA::forall_Icount(EXEC_POLICY(), iset, [=] RAJA_HOST_DEVICE(INDEX_TYPE icount, INDEX_TYPE idx) { working_array[icount] = idx; }); @@ -68,8 +67,8 @@ void ForallIcountIndexSetTestImpl() ASSERT_EQ(test_array[i], check_array[i]); } - deallocateForallTestData( - working_res, working_array, check_array, test_array); + deallocateForallTestData(working_res, working_array, check_array, + test_array); } diff --git a/test/functional/forall/indexset/tests/test-forall-IndexSet.hpp b/test/functional/forall/indexset/tests/test-forall-IndexSet.hpp index 00edd70b07..657d464f1c 100644 --- a/test/functional/forall/indexset/tests/test-forall-IndexSet.hpp +++ b/test/functional/forall/indexset/tests/test-forall-IndexSet.hpp @@ -42,8 +42,8 @@ void ForallIndexSetTestImpl() INDEX_TYPE* check_array; INDEX_TYPE* test_array; - allocateForallTestData( - N, working_res, &working_array, &check_array, &test_array); + allocateForallTestData(N, working_res, &working_array, + &check_array, &test_array); memset(test_array, 0, sizeof(INDEX_TYPE) * N); @@ -54,8 +54,7 @@ void ForallIndexSetTestImpl() test_array[is_indices[i]] = is_indices[i]; } - RAJA::forall(EXEC_POLICY(), - iset, + RAJA::forall(EXEC_POLICY(), iset, [=] RAJA_HOST_DEVICE(INDEX_TYPE idx) { working_array[idx] = idx; }); @@ -67,8 +66,8 @@ void ForallIndexSetTestImpl() ASSERT_EQ(test_array[i], check_array[i]); } - deallocateForallTestData( - working_res, working_array, check_array, test_array); + deallocateForallTestData(working_res, working_array, check_array, + test_array); } diff --git a/test/functional/forall/multi-reduce-basic/tests/test-forall-basic-MultiReduce.hpp b/test/functional/forall/multi-reduce-basic/tests/test-forall-basic-MultiReduce.hpp index e3932ebfc3..f93b2affef 100644 --- a/test/functional/forall/multi-reduce-basic/tests/test-forall-basic-MultiReduce.hpp +++ b/test/functional/forall/multi-reduce-basic/tests/test-forall-basic-MultiReduce.hpp @@ -74,8 +74,8 @@ ForallMultiReduceBasicTestImpl(const SEG_TYPE& seg, IDX_TYPE data_len = 0; - allocateForallTestData( - idx_range + 1, working_res, &working_range, &check_range, &test_range); + allocateForallTestData(idx_range + 1, working_res, &working_range, + &check_range, &test_range); for (IDX_TYPE i = 0; i < idx_range + 1; ++i) { @@ -95,11 +95,11 @@ ForallMultiReduceBasicTestImpl(const SEG_TYPE& seg, } } - allocateForallTestData( - data_len, working_res, &working_array, &check_array, &test_array); + allocateForallTestData(data_len, working_res, &working_array, &check_array, + &test_array); - allocateForallTestData( - data_len, working_res, &working_bins, &check_bins, &test_bins); + allocateForallTestData(data_len, working_res, &working_bins, &check_bins, + &test_bins); if (data_len > IDX_TYPE(0)) { @@ -119,8 +119,8 @@ ForallMultiReduceBasicTestImpl(const SEG_TYPE& seg, } } - working_res.memcpy( - working_range, test_range, sizeof(IDX_TYPE) * (idx_range + 1)); + working_res.memcpy(working_range, test_range, + sizeof(IDX_TYPE) * (idx_range + 1)); working_res.memcpy(working_array, test_array, sizeof(DATA_TYPE) * data_len); working_res.memcpy(working_bins, test_bins, sizeof(IDX_TYPE) * data_len); @@ -177,16 +177,16 @@ ForallMultiReduceBasicTestImpl(const SEG_TYPE& seg, ABSTRACTION::combine(ref_vals[test_bins[i]], test_array[i]); } - RAJA::forall( - seg, - [=] RAJA_HOST_DEVICE(IDX_TYPE ii) - { - for (IDX_TYPE idx = working_range[ii]; idx < working_range[ii + 1]; - ++idx) - { - ABSTRACTION::reduce(red[working_bins[idx]], working_array[idx]); - } - }); + RAJA::forall(seg, + [=] RAJA_HOST_DEVICE(IDX_TYPE ii) + { + for (IDX_TYPE idx = working_range[ii]; + idx < working_range[ii + 1]; ++idx) + { + ABSTRACTION::reduce(red[working_bins[idx]], + working_array[idx]); + } + }); } for (size_t bin = 0; bin < num_bins; ++bin) @@ -213,8 +213,8 @@ ForallMultiReduceBasicTestImpl(const SEG_TYPE& seg, { test_array[i] = DATA_TYPE(array_flt_distribution(rngen)); } - working_res.memcpy( - working_array, test_array, sizeof(DATA_TYPE) * data_len); + working_res.memcpy(working_array, test_array, + sizeof(DATA_TYPE) * data_len); } @@ -226,16 +226,16 @@ ForallMultiReduceBasicTestImpl(const SEG_TYPE& seg, { red.reset(); - RAJA::forall( - seg, - [=] RAJA_HOST_DEVICE(IDX_TYPE ii) - { - for (IDX_TYPE idx = working_range[ii]; idx < working_range[ii + 1]; - ++idx) - { - ABSTRACTION::reduce(red[working_bins[idx]], working_array[idx]); - } - }); + RAJA::forall(seg, + [=] RAJA_HOST_DEVICE(IDX_TYPE ii) + { + for (IDX_TYPE idx = working_range[ii]; + idx < working_range[ii + 1]; ++idx) + { + ABSTRACTION::reduce(red[working_bins[idx]], + working_array[idx]); + } + }); if (!got_ref_vals) { @@ -299,30 +299,24 @@ TYPED_TEST_P(ForallMultiReduceBasicTest, MultiReduceBasicForall) // Range segment tests RAJA::TypedRangeSegment r1(0, 28); RAJA::getIndices(seg_idx, r1); - ForallMultiReduceBasicTestImpl( - r1, container, seg_idx, working_res, rngen); + ForallMultiReduceBasicTestImpl(r1, container, seg_idx, + working_res, rngen); seg_idx.clear(); RAJA::TypedRangeSegment r3(3, 2060); RAJA::getIndices(seg_idx, r3); - ForallMultiReduceBasicTestImpl( - r3, container, seg_idx, working_res, rngen); + ForallMultiReduceBasicTestImpl(r3, container, seg_idx, + working_res, rngen); // Range-stride segment test seg_idx.clear(); RAJA::TypedRangeStrideSegment r5(3, 1029, 3); RAJA::getIndices(seg_idx, r5); - ForallMultiReduceBasicTestImpl( - r5, container, seg_idx, working_res, rngen); + ForallMultiReduceBasicTestImpl(r5, container, seg_idx, + working_res, rngen); // List segment test seg_idx.clear(); @@ -336,13 +330,11 @@ TYPED_TEST_P(ForallMultiReduceBasicTest, MultiReduceBasicForall) seg_idx.push_back(i); } } - RAJA::TypedListSegment l1( - &seg_idx[0], seg_idx.size(), working_res); - ForallMultiReduceBasicTestImpl( - l1, container, seg_idx, working_res, rngen); + RAJA::TypedListSegment l1(&seg_idx[0], seg_idx.size(), + working_res); + ForallMultiReduceBasicTestImpl(l1, container, seg_idx, + working_res, rngen); } } diff --git a/test/functional/forall/reduce-basic/tests/test-forall-basic-ReduceBitAnd.hpp b/test/functional/forall/reduce-basic/tests/test-forall-basic-ReduceBitAnd.hpp index ed2d4c9dc9..b76d88741f 100644 --- a/test/functional/forall/reduce-basic/tests/test-forall-basic-ReduceBitAnd.hpp +++ b/test/functional/forall/reduce-basic/tests/test-forall-basic-ReduceBitAnd.hpp @@ -29,8 +29,8 @@ void ForallReduceBitAndBasicTestImpl(const SEG_TYPE& seg, DATA_TYPE* check_array; DATA_TYPE* test_array; - allocateForallTestData( - data_len, working_res, &working_array, &check_array, &test_array); + allocateForallTestData(data_len, working_res, &working_array, + &check_array, &test_array); // // First a simple non-trivial test that is mildly interesting @@ -43,8 +43,7 @@ void ForallReduceBitAndBasicTestImpl(const SEG_TYPE& seg, RAJA::ReduceBitAnd simpand(21); - RAJA::forall(seg, - [=] RAJA_HOST_DEVICE(IDX_TYPE idx) + RAJA::forall(seg, [=] RAJA_HOST_DEVICE(IDX_TYPE idx) { simpand &= working_array[idx]; }); ASSERT_EQ(static_cast(simpand.get()), 5); @@ -86,16 +85,15 @@ void ForallReduceBitAndBasicTestImpl(const SEG_TYPE& seg, const int nloops = 3; for (int j = 0; j < nloops; ++j) { - RAJA::forall(seg, - [=] RAJA_HOST_DEVICE(IDX_TYPE idx) + RAJA::forall(seg, [=] RAJA_HOST_DEVICE(IDX_TYPE idx) { redand &= working_array[idx]; }); } ASSERT_EQ(static_cast(redand.get()), ref_and); - deallocateForallTestData( - working_res, working_array, check_array, test_array); + deallocateForallTestData(working_res, working_array, check_array, + test_array); } @@ -119,48 +117,43 @@ TYPED_TEST_P(ForallReduceBitAndBasicTest, ReduceBitAndBasicForall) // Range segment tests RAJA::TypedRangeSegment r1(0, 28); RAJA::getIndices(seg_idx, r1); - ForallReduceBitAndBasicTestImpl, - EXEC_POLICY, - REDUCE_POLICY>(r1, seg_idx, working_res); + EXEC_POLICY, REDUCE_POLICY>(r1, seg_idx, + working_res); seg_idx.clear(); RAJA::TypedRangeSegment r2(3, 642); RAJA::getIndices(seg_idx, r2); - ForallReduceBitAndBasicTestImpl, - EXEC_POLICY, - REDUCE_POLICY>(r2, seg_idx, working_res); + EXEC_POLICY, REDUCE_POLICY>(r2, seg_idx, + working_res); seg_idx.clear(); RAJA::TypedRangeSegment r3(0, 2057); RAJA::getIndices(seg_idx, r3); - ForallReduceBitAndBasicTestImpl, - EXEC_POLICY, - REDUCE_POLICY>(r3, seg_idx, working_res); + EXEC_POLICY, REDUCE_POLICY>(r3, seg_idx, + working_res); // Range-stride segment tests seg_idx.clear(); RAJA::TypedRangeStrideSegment r4(0, 188, 2); RAJA::getIndices(seg_idx, r4); - ForallReduceBitAndBasicTestImpl, - EXEC_POLICY, - REDUCE_POLICY>(r4, seg_idx, working_res); + EXEC_POLICY, REDUCE_POLICY>(r4, seg_idx, + working_res); seg_idx.clear(); RAJA::TypedRangeStrideSegment r5(3, 1029, 3); RAJA::getIndices(seg_idx, r5); - ForallReduceBitAndBasicTestImpl, - EXEC_POLICY, - REDUCE_POLICY>(r5, seg_idx, working_res); + EXEC_POLICY, REDUCE_POLICY>(r5, seg_idx, + working_res); // List segment tests seg_idx.clear(); @@ -175,10 +168,8 @@ TYPED_TEST_P(ForallReduceBitAndBasicTest, ReduceBitAndBasicForall) } } RAJA::TypedListSegment l1(&seg_idx[0], seg_idx.size(), working_res); - ForallReduceBitAndBasicTestImpl, - EXEC_POLICY, + ForallReduceBitAndBasicTestImpl, EXEC_POLICY, REDUCE_POLICY>(l1, seg_idx, working_res); } diff --git a/test/functional/forall/reduce-basic/tests/test-forall-basic-ReduceBitOr.hpp b/test/functional/forall/reduce-basic/tests/test-forall-basic-ReduceBitOr.hpp index 6d34119516..c6786ba23d 100644 --- a/test/functional/forall/reduce-basic/tests/test-forall-basic-ReduceBitOr.hpp +++ b/test/functional/forall/reduce-basic/tests/test-forall-basic-ReduceBitOr.hpp @@ -29,8 +29,8 @@ void ForallReduceBitOrBasicTestImpl(const SEG_TYPE& seg, DATA_TYPE* check_array; DATA_TYPE* test_array; - allocateForallTestData( - data_len, working_res, &working_array, &check_array, &test_array); + allocateForallTestData(data_len, working_res, &working_array, + &check_array, &test_array); // // First a simple non-trivial test that is mildly interesting @@ -43,8 +43,7 @@ void ForallReduceBitOrBasicTestImpl(const SEG_TYPE& seg, RAJA::ReduceBitOr simpor(5); - RAJA::forall(seg, - [=] RAJA_HOST_DEVICE(IDX_TYPE idx) + RAJA::forall(seg, [=] RAJA_HOST_DEVICE(IDX_TYPE idx) { simpor |= working_array[idx]; }); ASSERT_EQ(static_cast(simpor.get()), 13); @@ -87,16 +86,15 @@ void ForallReduceBitOrBasicTestImpl(const SEG_TYPE& seg, const int nloops = 3; for (int j = 0; j < nloops; ++j) { - RAJA::forall(seg, - [=] RAJA_HOST_DEVICE(IDX_TYPE idx) + RAJA::forall(seg, [=] RAJA_HOST_DEVICE(IDX_TYPE idx) { redor |= working_array[idx]; }); } ASSERT_EQ(static_cast(redor.get()), ref_or); - deallocateForallTestData( - working_res, working_array, check_array, test_array); + deallocateForallTestData(working_res, working_array, check_array, + test_array); } TYPED_TEST_SUITE_P(ForallReduceBitOrBasicTest); @@ -120,48 +118,40 @@ TYPED_TEST_P(ForallReduceBitOrBasicTest, ReduceBitOrBasicForall) // Range segment tests RAJA::TypedRangeSegment r1(0, 28); RAJA::getIndices(seg_idx, r1); - ForallReduceBitOrBasicTestImpl, - EXEC_POLICY, + ForallReduceBitOrBasicTestImpl, EXEC_POLICY, REDUCE_POLICY>(r1, seg_idx, working_res); seg_idx.clear(); RAJA::TypedRangeSegment r2(3, 642); RAJA::getIndices(seg_idx, r2); - ForallReduceBitOrBasicTestImpl, - EXEC_POLICY, + ForallReduceBitOrBasicTestImpl, EXEC_POLICY, REDUCE_POLICY>(r2, seg_idx, working_res); seg_idx.clear(); RAJA::TypedRangeSegment r3(0, 2057); RAJA::getIndices(seg_idx, r3); - ForallReduceBitOrBasicTestImpl, - EXEC_POLICY, + ForallReduceBitOrBasicTestImpl, EXEC_POLICY, REDUCE_POLICY>(r3, seg_idx, working_res); // Range-stride segment tests seg_idx.clear(); RAJA::TypedRangeStrideSegment r4(0, 188, 2); RAJA::getIndices(seg_idx, r4); - ForallReduceBitOrBasicTestImpl, - EXEC_POLICY, - REDUCE_POLICY>(r4, seg_idx, working_res); + EXEC_POLICY, REDUCE_POLICY>(r4, seg_idx, + working_res); seg_idx.clear(); RAJA::TypedRangeStrideSegment r5(3, 1029, 3); RAJA::getIndices(seg_idx, r5); - ForallReduceBitOrBasicTestImpl, - EXEC_POLICY, - REDUCE_POLICY>(r5, seg_idx, working_res); + EXEC_POLICY, REDUCE_POLICY>(r5, seg_idx, + working_res); // List segment tests seg_idx.clear(); @@ -176,10 +166,8 @@ TYPED_TEST_P(ForallReduceBitOrBasicTest, ReduceBitOrBasicForall) } } RAJA::TypedListSegment l1(&seg_idx[0], seg_idx.size(), working_res); - ForallReduceBitOrBasicTestImpl, - EXEC_POLICY, + ForallReduceBitOrBasicTestImpl, EXEC_POLICY, REDUCE_POLICY>(l1, seg_idx, working_res); } diff --git a/test/functional/forall/reduce-basic/tests/test-forall-basic-ReduceMax.hpp b/test/functional/forall/reduce-basic/tests/test-forall-basic-ReduceMax.hpp index 9e88a8a1dd..8da71aca26 100644 --- a/test/functional/forall/reduce-basic/tests/test-forall-basic-ReduceMax.hpp +++ b/test/functional/forall/reduce-basic/tests/test-forall-basic-ReduceMax.hpp @@ -29,8 +29,8 @@ void ForallReduceMaxBasicTestImpl(const SEG_TYPE& seg, DATA_TYPE* check_array; DATA_TYPE* test_array; - allocateForallTestData( - data_len, working_res, &working_array, &check_array, &test_array); + allocateForallTestData(data_len, working_res, &working_array, + &check_array, &test_array); const int modval = 100; const DATA_TYPE max_init = -1; @@ -66,20 +66,18 @@ void ForallReduceMaxBasicTestImpl(const SEG_TYPE& seg, ASSERT_EQ(static_cast(max.get()), max_init); DATA_TYPE factor = 2; - RAJA::forall(seg, - [=] RAJA_HOST_DEVICE(IDX_TYPE idx) + RAJA::forall(seg, [=] RAJA_HOST_DEVICE(IDX_TYPE idx) { max.max(working_array[idx] * factor); }); ASSERT_EQ(static_cast(max.get()), ref_max * factor); factor = 3; - RAJA::forall(seg, - [=] RAJA_HOST_DEVICE(IDX_TYPE idx) + RAJA::forall(seg, [=] RAJA_HOST_DEVICE(IDX_TYPE idx) { max.max(working_array[idx] * factor); }); ASSERT_EQ(static_cast(max.get()), ref_max * factor); - deallocateForallTestData( - working_res, working_array, check_array, test_array); + deallocateForallTestData(working_res, working_array, check_array, + test_array); } TYPED_TEST_SUITE_P(ForallReduceMaxBasicTest); @@ -102,48 +100,40 @@ TYPED_TEST_P(ForallReduceMaxBasicTest, ReduceMaxBasicForall) // Range segment tests RAJA::TypedRangeSegment r1(0, 28); RAJA::getIndices(seg_idx, r1); - ForallReduceMaxBasicTestImpl, - EXEC_POLICY, + ForallReduceMaxBasicTestImpl, EXEC_POLICY, REDUCE_POLICY>(r1, seg_idx, working_res); seg_idx.clear(); RAJA::TypedRangeSegment r2(3, 642); RAJA::getIndices(seg_idx, r2); - ForallReduceMaxBasicTestImpl, - EXEC_POLICY, + ForallReduceMaxBasicTestImpl, EXEC_POLICY, REDUCE_POLICY>(r2, seg_idx, working_res); seg_idx.clear(); RAJA::TypedRangeSegment r3(0, 2057); RAJA::getIndices(seg_idx, r3); - ForallReduceMaxBasicTestImpl, - EXEC_POLICY, + ForallReduceMaxBasicTestImpl, EXEC_POLICY, REDUCE_POLICY>(r3, seg_idx, working_res); // Range-stride segment tests seg_idx.clear(); RAJA::TypedRangeStrideSegment r4(0, 188, 2); RAJA::getIndices(seg_idx, r4); - ForallReduceMaxBasicTestImpl, - EXEC_POLICY, - REDUCE_POLICY>(r4, seg_idx, working_res); + EXEC_POLICY, REDUCE_POLICY>(r4, seg_idx, + working_res); seg_idx.clear(); RAJA::TypedRangeStrideSegment r5(3, 1029, 3); RAJA::getIndices(seg_idx, r5); - ForallReduceMaxBasicTestImpl, - EXEC_POLICY, - REDUCE_POLICY>(r5, seg_idx, working_res); + EXEC_POLICY, REDUCE_POLICY>(r5, seg_idx, + working_res); // List segment tests seg_idx.clear(); @@ -158,10 +148,8 @@ TYPED_TEST_P(ForallReduceMaxBasicTest, ReduceMaxBasicForall) } } RAJA::TypedListSegment l1(&seg_idx[0], seg_idx.size(), working_res); - ForallReduceMaxBasicTestImpl, - EXEC_POLICY, + ForallReduceMaxBasicTestImpl, EXEC_POLICY, REDUCE_POLICY>(l1, seg_idx, working_res); } diff --git a/test/functional/forall/reduce-basic/tests/test-forall-basic-ReduceMaxLoc.hpp b/test/functional/forall/reduce-basic/tests/test-forall-basic-ReduceMaxLoc.hpp index c492320b99..b9434014cb 100644 --- a/test/functional/forall/reduce-basic/tests/test-forall-basic-ReduceMaxLoc.hpp +++ b/test/functional/forall/reduce-basic/tests/test-forall-basic-ReduceMaxLoc.hpp @@ -29,8 +29,8 @@ void ForallReduceMaxLocBasicTestImpl(const SEG_TYPE& seg, DATA_TYPE* check_array; DATA_TYPE* test_array; - allocateForallTestData( - data_len, working_res, &working_array, &check_array, &test_array); + allocateForallTestData(data_len, working_res, &working_array, + &check_array, &test_array); const int modval = 100; const DATA_TYPE max_init = -modval; @@ -81,22 +81,20 @@ void ForallReduceMaxLocBasicTestImpl(const SEG_TYPE& seg, ASSERT_EQ(static_cast(max.getLoc()), maxloc_init); DATA_TYPE factor = 2; - RAJA::forall(seg, - [=] RAJA_HOST_DEVICE(IDX_TYPE idx) + RAJA::forall(seg, [=] RAJA_HOST_DEVICE(IDX_TYPE idx) { max.maxloc(working_array[idx] * factor, idx); }); ASSERT_EQ(static_cast(max.get()), ref_max * factor); ASSERT_EQ(static_cast(max.getLoc()), ref_maxloc); factor = 3; - RAJA::forall(seg, - [=] RAJA_HOST_DEVICE(IDX_TYPE idx) + RAJA::forall(seg, [=] RAJA_HOST_DEVICE(IDX_TYPE idx) { max.maxloc(working_array[idx] * factor, idx); }); ASSERT_EQ(static_cast(max.get()), ref_max * factor); ASSERT_EQ(static_cast(max.getLoc()), ref_maxloc); - deallocateForallTestData( - working_res, working_array, check_array, test_array); + deallocateForallTestData(working_res, working_array, check_array, + test_array); } TYPED_TEST_SUITE_P(ForallReduceMaxLocBasicTest); @@ -119,48 +117,43 @@ TYPED_TEST_P(ForallReduceMaxLocBasicTest, ReduceMaxLocBasicForall) // Range segment tests RAJA::TypedRangeSegment r1(0, 28); RAJA::getIndices(seg_idx, r1); - ForallReduceMaxLocBasicTestImpl, - EXEC_POLICY, - REDUCE_POLICY>(r1, seg_idx, working_res); + EXEC_POLICY, REDUCE_POLICY>(r1, seg_idx, + working_res); seg_idx.clear(); RAJA::TypedRangeSegment r2(3, 642); RAJA::getIndices(seg_idx, r2); - ForallReduceMaxLocBasicTestImpl, - EXEC_POLICY, - REDUCE_POLICY>(r2, seg_idx, working_res); + EXEC_POLICY, REDUCE_POLICY>(r2, seg_idx, + working_res); seg_idx.clear(); RAJA::TypedRangeSegment r3(0, 2057); RAJA::getIndices(seg_idx, r3); - ForallReduceMaxLocBasicTestImpl, - EXEC_POLICY, - REDUCE_POLICY>(r3, seg_idx, working_res); + EXEC_POLICY, REDUCE_POLICY>(r3, seg_idx, + working_res); // Range-stride segment tests seg_idx.clear(); RAJA::TypedRangeStrideSegment r4(0, 188, 2); RAJA::getIndices(seg_idx, r4); - ForallReduceMaxLocBasicTestImpl, - EXEC_POLICY, - REDUCE_POLICY>(r4, seg_idx, working_res); + EXEC_POLICY, REDUCE_POLICY>(r4, seg_idx, + working_res); seg_idx.clear(); RAJA::TypedRangeStrideSegment r5(3, 1029, 3); RAJA::getIndices(seg_idx, r5); - ForallReduceMaxLocBasicTestImpl, - EXEC_POLICY, - REDUCE_POLICY>(r5, seg_idx, working_res); + EXEC_POLICY, REDUCE_POLICY>(r5, seg_idx, + working_res); // List segment tests seg_idx.clear(); @@ -175,10 +168,8 @@ TYPED_TEST_P(ForallReduceMaxLocBasicTest, ReduceMaxLocBasicForall) } } RAJA::TypedListSegment l1(&seg_idx[0], seg_idx.size(), working_res); - ForallReduceMaxLocBasicTestImpl, - EXEC_POLICY, + ForallReduceMaxLocBasicTestImpl, EXEC_POLICY, REDUCE_POLICY>(l1, seg_idx, working_res); } diff --git a/test/functional/forall/reduce-basic/tests/test-forall-basic-ReduceMin.hpp b/test/functional/forall/reduce-basic/tests/test-forall-basic-ReduceMin.hpp index 23de6c393f..0af276ac7d 100644 --- a/test/functional/forall/reduce-basic/tests/test-forall-basic-ReduceMin.hpp +++ b/test/functional/forall/reduce-basic/tests/test-forall-basic-ReduceMin.hpp @@ -29,8 +29,8 @@ void ForallReduceMinBasicTestImpl(const SEG_TYPE& seg, DATA_TYPE* check_array; DATA_TYPE* test_array; - allocateForallTestData( - data_len, working_res, &working_array, &check_array, &test_array); + allocateForallTestData(data_len, working_res, &working_array, + &check_array, &test_array); const int modval = 100; const DATA_TYPE min_init = modval + 1; @@ -67,22 +67,20 @@ void ForallReduceMinBasicTestImpl(const SEG_TYPE& seg, ASSERT_EQ(static_cast(min.get()), min_init); DATA_TYPE factor = 3; - RAJA::forall(seg, - [=] RAJA_HOST_DEVICE(IDX_TYPE idx) + RAJA::forall(seg, [=] RAJA_HOST_DEVICE(IDX_TYPE idx) { min.min(working_array[idx] * factor); }); ASSERT_EQ(static_cast(min.get()), ref_min * factor); factor = 2; - RAJA::forall(seg, - [=] RAJA_HOST_DEVICE(IDX_TYPE idx) + RAJA::forall(seg, [=] RAJA_HOST_DEVICE(IDX_TYPE idx) { min.min(working_array[idx] * factor); }); ASSERT_EQ(static_cast(min.get()), ref_min * factor); - deallocateForallTestData( - working_res, working_array, check_array, test_array); + deallocateForallTestData(working_res, working_array, check_array, + test_array); } @@ -106,48 +104,40 @@ TYPED_TEST_P(ForallReduceMinBasicTest, ReduceMinBasicForall) // Range segment tests RAJA::TypedRangeSegment r1(0, 28); RAJA::getIndices(seg_idx, r1); - ForallReduceMinBasicTestImpl, - EXEC_POLICY, + ForallReduceMinBasicTestImpl, EXEC_POLICY, REDUCE_POLICY>(r1, seg_idx, working_res); seg_idx.clear(); RAJA::TypedRangeSegment r2(3, 642); RAJA::getIndices(seg_idx, r2); - ForallReduceMinBasicTestImpl, - EXEC_POLICY, + ForallReduceMinBasicTestImpl, EXEC_POLICY, REDUCE_POLICY>(r2, seg_idx, working_res); seg_idx.clear(); RAJA::TypedRangeSegment r3(0, 2057); RAJA::getIndices(seg_idx, r3); - ForallReduceMinBasicTestImpl, - EXEC_POLICY, + ForallReduceMinBasicTestImpl, EXEC_POLICY, REDUCE_POLICY>(r3, seg_idx, working_res); // Range-stride segment tests seg_idx.clear(); RAJA::TypedRangeStrideSegment r4(0, 188, 2); RAJA::getIndices(seg_idx, r4); - ForallReduceMinBasicTestImpl, - EXEC_POLICY, - REDUCE_POLICY>(r4, seg_idx, working_res); + EXEC_POLICY, REDUCE_POLICY>(r4, seg_idx, + working_res); seg_idx.clear(); RAJA::TypedRangeStrideSegment r5(3, 1029, 3); RAJA::getIndices(seg_idx, r5); - ForallReduceMinBasicTestImpl, - EXEC_POLICY, - REDUCE_POLICY>(r5, seg_idx, working_res); + EXEC_POLICY, REDUCE_POLICY>(r5, seg_idx, + working_res); // List segment tests seg_idx.clear(); @@ -162,10 +152,8 @@ TYPED_TEST_P(ForallReduceMinBasicTest, ReduceMinBasicForall) } } RAJA::TypedListSegment l1(&seg_idx[0], seg_idx.size(), working_res); - ForallReduceMinBasicTestImpl, - EXEC_POLICY, + ForallReduceMinBasicTestImpl, EXEC_POLICY, REDUCE_POLICY>(l1, seg_idx, working_res); } diff --git a/test/functional/forall/reduce-basic/tests/test-forall-basic-ReduceMinLoc.hpp b/test/functional/forall/reduce-basic/tests/test-forall-basic-ReduceMinLoc.hpp index 0bf882ceea..f20b086bc7 100644 --- a/test/functional/forall/reduce-basic/tests/test-forall-basic-ReduceMinLoc.hpp +++ b/test/functional/forall/reduce-basic/tests/test-forall-basic-ReduceMinLoc.hpp @@ -29,8 +29,8 @@ void ForallReduceMinLocBasicTestImpl(const SEG_TYPE& seg, DATA_TYPE* check_array; DATA_TYPE* test_array; - allocateForallTestData( - data_len, working_res, &working_array, &check_array, &test_array); + allocateForallTestData(data_len, working_res, &working_array, + &check_array, &test_array); const int modval = 100; const DATA_TYPE min_init = modval + 1; @@ -81,22 +81,20 @@ void ForallReduceMinLocBasicTestImpl(const SEG_TYPE& seg, ASSERT_EQ(static_cast(min.getLoc()), minloc_init); DATA_TYPE factor = 2; - RAJA::forall(seg, - [=] RAJA_HOST_DEVICE(IDX_TYPE idx) + RAJA::forall(seg, [=] RAJA_HOST_DEVICE(IDX_TYPE idx) { min.minloc(working_array[idx] * factor, idx); }); ASSERT_EQ(static_cast(min.get()), ref_min * factor); ASSERT_EQ(static_cast(min.getLoc()), ref_minloc); factor = 3; - RAJA::forall(seg, - [=] RAJA_HOST_DEVICE(IDX_TYPE idx) + RAJA::forall(seg, [=] RAJA_HOST_DEVICE(IDX_TYPE idx) { min.minloc(working_array[idx] * factor, idx); }); ASSERT_EQ(static_cast(min.get()), ref_min * factor); ASSERT_EQ(static_cast(min.getLoc()), ref_minloc); - deallocateForallTestData( - working_res, working_array, check_array, test_array); + deallocateForallTestData(working_res, working_array, check_array, + test_array); } TYPED_TEST_SUITE_P(ForallReduceMinLocBasicTest); @@ -119,48 +117,43 @@ TYPED_TEST_P(ForallReduceMinLocBasicTest, ReduceMinLocBasicForall) // Range segment tests RAJA::TypedRangeSegment r1(0, 28); RAJA::getIndices(seg_idx, r1); - ForallReduceMinLocBasicTestImpl, - EXEC_POLICY, - REDUCE_POLICY>(r1, seg_idx, working_res); + EXEC_POLICY, REDUCE_POLICY>(r1, seg_idx, + working_res); seg_idx.clear(); RAJA::TypedRangeSegment r2(3, 642); RAJA::getIndices(seg_idx, r2); - ForallReduceMinLocBasicTestImpl, - EXEC_POLICY, - REDUCE_POLICY>(r2, seg_idx, working_res); + EXEC_POLICY, REDUCE_POLICY>(r2, seg_idx, + working_res); seg_idx.clear(); RAJA::TypedRangeSegment r3(0, 2057); RAJA::getIndices(seg_idx, r3); - ForallReduceMinLocBasicTestImpl, - EXEC_POLICY, - REDUCE_POLICY>(r3, seg_idx, working_res); + EXEC_POLICY, REDUCE_POLICY>(r3, seg_idx, + working_res); // Range-stride segment tests seg_idx.clear(); RAJA::TypedRangeStrideSegment r4(0, 188, 2); RAJA::getIndices(seg_idx, r4); - ForallReduceMinLocBasicTestImpl, - EXEC_POLICY, - REDUCE_POLICY>(r4, seg_idx, working_res); + EXEC_POLICY, REDUCE_POLICY>(r4, seg_idx, + working_res); seg_idx.clear(); RAJA::TypedRangeStrideSegment r5(3, 1029, 3); RAJA::getIndices(seg_idx, r5); - ForallReduceMinLocBasicTestImpl, - EXEC_POLICY, - REDUCE_POLICY>(r5, seg_idx, working_res); + EXEC_POLICY, REDUCE_POLICY>(r5, seg_idx, + working_res); // List segment tests seg_idx.clear(); @@ -175,10 +168,8 @@ TYPED_TEST_P(ForallReduceMinLocBasicTest, ReduceMinLocBasicForall) } } RAJA::TypedListSegment l1(&seg_idx[0], seg_idx.size(), working_res); - ForallReduceMinLocBasicTestImpl, - EXEC_POLICY, + ForallReduceMinLocBasicTestImpl, EXEC_POLICY, REDUCE_POLICY>(l1, seg_idx, working_res); } diff --git a/test/functional/forall/reduce-basic/tests/test-forall-basic-ReduceSum.hpp b/test/functional/forall/reduce-basic/tests/test-forall-basic-ReduceSum.hpp index 145355e64f..6b8c0b2506 100644 --- a/test/functional/forall/reduce-basic/tests/test-forall-basic-ReduceSum.hpp +++ b/test/functional/forall/reduce-basic/tests/test-forall-basic-ReduceSum.hpp @@ -29,8 +29,8 @@ void ForallReduceSumBasicTestImpl(const SEG_TYPE& seg, DATA_TYPE* check_array; DATA_TYPE* test_array; - allocateForallTestData( - data_len, working_res, &working_array, &check_array, &test_array); + allocateForallTestData(data_len, working_res, &working_array, + &check_array, &test_array); const int modval = 100; @@ -67,15 +67,15 @@ void ForallReduceSumBasicTestImpl(const SEG_TYPE& seg, for (int j = 0; j < nloops; ++j) { - RAJA::forall( - seg, [=] RAJA_HOST_DEVICE(IDX_TYPE idx) { sum += working_array[idx]; }); + RAJA::forall(seg, [=] RAJA_HOST_DEVICE(IDX_TYPE idx) + { sum += working_array[idx]; }); } ASSERT_EQ(static_cast(sum.get()), nloops * ref_sum); - deallocateForallTestData( - working_res, working_array, check_array, test_array); + deallocateForallTestData(working_res, working_array, check_array, + test_array); } @@ -99,48 +99,40 @@ TYPED_TEST_P(ForallReduceSumBasicTest, ReduceSumBasicForall) // Range segment tests RAJA::TypedRangeSegment r1(0, 28); RAJA::getIndices(seg_idx, r1); - ForallReduceSumBasicTestImpl, - EXEC_POLICY, + ForallReduceSumBasicTestImpl, EXEC_POLICY, REDUCE_POLICY>(r1, seg_idx, working_res); seg_idx.clear(); RAJA::TypedRangeSegment r2(3, 642); RAJA::getIndices(seg_idx, r2); - ForallReduceSumBasicTestImpl, - EXEC_POLICY, + ForallReduceSumBasicTestImpl, EXEC_POLICY, REDUCE_POLICY>(r2, seg_idx, working_res); seg_idx.clear(); RAJA::TypedRangeSegment r3(0, 2057); RAJA::getIndices(seg_idx, r3); - ForallReduceSumBasicTestImpl, - EXEC_POLICY, + ForallReduceSumBasicTestImpl, EXEC_POLICY, REDUCE_POLICY>(r3, seg_idx, working_res); // Range-stride segment tests seg_idx.clear(); RAJA::TypedRangeStrideSegment r4(0, 188, 2); RAJA::getIndices(seg_idx, r4); - ForallReduceSumBasicTestImpl, - EXEC_POLICY, - REDUCE_POLICY>(r4, seg_idx, working_res); + EXEC_POLICY, REDUCE_POLICY>(r4, seg_idx, + working_res); seg_idx.clear(); RAJA::TypedRangeStrideSegment r5(3, 1029, 3); RAJA::getIndices(seg_idx, r5); - ForallReduceSumBasicTestImpl, - EXEC_POLICY, - REDUCE_POLICY>(r5, seg_idx, working_res); + EXEC_POLICY, REDUCE_POLICY>(r5, seg_idx, + working_res); // List segment tests seg_idx.clear(); @@ -155,10 +147,8 @@ TYPED_TEST_P(ForallReduceSumBasicTest, ReduceSumBasicForall) } } RAJA::TypedListSegment l1(&seg_idx[0], seg_idx.size(), working_res); - ForallReduceSumBasicTestImpl, - EXEC_POLICY, + ForallReduceSumBasicTestImpl, EXEC_POLICY, REDUCE_POLICY>(l1, seg_idx, working_res); } diff --git a/test/functional/forall/reduce-basic/tests/test-forall-basic-expt-ReduceBitAnd.hpp b/test/functional/forall/reduce-basic/tests/test-forall-basic-expt-ReduceBitAnd.hpp index 5525eaf302..d4a91c4732 100644 --- a/test/functional/forall/reduce-basic/tests/test-forall-basic-expt-ReduceBitAnd.hpp +++ b/test/functional/forall/reduce-basic/tests/test-forall-basic-expt-ReduceBitAnd.hpp @@ -29,8 +29,8 @@ void ForallReduceBitAndBasicTestImpl(const SEG_TYPE& seg, DATA_TYPE* check_array; DATA_TYPE* test_array; - allocateForallTestData( - data_len, working_res, &working_array, &check_array, &test_array); + allocateForallTestData(data_len, working_res, &working_array, + &check_array, &test_array); // // First a simple non-trivial test that is mildly interesting @@ -43,8 +43,7 @@ void ForallReduceBitAndBasicTestImpl(const SEG_TYPE& seg, RAJA::ReduceBitAnd simpand(21); - RAJA::forall(seg, - [=] RAJA_HOST_DEVICE(IDX_TYPE idx) + RAJA::forall(seg, [=] RAJA_HOST_DEVICE(IDX_TYPE idx) { simpand &= working_array[idx]; }); ASSERT_EQ(static_cast(simpand.get()), 5); @@ -72,8 +71,7 @@ void ForallReduceBitAndBasicTestImpl(const SEG_TYPE& seg, DATA_TYPE redand2(2); RAJA::forall( - seg, - RAJA::expt::Reduce(&redand), + seg, RAJA::expt::Reduce(&redand), RAJA::expt::Reduce(&redand2), RAJA::expt::KernelName("RAJA Reduce BitAnd"), [=] RAJA_HOST_DEVICE(IDX_TYPE idx, DATA_TYPE & r1, DATA_TYPE & r2) @@ -91,8 +89,7 @@ void ForallReduceBitAndBasicTestImpl(const SEG_TYPE& seg, for (int j = 0; j < nloops; ++j) { RAJA::forall( - seg, - RAJA::expt::Reduce(&redand), + seg, RAJA::expt::Reduce(&redand), [=] RAJA_HOST_DEVICE(IDX_TYPE idx, DATA_TYPE & r1) { r1 &= working_array[idx]; }); } @@ -100,8 +97,8 @@ void ForallReduceBitAndBasicTestImpl(const SEG_TYPE& seg, ASSERT_EQ(static_cast(redand), ref_and); - deallocateForallTestData( - working_res, working_array, check_array, test_array); + deallocateForallTestData(working_res, working_array, check_array, + test_array); } @@ -125,48 +122,43 @@ TYPED_TEST_P(ForallReduceBitAndBasicTest, ReduceBitAndBasicForall) // Range segment tests RAJA::TypedRangeSegment r1(0, 28); RAJA::getIndices(seg_idx, r1); - ForallReduceBitAndBasicTestImpl, - EXEC_POLICY, - REDUCE_POLICY>(r1, seg_idx, working_res); + EXEC_POLICY, REDUCE_POLICY>(r1, seg_idx, + working_res); seg_idx.clear(); RAJA::TypedRangeSegment r2(3, 642); RAJA::getIndices(seg_idx, r2); - ForallReduceBitAndBasicTestImpl, - EXEC_POLICY, - REDUCE_POLICY>(r2, seg_idx, working_res); + EXEC_POLICY, REDUCE_POLICY>(r2, seg_idx, + working_res); seg_idx.clear(); RAJA::TypedRangeSegment r3(0, 2057); RAJA::getIndices(seg_idx, r3); - ForallReduceBitAndBasicTestImpl, - EXEC_POLICY, - REDUCE_POLICY>(r3, seg_idx, working_res); + EXEC_POLICY, REDUCE_POLICY>(r3, seg_idx, + working_res); // Range-stride segment tests seg_idx.clear(); RAJA::TypedRangeStrideSegment r4(0, 188, 2); RAJA::getIndices(seg_idx, r4); - ForallReduceBitAndBasicTestImpl, - EXEC_POLICY, - REDUCE_POLICY>(r4, seg_idx, working_res); + EXEC_POLICY, REDUCE_POLICY>(r4, seg_idx, + working_res); seg_idx.clear(); RAJA::TypedRangeStrideSegment r5(3, 1029, 3); RAJA::getIndices(seg_idx, r5); - ForallReduceBitAndBasicTestImpl, - EXEC_POLICY, - REDUCE_POLICY>(r5, seg_idx, working_res); + EXEC_POLICY, REDUCE_POLICY>(r5, seg_idx, + working_res); // List segment tests seg_idx.clear(); @@ -181,10 +173,8 @@ TYPED_TEST_P(ForallReduceBitAndBasicTest, ReduceBitAndBasicForall) } } RAJA::TypedListSegment l1(&seg_idx[0], seg_idx.size(), working_res); - ForallReduceBitAndBasicTestImpl, - EXEC_POLICY, + ForallReduceBitAndBasicTestImpl, EXEC_POLICY, REDUCE_POLICY>(l1, seg_idx, working_res); } diff --git a/test/functional/forall/reduce-basic/tests/test-forall-basic-expt-ReduceBitOr.hpp b/test/functional/forall/reduce-basic/tests/test-forall-basic-expt-ReduceBitOr.hpp index fdc9ca64ee..118a2488b7 100644 --- a/test/functional/forall/reduce-basic/tests/test-forall-basic-expt-ReduceBitOr.hpp +++ b/test/functional/forall/reduce-basic/tests/test-forall-basic-expt-ReduceBitOr.hpp @@ -29,8 +29,8 @@ void ForallReduceBitOrBasicTestImpl(const SEG_TYPE& seg, DATA_TYPE* check_array; DATA_TYPE* test_array; - allocateForallTestData( - data_len, working_res, &working_array, &check_array, &test_array); + allocateForallTestData(data_len, working_res, &working_array, + &check_array, &test_array); // // First a simple non-trivial test that is mildly interesting @@ -43,8 +43,7 @@ void ForallReduceBitOrBasicTestImpl(const SEG_TYPE& seg, RAJA::ReduceBitOr simpor(5); - RAJA::forall(seg, - [=] RAJA_HOST_DEVICE(IDX_TYPE idx) + RAJA::forall(seg, [=] RAJA_HOST_DEVICE(IDX_TYPE idx) { simpor |= working_array[idx]; }); ASSERT_EQ(static_cast(simpor.get()), 13); @@ -72,8 +71,7 @@ void ForallReduceBitOrBasicTestImpl(const SEG_TYPE& seg, DATA_TYPE redor2(2); RAJA::forall( - seg, - RAJA::expt::Reduce(&redor), + seg, RAJA::expt::Reduce(&redor), RAJA::expt::Reduce(&redor2), RAJA::expt::KernelName("RAJA Reduce BitOr"), [=] RAJA_HOST_DEVICE(IDX_TYPE idx, DATA_TYPE & r1, DATA_TYPE & r2) @@ -91,8 +89,7 @@ void ForallReduceBitOrBasicTestImpl(const SEG_TYPE& seg, for (int j = 0; j < nloops; ++j) { RAJA::forall( - seg, - RAJA::expt::Reduce(&redor), + seg, RAJA::expt::Reduce(&redor), [=] RAJA_HOST_DEVICE(IDX_TYPE idx, DATA_TYPE & r1) { r1 |= working_array[idx]; }); } @@ -100,8 +97,8 @@ void ForallReduceBitOrBasicTestImpl(const SEG_TYPE& seg, ASSERT_EQ(static_cast(redor), ref_or); - deallocateForallTestData( - working_res, working_array, check_array, test_array); + deallocateForallTestData(working_res, working_array, check_array, + test_array); } TYPED_TEST_SUITE_P(ForallReduceBitOrBasicTest); @@ -125,48 +122,40 @@ TYPED_TEST_P(ForallReduceBitOrBasicTest, ReduceBitOrBasicForall) // Range segment tests RAJA::TypedRangeSegment r1(0, 28); RAJA::getIndices(seg_idx, r1); - ForallReduceBitOrBasicTestImpl, - EXEC_POLICY, + ForallReduceBitOrBasicTestImpl, EXEC_POLICY, REDUCE_POLICY>(r1, seg_idx, working_res); seg_idx.clear(); RAJA::TypedRangeSegment r2(3, 642); RAJA::getIndices(seg_idx, r2); - ForallReduceBitOrBasicTestImpl, - EXEC_POLICY, + ForallReduceBitOrBasicTestImpl, EXEC_POLICY, REDUCE_POLICY>(r2, seg_idx, working_res); seg_idx.clear(); RAJA::TypedRangeSegment r3(0, 2057); RAJA::getIndices(seg_idx, r3); - ForallReduceBitOrBasicTestImpl, - EXEC_POLICY, + ForallReduceBitOrBasicTestImpl, EXEC_POLICY, REDUCE_POLICY>(r3, seg_idx, working_res); // Range-stride segment tests seg_idx.clear(); RAJA::TypedRangeStrideSegment r4(0, 188, 2); RAJA::getIndices(seg_idx, r4); - ForallReduceBitOrBasicTestImpl, - EXEC_POLICY, - REDUCE_POLICY>(r4, seg_idx, working_res); + EXEC_POLICY, REDUCE_POLICY>(r4, seg_idx, + working_res); seg_idx.clear(); RAJA::TypedRangeStrideSegment r5(3, 1029, 3); RAJA::getIndices(seg_idx, r5); - ForallReduceBitOrBasicTestImpl, - EXEC_POLICY, - REDUCE_POLICY>(r5, seg_idx, working_res); + EXEC_POLICY, REDUCE_POLICY>(r5, seg_idx, + working_res); // List segment tests seg_idx.clear(); @@ -181,10 +170,8 @@ TYPED_TEST_P(ForallReduceBitOrBasicTest, ReduceBitOrBasicForall) } } RAJA::TypedListSegment l1(&seg_idx[0], seg_idx.size(), working_res); - ForallReduceBitOrBasicTestImpl, - EXEC_POLICY, + ForallReduceBitOrBasicTestImpl, EXEC_POLICY, REDUCE_POLICY>(l1, seg_idx, working_res); } diff --git a/test/functional/forall/reduce-basic/tests/test-forall-basic-expt-ReduceMax.hpp b/test/functional/forall/reduce-basic/tests/test-forall-basic-expt-ReduceMax.hpp index 7f38c25324..0afa44d017 100644 --- a/test/functional/forall/reduce-basic/tests/test-forall-basic-expt-ReduceMax.hpp +++ b/test/functional/forall/reduce-basic/tests/test-forall-basic-expt-ReduceMax.hpp @@ -29,8 +29,8 @@ void ForallReduceMaxBasicTestImpl(const SEG_TYPE& seg, DATA_TYPE* check_array; DATA_TYPE* test_array; - allocateForallTestData( - data_len, working_res, &working_array, &check_array, &test_array); + allocateForallTestData(data_len, working_res, &working_array, + &check_array, &test_array); const int modval = 100; const DATA_TYPE max_init = -1; @@ -53,8 +53,7 @@ void ForallReduceMaxBasicTestImpl(const SEG_TYPE& seg, DATA_TYPE max(max_init); RAJA::forall( - seg, - RAJA::expt::Reduce(&maxinit), + seg, RAJA::expt::Reduce(&maxinit), RAJA::expt::Reduce(&max), RAJA::expt::KernelName("RAJA Reduce Max"), [=] RAJA_HOST_DEVICE(IDX_TYPE idx, DATA_TYPE & mi, DATA_TYPE & m) @@ -84,8 +83,8 @@ void ForallReduceMaxBasicTestImpl(const SEG_TYPE& seg, ASSERT_EQ(static_cast(max), ref_max * factor); - deallocateForallTestData( - working_res, working_array, check_array, test_array); + deallocateForallTestData(working_res, working_array, check_array, + test_array); } TYPED_TEST_SUITE_P(ForallReduceMaxBasicTest); @@ -108,48 +107,40 @@ TYPED_TEST_P(ForallReduceMaxBasicTest, ReduceMaxBasicForall) // Range segment tests RAJA::TypedRangeSegment r1(0, 28); RAJA::getIndices(seg_idx, r1); - ForallReduceMaxBasicTestImpl, - EXEC_POLICY, + ForallReduceMaxBasicTestImpl, EXEC_POLICY, REDUCE_POLICY>(r1, seg_idx, working_res); seg_idx.clear(); RAJA::TypedRangeSegment r2(3, 642); RAJA::getIndices(seg_idx, r2); - ForallReduceMaxBasicTestImpl, - EXEC_POLICY, + ForallReduceMaxBasicTestImpl, EXEC_POLICY, REDUCE_POLICY>(r2, seg_idx, working_res); seg_idx.clear(); RAJA::TypedRangeSegment r3(0, 2057); RAJA::getIndices(seg_idx, r3); - ForallReduceMaxBasicTestImpl, - EXEC_POLICY, + ForallReduceMaxBasicTestImpl, EXEC_POLICY, REDUCE_POLICY>(r3, seg_idx, working_res); // Range-stride segment tests seg_idx.clear(); RAJA::TypedRangeStrideSegment r4(0, 188, 2); RAJA::getIndices(seg_idx, r4); - ForallReduceMaxBasicTestImpl, - EXEC_POLICY, - REDUCE_POLICY>(r4, seg_idx, working_res); + EXEC_POLICY, REDUCE_POLICY>(r4, seg_idx, + working_res); seg_idx.clear(); RAJA::TypedRangeStrideSegment r5(3, 1029, 3); RAJA::getIndices(seg_idx, r5); - ForallReduceMaxBasicTestImpl, - EXEC_POLICY, - REDUCE_POLICY>(r5, seg_idx, working_res); + EXEC_POLICY, REDUCE_POLICY>(r5, seg_idx, + working_res); // List segment tests seg_idx.clear(); @@ -164,10 +155,8 @@ TYPED_TEST_P(ForallReduceMaxBasicTest, ReduceMaxBasicForall) } } RAJA::TypedListSegment l1(&seg_idx[0], seg_idx.size(), working_res); - ForallReduceMaxBasicTestImpl, - EXEC_POLICY, + ForallReduceMaxBasicTestImpl, EXEC_POLICY, REDUCE_POLICY>(l1, seg_idx, working_res); } diff --git a/test/functional/forall/reduce-basic/tests/test-forall-basic-expt-ReduceMaxLoc.hpp b/test/functional/forall/reduce-basic/tests/test-forall-basic-expt-ReduceMaxLoc.hpp index 61705c86d3..7e225c0cf2 100644 --- a/test/functional/forall/reduce-basic/tests/test-forall-basic-expt-ReduceMaxLoc.hpp +++ b/test/functional/forall/reduce-basic/tests/test-forall-basic-expt-ReduceMaxLoc.hpp @@ -29,8 +29,8 @@ void ForallReduceMaxLocBasicTestImpl(const SEG_TYPE& seg, DATA_TYPE* check_array; DATA_TYPE* test_array; - allocateForallTestData( - data_len, working_res, &working_array, &check_array, &test_array); + allocateForallTestData(data_len, working_res, &working_array, + &check_array, &test_array); const int modval = 100; const DATA_TYPE max_init = -modval; @@ -64,8 +64,7 @@ void ForallReduceMaxLocBasicTestImpl(const SEG_TYPE& seg, VL_TYPE max(max_init, maxloc_init); RAJA::forall( - seg, - RAJA::expt::Reduce(&maxinit), + seg, RAJA::expt::Reduce(&maxinit), RAJA::expt::Reduce(&max), RAJA::expt::KernelName("RAJA Reduce MaxLoc"), [=] RAJA_HOST_DEVICE(IDX_TYPE idx, VL_TYPE & mi, VL_TYPE & m) @@ -100,8 +99,8 @@ void ForallReduceMaxLocBasicTestImpl(const SEG_TYPE& seg, ASSERT_EQ(static_cast(max.getLoc()), ref_maxloc); - deallocateForallTestData( - working_res, working_array, check_array, test_array); + deallocateForallTestData(working_res, working_array, check_array, + test_array); } TYPED_TEST_SUITE_P(ForallReduceMaxLocBasicTest); @@ -124,48 +123,43 @@ TYPED_TEST_P(ForallReduceMaxLocBasicTest, ReduceMaxLocBasicForall) // Range segment tests RAJA::TypedRangeSegment r1(0, 28); RAJA::getIndices(seg_idx, r1); - ForallReduceMaxLocBasicTestImpl, - EXEC_POLICY, - REDUCE_POLICY>(r1, seg_idx, working_res); + EXEC_POLICY, REDUCE_POLICY>(r1, seg_idx, + working_res); seg_idx.clear(); RAJA::TypedRangeSegment r2(3, 642); RAJA::getIndices(seg_idx, r2); - ForallReduceMaxLocBasicTestImpl, - EXEC_POLICY, - REDUCE_POLICY>(r2, seg_idx, working_res); + EXEC_POLICY, REDUCE_POLICY>(r2, seg_idx, + working_res); seg_idx.clear(); RAJA::TypedRangeSegment r3(0, 2057); RAJA::getIndices(seg_idx, r3); - ForallReduceMaxLocBasicTestImpl, - EXEC_POLICY, - REDUCE_POLICY>(r3, seg_idx, working_res); + EXEC_POLICY, REDUCE_POLICY>(r3, seg_idx, + working_res); // Range-stride segment tests seg_idx.clear(); RAJA::TypedRangeStrideSegment r4(0, 188, 2); RAJA::getIndices(seg_idx, r4); - ForallReduceMaxLocBasicTestImpl, - EXEC_POLICY, - REDUCE_POLICY>(r4, seg_idx, working_res); + EXEC_POLICY, REDUCE_POLICY>(r4, seg_idx, + working_res); seg_idx.clear(); RAJA::TypedRangeStrideSegment r5(3, 1029, 3); RAJA::getIndices(seg_idx, r5); - ForallReduceMaxLocBasicTestImpl, - EXEC_POLICY, - REDUCE_POLICY>(r5, seg_idx, working_res); + EXEC_POLICY, REDUCE_POLICY>(r5, seg_idx, + working_res); // List segment tests seg_idx.clear(); @@ -180,10 +174,8 @@ TYPED_TEST_P(ForallReduceMaxLocBasicTest, ReduceMaxLocBasicForall) } } RAJA::TypedListSegment l1(&seg_idx[0], seg_idx.size(), working_res); - ForallReduceMaxLocBasicTestImpl, - EXEC_POLICY, + ForallReduceMaxLocBasicTestImpl, EXEC_POLICY, REDUCE_POLICY>(l1, seg_idx, working_res); } diff --git a/test/functional/forall/reduce-basic/tests/test-forall-basic-expt-ReduceMin.hpp b/test/functional/forall/reduce-basic/tests/test-forall-basic-expt-ReduceMin.hpp index a2395bdf3e..beef5ee707 100644 --- a/test/functional/forall/reduce-basic/tests/test-forall-basic-expt-ReduceMin.hpp +++ b/test/functional/forall/reduce-basic/tests/test-forall-basic-expt-ReduceMin.hpp @@ -29,8 +29,8 @@ void ForallReduceMinBasicTestImpl(const SEG_TYPE& seg, DATA_TYPE* check_array; DATA_TYPE* test_array; - allocateForallTestData( - data_len, working_res, &working_array, &check_array, &test_array); + allocateForallTestData(data_len, working_res, &working_array, + &check_array, &test_array); const int modval = 100; const DATA_TYPE min_init = modval + 1; @@ -53,8 +53,7 @@ void ForallReduceMinBasicTestImpl(const SEG_TYPE& seg, DATA_TYPE min(min_init); RAJA::forall( - seg, - RAJA::expt::Reduce(&mininit), + seg, RAJA::expt::Reduce(&mininit), RAJA::expt::Reduce(&min), RAJA::expt::KernelName("RAJA Reduce Min"), [=] RAJA_HOST_DEVICE(IDX_TYPE idx, DATA_TYPE & mi, DATA_TYPE & m) @@ -83,8 +82,8 @@ void ForallReduceMinBasicTestImpl(const SEG_TYPE& seg, { m = RAJA_MIN(working_array[idx] * factor, m); }); ASSERT_EQ(static_cast(min), ref_min * factor); - deallocateForallTestData( - working_res, working_array, check_array, test_array); + deallocateForallTestData(working_res, working_array, check_array, + test_array); } @@ -108,48 +107,40 @@ TYPED_TEST_P(ForallReduceMinBasicTest, ReduceMinBasicForall) // Range segment tests RAJA::TypedRangeSegment r1(0, 28); RAJA::getIndices(seg_idx, r1); - ForallReduceMinBasicTestImpl, - EXEC_POLICY, + ForallReduceMinBasicTestImpl, EXEC_POLICY, REDUCE_POLICY>(r1, seg_idx, working_res); seg_idx.clear(); RAJA::TypedRangeSegment r2(3, 642); RAJA::getIndices(seg_idx, r2); - ForallReduceMinBasicTestImpl, - EXEC_POLICY, + ForallReduceMinBasicTestImpl, EXEC_POLICY, REDUCE_POLICY>(r2, seg_idx, working_res); seg_idx.clear(); RAJA::TypedRangeSegment r3(0, 2057); RAJA::getIndices(seg_idx, r3); - ForallReduceMinBasicTestImpl, - EXEC_POLICY, + ForallReduceMinBasicTestImpl, EXEC_POLICY, REDUCE_POLICY>(r3, seg_idx, working_res); // Range-stride segment tests seg_idx.clear(); RAJA::TypedRangeStrideSegment r4(0, 188, 2); RAJA::getIndices(seg_idx, r4); - ForallReduceMinBasicTestImpl, - EXEC_POLICY, - REDUCE_POLICY>(r4, seg_idx, working_res); + EXEC_POLICY, REDUCE_POLICY>(r4, seg_idx, + working_res); seg_idx.clear(); RAJA::TypedRangeStrideSegment r5(3, 1029, 3); RAJA::getIndices(seg_idx, r5); - ForallReduceMinBasicTestImpl, - EXEC_POLICY, - REDUCE_POLICY>(r5, seg_idx, working_res); + EXEC_POLICY, REDUCE_POLICY>(r5, seg_idx, + working_res); // List segment tests seg_idx.clear(); @@ -164,10 +155,8 @@ TYPED_TEST_P(ForallReduceMinBasicTest, ReduceMinBasicForall) } } RAJA::TypedListSegment l1(&seg_idx[0], seg_idx.size(), working_res); - ForallReduceMinBasicTestImpl, - EXEC_POLICY, + ForallReduceMinBasicTestImpl, EXEC_POLICY, REDUCE_POLICY>(l1, seg_idx, working_res); } diff --git a/test/functional/forall/reduce-basic/tests/test-forall-basic-expt-ReduceMinLoc.hpp b/test/functional/forall/reduce-basic/tests/test-forall-basic-expt-ReduceMinLoc.hpp index da34fc618d..33f05290c8 100644 --- a/test/functional/forall/reduce-basic/tests/test-forall-basic-expt-ReduceMinLoc.hpp +++ b/test/functional/forall/reduce-basic/tests/test-forall-basic-expt-ReduceMinLoc.hpp @@ -29,8 +29,8 @@ void ForallReduceMinLocBasicTestImpl(const SEG_TYPE& seg, DATA_TYPE* check_array; DATA_TYPE* test_array; - allocateForallTestData( - data_len, working_res, &working_array, &check_array, &test_array); + allocateForallTestData(data_len, working_res, &working_array, + &check_array, &test_array); const int modval = 100; const DATA_TYPE min_init = modval + 1; @@ -64,8 +64,7 @@ void ForallReduceMinLocBasicTestImpl(const SEG_TYPE& seg, VL_TYPE min(min_init, minloc_init); RAJA::forall( - seg, - RAJA::expt::Reduce(&mininit), + seg, RAJA::expt::Reduce(&mininit), RAJA::expt::Reduce(&min), RAJA::expt::KernelName("RAJA Reduce MinLoc"), [=] RAJA_HOST_DEVICE(IDX_TYPE idx, VL_TYPE & mi, VL_TYPE & m) @@ -100,8 +99,8 @@ void ForallReduceMinLocBasicTestImpl(const SEG_TYPE& seg, ASSERT_EQ(static_cast(min.getLoc()), ref_minloc); - deallocateForallTestData( - working_res, working_array, check_array, test_array); + deallocateForallTestData(working_res, working_array, check_array, + test_array); } TYPED_TEST_SUITE_P(ForallReduceMinLocBasicTest); @@ -124,48 +123,43 @@ TYPED_TEST_P(ForallReduceMinLocBasicTest, ReduceMinLocBasicForall) // Range segment tests RAJA::TypedRangeSegment r1(0, 28); RAJA::getIndices(seg_idx, r1); - ForallReduceMinLocBasicTestImpl, - EXEC_POLICY, - REDUCE_POLICY>(r1, seg_idx, working_res); + EXEC_POLICY, REDUCE_POLICY>(r1, seg_idx, + working_res); seg_idx.clear(); RAJA::TypedRangeSegment r2(3, 642); RAJA::getIndices(seg_idx, r2); - ForallReduceMinLocBasicTestImpl, - EXEC_POLICY, - REDUCE_POLICY>(r2, seg_idx, working_res); + EXEC_POLICY, REDUCE_POLICY>(r2, seg_idx, + working_res); seg_idx.clear(); RAJA::TypedRangeSegment r3(0, 2057); RAJA::getIndices(seg_idx, r3); - ForallReduceMinLocBasicTestImpl, - EXEC_POLICY, - REDUCE_POLICY>(r3, seg_idx, working_res); + EXEC_POLICY, REDUCE_POLICY>(r3, seg_idx, + working_res); // Range-stride segment tests seg_idx.clear(); RAJA::TypedRangeStrideSegment r4(0, 188, 2); RAJA::getIndices(seg_idx, r4); - ForallReduceMinLocBasicTestImpl, - EXEC_POLICY, - REDUCE_POLICY>(r4, seg_idx, working_res); + EXEC_POLICY, REDUCE_POLICY>(r4, seg_idx, + working_res); seg_idx.clear(); RAJA::TypedRangeStrideSegment r5(3, 1029, 3); RAJA::getIndices(seg_idx, r5); - ForallReduceMinLocBasicTestImpl, - EXEC_POLICY, - REDUCE_POLICY>(r5, seg_idx, working_res); + EXEC_POLICY, REDUCE_POLICY>(r5, seg_idx, + working_res); // List segment tests seg_idx.clear(); @@ -180,10 +174,8 @@ TYPED_TEST_P(ForallReduceMinLocBasicTest, ReduceMinLocBasicForall) } } RAJA::TypedListSegment l1(&seg_idx[0], seg_idx.size(), working_res); - ForallReduceMinLocBasicTestImpl, - EXEC_POLICY, + ForallReduceMinLocBasicTestImpl, EXEC_POLICY, REDUCE_POLICY>(l1, seg_idx, working_res); } diff --git a/test/functional/forall/reduce-basic/tests/test-forall-basic-expt-ReduceSum.hpp b/test/functional/forall/reduce-basic/tests/test-forall-basic-expt-ReduceSum.hpp index f324845cd3..e794486608 100644 --- a/test/functional/forall/reduce-basic/tests/test-forall-basic-expt-ReduceSum.hpp +++ b/test/functional/forall/reduce-basic/tests/test-forall-basic-expt-ReduceSum.hpp @@ -29,8 +29,8 @@ void ForallReduceSumBasicTestImpl(const SEG_TYPE& seg, DATA_TYPE* check_array; DATA_TYPE* test_array; - allocateForallTestData( - data_len, working_res, &working_array, &check_array, &test_array); + allocateForallTestData(data_len, working_res, &working_array, + &check_array, &test_array); const int modval = 100; @@ -51,8 +51,7 @@ void ForallReduceSumBasicTestImpl(const SEG_TYPE& seg, DATA_TYPE sum2 = 2; RAJA::forall( - seg, - RAJA::expt::Reduce(&sum), + seg, RAJA::expt::Reduce(&sum), RAJA::expt::Reduce(&sum2), RAJA::expt::KernelName("RAJA Reduce Sum"), [=] RAJA_HOST_DEVICE(IDX_TYPE idx, DATA_TYPE & s1, DATA_TYPE & s2) @@ -79,8 +78,8 @@ void ForallReduceSumBasicTestImpl(const SEG_TYPE& seg, ASSERT_EQ(static_cast(sum), nloops * ref_sum); - deallocateForallTestData( - working_res, working_array, check_array, test_array); + deallocateForallTestData(working_res, working_array, check_array, + test_array); } @@ -104,48 +103,40 @@ TYPED_TEST_P(ForallReduceSumBasicTest, ReduceSumBasicForall) // Range segment tests RAJA::TypedRangeSegment r1(0, 28); RAJA::getIndices(seg_idx, r1); - ForallReduceSumBasicTestImpl, - EXEC_POLICY, + ForallReduceSumBasicTestImpl, EXEC_POLICY, REDUCE_POLICY>(r1, seg_idx, working_res); seg_idx.clear(); RAJA::TypedRangeSegment r2(3, 642); RAJA::getIndices(seg_idx, r2); - ForallReduceSumBasicTestImpl, - EXEC_POLICY, + ForallReduceSumBasicTestImpl, EXEC_POLICY, REDUCE_POLICY>(r2, seg_idx, working_res); seg_idx.clear(); RAJA::TypedRangeSegment r3(0, 2057); RAJA::getIndices(seg_idx, r3); - ForallReduceSumBasicTestImpl, - EXEC_POLICY, + ForallReduceSumBasicTestImpl, EXEC_POLICY, REDUCE_POLICY>(r3, seg_idx, working_res); // Range-stride segment tests seg_idx.clear(); RAJA::TypedRangeStrideSegment r4(0, 188, 2); RAJA::getIndices(seg_idx, r4); - ForallReduceSumBasicTestImpl, - EXEC_POLICY, - REDUCE_POLICY>(r4, seg_idx, working_res); + EXEC_POLICY, REDUCE_POLICY>(r4, seg_idx, + working_res); seg_idx.clear(); RAJA::TypedRangeStrideSegment r5(3, 1029, 3); RAJA::getIndices(seg_idx, r5); - ForallReduceSumBasicTestImpl, - EXEC_POLICY, - REDUCE_POLICY>(r5, seg_idx, working_res); + EXEC_POLICY, REDUCE_POLICY>(r5, seg_idx, + working_res); // List segment tests seg_idx.clear(); @@ -160,10 +151,8 @@ TYPED_TEST_P(ForallReduceSumBasicTest, ReduceSumBasicForall) } } RAJA::TypedListSegment l1(&seg_idx[0], seg_idx.size(), working_res); - ForallReduceSumBasicTestImpl, - EXEC_POLICY, + ForallReduceSumBasicTestImpl, EXEC_POLICY, REDUCE_POLICY>(l1, seg_idx, working_res); } diff --git a/test/functional/forall/reduce-multiple-indexset/tests/test-forall-indexset-multiple-ReduceMax.hpp b/test/functional/forall/reduce-multiple-indexset/tests/test-forall-indexset-multiple-ReduceMax.hpp index 40f4b87829..af10db39e5 100644 --- a/test/functional/forall/reduce-multiple-indexset/tests/test-forall-indexset-multiple-ReduceMax.hpp +++ b/test/functional/forall/reduce-multiple-indexset/tests/test-forall-indexset-multiple-ReduceMax.hpp @@ -47,8 +47,8 @@ void ForallIndexSetReduceMaxMultipleTestImpl() double* check_array; double* test_array; - allocateForallTestData( - alen, working_res, &working_array, &check_array, &test_array); + allocateForallTestData(alen, working_res, &working_array, + &check_array, &test_array); const double default_val = -DBL_MAX; @@ -97,8 +97,8 @@ void ForallIndexSetReduceMaxMultipleTestImpl() ASSERT_FLOAT_EQ(static_cast(dmax1.get()), 2 * current_max); } - deallocateForallTestData( - working_res, working_array, check_array, test_array); + deallocateForallTestData(working_res, working_array, check_array, + test_array); } TYPED_TEST_SUITE_P(ForallIndexSetReduceMaxMultipleTest); @@ -114,9 +114,7 @@ TYPED_TEST_P(ForallIndexSetReduceMaxMultipleTest, using EXEC_POLICY = typename camp::at>::type; using REDUCE_POLICY = typename camp::at>::type; - ForallIndexSetReduceMaxMultipleTestImpl(); } diff --git a/test/functional/forall/reduce-multiple-indexset/tests/test-forall-indexset-multiple-ReduceMaxLoc.hpp b/test/functional/forall/reduce-multiple-indexset/tests/test-forall-indexset-multiple-ReduceMaxLoc.hpp index 7fd5d20dbe..161f04a3cb 100644 --- a/test/functional/forall/reduce-multiple-indexset/tests/test-forall-indexset-multiple-ReduceMaxLoc.hpp +++ b/test/functional/forall/reduce-multiple-indexset/tests/test-forall-indexset-multiple-ReduceMaxLoc.hpp @@ -47,8 +47,8 @@ void ForallIndexSetReduceMaxLocMultipleTestImpl() double* check_array; double* test_array; - allocateForallTestData( - alen, working_res, &working_array, &check_array, &test_array); + allocateForallTestData(alen, working_res, &working_array, + &check_array, &test_array); double current_max = -DBL_MAX; IDX_TYPE current_loc = -1; @@ -94,8 +94,8 @@ void ForallIndexSetReduceMaxLocMultipleTestImpl() ASSERT_EQ(static_cast(dmax1.getLoc()), current_loc); } - deallocateForallTestData( - working_res, working_array, check_array, test_array); + deallocateForallTestData(working_res, working_array, check_array, + test_array); } TYPED_TEST_SUITE_P(ForallIndexSetReduceMaxLocMultipleTest); @@ -111,9 +111,7 @@ TYPED_TEST_P(ForallIndexSetReduceMaxLocMultipleTest, using EXEC_POLICY = typename camp::at>::type; using REDUCE_POLICY = typename camp::at>::type; - ForallIndexSetReduceMaxLocMultipleTestImpl(); } diff --git a/test/functional/forall/reduce-multiple-indexset/tests/test-forall-indexset-multiple-ReduceMin.hpp b/test/functional/forall/reduce-multiple-indexset/tests/test-forall-indexset-multiple-ReduceMin.hpp index b99a446fdd..3a04bc4764 100644 --- a/test/functional/forall/reduce-multiple-indexset/tests/test-forall-indexset-multiple-ReduceMin.hpp +++ b/test/functional/forall/reduce-multiple-indexset/tests/test-forall-indexset-multiple-ReduceMin.hpp @@ -47,8 +47,8 @@ void ForallIndexSetReduceMinMultipleTestImpl() double* check_array; double* test_array; - allocateForallTestData( - alen, working_res, &working_array, &check_array, &test_array); + allocateForallTestData(alen, working_res, &working_array, + &check_array, &test_array); const double default_val = DBL_MAX; @@ -97,8 +97,8 @@ void ForallIndexSetReduceMinMultipleTestImpl() ASSERT_FLOAT_EQ(static_cast(dmin1.get()), 2 * current_min); } - deallocateForallTestData( - working_res, working_array, check_array, test_array); + deallocateForallTestData(working_res, working_array, check_array, + test_array); } TYPED_TEST_SUITE_P(ForallIndexSetReduceMinMultipleTest); @@ -114,9 +114,7 @@ TYPED_TEST_P(ForallIndexSetReduceMinMultipleTest, using EXEC_POLICY = typename camp::at>::type; using REDUCE_POLICY = typename camp::at>::type; - ForallIndexSetReduceMinMultipleTestImpl(); } diff --git a/test/functional/forall/reduce-multiple-indexset/tests/test-forall-indexset-multiple-ReduceMinLoc.hpp b/test/functional/forall/reduce-multiple-indexset/tests/test-forall-indexset-multiple-ReduceMinLoc.hpp index c6d0b5645d..89acb80207 100644 --- a/test/functional/forall/reduce-multiple-indexset/tests/test-forall-indexset-multiple-ReduceMinLoc.hpp +++ b/test/functional/forall/reduce-multiple-indexset/tests/test-forall-indexset-multiple-ReduceMinLoc.hpp @@ -47,8 +47,8 @@ void ForallIndexSetReduceMinLocMultipleTestImpl() double* check_array; double* test_array; - allocateForallTestData( - alen, working_res, &working_array, &check_array, &test_array); + allocateForallTestData(alen, working_res, &working_array, + &check_array, &test_array); double current_min = DBL_MAX; IDX_TYPE current_loc = -1; @@ -94,8 +94,8 @@ void ForallIndexSetReduceMinLocMultipleTestImpl() ASSERT_EQ(static_cast(dmin1.getLoc()), current_loc); } - deallocateForallTestData( - working_res, working_array, check_array, test_array); + deallocateForallTestData(working_res, working_array, check_array, + test_array); } TYPED_TEST_SUITE_P(ForallIndexSetReduceMinLocMultipleTest); @@ -111,9 +111,7 @@ TYPED_TEST_P(ForallIndexSetReduceMinLocMultipleTest, using EXEC_POLICY = typename camp::at>::type; using REDUCE_POLICY = typename camp::at>::type; - ForallIndexSetReduceMinLocMultipleTestImpl(); } diff --git a/test/functional/forall/reduce-multiple-indexset/tests/test-forall-indexset-multiple-ReduceSum.hpp b/test/functional/forall/reduce-multiple-indexset/tests/test-forall-indexset-multiple-ReduceSum.hpp index 16e80f31d7..7811a605bc 100644 --- a/test/functional/forall/reduce-multiple-indexset/tests/test-forall-indexset-multiple-ReduceSum.hpp +++ b/test/functional/forall/reduce-multiple-indexset/tests/test-forall-indexset-multiple-ReduceSum.hpp @@ -45,15 +45,15 @@ void ForallIndexSetReduceSumMultipleTestImpl() double* dcheck_array; double* dtest_array; - allocateForallTestData( - alen, working_res, &dworking_array, &dcheck_array, &dtest_array); + allocateForallTestData(alen, working_res, &dworking_array, + &dcheck_array, &dtest_array); int* iworking_array; int* icheck_array; int* itest_array; - allocateForallTestData( - alen, working_res, &iworking_array, &icheck_array, &itest_array); + allocateForallTestData(alen, working_res, &iworking_array, &icheck_array, + &itest_array); const double dinit_val = 0.1; const int iinit_val = 1; @@ -101,11 +101,11 @@ void ForallIndexSetReduceSumMultipleTestImpl() tcount * (4 * ichk_val) + (irinit * 4)); } - deallocateForallTestData( - working_res, dworking_array, dcheck_array, dtest_array); + deallocateForallTestData(working_res, dworking_array, dcheck_array, + dtest_array); - deallocateForallTestData( - working_res, iworking_array, icheck_array, itest_array); + deallocateForallTestData(working_res, iworking_array, icheck_array, + itest_array); } TYPED_TEST_SUITE_P(ForallIndexSetReduceSumMultipleTest); @@ -121,9 +121,7 @@ TYPED_TEST_P(ForallIndexSetReduceSumMultipleTest, using EXEC_POLICY = typename camp::at>::type; using REDUCE_POLICY = typename camp::at>::type; - ForallIndexSetReduceSumMultipleTestImpl(); } diff --git a/test/functional/forall/reduce-multiple-segment/tests/test-forall-segment-multiple-ReduceMax.hpp b/test/functional/forall/reduce-multiple-segment/tests/test-forall-segment-multiple-ReduceMax.hpp index 5ceb700c2a..281ad51d43 100644 --- a/test/functional/forall/reduce-multiple-segment/tests/test-forall-segment-multiple-ReduceMax.hpp +++ b/test/functional/forall/reduce-multiple-segment/tests/test-forall-segment-multiple-ReduceMax.hpp @@ -28,8 +28,8 @@ void ForallReduceMaxMultipleTestImpl(IDX_TYPE first, IDX_TYPE last) DATA_TYPE* check_array; DATA_TYPE* test_array; - allocateForallTestData( - last, working_res, &working_array, &check_array, &test_array); + allocateForallTestData(last, working_res, &working_array, + &check_array, &test_array); const DATA_TYPE default_val = static_cast(-SHRT_MAX); const DATA_TYPE big_val = 500; @@ -38,7 +38,7 @@ void ForallReduceMaxMultipleTestImpl(IDX_TYPE first, IDX_TYPE last) static std::mt19937 mt(rd()); static std::uniform_real_distribution dist(-100, 100); static std::uniform_int_distribution dist2(static_cast(first), - static_cast(last) - 1); + static_cast(last) - 1); // Workaround for broken omp-target reduction interface. // This should be `max0;` not `max0(0);` @@ -75,8 +75,7 @@ void ForallReduceMaxMultipleTestImpl(IDX_TYPE first, IDX_TYPE last) IDX_TYPE max_index = static_cast(dist2(mt)); test_array[max_index] = roll; - working_res.memcpy(&working_array[max_index], - &test_array[max_index], + working_res.memcpy(&working_array[max_index], &test_array[max_index], sizeof(DATA_TYPE)); if (current_max < roll) @@ -107,8 +106,8 @@ void ForallReduceMaxMultipleTestImpl(IDX_TYPE first, IDX_TYPE last) ASSERT_EQ(default_val, static_cast(max1.get())); ASSERT_EQ(big_val, static_cast(max2.get())); - deallocateForallTestData( - working_res, working_array, check_array, test_array); + deallocateForallTestData(working_res, working_array, check_array, + test_array); } TYPED_TEST_SUITE_P(ForallReduceMaxMultipleTest); @@ -124,10 +123,7 @@ TYPED_TEST_P(ForallReduceMaxMultipleTest, ReduceMaxMultipleForall) using EXEC_POLICY = typename camp::at>::type; using REDUCE_POLICY = typename camp::at>::type; - ForallReduceMaxMultipleTestImpl(0, 2115); } diff --git a/test/functional/forall/reduce-multiple-segment/tests/test-forall-segment-multiple-ReduceMaxLoc.hpp b/test/functional/forall/reduce-multiple-segment/tests/test-forall-segment-multiple-ReduceMaxLoc.hpp index 5fa290a86e..2351c790ca 100644 --- a/test/functional/forall/reduce-multiple-segment/tests/test-forall-segment-multiple-ReduceMaxLoc.hpp +++ b/test/functional/forall/reduce-multiple-segment/tests/test-forall-segment-multiple-ReduceMaxLoc.hpp @@ -28,8 +28,8 @@ void ForallReduceMaxLocMultipleTestImpl(IDX_TYPE first, IDX_TYPE last) DATA_TYPE* check_array; DATA_TYPE* test_array; - allocateForallTestData( - last, working_res, &working_array, &check_array, &test_array); + allocateForallTestData(last, working_res, &working_array, + &check_array, &test_array); const DATA_TYPE default_val = static_cast(-SHRT_MAX); const IDX_TYPE default_loc = -1; @@ -39,7 +39,7 @@ void ForallReduceMaxLocMultipleTestImpl(IDX_TYPE first, IDX_TYPE last) static std::mt19937 mt(rd()); static std::uniform_real_distribution dist(-100, 100); static std::uniform_int_distribution dist2(static_cast(first), - static_cast(last) - 1); + static_cast(last) - 1); RAJA::ReduceMaxLoc max0(default_val, default_loc); @@ -84,8 +84,7 @@ void ForallReduceMaxLocMultipleTestImpl(IDX_TYPE first, IDX_TYPE last) if (current_max != roll) { // avoid two indices getting the same value test_array[max_index] = roll; - working_res.memcpy(&working_array[max_index], - &test_array[max_index], + working_res.memcpy(&working_array[max_index], &test_array[max_index], sizeof(DATA_TYPE)); if (current_max < roll) @@ -128,8 +127,8 @@ void ForallReduceMaxLocMultipleTestImpl(IDX_TYPE first, IDX_TYPE last) ASSERT_EQ(big_val, static_cast(max2.get())); ASSERT_EQ(default_loc, static_cast(max2.getLoc())); - deallocateForallTestData( - working_res, working_array, check_array, test_array); + deallocateForallTestData(working_res, working_array, check_array, + test_array); } TYPED_TEST_SUITE_P(ForallReduceMaxLocMultipleTest); @@ -145,11 +144,8 @@ TYPED_TEST_P(ForallReduceMaxLocMultipleTest, ReduceMaxLocMultipleForall) using EXEC_POLICY = typename camp::at>::type; using REDUCE_POLICY = typename camp::at>::type; - ForallReduceMaxLocMultipleTestImpl(0, 2115); + ForallReduceMaxLocMultipleTestImpl(0, 2115); } REGISTER_TYPED_TEST_SUITE_P(ForallReduceMaxLocMultipleTest, diff --git a/test/functional/forall/reduce-multiple-segment/tests/test-forall-segment-multiple-ReduceMin.hpp b/test/functional/forall/reduce-multiple-segment/tests/test-forall-segment-multiple-ReduceMin.hpp index c9c6fbd84e..60ab8d817c 100644 --- a/test/functional/forall/reduce-multiple-segment/tests/test-forall-segment-multiple-ReduceMin.hpp +++ b/test/functional/forall/reduce-multiple-segment/tests/test-forall-segment-multiple-ReduceMin.hpp @@ -28,8 +28,8 @@ void ForallReduceMinMultipleTestImpl(IDX_TYPE first, IDX_TYPE last) DATA_TYPE* check_array; DATA_TYPE* test_array; - allocateForallTestData( - last, working_res, &working_array, &check_array, &test_array); + allocateForallTestData(last, working_res, &working_array, + &check_array, &test_array); const DATA_TYPE default_val = static_cast(SHRT_MAX); const DATA_TYPE big_val = -500; @@ -38,7 +38,7 @@ void ForallReduceMinMultipleTestImpl(IDX_TYPE first, IDX_TYPE last) static std::mt19937 mt(rd()); static std::uniform_real_distribution dist(-100, 100); static std::uniform_int_distribution dist2(static_cast(first), - static_cast(last) - 1); + static_cast(last) - 1); // Workaround for broken omp-target reduction interface. // This should be `min0;` not `min0(0);` @@ -75,8 +75,7 @@ void ForallReduceMinMultipleTestImpl(IDX_TYPE first, IDX_TYPE last) IDX_TYPE min_index = static_cast(dist2(mt)); test_array[min_index] = roll; - working_res.memcpy(&working_array[min_index], - &test_array[min_index], + working_res.memcpy(&working_array[min_index], &test_array[min_index], sizeof(DATA_TYPE)); if (current_min > roll) @@ -107,8 +106,8 @@ void ForallReduceMinMultipleTestImpl(IDX_TYPE first, IDX_TYPE last) ASSERT_EQ(default_val, static_cast(min1.get())); ASSERT_EQ(big_val, static_cast(min2.get())); - deallocateForallTestData( - working_res, working_array, check_array, test_array); + deallocateForallTestData(working_res, working_array, check_array, + test_array); } TYPED_TEST_SUITE_P(ForallReduceMinMultipleTest); @@ -124,10 +123,7 @@ TYPED_TEST_P(ForallReduceMinMultipleTest, ReduceMinMultipleForall) using EXEC_POLICY = typename camp::at>::type; using REDUCE_POLICY = typename camp::at>::type; - ForallReduceMinMultipleTestImpl(0, 2115); } diff --git a/test/functional/forall/reduce-multiple-segment/tests/test-forall-segment-multiple-ReduceMinLoc.hpp b/test/functional/forall/reduce-multiple-segment/tests/test-forall-segment-multiple-ReduceMinLoc.hpp index 15df809f2b..5b39ce5547 100644 --- a/test/functional/forall/reduce-multiple-segment/tests/test-forall-segment-multiple-ReduceMinLoc.hpp +++ b/test/functional/forall/reduce-multiple-segment/tests/test-forall-segment-multiple-ReduceMinLoc.hpp @@ -28,8 +28,8 @@ void ForallReduceMinLocMultipleTestImpl(IDX_TYPE first, IDX_TYPE last) DATA_TYPE* check_array; DATA_TYPE* test_array; - allocateForallTestData( - last, working_res, &working_array, &check_array, &test_array); + allocateForallTestData(last, working_res, &working_array, + &check_array, &test_array); const DATA_TYPE default_val = static_cast(SHRT_MAX); const IDX_TYPE default_loc = -1; @@ -39,7 +39,7 @@ void ForallReduceMinLocMultipleTestImpl(IDX_TYPE first, IDX_TYPE last) static std::mt19937 mt(rd()); static std::uniform_real_distribution dist(-100, 100); static std::uniform_int_distribution dist2(static_cast(first), - static_cast(last) - 1); + static_cast(last) - 1); printf("min0 init { %f, %f }\n", (double)default_val, (double)default_loc); RAJA::ReduceMinLoc min0(default_val, @@ -88,8 +88,7 @@ void ForallReduceMinLocMultipleTestImpl(IDX_TYPE first, IDX_TYPE last) if (current_min != roll) { // avoid two indices getting the same value test_array[min_index] = roll; - working_res.memcpy(&working_array[min_index], - &test_array[min_index], + working_res.memcpy(&working_array[min_index], &test_array[min_index], sizeof(DATA_TYPE)); if (current_min > roll) @@ -98,8 +97,8 @@ void ForallReduceMinLocMultipleTestImpl(IDX_TYPE first, IDX_TYPE last) current_loc = min_index; } } - printf( - "current { %f, %f }\n", (double)current_min, (double)current_loc); + printf("current { %f, %f }\n", (double)current_min, + (double)current_loc); RAJA::forall(r1, [=] RAJA_HOST_DEVICE(IDX_TYPE idx) @@ -137,8 +136,8 @@ void ForallReduceMinLocMultipleTestImpl(IDX_TYPE first, IDX_TYPE last) ASSERT_EQ(big_val, static_cast(min2.get())); ASSERT_EQ(default_loc, static_cast(min2.getLoc())); - deallocateForallTestData( - working_res, working_array, check_array, test_array); + deallocateForallTestData(working_res, working_array, check_array, + test_array); } TYPED_TEST_SUITE_P(ForallReduceMinLocMultipleTest); @@ -154,11 +153,8 @@ TYPED_TEST_P(ForallReduceMinLocMultipleTest, ReduceMinLocMultipleForall) using EXEC_POLICY = typename camp::at>::type; using REDUCE_POLICY = typename camp::at>::type; - ForallReduceMinLocMultipleTestImpl(0, 2115); + ForallReduceMinLocMultipleTestImpl(0, 2115); } REGISTER_TYPED_TEST_SUITE_P(ForallReduceMinLocMultipleTest, diff --git a/test/functional/forall/reduce-multiple-segment/tests/test-forall-segment-multiple-ReduceSum.hpp b/test/functional/forall/reduce-multiple-segment/tests/test-forall-segment-multiple-ReduceSum.hpp index 18f069e5de..8010cbafd0 100644 --- a/test/functional/forall/reduce-multiple-segment/tests/test-forall-segment-multiple-ReduceSum.hpp +++ b/test/functional/forall/reduce-multiple-segment/tests/test-forall-segment-multiple-ReduceSum.hpp @@ -25,8 +25,8 @@ void ForallReduceSumMultipleStaggeredTestImpl(IDX_TYPE first, IDX_TYPE last) DATA_TYPE* check_array; DATA_TYPE* test_array; - allocateForallTestData( - last, working_res, &working_array, &check_array, &test_array); + allocateForallTestData(last, working_res, &working_array, + &check_array, &test_array); const DATA_TYPE initval = 2; @@ -82,8 +82,8 @@ void ForallReduceSumMultipleStaggeredTestImpl(IDX_TYPE first, IDX_TYPE last) static_cast(sum7.get())); } - deallocateForallTestData( - working_res, working_array, check_array, test_array); + deallocateForallTestData(working_res, working_array, check_array, + test_array); } template ( - last, working_res, &working_array, &check_array, &test_array); + allocateForallTestData(last, working_res, &working_array, + &check_array, &test_array); const DATA_TYPE initval = 2; @@ -168,8 +168,8 @@ void ForallReduceSumMultipleStaggered2TestImpl(IDX_TYPE first, IDX_TYPE last) static_cast(sum7.get())); } - deallocateForallTestData( - working_res, working_array, check_array, test_array); + deallocateForallTestData(working_res, working_array, check_array, + test_array); } TYPED_TEST_SUITE_P(ForallReduceSumMultipleTest); @@ -185,17 +185,12 @@ TYPED_TEST_P(ForallReduceSumMultipleTest, ReduceSumMultipleForall) using EXEC_POLICY = typename camp::at>::type; using REDUCE_POLICY = typename camp::at>::type; - ForallReduceSumMultipleStaggeredTestImpl(0, 2115); - - ForallReduceSumMultipleStaggered2TestImpl(0, 2115); + ForallReduceSumMultipleStaggeredTestImpl(0, 2115); + + ForallReduceSumMultipleStaggered2TestImpl(0, + 2115); } REGISTER_TYPED_TEST_SUITE_P(ForallReduceSumMultipleTest, diff --git a/test/functional/forall/region/tests/test-forall-region.hpp b/test/functional/forall/region/tests/test-forall-region.hpp index 7fb536e61e..6f47f26032 100644 --- a/test/functional/forall/region/tests/test-forall-region.hpp +++ b/test/functional/forall/region/tests/test-forall-region.hpp @@ -35,20 +35,18 @@ void ForallRegionTestImpl(INDEX_TYPE first, INDEX_TYPE last) INDEX_TYPE* check_array; INDEX_TYPE* test_array; - allocateForallTestData( - N, working_res, &working_array, &check_array, &test_array); + allocateForallTestData(N, working_res, &working_array, + &check_array, &test_array); working_res.memset(working_array, 0, sizeof(INDEX_TYPE) * N); RAJA::region( [=]() { - RAJA::forall(rseg, - [=] RAJA_HOST_DEVICE(INDEX_TYPE idx) + RAJA::forall(rseg, [=] RAJA_HOST_DEVICE(INDEX_TYPE idx) { working_array[idx - first] += 1; }); - RAJA::forall(lseg, - [=] RAJA_HOST_DEVICE(INDEX_TYPE idx) + RAJA::forall(lseg, [=] RAJA_HOST_DEVICE(INDEX_TYPE idx) { working_array[idx - first] += 2; }); }); @@ -60,8 +58,8 @@ void ForallRegionTestImpl(INDEX_TYPE first, INDEX_TYPE last) ASSERT_EQ(check_array[i], 3); } - deallocateForallTestData( - working_res, working_array, check_array, test_array); + deallocateForallTestData(working_res, working_array, check_array, + test_array); } diff --git a/test/functional/forall/resource-indexset/tests/test-forall-ResourceIcountIndexSet.hpp b/test/functional/forall/resource-indexset/tests/test-forall-ResourceIcountIndexSet.hpp index 78c9e1e6e1..d8f67dbf7a 100644 --- a/test/functional/forall/resource-indexset/tests/test-forall-ResourceIcountIndexSet.hpp +++ b/test/functional/forall/resource-indexset/tests/test-forall-ResourceIcountIndexSet.hpp @@ -44,8 +44,8 @@ void ForallResourceIcountIndexSetTestImpl() INDEX_TYPE* check_array; INDEX_TYPE* test_array; - allocateForallTestData( - N, erased_working_res, &working_array, &check_array, &test_array); + allocateForallTestData(N, erased_working_res, &working_array, + &check_array, &test_array); memset(test_array, 0, sizeof(INDEX_TYPE) * N); @@ -58,8 +58,7 @@ void ForallResourceIcountIndexSetTestImpl() } RAJA::forall_Icount( - working_res, - iset, + working_res, iset, [=] RAJA_HOST_DEVICE(INDEX_TYPE icount, INDEX_TYPE idx) { working_array[icount] = idx; }); @@ -70,8 +69,8 @@ void ForallResourceIcountIndexSetTestImpl() ASSERT_EQ(test_array[i], check_array[i]); } - deallocateForallTestData( - erased_working_res, working_array, check_array, test_array); + deallocateForallTestData(erased_working_res, working_array, + check_array, test_array); } @@ -86,8 +85,7 @@ TYPED_TEST_P(ForallResourceIcountIndexSetTest, ResourceIndexSetForallIcount) using WORKING_RESOURCE = typename camp::at>::type; using EXEC_POLICY = typename camp::at>::type; - ForallResourceIcountIndexSetTestImpl(); } diff --git a/test/functional/forall/resource-indexset/tests/test-forall-ResourceIndexSet.hpp b/test/functional/forall/resource-indexset/tests/test-forall-ResourceIndexSet.hpp index 6a1ad8a544..a462597fca 100644 --- a/test/functional/forall/resource-indexset/tests/test-forall-ResourceIndexSet.hpp +++ b/test/functional/forall/resource-indexset/tests/test-forall-ResourceIndexSet.hpp @@ -43,8 +43,8 @@ void ForallResourceIndexSetTestImpl() INDEX_TYPE* check_array; INDEX_TYPE* test_array; - allocateForallTestData( - N, erased_working_res, &working_array, &check_array, &test_array); + allocateForallTestData(N, erased_working_res, &working_array, + &check_array, &test_array); memset(test_array, 0, sizeof(INDEX_TYPE) * N); @@ -55,8 +55,7 @@ void ForallResourceIndexSetTestImpl() test_array[is_indices[i]] = is_indices[i]; } - RAJA::forall(working_res, - iset, + RAJA::forall(working_res, iset, [=] RAJA_HOST_DEVICE(INDEX_TYPE idx) { working_array[idx] = idx; }); @@ -68,8 +67,8 @@ void ForallResourceIndexSetTestImpl() ASSERT_EQ(test_array[i], check_array[i]); } - deallocateForallTestData( - erased_working_res, working_array, check_array, test_array); + deallocateForallTestData(erased_working_res, working_array, + check_array, test_array); } diff --git a/test/functional/forall/resource-segment/tests/test-forall-resource-ListSegment.hpp b/test/functional/forall/resource-segment/tests/test-forall-resource-ListSegment.hpp index bb1ec42e0a..460067476b 100644 --- a/test/functional/forall/resource-segment/tests/test-forall-resource-ListSegment.hpp +++ b/test/functional/forall/resource-segment/tests/test-forall-resource-ListSegment.hpp @@ -39,37 +39,36 @@ void ForallResourceListSegmentTestImpl(INDEX_TYPE N) camp::resources::Resource erased_working_res{working_res}; // Create list segment for tests - RAJA::TypedListSegment lseg( - &idx_array[0], idxlen, erased_working_res); + RAJA::TypedListSegment lseg(&idx_array[0], idxlen, + erased_working_res); INDEX_TYPE* working_array; INDEX_TYPE* check_array; INDEX_TYPE* test_array; - allocateForallTestData( - N, erased_working_res, &working_array, &check_array, &test_array); + allocateForallTestData(N, erased_working_res, &working_array, + &check_array, &test_array); for (INDEX_TYPE i = INDEX_TYPE(0); i < N; i++) { test_array[RAJA::stripIndexType(i)] = INDEX_TYPE(0); } - working_res.memcpy( - working_array, test_array, sizeof(INDEX_TYPE) * RAJA::stripIndexType(N)); + working_res.memcpy(working_array, test_array, + sizeof(INDEX_TYPE) * RAJA::stripIndexType(N)); for (size_t i = 0; i < idxlen; ++i) { test_array[RAJA::stripIndexType(idx_array[i])] = idx_array[i]; } - RAJA::forall(working_res, - lseg, + RAJA::forall(working_res, lseg, [=] RAJA_HOST_DEVICE(INDEX_TYPE idx) { working_array[RAJA::stripIndexType(idx)] = idx; }); - working_res.memcpy( - check_array, working_array, sizeof(INDEX_TYPE) * RAJA::stripIndexType(N)); + working_res.memcpy(check_array, working_array, + sizeof(INDEX_TYPE) * RAJA::stripIndexType(N)); // for (INDEX_TYPE i = INDEX_TYPE(0); i < N; i++) @@ -78,8 +77,8 @@ void ForallResourceListSegmentTestImpl(INDEX_TYPE N) check_array[RAJA::stripIndexType(i)]); } - deallocateForallTestData( - erased_working_res, working_array, check_array, test_array); + deallocateForallTestData(erased_working_res, working_array, + check_array, test_array); } diff --git a/test/functional/forall/resource-segment/tests/test-forall-resource-RangeSegment.hpp b/test/functional/forall/resource-segment/tests/test-forall-resource-RangeSegment.hpp index 6e44124c6e..0b0b068554 100644 --- a/test/functional/forall/resource-segment/tests/test-forall-resource-RangeSegment.hpp +++ b/test/functional/forall/resource-segment/tests/test-forall-resource-RangeSegment.hpp @@ -23,21 +23,20 @@ void ForallResourceRangeSegmentTestImpl(INDEX_TYPE first, INDEX_TYPE last) INDEX_TYPE* check_array; INDEX_TYPE* test_array; - allocateForallTestData( - N, erased_working_res, &working_array, &check_array, &test_array); + allocateForallTestData(N, erased_working_res, &working_array, + &check_array, &test_array); const INDEX_TYPE rbegin = *r1.begin(); std::iota(test_array, test_array + RAJA::stripIndexType(N), rbegin); RAJA::forall( - working_res, - r1, + working_res, r1, [=] RAJA_HOST_DEVICE(INDEX_TYPE idx) { working_array[RAJA::stripIndexType(idx - rbegin)] = idx; }); - working_res.memcpy( - check_array, working_array, sizeof(INDEX_TYPE) * RAJA::stripIndexType(N)); + working_res.memcpy(check_array, working_array, + sizeof(INDEX_TYPE) * RAJA::stripIndexType(N)); for (INDEX_TYPE i = INDEX_TYPE(0); i < N; i++) { @@ -45,8 +44,8 @@ void ForallResourceRangeSegmentTestImpl(INDEX_TYPE first, INDEX_TYPE last) check_array[RAJA::stripIndexType(i)]); } - deallocateForallTestData( - erased_working_res, working_array, check_array, test_array); + deallocateForallTestData(erased_working_res, working_array, + check_array, test_array); } diff --git a/test/functional/forall/resource-segment/tests/test-forall-resource-RangeStrideSegment.hpp b/test/functional/forall/resource-segment/tests/test-forall-resource-RangeStrideSegment.hpp index 6f4d3e7d6d..677ab28e62 100644 --- a/test/functional/forall/resource-segment/tests/test-forall-resource-RangeStrideSegment.hpp +++ b/test/functional/forall/resource-segment/tests/test-forall-resource-RangeStrideSegment.hpp @@ -27,16 +27,16 @@ void ForallResourceRangeStrideSegmentTestImpl(INDEX_TYPE first, INDEX_TYPE* check_array; INDEX_TYPE* test_array; - allocateForallTestData( - N, erased_working_res, &working_array, &check_array, &test_array); + allocateForallTestData(N, erased_working_res, &working_array, + &check_array, &test_array); for (INDEX_TYPE i = INDEX_TYPE(0); i < N; i++) { test_array[RAJA::stripIndexType(i)] = INDEX_TYPE(0); } - working_res.memcpy( - working_array, test_array, sizeof(INDEX_TYPE) * RAJA::stripIndexType(N)); + working_res.memcpy(working_array, test_array, + sizeof(INDEX_TYPE) * RAJA::stripIndexType(N)); INDEX_TYPE idx = first; for (INDEX_TYPE i = INDEX_TYPE(0); i < N; ++i) @@ -46,13 +46,12 @@ void ForallResourceRangeStrideSegmentTestImpl(INDEX_TYPE first, } RAJA::forall( - working_res, - r1, + working_res, r1, [=] RAJA_HOST_DEVICE(INDEX_TYPE idx) { working_array[RAJA::stripIndexType((idx - first) / stride)] = idx; }); - working_res.memcpy( - check_array, working_array, sizeof(INDEX_TYPE) * RAJA::stripIndexType(N)); + working_res.memcpy(check_array, working_array, + sizeof(INDEX_TYPE) * RAJA::stripIndexType(N)); for (INDEX_TYPE i = INDEX_TYPE(0); i < N; i++) { @@ -60,8 +59,8 @@ void ForallResourceRangeStrideSegmentTestImpl(INDEX_TYPE first, check_array[RAJA::stripIndexType(i)]); } - deallocateForallTestData( - erased_working_res, working_array, check_array, test_array); + deallocateForallTestData(erased_working_res, working_array, + check_array, test_array); } @@ -87,31 +86,21 @@ template >::value>::type* = nullptr> void runNegativeStrideTests() { - ForallResourceRangeStrideSegmentTestImpl( INDEX_TYPE(-10), INDEX_TYPE(-1), DIFF_TYPE(2)); - ForallResourceRangeStrideSegmentTestImpl( INDEX_TYPE(-5), INDEX_TYPE(0), DIFF_TYPE(2)); - ForallResourceRangeStrideSegmentTestImpl( INDEX_TYPE(-5), INDEX_TYPE(5), DIFF_TYPE(3)); // Test negative strides - ForallResourceRangeStrideSegmentTestImpl( INDEX_TYPE(10), INDEX_TYPE(-1), DIFF_TYPE(-1)); - ForallResourceRangeStrideSegmentTestImpl( INDEX_TYPE(10), INDEX_TYPE(0), DIFF_TYPE(-2)); } @@ -126,51 +115,33 @@ TYPED_TEST_P(ForallResourceRangeStrideSegmentTest, using DIFF_TYPE = typename std::make_signed>::type; - ForallResourceRangeStrideSegmentTestImpl( INDEX_TYPE(0), INDEX_TYPE(20), DIFF_TYPE(1)); - ForallResourceRangeStrideSegmentTestImpl( INDEX_TYPE(1), INDEX_TYPE(20), DIFF_TYPE(1)); - ForallResourceRangeStrideSegmentTestImpl( INDEX_TYPE(0), INDEX_TYPE(20), DIFF_TYPE(2)); - ForallResourceRangeStrideSegmentTestImpl( INDEX_TYPE(1), INDEX_TYPE(20), DIFF_TYPE(2)); - ForallResourceRangeStrideSegmentTestImpl( INDEX_TYPE(0), INDEX_TYPE(21), DIFF_TYPE(2)); - ForallResourceRangeStrideSegmentTestImpl( INDEX_TYPE(1), INDEX_TYPE(21), DIFF_TYPE(2)); - ForallResourceRangeStrideSegmentTestImpl( INDEX_TYPE(1), INDEX_TYPE(255), DIFF_TYPE(2)); // Test size zero segments - ForallResourceRangeStrideSegmentTestImpl( INDEX_TYPE(0), INDEX_TYPE(20), DIFF_TYPE(-2)); - ForallResourceRangeStrideSegmentTestImpl( INDEX_TYPE(1), INDEX_TYPE(20), DIFF_TYPE(-2)); diff --git a/test/functional/forall/segment-view/tests/test-forall-ListSegmentView.hpp b/test/functional/forall/segment-view/tests/test-forall-ListSegmentView.hpp index 755d3db420..4c9e045cce 100644 --- a/test/functional/forall/segment-view/tests/test-forall-ListSegmentView.hpp +++ b/test/functional/forall/segment-view/tests/test-forall-ListSegmentView.hpp @@ -44,8 +44,8 @@ void ForallListSegmentViewTestImpl(INDEX_TYPE N) INDEX_TYPE* check_array; INDEX_TYPE* test_array; - allocateForallTestData( - N, working_res, &working_array, &check_array, &test_array); + allocateForallTestData(N, working_res, &working_array, + &check_array, &test_array); memset(test_array, 0, sizeof(INDEX_TYPE) * N); @@ -81,8 +81,8 @@ void ForallListSegmentViewTestImpl(INDEX_TYPE N) RAJA::Layout<1> layout(N); view_type work_view(working_array, layout); - RAJA::forall( - lseg, [=] RAJA_HOST_DEVICE(INDEX_TYPE idx) { work_view(idx) = idx; }); + RAJA::forall(lseg, [=] RAJA_HOST_DEVICE(INDEX_TYPE idx) + { work_view(idx) = idx; }); working_res.memcpy(check_array, working_array, sizeof(INDEX_TYPE) * N); @@ -91,8 +91,8 @@ void ForallListSegmentViewTestImpl(INDEX_TYPE N) ASSERT_EQ(test_array[i], check_array[i]); } - deallocateForallTestData( - working_res, working_array, check_array, test_array); + deallocateForallTestData(working_res, working_array, check_array, + test_array); } template @@ -123,8 +123,8 @@ void ForallListSegmentOffsetViewTestImpl(INDEX_TYPE N, INDEX_TYPE offset) INDEX_TYPE* check_array; INDEX_TYPE* test_array; - allocateForallTestData( - N, working_res, &working_array, &check_array, &test_array); + allocateForallTestData(N, working_res, &working_array, + &check_array, &test_array); memset(test_array, 0, sizeof(INDEX_TYPE) * N); @@ -139,12 +139,11 @@ void ForallListSegmentOffsetViewTestImpl(INDEX_TYPE N, INDEX_TYPE offset) using view_type = RAJA::View; INDEX_TYPE N_offset = N + offset; - view_type work_view( - working_array, - RAJA::make_offset_layout<1, INDEX_TYPE>({{offset}}, {{N_offset}})); + view_type work_view(working_array, RAJA::make_offset_layout<1, INDEX_TYPE>( + {{offset}}, {{N_offset}})); - RAJA::forall( - lseg, [=] RAJA_HOST_DEVICE(INDEX_TYPE idx) { work_view(idx) = idx; }); + RAJA::forall(lseg, [=] RAJA_HOST_DEVICE(INDEX_TYPE idx) + { work_view(idx) = idx; }); working_res.memcpy(check_array, working_array, sizeof(INDEX_TYPE) * N); @@ -153,8 +152,8 @@ void ForallListSegmentOffsetViewTestImpl(INDEX_TYPE N, INDEX_TYPE offset) ASSERT_EQ(test_array[i], check_array[i]); } - deallocateForallTestData( - working_res, working_array, check_array, test_array); + deallocateForallTestData(working_res, working_array, check_array, + test_array); } TYPED_TEST_SUITE_P(ForallListSegmentViewTest); @@ -174,14 +173,11 @@ TYPED_TEST_P(ForallListSegmentViewTest, ListSegmentForallView) ForallListSegmentViewTestImpl( 32000); - ForallListSegmentOffsetViewTestImpl(13, 1); - ForallListSegmentOffsetViewTestImpl(2047, 2); - ForallListSegmentOffsetViewTestImpl(32000, 3); } diff --git a/test/functional/forall/segment-view/tests/test-forall-RangeSegment2DView.hpp b/test/functional/forall/segment-view/tests/test-forall-RangeSegment2DView.hpp index fc9b2056e4..c065c274ff 100644 --- a/test/functional/forall/segment-view/tests/test-forall-RangeSegment2DView.hpp +++ b/test/functional/forall/segment-view/tests/test-forall-RangeSegment2DView.hpp @@ -24,8 +24,8 @@ void ForallRangeSegment2DViewTestImpl(INDEX_TYPE N) INDEX_TYPE* check_array; INDEX_TYPE* test_array; - allocateForallTestData( - lentot, working_res, &working_array, &check_array, &test_array); + allocateForallTestData(lentot, working_res, &working_array, + &check_array, &test_array); std::iota(test_array, test_array + lentot, 0); @@ -49,8 +49,8 @@ void ForallRangeSegment2DViewTestImpl(INDEX_TYPE N) ASSERT_EQ(test_array[i], check_array[i]); } - deallocateForallTestData( - working_res, working_array, check_array, test_array); + deallocateForallTestData(working_res, working_array, check_array, + test_array); } template @@ -67,8 +67,8 @@ void ForallRangeSegment2DOffsetViewTestImpl(INDEX_TYPE N) INDEX_TYPE* check_array; INDEX_TYPE* test_array; - allocateForallTestData( - lentot, working_res, &working_array, &check_array, &test_array); + allocateForallTestData(lentot, working_res, &working_array, + &check_array, &test_array); memset(test_array, 0, sizeof(INDEX_TYPE) * lentot); @@ -104,8 +104,8 @@ void ForallRangeSegment2DOffsetViewTestImpl(INDEX_TYPE N) ASSERT_EQ(test_array[i], check_array[i]); } - deallocateForallTestData( - working_res, working_array, check_array, test_array); + deallocateForallTestData(working_res, working_array, check_array, + test_array); } TYPED_TEST_SUITE_P(ForallRangeSegment2DViewTest); diff --git a/test/functional/forall/segment-view/tests/test-forall-RangeSegmentView.hpp b/test/functional/forall/segment-view/tests/test-forall-RangeSegmentView.hpp index 8fc3208088..eee1bac55c 100644 --- a/test/functional/forall/segment-view/tests/test-forall-RangeSegmentView.hpp +++ b/test/functional/forall/segment-view/tests/test-forall-RangeSegmentView.hpp @@ -21,8 +21,8 @@ void ForallRangeSegmentViewTestImpl(INDEX_TYPE first, INDEX_TYPE last) INDEX_TYPE* check_array; INDEX_TYPE* test_array; - allocateForallTestData( - N, working_res, &working_array, &check_array, &test_array); + allocateForallTestData(N, working_res, &working_array, + &check_array, &test_array); const INDEX_TYPE rbegin = *r1.begin(); @@ -33,8 +33,7 @@ void ForallRangeSegmentViewTestImpl(INDEX_TYPE first, INDEX_TYPE last) RAJA::Layout<1> layout(N); view_type work_view(working_array, layout); - RAJA::forall(r1, - [=] RAJA_HOST_DEVICE(INDEX_TYPE idx) + RAJA::forall(r1, [=] RAJA_HOST_DEVICE(INDEX_TYPE idx) { work_view(idx - rbegin) = idx; }); working_res.memcpy(check_array, working_array, sizeof(INDEX_TYPE) * N); @@ -44,8 +43,8 @@ void ForallRangeSegmentViewTestImpl(INDEX_TYPE first, INDEX_TYPE last) ASSERT_EQ(test_array[i], check_array[i]); } - deallocateForallTestData( - working_res, working_array, check_array, test_array); + deallocateForallTestData(working_res, working_array, check_array, + test_array); } template @@ -61,8 +60,8 @@ void ForallRangeSegmentOffsetViewTestImpl(INDEX_TYPE first, INDEX_TYPE* check_array; INDEX_TYPE* test_array; - allocateForallTestData( - N, working_res, &working_array, &check_array, &test_array); + allocateForallTestData(N, working_res, &working_array, + &check_array, &test_array); const INDEX_TYPE rbegin = *r1.begin(); @@ -72,12 +71,11 @@ void ForallRangeSegmentOffsetViewTestImpl(INDEX_TYPE first, INDEX_TYPE f_offset = first + offset; INDEX_TYPE l_offset = last + offset; - view_type work_view( - working_array, - RAJA::make_offset_layout<1, INDEX_TYPE>({{f_offset}}, {{l_offset}})); + view_type work_view(working_array, RAJA::make_offset_layout<1, INDEX_TYPE>( + {{f_offset}}, {{l_offset}})); - RAJA::forall( - r1, [=] RAJA_HOST_DEVICE(INDEX_TYPE idx) { work_view(idx) = idx; }); + RAJA::forall(r1, [=] RAJA_HOST_DEVICE(INDEX_TYPE idx) + { work_view(idx) = idx; }); working_res.memcpy(check_array, working_array, sizeof(INDEX_TYPE) * N); @@ -86,8 +84,8 @@ void ForallRangeSegmentOffsetViewTestImpl(INDEX_TYPE first, ASSERT_EQ(test_array[i], check_array[i]); } - deallocateForallTestData( - working_res, working_array, check_array, test_array); + deallocateForallTestData(working_res, working_array, check_array, + test_array); } template ( - N, working_res, &working_array, &check_array, &test_array); + allocateForallTestData(N, working_res, &working_array, + &check_array, &test_array); memset(test_array, 0, sizeof(INDEX_TYPE) * N); @@ -43,8 +43,7 @@ void ForallRangeStrideSegmentViewTestImpl(INDEX_TYPE first, RAJA::Layout<1> layout(N); view_type work_view(working_array, layout); - RAJA::forall(r1, - [=] RAJA_HOST_DEVICE(INDEX_TYPE idx) + RAJA::forall(r1, [=] RAJA_HOST_DEVICE(INDEX_TYPE idx) { work_view((idx - first) / stride) = idx; }); working_res.memcpy(check_array, working_array, sizeof(INDEX_TYPE) * N); @@ -54,8 +53,8 @@ void ForallRangeStrideSegmentViewTestImpl(INDEX_TYPE first, ASSERT_EQ(test_array[i], check_array[i]); } - deallocateForallTestData( - working_res, working_array, check_array, test_array); + deallocateForallTestData(working_res, working_array, check_array, + test_array); } template ::value>::type* = nullptr> void runNegativeIndexViewTests() { - ForallRangeStrideSegmentViewTestImpl(-10, -1, 2); - ForallRangeStrideSegmentViewTestImpl(-5, 0, 2); - ForallRangeStrideSegmentViewTestImpl(-5, 5, 3); - ForallRangeStrideSegmentViewTestImpl(10, -1, -1); - ForallRangeStrideSegmentViewTestImpl(10, 0, -2); } @@ -111,43 +100,25 @@ TYPED_TEST_P(ForallRangeStrideSegmentViewTest, RangeStrideSegmentForallView) using EXEC_POLICY = typename camp::at>::type; using DIFF_TYPE = typename std::make_signed::type; - ForallRangeStrideSegmentViewTestImpl(0, 20, 1); - ForallRangeStrideSegmentViewTestImpl(1, 20, 1); - ForallRangeStrideSegmentViewTestImpl(0, 20, 2); - ForallRangeStrideSegmentViewTestImpl(1, 20, 2); - ForallRangeStrideSegmentViewTestImpl(0, 21, 2); - ForallRangeStrideSegmentViewTestImpl(1, 21, 2); - ForallRangeStrideSegmentViewTestImpl(1, 255, 2); // Test size zero segments - ForallRangeStrideSegmentViewTestImpl(0, 20, -2); - ForallRangeStrideSegmentViewTestImpl(1, 20, -2); runNegativeIndexViewTests(); diff --git a/test/functional/forall/segment/tests/test-forall-ListSegment.hpp b/test/functional/forall/segment/tests/test-forall-ListSegment.hpp index 5567b66fa4..6bd5f707b9 100644 --- a/test/functional/forall/segment/tests/test-forall-ListSegment.hpp +++ b/test/functional/forall/segment/tests/test-forall-ListSegment.hpp @@ -55,8 +55,8 @@ void ForallListSegmentTestImpl(INDEX_TYPE N) data_len = 1; } - allocateForallTestData( - data_len, working_res, &working_array, &check_array, &test_array); + allocateForallTestData(data_len, working_res, &working_array, + &check_array, &test_array); if (RAJA::stripIndexType(N) > 0) { @@ -66,8 +66,8 @@ void ForallListSegmentTestImpl(INDEX_TYPE N) test_array[RAJA::stripIndexType(idx_vals[i])] = idx_vals[i]; } - working_res.memcpy( - working_array, test_array, sizeof(INDEX_TYPE) * data_len); + working_res.memcpy(working_array, test_array, + sizeof(INDEX_TYPE) * data_len); RAJA::forall(lseg, [=] RAJA_HOST_DEVICE(INDEX_TYPE idx) { @@ -79,8 +79,8 @@ void ForallListSegmentTestImpl(INDEX_TYPE N) memset(static_cast(test_array), 0, sizeof(INDEX_TYPE) * data_len); - working_res.memcpy( - working_array, test_array, sizeof(INDEX_TYPE) * data_len); + working_res.memcpy(working_array, test_array, + sizeof(INDEX_TYPE) * data_len); RAJA::forall(lseg, [=] RAJA_HOST_DEVICE(INDEX_TYPE idx) @@ -98,8 +98,8 @@ void ForallListSegmentTestImpl(INDEX_TYPE N) check_array[RAJA::stripIndexType(i)]); } - deallocateForallTestData( - working_res, working_array, check_array, test_array); + deallocateForallTestData(working_res, working_array, check_array, + test_array); } diff --git a/test/functional/forall/segment/tests/test-forall-RangeSegment.hpp b/test/functional/forall/segment/tests/test-forall-RangeSegment.hpp index f0454ae339..4f88cbb27d 100644 --- a/test/functional/forall/segment/tests/test-forall-RangeSegment.hpp +++ b/test/functional/forall/segment/tests/test-forall-RangeSegment.hpp @@ -29,8 +29,8 @@ void ForallRangeSegmentTestImpl(INDEX_TYPE first, INDEX_TYPE last) data_len = 1; } - allocateForallTestData( - data_len, working_res, &working_array, &check_array, &test_array); + allocateForallTestData(data_len, working_res, &working_array, + &check_array, &test_array); if (RAJA::stripIndexType(N) > 0) { @@ -40,8 +40,7 @@ void ForallRangeSegmentTestImpl(INDEX_TYPE first, INDEX_TYPE last) std::iota(test_array, test_array + RAJA::stripIndexType(N), rbegin); RAJA::forall( - r1, - [=] RAJA_HOST_DEVICE(INDEX_TYPE idx) + r1, [=] RAJA_HOST_DEVICE(INDEX_TYPE idx) { working_array[RAJA::stripIndexType(idx - rbegin)] = idx; }); } else @@ -49,8 +48,8 @@ void ForallRangeSegmentTestImpl(INDEX_TYPE first, INDEX_TYPE last) memset(static_cast(test_array), 0, sizeof(INDEX_TYPE) * data_len); - working_res.memcpy( - working_array, test_array, sizeof(INDEX_TYPE) * data_len); + working_res.memcpy(working_array, test_array, + sizeof(INDEX_TYPE) * data_len); RAJA::forall(r1, [=] RAJA_HOST_DEVICE(INDEX_TYPE idx) @@ -68,8 +67,8 @@ void ForallRangeSegmentTestImpl(INDEX_TYPE first, INDEX_TYPE last) check_array[RAJA::stripIndexType(i)]); } - deallocateForallTestData( - working_res, working_array, check_array, test_array); + deallocateForallTestData(working_res, working_array, check_array, + test_array); } diff --git a/test/functional/forall/segment/tests/test-forall-RangeStrideSegment.hpp b/test/functional/forall/segment/tests/test-forall-RangeStrideSegment.hpp index 983fe19662..909474b019 100644 --- a/test/functional/forall/segment/tests/test-forall-RangeStrideSegment.hpp +++ b/test/functional/forall/segment/tests/test-forall-RangeStrideSegment.hpp @@ -34,8 +34,8 @@ void ForallRangeStrideSegmentTestImpl(INDEX_TYPE first, data_len = 1; } - allocateForallTestData( - data_len, working_res, &working_array, &check_array, &test_array); + allocateForallTestData(data_len, working_res, &working_array, + &check_array, &test_array); memset(static_cast(test_array), 0, sizeof(INDEX_TYPE) * data_len); @@ -52,8 +52,7 @@ void ForallRangeStrideSegmentTestImpl(INDEX_TYPE first, } RAJA::forall( - r1, - [=] RAJA_HOST_DEVICE(INDEX_TYPE idx) + r1, [=] RAJA_HOST_DEVICE(INDEX_TYPE idx) { working_array[RAJA::stripIndexType((idx - first) / stride)] = idx; }); } else @@ -75,8 +74,8 @@ void ForallRangeStrideSegmentTestImpl(INDEX_TYPE first, check_array[RAJA::stripIndexType(i)]); } - deallocateForallTestData( - working_res, working_array, check_array, test_array); + deallocateForallTestData(working_res, working_array, check_array, + test_array); } @@ -102,33 +101,23 @@ template >::value>::type* = nullptr> void runNegativeStrideTests() { - ForallRangeStrideSegmentTestImpl( - INDEX_TYPE(-10), INDEX_TYPE(-1), DIFF_TYPE(2)); - ForallRangeStrideSegmentTestImpl( - INDEX_TYPE(-5), INDEX_TYPE(0), DIFF_TYPE(2)); - ForallRangeStrideSegmentTestImpl( - INDEX_TYPE(-5), INDEX_TYPE(5), DIFF_TYPE(3)); + ForallRangeStrideSegmentTestImpl(INDEX_TYPE(-10), INDEX_TYPE(-1), + DIFF_TYPE(2)); + ForallRangeStrideSegmentTestImpl(INDEX_TYPE(-5), INDEX_TYPE(0), + DIFF_TYPE(2)); + ForallRangeStrideSegmentTestImpl(INDEX_TYPE(-5), INDEX_TYPE(5), + DIFF_TYPE(3)); // Test negative strides - ForallRangeStrideSegmentTestImpl( - INDEX_TYPE(10), INDEX_TYPE(-1), DIFF_TYPE(-1)); - ForallRangeStrideSegmentTestImpl( - INDEX_TYPE(10), INDEX_TYPE(0), DIFF_TYPE(-2)); + ForallRangeStrideSegmentTestImpl(INDEX_TYPE(10), INDEX_TYPE(-1), + DIFF_TYPE(-1)); + ForallRangeStrideSegmentTestImpl(INDEX_TYPE(10), INDEX_TYPE(0), + DIFF_TYPE(-2)); } @@ -140,53 +129,35 @@ TYPED_TEST_P(ForallRangeStrideSegmentTest, RangeStrideSegmentForall) using DIFF_TYPE = typename std::make_signed>::type; - ForallRangeStrideSegmentTestImpl( - INDEX_TYPE(0), INDEX_TYPE(20), DIFF_TYPE(1)); - ForallRangeStrideSegmentTestImpl( - INDEX_TYPE(1), INDEX_TYPE(20), DIFF_TYPE(1)); - ForallRangeStrideSegmentTestImpl( - INDEX_TYPE(0), INDEX_TYPE(20), DIFF_TYPE(2)); - ForallRangeStrideSegmentTestImpl( - INDEX_TYPE(1), INDEX_TYPE(20), DIFF_TYPE(2)); - ForallRangeStrideSegmentTestImpl( - INDEX_TYPE(0), INDEX_TYPE(21), DIFF_TYPE(2)); - ForallRangeStrideSegmentTestImpl( - INDEX_TYPE(1), INDEX_TYPE(21), DIFF_TYPE(2)); - ForallRangeStrideSegmentTestImpl( - INDEX_TYPE(1), INDEX_TYPE(255), DIFF_TYPE(2)); + ForallRangeStrideSegmentTestImpl(INDEX_TYPE(0), INDEX_TYPE(20), + DIFF_TYPE(1)); + ForallRangeStrideSegmentTestImpl(INDEX_TYPE(1), INDEX_TYPE(20), + DIFF_TYPE(1)); + ForallRangeStrideSegmentTestImpl(INDEX_TYPE(0), INDEX_TYPE(20), + DIFF_TYPE(2)); + ForallRangeStrideSegmentTestImpl(INDEX_TYPE(1), INDEX_TYPE(20), + DIFF_TYPE(2)); + ForallRangeStrideSegmentTestImpl(INDEX_TYPE(0), INDEX_TYPE(21), + DIFF_TYPE(2)); + ForallRangeStrideSegmentTestImpl(INDEX_TYPE(1), INDEX_TYPE(21), + DIFF_TYPE(2)); + ForallRangeStrideSegmentTestImpl(INDEX_TYPE(1), INDEX_TYPE(255), + DIFF_TYPE(2)); // Test size zero segments - ForallRangeStrideSegmentTestImpl( - INDEX_TYPE(0), INDEX_TYPE(20), DIFF_TYPE(-2)); - ForallRangeStrideSegmentTestImpl( - INDEX_TYPE(1), INDEX_TYPE(20), DIFF_TYPE(-2)); + ForallRangeStrideSegmentTestImpl(INDEX_TYPE(0), INDEX_TYPE(20), + DIFF_TYPE(-2)); + ForallRangeStrideSegmentTestImpl(INDEX_TYPE(1), INDEX_TYPE(20), + DIFF_TYPE(-2)); runNegativeStrideTests(); } diff --git a/test/functional/indexset-build/test-aligned-indexset.cpp b/test/functional/indexset-build/test-aligned-indexset.cpp index 1146e31e58..80eb2d77f5 100644 --- a/test/functional/indexset-build/test-aligned-indexset.cpp +++ b/test/functional/indexset-build/test-aligned-indexset.cpp @@ -49,12 +49,9 @@ TEST(IndexSetBuild, Aligned) RAJA::TypedIndexSet iset; - RAJA::buildIndexSetAligned(iset, - res, - &indices[0], + RAJA::buildIndexSetAligned(iset, res, &indices[0], static_cast(indices.size()), - range_min_length, - range_align); + range_min_length, range_align); ASSERT_EQ(iset.getLength(), indices.size()); diff --git a/test/functional/kernel/basic-fission-fusion-loop/tests/basic-fission-fusion-loop-impl.hpp b/test/functional/kernel/basic-fission-fusion-loop/tests/basic-fission-fusion-loop-impl.hpp index d50b4e0480..c729126a84 100644 --- a/test/functional/kernel/basic-fission-fusion-loop/tests/basic-fission-fusion-loop-impl.hpp +++ b/test/functional/kernel/basic-fission-fusion-loop/tests/basic-fission-fusion-loop-impl.hpp @@ -42,20 +42,16 @@ void KernelBasicFissionFusionLoopTestImpl( DATA_TYPE* test_array_y; allocateForallTestData(RAJA::stripIndexType(data_len), - erased_working_res, - &working_array_x, - &check_array_x, - &test_array_x); + erased_working_res, &working_array_x, + &check_array_x, &test_array_x); allocateForallTestData(RAJA::stripIndexType(data_len), - erased_working_res, - &working_array_y, - &check_array_y, - &test_array_y); + erased_working_res, &working_array_y, + &check_array_y, &test_array_y); - working_res.memset( - working_array_x, 0, sizeof(DATA_TYPE) * RAJA::stripIndexType(data_len)); + working_res.memset(working_array_x, 0, + sizeof(DATA_TYPE) * RAJA::stripIndexType(data_len)); RAJA::kernel( RAJA::make_tuple(seg, seg), @@ -74,16 +70,13 @@ void KernelBasicFissionFusionLoopTestImpl( ); - working_res.memcpy(check_array_x, - working_array_x, + working_res.memcpy(check_array_x, working_array_x, sizeof(DATA_TYPE) * RAJA::stripIndexType(data_len)); - memset(static_cast(check_array_y), - 0, + memset(static_cast(check_array_y), 0, sizeof(DATA_TYPE) * RAJA::stripIndexType(data_len)); - RAJA::forall(working_res, - seg_idx, + RAJA::forall(working_res, seg_idx, [=](IDX_TYPE i) { check_array_y[RAJA::stripIndexType(i)] += 1; @@ -97,12 +90,12 @@ void KernelBasicFissionFusionLoopTestImpl( check_array_y[RAJA::stripIndexType(i)]); } - deallocateForallTestData( - erased_working_res, working_array_x, check_array_x, test_array_x); + deallocateForallTestData(erased_working_res, working_array_x, + check_array_x, test_array_x); - deallocateForallTestData( - erased_working_res, working_array_y, check_array_y, test_array_y); + deallocateForallTestData(erased_working_res, working_array_y, + check_array_y, test_array_y); } #endif // __BASIC_FISSION_FUSION_LOOP_SEGMENTS_IMPL_HPP__ diff --git a/test/functional/kernel/basic-fission-fusion-loop/tests/test-kernel-basic-fission-fusion-loop-segments.hpp b/test/functional/kernel/basic-fission-fusion-loop/tests/test-kernel-basic-fission-fusion-loop-segments.hpp index 47075076f9..3a9c996bd3 100644 --- a/test/functional/kernel/basic-fission-fusion-loop/tests/test-kernel-basic-fission-fusion-loop-segments.hpp +++ b/test/functional/kernel/basic-fission-fusion-loop/tests/test-kernel-basic-fission-fusion-loop-segments.hpp @@ -31,18 +31,14 @@ TYPED_TEST_P(KernelBasicFissionFusionLoopTest, RAJA::TypedRangeSegment r1(0, 37); RAJA::getIndices(seg_idx, r1); - KernelBasicFissionFusionLoopTestImpl>( r1, seg_idx, working_res, erased_working_res); seg_idx.clear(); RAJA::TypedRangeSegment r2(3, 2057); RAJA::getIndices(seg_idx, r2); - KernelBasicFissionFusionLoopTestImpl>( r2, seg_idx, working_res, erased_working_res); @@ -51,9 +47,7 @@ TYPED_TEST_P(KernelBasicFissionFusionLoopTest, RAJA::TypedRangeSegment r3(5, 5); RAJA::getIndices(seg_idx, r3); - KernelBasicFissionFusionLoopTestImpl>( r3, seg_idx, working_res, erased_working_res); @@ -61,18 +55,14 @@ TYPED_TEST_P(KernelBasicFissionFusionLoopTest, seg_idx.clear(); RAJA::TypedRangeStrideSegment rs1(0, 188, 2); RAJA::getIndices(seg_idx, rs1); - KernelBasicFissionFusionLoopTestImpl>( rs1, seg_idx, working_res, erased_working_res); seg_idx.clear(); RAJA::TypedRangeStrideSegment rs2(2, 1029, 3); RAJA::getIndices(seg_idx, rs2); - KernelBasicFissionFusionLoopTestImpl>( rs2, seg_idx, working_res, erased_working_res); @@ -80,9 +70,7 @@ TYPED_TEST_P(KernelBasicFissionFusionLoopTest, seg_idx.clear(); RAJA::TypedRangeStrideSegment rs3(2, 2, 3); RAJA::getIndices(seg_idx, rs3); - KernelBasicFissionFusionLoopTestImpl>( rs3, seg_idx, working_res, erased_working_res); @@ -98,21 +86,17 @@ TYPED_TEST_P(KernelBasicFissionFusionLoopTest, seg_idx.push_back(i); } } - RAJA::TypedListSegment l1( - &seg_idx[0], seg_idx.size(), erased_working_res); - KernelBasicFissionFusionLoopTestImpl l1(&seg_idx[0], seg_idx.size(), + erased_working_res); + KernelBasicFissionFusionLoopTestImpl>( l1, seg_idx, working_res, erased_working_res); // test zero-length list segment seg_idx.clear(); - RAJA::TypedListSegment l2( - nullptr, seg_idx.size(), erased_working_res); - KernelBasicFissionFusionLoopTestImpl l2(nullptr, seg_idx.size(), + erased_working_res); + KernelBasicFissionFusionLoopTestImpl>( l2, seg_idx, working_res, erased_working_res); } diff --git a/test/functional/kernel/basic-single-icount-loop/tests/basic-single-icount-loop-impl.hpp b/test/functional/kernel/basic-single-icount-loop/tests/basic-single-icount-loop-impl.hpp index f2cd2d9de6..adda6c10e9 100644 --- a/test/functional/kernel/basic-single-icount-loop/tests/basic-single-icount-loop-impl.hpp +++ b/test/functional/kernel/basic-single-icount-loop/tests/basic-single-icount-loop-impl.hpp @@ -45,25 +45,20 @@ void KernelBasicSingleICountLoopTestImpl( data_len++; } - allocateForallTestData( - data_len, erased_working_res, &working_array, &check_array, &test_array); + allocateForallTestData(data_len, erased_working_res, &working_array, + &check_array, &test_array); - allocateForallTestData(data_len, - erased_working_res, - &working_array_i, - &check_array_i, + allocateForallTestData(data_len, erased_working_res, + &working_array_i, &check_array_i, &test_array_i); - memset(static_cast(test_array), - 0, + memset(static_cast(test_array), 0, sizeof(IDX_TYPE) * RAJA::stripIndexType(data_len)); - working_res.memcpy(working_array, - test_array, + working_res.memcpy(working_array, test_array, sizeof(IDX_TYPE) * RAJA::stripIndexType(data_len)); - working_res.memcpy(working_array_i, - test_array_i, + working_res.memcpy(working_array_i, test_array_i, sizeof(IDX_TYPE) * RAJA::stripIndexType(data_len)); if (RAJA::stripIndexType(idx_len) > 0) @@ -77,8 +72,7 @@ void KernelBasicSingleICountLoopTestImpl( } RAJA::kernel_param( - RAJA::make_tuple(seg), - RAJA::make_tuple(IDX_TYPE(0)), + RAJA::make_tuple(seg), RAJA::make_tuple(IDX_TYPE(0)), [=] RAJA_HOST_DEVICE(IDX_TYPE idx, IDX_TYPE i_idx) { @@ -90,8 +84,7 @@ void KernelBasicSingleICountLoopTestImpl( { // zero-length segment RAJA::kernel_param( - RAJA::make_tuple(seg), - RAJA::make_tuple(IDX_TYPE(0)), + RAJA::make_tuple(seg), RAJA::make_tuple(IDX_TYPE(0)), [=] RAJA_HOST_DEVICE(IDX_TYPE idx, IDX_TYPE i_idx) { @@ -102,11 +95,9 @@ void KernelBasicSingleICountLoopTestImpl( }); } - working_res.memcpy(check_array, - working_array, + working_res.memcpy(check_array, working_array, sizeof(IDX_TYPE) * RAJA::stripIndexType(data_len)); - working_res.memcpy(check_array_i, - working_array_i, + working_res.memcpy(check_array_i, working_array_i, sizeof(IDX_TYPE) * RAJA::stripIndexType(data_len)); for (IDX_TYPE i = IDX_TYPE(0); i < data_len; ++i) @@ -117,11 +108,11 @@ void KernelBasicSingleICountLoopTestImpl( check_array_i[RAJA::stripIndexType(i)]); } - deallocateForallTestData( - erased_working_res, working_array, check_array, test_array); + deallocateForallTestData(erased_working_res, working_array, + check_array, test_array); - deallocateForallTestData( - erased_working_res, working_array_i, check_array_i, test_array_i); + deallocateForallTestData(erased_working_res, working_array_i, + check_array_i, test_array_i); } #endif // __BASIC_SINGLE_ICOUNT_LOOP_SEGMENTS_IMPL_HPP__ diff --git a/test/functional/kernel/basic-single-icount-loop/tests/test-kernel-basic-single-icount-loop-segments.hpp b/test/functional/kernel/basic-single-icount-loop/tests/test-kernel-basic-single-icount-loop-segments.hpp index 2e80007803..15dc025698 100644 --- a/test/functional/kernel/basic-single-icount-loop/tests/test-kernel-basic-single-icount-loop-segments.hpp +++ b/test/functional/kernel/basic-single-icount-loop/tests/test-kernel-basic-single-icount-loop-segments.hpp @@ -31,18 +31,14 @@ TYPED_TEST_P(KernelBasicSingleICountLoopTest, RAJA::TypedRangeSegment r1(0, 37); RAJA::getIndices(seg_idx, r1); - KernelBasicSingleICountLoopTestImpl>( r1, seg_idx, working_res, erased_working_res); seg_idx.clear(); RAJA::TypedRangeSegment r2(3, 2057); RAJA::getIndices(seg_idx, r2); - KernelBasicSingleICountLoopTestImpl>( r2, seg_idx, working_res, erased_working_res); @@ -51,9 +47,7 @@ TYPED_TEST_P(KernelBasicSingleICountLoopTest, RAJA::TypedRangeSegment r3(5, 5); RAJA::getIndices(seg_idx, r3); - KernelBasicSingleICountLoopTestImpl>( r3, seg_idx, working_res, erased_working_res); @@ -61,18 +55,14 @@ TYPED_TEST_P(KernelBasicSingleICountLoopTest, seg_idx.clear(); RAJA::TypedRangeStrideSegment rs1(0, 188, 2); RAJA::getIndices(seg_idx, rs1); - KernelBasicSingleICountLoopTestImpl>( rs1, seg_idx, working_res, erased_working_res); seg_idx.clear(); RAJA::TypedRangeStrideSegment rs2(2, 1029, 3); RAJA::getIndices(seg_idx, rs2); - KernelBasicSingleICountLoopTestImpl>( rs2, seg_idx, working_res, erased_working_res); @@ -80,9 +70,7 @@ TYPED_TEST_P(KernelBasicSingleICountLoopTest, seg_idx.clear(); RAJA::TypedRangeStrideSegment rs3(2, 2, 3); RAJA::getIndices(seg_idx, rs3); - KernelBasicSingleICountLoopTestImpl>( rs3, seg_idx, working_res, erased_working_res); @@ -98,21 +86,17 @@ TYPED_TEST_P(KernelBasicSingleICountLoopTest, seg_idx.push_back(i); } } - RAJA::TypedListSegment l1( - &seg_idx[0], seg_idx.size(), erased_working_res); - KernelBasicSingleICountLoopTestImpl l1(&seg_idx[0], seg_idx.size(), + erased_working_res); + KernelBasicSingleICountLoopTestImpl>( l1, seg_idx, working_res, erased_working_res); // test zero-length list segment seg_idx.clear(); - RAJA::TypedListSegment l2( - nullptr, seg_idx.size(), erased_working_res); - KernelBasicSingleICountLoopTestImpl l2(nullptr, seg_idx.size(), + erased_working_res); + KernelBasicSingleICountLoopTestImpl>( l2, seg_idx, working_res, erased_working_res); } diff --git a/test/functional/kernel/basic-single-loop/tests/basic-single-loop-segments-impl.hpp b/test/functional/kernel/basic-single-loop/tests/basic-single-loop-segments-impl.hpp index 5373435404..a3732a6f1e 100644 --- a/test/functional/kernel/basic-single-loop/tests/basic-single-loop-segments-impl.hpp +++ b/test/functional/kernel/basic-single-loop/tests/basic-single-loop-segments-impl.hpp @@ -64,15 +64,13 @@ void KernelBasicSingleLoopTestImpl(const SEG_TYPE& seg, data_len++; } - allocateForallTestData( - data_len, erased_working_res, &working_array, &check_array, &test_array); + allocateForallTestData(data_len, erased_working_res, &working_array, + &check_array, &test_array); - memset(static_cast(test_array), - 0, + memset(static_cast(test_array), 0, sizeof(IDX_TYPE) * RAJA::stripIndexType(data_len)); - working_res.memcpy(working_array, - test_array, + working_res.memcpy(working_array, test_array, sizeof(IDX_TYPE) * RAJA::stripIndexType(data_len)); if (RAJA::stripIndexType(idx_len) > 0) @@ -85,16 +83,14 @@ void KernelBasicSingleLoopTestImpl(const SEG_TYPE& seg, } call_kernel( - RAJA::make_tuple(seg), - working_res, + RAJA::make_tuple(seg), working_res, [=] RAJA_HOST_DEVICE(IDX_TYPE idx) { working_array[RAJA::stripIndexType(idx)] = idx; }); } else { // zero-length segment - call_kernel(RAJA::make_tuple(seg), - working_res, + call_kernel(RAJA::make_tuple(seg), working_res, [=] RAJA_HOST_DEVICE(IDX_TYPE idx) { (void)idx; @@ -102,8 +98,7 @@ void KernelBasicSingleLoopTestImpl(const SEG_TYPE& seg, }); } - working_res.memcpy(check_array, - working_array, + working_res.memcpy(check_array, working_array, sizeof(IDX_TYPE) * RAJA::stripIndexType(data_len)); for (IDX_TYPE i = IDX_TYPE(0); i < data_len; ++i) @@ -112,8 +107,8 @@ void KernelBasicSingleLoopTestImpl(const SEG_TYPE& seg, check_array[RAJA::stripIndexType(i)]); } - deallocateForallTestData( - erased_working_res, working_array, check_array, test_array); + deallocateForallTestData(erased_working_res, working_array, + check_array, test_array); } #endif // __BASIC_SINGLE_LOOP_SEGMENTS_IMPL_HPP__ diff --git a/test/functional/kernel/basic-single-loop/tests/test-kernel-basic-single-loop-segments.hpp b/test/functional/kernel/basic-single-loop/tests/test-kernel-basic-single-loop-segments.hpp index 1b7a90eb00..c358ec3071 100644 --- a/test/functional/kernel/basic-single-loop/tests/test-kernel-basic-single-loop-segments.hpp +++ b/test/functional/kernel/basic-single-loop/tests/test-kernel-basic-single-loop-segments.hpp @@ -32,21 +32,15 @@ TYPED_TEST_P(KernelBasicSingleLoopTest, BasicSingleLoopSegmentKernel) RAJA::TypedRangeSegment r1(0, 37); RAJA::getIndices(seg_idx, r1); - KernelBasicSingleLoopTestImpl, - USE_RES>( + KernelBasicSingleLoopTestImpl, USE_RES>( r1, seg_idx, working_res, erased_working_res); seg_idx.clear(); RAJA::TypedRangeSegment r2(3, 2057); RAJA::getIndices(seg_idx, r2); - KernelBasicSingleLoopTestImpl, - USE_RES>( + KernelBasicSingleLoopTestImpl, USE_RES>( r2, seg_idx, working_res, erased_working_res); // test zero-length range segment @@ -54,44 +48,35 @@ TYPED_TEST_P(KernelBasicSingleLoopTest, BasicSingleLoopSegmentKernel) RAJA::TypedRangeSegment r3(5, 5); RAJA::getIndices(seg_idx, r3); - KernelBasicSingleLoopTestImpl, - USE_RES>( + KernelBasicSingleLoopTestImpl, USE_RES>( r3, seg_idx, working_res, erased_working_res); // Range-stride segment tests seg_idx.clear(); RAJA::TypedRangeStrideSegment rs1(0, 188, 2); RAJA::getIndices(seg_idx, rs1); - KernelBasicSingleLoopTestImpl, - USE_RES>( - rs1, seg_idx, working_res, erased_working_res); + USE_RES>(rs1, seg_idx, working_res, + erased_working_res); seg_idx.clear(); RAJA::TypedRangeStrideSegment rs2(2, 1029, 3); RAJA::getIndices(seg_idx, rs2); - KernelBasicSingleLoopTestImpl, - USE_RES>( - rs2, seg_idx, working_res, erased_working_res); + USE_RES>(rs2, seg_idx, working_res, + erased_working_res); // test zero-length range-stride segment seg_idx.clear(); RAJA::TypedRangeStrideSegment rs3(2, 2, 3); RAJA::getIndices(seg_idx, rs3); - KernelBasicSingleLoopTestImpl, - USE_RES>( - rs3, seg_idx, working_res, erased_working_res); + USE_RES>(rs3, seg_idx, working_res, + erased_working_res); // List segment tests seg_idx.clear(); @@ -105,24 +90,18 @@ TYPED_TEST_P(KernelBasicSingleLoopTest, BasicSingleLoopSegmentKernel) seg_idx.push_back(i); } } - RAJA::TypedListSegment l1( - &seg_idx[0], seg_idx.size(), erased_working_res); - KernelBasicSingleLoopTestImpl, - USE_RES>( + RAJA::TypedListSegment l1(&seg_idx[0], seg_idx.size(), + erased_working_res); + KernelBasicSingleLoopTestImpl, USE_RES>( l1, seg_idx, working_res, erased_working_res); // test zero-length list segment seg_idx.clear(); - RAJA::TypedListSegment l2( - nullptr, seg_idx.size(), erased_working_res); - KernelBasicSingleLoopTestImpl, - USE_RES>( + RAJA::TypedListSegment l2(nullptr, seg_idx.size(), + erased_working_res); + KernelBasicSingleLoopTestImpl, USE_RES>( l2, seg_idx, working_res, erased_working_res); } diff --git a/test/functional/kernel/basic-single-loop/tests/test-kernel-resource-basic-single-loop-segments.hpp b/test/functional/kernel/basic-single-loop/tests/test-kernel-resource-basic-single-loop-segments.hpp index 9cd1096bd4..aa06b1003d 100644 --- a/test/functional/kernel/basic-single-loop/tests/test-kernel-resource-basic-single-loop-segments.hpp +++ b/test/functional/kernel/basic-single-loop/tests/test-kernel-resource-basic-single-loop-segments.hpp @@ -32,21 +32,15 @@ TYPED_TEST_P(KernelBasicSingleLoopTest, BasicSingleLoopSegmentKernel) RAJA::TypedRangeSegment r1(0, 37); RAJA::getIndices(seg_idx, r1); - KernelBasicSingleLoopTestImpl, - USE_RES>( + KernelBasicSingleLoopTestImpl, USE_RES>( r1, seg_idx, working_res, erased_working_res); seg_idx.clear(); RAJA::TypedRangeSegment r2(3, 2057); RAJA::getIndices(seg_idx, r2); - KernelBasicSingleLoopTestImpl, - USE_RES>( + KernelBasicSingleLoopTestImpl, USE_RES>( r2, seg_idx, working_res, erased_working_res); // test zero-length range segment @@ -54,44 +48,35 @@ TYPED_TEST_P(KernelBasicSingleLoopTest, BasicSingleLoopSegmentKernel) RAJA::TypedRangeSegment r3(5, 5); RAJA::getIndices(seg_idx, r3); - KernelBasicSingleLoopTestImpl, - USE_RES>( + KernelBasicSingleLoopTestImpl, USE_RES>( r3, seg_idx, working_res, erased_working_res); // Range-stride segment tests seg_idx.clear(); RAJA::TypedRangeStrideSegment rs1(0, 188, 2); RAJA::getIndices(seg_idx, rs1); - KernelBasicSingleLoopTestImpl, - USE_RES>( - rs1, seg_idx, working_res, erased_working_res); + USE_RES>(rs1, seg_idx, working_res, + erased_working_res); seg_idx.clear(); RAJA::TypedRangeStrideSegment rs2(2, 1029, 3); RAJA::getIndices(seg_idx, rs2); - KernelBasicSingleLoopTestImpl, - USE_RES>( - rs2, seg_idx, working_res, erased_working_res); + USE_RES>(rs2, seg_idx, working_res, + erased_working_res); // test zero-length range-stride segment seg_idx.clear(); RAJA::TypedRangeStrideSegment rs3(2, 2, 3); RAJA::getIndices(seg_idx, rs3); - KernelBasicSingleLoopTestImpl, - USE_RES>( - rs3, seg_idx, working_res, erased_working_res); + USE_RES>(rs3, seg_idx, working_res, + erased_working_res); // List segment tests seg_idx.clear(); @@ -105,24 +90,18 @@ TYPED_TEST_P(KernelBasicSingleLoopTest, BasicSingleLoopSegmentKernel) seg_idx.push_back(i); } } - RAJA::TypedListSegment l1( - &seg_idx[0], seg_idx.size(), erased_working_res); - KernelBasicSingleLoopTestImpl, - USE_RES>( + RAJA::TypedListSegment l1(&seg_idx[0], seg_idx.size(), + erased_working_res); + KernelBasicSingleLoopTestImpl, USE_RES>( l1, seg_idx, working_res, erased_working_res); // test zero-length list segment seg_idx.clear(); - RAJA::TypedListSegment l2( - nullptr, seg_idx.size(), erased_working_res); - KernelBasicSingleLoopTestImpl, - USE_RES>( + RAJA::TypedListSegment l2(nullptr, seg_idx.size(), + erased_working_res); + KernelBasicSingleLoopTestImpl, USE_RES>( l2, seg_idx, working_res, erased_working_res); } diff --git a/test/functional/kernel/conditional-fission-fusion-loop/tests/conditional-fission-fusion-loop-impl.hpp b/test/functional/kernel/conditional-fission-fusion-loop/tests/conditional-fission-fusion-loop-impl.hpp index d6bdf64d1f..558c06feb5 100644 --- a/test/functional/kernel/conditional-fission-fusion-loop/tests/conditional-fission-fusion-loop-impl.hpp +++ b/test/functional/kernel/conditional-fission-fusion-loop/tests/conditional-fission-fusion-loop-impl.hpp @@ -42,20 +42,16 @@ void KernelConditionalFissionFusionLoopTestImpl( DATA_TYPE* test_array_y; allocateForallTestData(RAJA::stripIndexType(data_len), - erased_working_res, - &working_array_x, - &check_array_x, - &test_array_x); + erased_working_res, &working_array_x, + &check_array_x, &test_array_x); allocateForallTestData(RAJA::stripIndexType(data_len), - erased_working_res, - &working_array_y, - &check_array_y, - &test_array_y); + erased_working_res, &working_array_y, + &check_array_y, &test_array_y); - working_res.memset( - working_array_x, 0, sizeof(DATA_TYPE) * RAJA::stripIndexType(data_len)); + working_res.memset(working_array_x, 0, + sizeof(DATA_TYPE) * RAJA::stripIndexType(data_len)); for (int param = 0; param < 2; ++param) { @@ -80,16 +76,13 @@ void KernelConditionalFissionFusionLoopTestImpl( ); - working_res.memcpy(check_array_x, - working_array_x, + working_res.memcpy(check_array_x, working_array_x, sizeof(DATA_TYPE) * RAJA::stripIndexType(data_len)); - memset(static_cast(check_array_y), - 0, + memset(static_cast(check_array_y), 0, sizeof(DATA_TYPE) * RAJA::stripIndexType(data_len)); - RAJA::forall(working_res, - seg_idx, + RAJA::forall(working_res, seg_idx, [=](IDX_TYPE i) { check_array_y[RAJA::stripIndexType(i)] = 3 + 3 * param; @@ -103,12 +96,12 @@ void KernelConditionalFissionFusionLoopTestImpl( } } - deallocateForallTestData( - erased_working_res, working_array_x, check_array_x, test_array_x); + deallocateForallTestData(erased_working_res, working_array_x, + check_array_x, test_array_x); - deallocateForallTestData( - erased_working_res, working_array_y, check_array_y, test_array_y); + deallocateForallTestData(erased_working_res, working_array_y, + check_array_y, test_array_y); } #endif // __CONDITIONAL_FISSION_FUSION_LOOP_SEGMENTS_IMPL_HPP__ diff --git a/test/functional/kernel/conditional-fission-fusion-loop/tests/test-kernel-conditional-fission-fusion-loop-segments.hpp b/test/functional/kernel/conditional-fission-fusion-loop/tests/test-kernel-conditional-fission-fusion-loop-segments.hpp index b0329de313..d7403ed14a 100644 --- a/test/functional/kernel/conditional-fission-fusion-loop/tests/test-kernel-conditional-fission-fusion-loop-segments.hpp +++ b/test/functional/kernel/conditional-fission-fusion-loop/tests/test-kernel-conditional-fission-fusion-loop-segments.hpp @@ -31,18 +31,14 @@ TYPED_TEST_P(KernelConditionalFissionFusionLoopTest, RAJA::TypedRangeSegment r1(0, 37); RAJA::getIndices(seg_idx, r1); - KernelConditionalFissionFusionLoopTestImpl>( r1, seg_idx, working_res, erased_working_res); seg_idx.clear(); RAJA::TypedRangeSegment r2(3, 2057); RAJA::getIndices(seg_idx, r2); - KernelConditionalFissionFusionLoopTestImpl>( r2, seg_idx, working_res, erased_working_res); @@ -51,9 +47,7 @@ TYPED_TEST_P(KernelConditionalFissionFusionLoopTest, RAJA::TypedRangeSegment r3(5, 5); RAJA::getIndices(seg_idx, r3); - KernelConditionalFissionFusionLoopTestImpl>( r3, seg_idx, working_res, erased_working_res); @@ -62,32 +56,26 @@ TYPED_TEST_P(KernelConditionalFissionFusionLoopTest, RAJA::TypedRangeStrideSegment rs1(0, 188, 2); RAJA::getIndices(seg_idx, rs1); KernelConditionalFissionFusionLoopTestImpl< - IDX_TYPE, - EXEC_POLICY, - WORKING_RES, - RAJA::TypedRangeStrideSegment>( - rs1, seg_idx, working_res, erased_working_res); + IDX_TYPE, EXEC_POLICY, WORKING_RES, + RAJA::TypedRangeStrideSegment>(rs1, seg_idx, working_res, + erased_working_res); seg_idx.clear(); RAJA::TypedRangeStrideSegment rs2(2, 1029, 3); RAJA::getIndices(seg_idx, rs2); KernelConditionalFissionFusionLoopTestImpl< - IDX_TYPE, - EXEC_POLICY, - WORKING_RES, - RAJA::TypedRangeStrideSegment>( - rs2, seg_idx, working_res, erased_working_res); + IDX_TYPE, EXEC_POLICY, WORKING_RES, + RAJA::TypedRangeStrideSegment>(rs2, seg_idx, working_res, + erased_working_res); // test zero-length range-stride segment seg_idx.clear(); RAJA::TypedRangeStrideSegment rs3(2, 2, 3); RAJA::getIndices(seg_idx, rs3); KernelConditionalFissionFusionLoopTestImpl< - IDX_TYPE, - EXEC_POLICY, - WORKING_RES, - RAJA::TypedRangeStrideSegment>( - rs3, seg_idx, working_res, erased_working_res); + IDX_TYPE, EXEC_POLICY, WORKING_RES, + RAJA::TypedRangeStrideSegment>(rs3, seg_idx, working_res, + erased_working_res); // List segment tests seg_idx.clear(); @@ -101,21 +89,17 @@ TYPED_TEST_P(KernelConditionalFissionFusionLoopTest, seg_idx.push_back(i); } } - RAJA::TypedListSegment l1( - &seg_idx[0], seg_idx.size(), erased_working_res); - KernelConditionalFissionFusionLoopTestImpl l1(&seg_idx[0], seg_idx.size(), + erased_working_res); + KernelConditionalFissionFusionLoopTestImpl>( l1, seg_idx, working_res, erased_working_res); // test zero-length list segment seg_idx.clear(); - RAJA::TypedListSegment l2( - nullptr, seg_idx.size(), erased_working_res); - KernelConditionalFissionFusionLoopTestImpl l2(nullptr, seg_idx.size(), + erased_working_res); + KernelConditionalFissionFusionLoopTestImpl>( l2, seg_idx, working_res, erased_working_res); } diff --git a/test/functional/kernel/hyperplane/tests/test-kernel-hyperplane-2D.hpp b/test/functional/kernel/hyperplane/tests/test-kernel-hyperplane-2D.hpp index d97139cb5b..603f54e695 100644 --- a/test/functional/kernel/hyperplane/tests/test-kernel-hyperplane-2D.hpp +++ b/test/functional/kernel/hyperplane/tests/test-kernel-hyperplane-2D.hpp @@ -31,8 +31,8 @@ void KernelHyperplane2DTestImpl(const int groups, INDEX_TYPE array_length = groups * idim * jdim; - allocateForallTestData( - array_length, work_res, &work_array, &check_array, &test_array); + allocateForallTestData(array_length, work_res, &work_array, + &check_array, &test_array); RAJA::View> HostView( test_array, groups, idim, jdim); @@ -122,8 +122,8 @@ void KernelHyperplane2DTestImpl(const int groups, } } - deallocateForallTestData( - work_res, work_array, check_array, test_array); + deallocateForallTestData(work_res, work_array, check_array, + test_array); } @@ -140,20 +140,11 @@ TYPED_TEST_P(KernelHyperplane2DTest, Hyperplane2DKernel) using EXEC_POLICY = typename camp::at>::type; using REDUCE_POLICY = typename camp::at>::type; - KernelHyperplane2DTestImpl(1, 10, 10); - KernelHyperplane2DTestImpl(2, 111, 205); - KernelHyperplane2DTestImpl(3, 213, 123); } diff --git a/test/functional/kernel/hyperplane/tests/test-kernel-hyperplane-3D.hpp b/test/functional/kernel/hyperplane/tests/test-kernel-hyperplane-3D.hpp index 99c6357496..9ed9ca453c 100644 --- a/test/functional/kernel/hyperplane/tests/test-kernel-hyperplane-3D.hpp +++ b/test/functional/kernel/hyperplane/tests/test-kernel-hyperplane-3D.hpp @@ -64,8 +64,8 @@ KernelHyperplane3DTestImpl(const int groups, INDEX_TYPE array_length = groups * idim * jdim * kdim; - allocateForallTestData( - array_length, work_res, &work_array, &check_array, &test_array); + allocateForallTestData(array_length, work_res, &work_array, + &check_array, &test_array); RAJA::View> HostView( test_array, groups, idim, jdim, kdim); @@ -88,39 +88,39 @@ KernelHyperplane3DTestImpl(const int groups, RAJA::TypedRangeStrideSegment Jrange(jdim - 1, -1, -1); RAJA::TypedRangeStrideSegment Krange(0, kdim, 1); - RAJA::kernel( - RAJA::make_tuple(Grange, Irange, Jrange, Krange), - [=] RAJA_HOST_DEVICE( - INDEX_TYPE g, INDEX_TYPE ii, INDEX_TYPE jj, INDEX_TYPE kk) - { - if (g < 0 || g >= groups || ii < 0 || ii >= idim || jj < 0 || - jj >= jdim || kk < 0 || kk >= kdim) - { - oob_count += 1; - } - - DATA_TYPE left = 1; - if (ii > 0) - { - left = WorkView(g, ii - 1, jj, kk); - } - - DATA_TYPE up = 1; - if (jj > 0) - { - up = WorkView(g, ii, jj - 1, kk); - } - - DATA_TYPE back = 1; - if (kk > 0) - { - back = WorkView(g, ii, jj, kk - 1); - } - - WorkView(g, ii, jj, kk) = left + up + back; - - trip_count += 1; - }); + RAJA::kernel(RAJA::make_tuple(Grange, Irange, Jrange, Krange), + [=] RAJA_HOST_DEVICE(INDEX_TYPE g, INDEX_TYPE ii, + INDEX_TYPE jj, INDEX_TYPE kk) + { + if (g < 0 || g >= groups || ii < 0 || + ii >= idim || jj < 0 || jj >= jdim || + kk < 0 || kk >= kdim) + { + oob_count += 1; + } + + DATA_TYPE left = 1; + if (ii > 0) + { + left = WorkView(g, ii - 1, jj, kk); + } + + DATA_TYPE up = 1; + if (jj > 0) + { + up = WorkView(g, ii, jj - 1, kk); + } + + DATA_TYPE back = 1; + if (kk > 0) + { + back = WorkView(g, ii, jj, kk - 1); + } + + WorkView(g, ii, jj, kk) = left + up + back; + + trip_count += 1; + }); work_res.memcpy(check_array, work_array, sizeof(DATA_TYPE) * array_length); @@ -175,8 +175,8 @@ KernelHyperplane3DTestImpl(const int groups, } } - deallocateForallTestData( - work_res, work_array, check_array, test_array); + deallocateForallTestData(work_res, work_array, check_array, + test_array); } @@ -193,20 +193,11 @@ TYPED_TEST_P(KernelHyperplane3DTest, Hyperplane3DKernel) using EXEC_POLICY = typename camp::at>::type; using REDUCE_POLICY = typename camp::at>::type; - KernelHyperplane3DTestImpl(1, 10, 10, 10); - KernelHyperplane3DTestImpl(2, 151, 111, 205); - KernelHyperplane3DTestImpl(3, 101, 213, 123); } diff --git a/test/functional/kernel/multi-reduce-nested/tests/test-kernel-nested-MultiReduce.hpp b/test/functional/kernel/multi-reduce-nested/tests/test-kernel-nested-MultiReduce.hpp index 9a6dd220be..5ae4194992 100644 --- a/test/functional/kernel/multi-reduce-nested/tests/test-kernel-nested-MultiReduce.hpp +++ b/test/functional/kernel/multi-reduce-nested/tests/test-kernel-nested-MultiReduce.hpp @@ -86,8 +86,8 @@ KernelMultiReduceNestedTestImpl(const SEGMENTS_TYPE& segments, IDX_TYPE data_len = 0; - allocateForallTestData( - idx_range + 1, working_res, &working_range, &check_range, &test_range); + allocateForallTestData(idx_range + 1, working_res, &working_range, + &check_range, &test_range); for (IDX_TYPE i = 0; i < idx_range + 1; ++i) { @@ -113,11 +113,11 @@ KernelMultiReduceNestedTestImpl(const SEGMENTS_TYPE& segments, } } - allocateForallTestData( - data_len, working_res, &working_array, &check_array, &test_array); + allocateForallTestData(data_len, working_res, &working_array, &check_array, + &test_array); - allocateForallTestData( - data_len, working_res, &working_bins, &check_bins, &test_bins); + allocateForallTestData(data_len, working_res, &working_bins, &check_bins, + &test_bins); if (data_len > IDX_TYPE(0)) { @@ -137,8 +137,8 @@ KernelMultiReduceNestedTestImpl(const SEGMENTS_TYPE& segments, } } - working_res.memcpy( - working_range, test_range, sizeof(IDX_TYPE) * (idx_range + 1)); + working_res.memcpy(working_range, test_range, + sizeof(IDX_TYPE) * (idx_range + 1)); working_res.memcpy(working_array, test_array, sizeof(DATA_TYPE) * data_len); working_res.memcpy(working_bins, test_bins, sizeof(IDX_TYPE) * data_len); @@ -157,8 +157,7 @@ KernelMultiReduceNestedTestImpl(const SEGMENTS_TYPE& segments, } RAJA::kernel_resource( - segments, - working_res, + segments, working_res, [=] RAJA_HOST_DEVICE(IDX_TYPE k, IDX_TYPE j, IDX_TYPE i) { IDX_TYPE ii = (dimi * dimj * k) + (dimi * j) + i; @@ -198,8 +197,7 @@ KernelMultiReduceNestedTestImpl(const SEGMENTS_TYPE& segments, } RAJA::kernel_resource( - segments, - working_res, + segments, working_res, [=] RAJA_HOST_DEVICE(IDX_TYPE k, IDX_TYPE j, IDX_TYPE i) { IDX_TYPE ii = (dimi * dimj * k) + (dimi * j) + i; @@ -235,8 +233,8 @@ KernelMultiReduceNestedTestImpl(const SEGMENTS_TYPE& segments, { test_array[i] = DATA_TYPE(array_flt_distribution(rngen)); } - working_res.memcpy( - working_array, test_array, sizeof(DATA_TYPE) * data_len); + working_res.memcpy(working_array, test_array, + sizeof(DATA_TYPE) * data_len); } @@ -249,8 +247,7 @@ KernelMultiReduceNestedTestImpl(const SEGMENTS_TYPE& segments, red.reset(); RAJA::kernel_resource( - segments, - working_res, + segments, working_res, [=] RAJA_HOST_DEVICE(IDX_TYPE k, IDX_TYPE j, IDX_TYPE i) { IDX_TYPE ii = (dimi * dimj * k) + (dimi * j) + i; @@ -381,34 +378,25 @@ TYPED_TEST_P(KernelMultiReduceNestedTest, MultiReduceNestedKernel) auto s1 = RAJA::make_tuple(RAJA::TypedRangeSegment(0, 2), RAJA::TypedRangeSegment(0, 7), RAJA::TypedRangeSegment(0, 3)); - KernelMultiReduceNestedTestImpl( - s1, container, working_res, rngen); + KernelMultiReduceNestedTestImpl(s1, container, + working_res, rngen); auto s2 = RAJA::make_tuple(RAJA::TypedRangeSegment(2, 35), RAJA::TypedRangeSegment(0, 19), RAJA::TypedRangeSegment(3, 13)); - KernelMultiReduceNestedTestImpl( - s2, container, working_res, rngen); + KernelMultiReduceNestedTestImpl(s2, container, + working_res, rngen); // Range-stride segment tests auto s3 = RAJA::make_tuple(RAJA::TypedRangeStrideSegment(0, 6, 2), RAJA::TypedRangeStrideSegment(1, 38, 3), RAJA::TypedRangeStrideSegment(5, 17, 1)); - KernelMultiReduceNestedTestImpl( - s3, container, working_res, rngen); + KernelMultiReduceNestedTestImpl(s3, container, + working_res, rngen); } } diff --git a/test/functional/kernel/nested-loop-reducesum/tests/nested-loop-BlockReduceSum-impl.hpp b/test/functional/kernel/nested-loop-reducesum/tests/nested-loop-BlockReduceSum-impl.hpp index c6cc00a099..60013aa430 100644 --- a/test/functional/kernel/nested-loop-reducesum/tests/nested-loop-BlockReduceSum-impl.hpp +++ b/test/functional/kernel/nested-loop-reducesum/tests/nested-loop-BlockReduceSum-impl.hpp @@ -64,23 +64,22 @@ void KernelNestedLoopTest(const DEPTH_1_REDUCESUM&, const int N) int* check_array; int* test_array; - allocateForallTestData( - N, erased_work_res, &work_array, &check_array, &test_array); + allocateForallTestData(N, erased_work_res, &work_array, &check_array, + &test_array); RAJA::TypedRangeSegment range(0, N); // Initialize Data std::iota(test_array, test_array + RAJA::stripIndexType(N), 0); - erased_work_res.memcpy( - work_array, test_array, sizeof(int) * RAJA::stripIndexType(N)); + erased_work_res.memcpy(work_array, test_array, + sizeof(int) * RAJA::stripIndexType(N)); RAJA::ReduceSum worksum(0); // Calculate Working data call_kernel( - RAJA::make_tuple(RAJA::RangeSegment(0, N)), - RAJA::make_tuple(0), + RAJA::make_tuple(RAJA::RangeSegment(0, N)), RAJA::make_tuple(0), // Resource work_res, @@ -102,8 +101,8 @@ void KernelNestedLoopTest(const DEPTH_1_REDUCESUM&, const int N) ASSERT_EQ(worksum.get(), N * (N - 1) / 2); - deallocateForallTestData( - erased_work_res, work_array, check_array, test_array); + deallocateForallTestData(erased_work_res, work_array, check_array, + test_array); } // DEVICE_ and DEPTH_1_REDUCESUM execution policies use the above diff --git a/test/functional/kernel/nested-loop-reducesum/tests/nested-loop-ReduceSum-impl.hpp b/test/functional/kernel/nested-loop-reducesum/tests/nested-loop-ReduceSum-impl.hpp index 6169265bb6..159261177c 100644 --- a/test/functional/kernel/nested-loop-reducesum/tests/nested-loop-ReduceSum-impl.hpp +++ b/test/functional/kernel/nested-loop-reducesum/tests/nested-loop-ReduceSum-impl.hpp @@ -77,23 +77,21 @@ void KernelNestedLoopTest(const DEPTH_3_REDUCESUM&, std::iota(test_array, test_array + RAJA::stripIndexType(flatSize), 0); - erased_work_res.memcpy(work_array, - test_array, + erased_work_res.memcpy(work_array, test_array, sizeof(RAJA::Index_type) * RAJA::stripIndexType(flatSize)); constexpr int Depth = 3; - RAJA::View> work_view( - work_array, dim0, dim1, dim2); + RAJA::View> work_view(work_array, dim0, + dim1, dim2); RAJA::ReduceSum hostsum(0); RAJA::ReduceSum worksum(0); call_kernel( - RAJA::make_tuple(range0, range1, range2), - work_res, - [=] RAJA_HOST_DEVICE( - RAJA::Index_type i, RAJA::Index_type j, RAJA::Index_type k) + RAJA::make_tuple(range0, range1, range2), work_res, + [=] RAJA_HOST_DEVICE(RAJA::Index_type i, RAJA::Index_type j, + RAJA::Index_type k) { worksum += work_view(i, j, k); }); RAJA::forall(rangeflat, @@ -103,8 +101,8 @@ void KernelNestedLoopTest(const DEPTH_3_REDUCESUM&, ASSERT_EQ(hostsum.get(), worksum.get()); - deallocateForallTestData( - erased_work_res, work_array, check_array, test_array); + deallocateForallTestData(erased_work_res, work_array, + check_array, test_array); } // DEVICE_ and DEPTH_3_REDUCESUM_SEQ_ execution policies use the above diff --git a/test/functional/kernel/nested-loop-segment-types/tests/test-kernel-nested-loops-segment-types.hpp b/test/functional/kernel/nested-loop-segment-types/tests/test-kernel-nested-loops-segment-types.hpp index 5009122c76..84cfad0815 100644 --- a/test/functional/kernel/nested-loop-segment-types/tests/test-kernel-nested-loops-segment-types.hpp +++ b/test/functional/kernel/nested-loop-segment-types/tests/test-kernel-nested-loops-segment-types.hpp @@ -55,20 +55,18 @@ void KernelNestedLoopsSegmentTypesTestImpl( DATA_TYPE* check_array; DATA_TYPE* test_array; - allocateForallTestData( - data_len, working_res, &work_array, &check_array, &test_array); + allocateForallTestData(data_len, working_res, &work_array, + &check_array, &test_array); - RAJA::View> work_view( - work_array, dim1, dim2, dim3); - RAJA::View> test_view( - test_array, dim1, dim2, dim3); + RAJA::View> work_view(work_array, dim1, dim2, + dim3); + RAJA::View> test_view(test_array, dim1, dim2, + dim3); - memset(static_cast(test_array), - 0, + memset(static_cast(test_array), 0, sizeof(DATA_TYPE) * RAJA::stripIndexType(data_len)); - working_res.memcpy(work_array, - test_array, + working_res.memcpy(work_array, test_array, sizeof(DATA_TYPE) * RAJA::stripIndexType(data_len)); if (!zero_legth_segment) @@ -123,8 +121,7 @@ void KernelNestedLoopsSegmentTypesTestImpl( }); } - working_res.memcpy(check_array, - work_array, + working_res.memcpy(check_array, work_array, sizeof(DATA_TYPE) * RAJA::stripIndexType(data_len)); for (IDX_TYPE i = 0; i < data_len; ++i) @@ -133,8 +130,8 @@ void KernelNestedLoopsSegmentTypesTestImpl( ASSERT_EQ(test_array[ii], check_array[ii]); } - deallocateForallTestData( - working_res, work_array, check_array, test_array); + deallocateForallTestData(working_res, work_array, check_array, + test_array); } diff --git a/test/functional/kernel/nested-loop-view-types/tests/test-kernel-nested-loop-OffsetView2D.hpp b/test/functional/kernel/nested-loop-view-types/tests/test-kernel-nested-loop-OffsetView2D.hpp index 7b2f446e72..2d6fa22331 100644 --- a/test/functional/kernel/nested-loop-view-types/tests/test-kernel-nested-loop-OffsetView2D.hpp +++ b/test/functional/kernel/nested-loop-view-types/tests/test-kernel-nested-loop-OffsetView2D.hpp @@ -26,8 +26,8 @@ void KernelOffsetView2DTestImpl(std::array dim, EXPECT_LT(off_dim0, dim.at(0)); EXPECT_LT(off_dim1, dim.at(1)); - allocateForallTestData( - N, working_res, &working_array, &check_array, &test_array); + allocateForallTestData(N, working_res, &working_array, &check_array, + &test_array); memset(static_cast(test_array), 0, sizeof(IDX_TYPE) * N); @@ -61,8 +61,8 @@ void KernelOffsetView2DTestImpl(std::array dim, ASSERT_EQ(test_array[ii], check_array[ii]); } - deallocateForallTestData( - working_res, working_array, check_array, test_array); + deallocateForallTestData(working_res, working_array, check_array, + test_array); } @@ -88,26 +88,26 @@ TYPED_TEST_P(KernelNestedLoopOffsetView2DTest, OffsetView2DKernelTest) // std::array offset_lo{{0, 2}}; std::array offset_hi{{dim0 - 3, dim1 - 4}}; - KernelOffsetView2DTestImpl( - dim, offset_lo, offset_hi); + KernelOffsetView2DTestImpl(dim, offset_lo, + offset_hi); offset_lo = std::array{{-1, -2}}; offset_hi = std::array{{dim0 - 3, dim1 - 6}}; - KernelOffsetView2DTestImpl( - dim, offset_lo, offset_hi); + KernelOffsetView2DTestImpl(dim, offset_lo, + offset_hi); // // Non-square views // offset_lo = std::array{{0, 1}}; offset_hi = std::array{{dim0 - 3, dim1 - 1}}; - KernelOffsetView2DTestImpl( - dim, offset_lo, offset_hi); + KernelOffsetView2DTestImpl(dim, offset_lo, + offset_hi); offset_lo = std::array{{-1, -1}}; offset_hi = std::array{{dim0 - 3, dim1 - 4}}; - KernelOffsetView2DTestImpl( - dim, offset_lo, offset_hi); + KernelOffsetView2DTestImpl(dim, offset_lo, + offset_hi); } REGISTER_TYPED_TEST_SUITE_P(KernelNestedLoopOffsetView2DTest, diff --git a/test/functional/kernel/nested-loop-view-types/tests/test-kernel-nested-loop-OffsetView3D.hpp b/test/functional/kernel/nested-loop-view-types/tests/test-kernel-nested-loop-OffsetView3D.hpp index 434dbd485b..2db2288d07 100644 --- a/test/functional/kernel/nested-loop-view-types/tests/test-kernel-nested-loop-OffsetView3D.hpp +++ b/test/functional/kernel/nested-loop-view-types/tests/test-kernel-nested-loop-OffsetView3D.hpp @@ -28,8 +28,8 @@ void KernelOffsetView3DTestImpl(std::array dim, EXPECT_LT(off_dim1, dim.at(1)); EXPECT_LT(off_dim2, dim.at(2)); - allocateForallTestData( - N, working_res, &working_array, &check_array, &test_array); + allocateForallTestData(N, working_res, &working_array, &check_array, + &test_array); memset(static_cast(test_array), 0, sizeof(IDX_TYPE) * N); @@ -50,8 +50,7 @@ void KernelOffsetView3DTestImpl(std::array dim, RAJA::OffsetLayout<3> layout = RAJA::make_offset_layout<3>( {{offset_lo.at(0), offset_lo.at(1), offset_lo.at(2)}}, - {{offset_lo.at(0) + dim.at(0), - offset_lo.at(1) + dim.at(1), + {{offset_lo.at(0) + dim.at(0), offset_lo.at(1) + dim.at(1), offset_lo.at(2) + dim.at(2)}}); RAJA::View> view(working_array, layout); @@ -72,8 +71,8 @@ void KernelOffsetView3DTestImpl(std::array dim, ASSERT_EQ(test_array[ii], check_array[ii]); } - deallocateForallTestData( - working_res, working_array, check_array, test_array); + deallocateForallTestData(working_res, working_array, check_array, + test_array); } @@ -100,26 +99,26 @@ TYPED_TEST_P(KernelNestedLoopOffsetView3DTest, OffsetView3DKernelTest) // std::array offset_lo{{0, 2, 1}}; std::array offset_hi{{dim0 - 2, dim1 - 6, dim2 - 4}}; - KernelOffsetView3DTestImpl( - dim, offset_lo, offset_hi); + KernelOffsetView3DTestImpl(dim, offset_lo, + offset_hi); offset_lo = std::array{{-1, -2, -3}}; offset_hi = std::array{{dim0 - 3, dim1 - 10, dim2 - 8}}; - KernelOffsetView3DTestImpl( - dim, offset_lo, offset_hi); + KernelOffsetView3DTestImpl(dim, offset_lo, + offset_hi); // // Non-square views // offset_lo = std::array{{0, 1, 2}}; offset_hi = std::array{{dim0 - 3, dim1 - 2, dim2 - 2}}; - KernelOffsetView3DTestImpl( - dim, offset_lo, offset_hi); + KernelOffsetView3DTestImpl(dim, offset_lo, + offset_hi); offset_lo = std::array{{-1, -1, 0}}; offset_hi = std::array{{dim0 - 3, dim1 - 4, dim2 - 2}}; - KernelOffsetView3DTestImpl( - dim, offset_lo, offset_hi); + KernelOffsetView3DTestImpl(dim, offset_lo, + offset_hi); } REGISTER_TYPED_TEST_SUITE_P(KernelNestedLoopOffsetView3DTest, diff --git a/test/functional/kernel/nested-loop-view-types/tests/test-kernel-nested-loop-PermutedOffsetView2D.hpp b/test/functional/kernel/nested-loop-view-types/tests/test-kernel-nested-loop-PermutedOffsetView2D.hpp index e9dd1c8e4b..ce0cef6b42 100644 --- a/test/functional/kernel/nested-loop-view-types/tests/test-kernel-nested-loop-PermutedOffsetView2D.hpp +++ b/test/functional/kernel/nested-loop-view-types/tests/test-kernel-nested-loop-PermutedOffsetView2D.hpp @@ -45,8 +45,8 @@ void KernelPermutedOffsetView2DTestImpl(std::array dim, RAJA::idx_t Ntot = Ntot_outer * Ntot_inner; - allocateForallTestData( - Ntot, working_res, &B_work_array, &B_check_array, &B_test_array); + allocateForallTestData(Ntot, working_res, &B_work_array, + &B_check_array, &B_test_array); memset(static_cast(B_test_array), 0, sizeof(IDX_TYPE) * Ntot); @@ -62,8 +62,8 @@ void KernelPermutedOffsetView2DTestImpl(std::array dim, working_res.memcpy(B_work_array, B_test_array, sizeof(IDX_TYPE) * Ntot); - allocateForallTestData( - Nint, working_res, &A_work_array, &A_check_array, &A_test_array); + allocateForallTestData(Nint, working_res, &A_work_array, + &A_check_array, &A_test_array); memset(static_cast(A_test_array), 0, sizeof(IDX_TYPE) * Nint); @@ -113,11 +113,11 @@ void KernelPermutedOffsetView2DTestImpl(std::array dim, ASSERT_EQ(A_test_array[ii], A_check_array[ii]); } - deallocateForallTestData( - working_res, A_work_array, A_check_array, A_test_array); + deallocateForallTestData(working_res, A_work_array, A_check_array, + A_test_array); - deallocateForallTestData( - working_res, B_work_array, B_check_array, B_test_array); + deallocateForallTestData(working_res, B_work_array, B_check_array, + B_test_array); } diff --git a/test/functional/kernel/nested-loop-view-types/tests/test-kernel-nested-loop-PermutedOffsetView3D.hpp b/test/functional/kernel/nested-loop-view-types/tests/test-kernel-nested-loop-PermutedOffsetView3D.hpp index b7c2820df1..f5b48fab5b 100644 --- a/test/functional/kernel/nested-loop-view-types/tests/test-kernel-nested-loop-PermutedOffsetView3D.hpp +++ b/test/functional/kernel/nested-loop-view-types/tests/test-kernel-nested-loop-PermutedOffsetView3D.hpp @@ -48,8 +48,8 @@ void KernelPermutedOffsetView3DTestImpl(std::array dim, RAJA::idx_t Ntot = Ntot_outer * Ntot_middle * Ntot_inner; - allocateForallTestData( - Ntot, working_res, &B_work_array, &B_check_array, &B_test_array); + allocateForallTestData(Ntot, working_res, &B_work_array, + &B_check_array, &B_test_array); memset(static_cast(B_test_array), 0, sizeof(IDX_TYPE) * Ntot); @@ -69,8 +69,8 @@ void KernelPermutedOffsetView3DTestImpl(std::array dim, working_res.memcpy(B_work_array, B_test_array, sizeof(IDX_TYPE) * Ntot); - allocateForallTestData( - Nint, working_res, &A_work_array, &A_check_array, &A_test_array); + allocateForallTestData(Nint, working_res, &A_work_array, + &A_check_array, &A_test_array); memset(static_cast(A_test_array), 0, sizeof(IDX_TYPE) * Nint); @@ -102,8 +102,7 @@ void KernelPermutedOffsetView3DTestImpl(std::array dim, RAJA::OffsetLayout<3> B_layout = RAJA::make_permuted_offset_layout<3>( {{-1, -1, -1}}, - {{Ntot_len.at(0) - 1, Ntot_len.at(1) - 1, Ntot_len.at(2) - 1}}, - perm); + {{Ntot_len.at(0) - 1, Ntot_len.at(1) - 1, Ntot_len.at(2) - 1}}, perm); RAJA::Layout<3> A_layout = RAJA::make_permuted_layout( {{Nint_len.at(0), Nint_len.at(1), Nint_len.at(2)}}, perm); @@ -131,11 +130,11 @@ void KernelPermutedOffsetView3DTestImpl(std::array dim, ASSERT_EQ(A_test_array[ii], A_check_array[ii]); } - deallocateForallTestData( - working_res, A_work_array, A_check_array, A_test_array); + deallocateForallTestData(working_res, A_work_array, A_check_array, + A_test_array); - deallocateForallTestData( - working_res, B_work_array, B_check_array, B_test_array); + deallocateForallTestData(working_res, B_work_array, B_check_array, + B_test_array); } diff --git a/test/functional/kernel/nested-loop-view-types/tests/test-kernel-nested-loop-PermutedView2D.hpp b/test/functional/kernel/nested-loop-view-types/tests/test-kernel-nested-loop-PermutedView2D.hpp index a3663c733b..b4829d1cd0 100644 --- a/test/functional/kernel/nested-loop-view-types/tests/test-kernel-nested-loop-PermutedView2D.hpp +++ b/test/functional/kernel/nested-loop-view-types/tests/test-kernel-nested-loop-PermutedView2D.hpp @@ -23,8 +23,8 @@ void KernelPermutedView2DTestImpl(std::array dim, static_cast(RAJA::stripIndexType(dim.at(1)))}}; RAJA::idx_t N = dim_strip.at(0) * dim_strip.at(1); - allocateForallTestData( - N, working_res, &working_array, &check_array, &test_array); + allocateForallTestData(N, working_res, &working_array, &check_array, + &test_array); memset(static_cast(test_array), 0, sizeof(IDX_TYPE) * N); @@ -55,8 +55,8 @@ void KernelPermutedView2DTestImpl(std::array dim, ASSERT_EQ(test_array[ii], check_array[ii]); } - deallocateForallTestData( - working_res, working_array, check_array, test_array); + deallocateForallTestData(working_res, working_array, check_array, + test_array); } diff --git a/test/functional/kernel/nested-loop-view-types/tests/test-kernel-nested-loop-PermutedView3D.hpp b/test/functional/kernel/nested-loop-view-types/tests/test-kernel-nested-loop-PermutedView3D.hpp index 083183706e..3af623c3ba 100644 --- a/test/functional/kernel/nested-loop-view-types/tests/test-kernel-nested-loop-PermutedView3D.hpp +++ b/test/functional/kernel/nested-loop-view-types/tests/test-kernel-nested-loop-PermutedView3D.hpp @@ -24,8 +24,8 @@ void KernelPermutedView3DTestImpl(std::array dim, static_cast(RAJA::stripIndexType(dim.at(2)))}}; RAJA::idx_t N = dim_strip.at(0) * dim_strip.at(1) * dim_strip.at(2); - allocateForallTestData( - N, working_res, &working_array, &check_array, &test_array); + allocateForallTestData(N, working_res, &working_array, &check_array, + &test_array); memset(static_cast(test_array), 0, sizeof(IDX_TYPE) * N); @@ -57,8 +57,8 @@ void KernelPermutedView3DTestImpl(std::array dim, ASSERT_EQ(test_array[ii], check_array[ii]); } - deallocateForallTestData( - working_res, working_array, check_array, test_array); + deallocateForallTestData(working_res, working_array, check_array, + test_array); } diff --git a/test/functional/kernel/nested-loop/tests/nested-loop-Basic-impl.hpp b/test/functional/kernel/nested-loop/tests/nested-loop-Basic-impl.hpp index f92b4f9fab..f5013c7420 100644 --- a/test/functional/kernel/nested-loop/tests/nested-loop-Basic-impl.hpp +++ b/test/functional/kernel/nested-loop/tests/nested-loop-Basic-impl.hpp @@ -77,17 +77,15 @@ void KernelNestedLoopTest(const DEPTH_2&, std::iota(test_array, test_array + RAJA::stripIndexType(flatSize), 0); constexpr int Depth = 2; - RAJA::View> work_view( - work_array, dim1, dim0); + RAJA::View> work_view(work_array, dim1, + dim0); call_kernel( - RAJA::make_tuple(range1, range0), - work_res, + RAJA::make_tuple(range1, range0), work_res, [=] RAJA_HOST_DEVICE(RAJA::Index_type j, RAJA::Index_type i) { work_view(j, i) = (j * dim0) + i; }); - work_res.memcpy(check_array, - work_array, + work_res.memcpy(check_array, work_array, sizeof(RAJA::Index_type) * RAJA::stripIndexType(flatSize)); RAJA::forall(rangeflat, [=](RAJA::Index_type i) @@ -97,8 +95,8 @@ void KernelNestedLoopTest(const DEPTH_2&, check_array[RAJA::stripIndexType(i)]); }); - deallocateForallTestData( - erased_work_res, work_array, check_array, test_array); + deallocateForallTestData(erased_work_res, work_array, + check_array, test_array); } // DEPTH_2_COLLAPSE and DEVICE_DEPTH_2 execution policies use the above DEPTH_2 @@ -153,18 +151,16 @@ void KernelNestedLoopTest(const DEPTH_3&, std::iota(test_array, test_array + RAJA::stripIndexType(flatSize), 0); constexpr int Depth = 3; - RAJA::View> work_view( - work_array, dim2, dim1, dim0); + RAJA::View> work_view(work_array, dim2, + dim1, dim0); call_kernel( - RAJA::make_tuple(range2, range1, range0), - work_res, - [=] RAJA_HOST_DEVICE( - RAJA::Index_type k, RAJA::Index_type j, RAJA::Index_type i) + RAJA::make_tuple(range2, range1, range0), work_res, + [=] RAJA_HOST_DEVICE(RAJA::Index_type k, RAJA::Index_type j, + RAJA::Index_type i) { work_view(k, j, i) = (dim0 * dim1 * k) + (dim0 * j) + i; }); - work_res.memcpy(check_array, - work_array, + work_res.memcpy(check_array, work_array, sizeof(RAJA::Index_type) * RAJA::stripIndexType(flatSize)); RAJA::forall(rangeflat, [=](RAJA::Index_type i) @@ -174,8 +170,8 @@ void KernelNestedLoopTest(const DEPTH_3&, check_array[RAJA::stripIndexType(i)]); }); - deallocateForallTestData( - erased_work_res, work_array, check_array, test_array); + deallocateForallTestData(erased_work_res, work_array, + check_array, test_array); } // DEPTH_3_COLLAPSE execution policies use the above DEPTH_3 test. diff --git a/test/functional/kernel/nested-loop/tests/nested-loop-MultiLambda-impl.hpp b/test/functional/kernel/nested-loop/tests/nested-loop-MultiLambda-impl.hpp index 1834b22ac9..caa66a621c 100644 --- a/test/functional/kernel/nested-loop/tests/nested-loop-MultiLambda-impl.hpp +++ b/test/functional/kernel/nested-loop/tests/nested-loop-MultiLambda-impl.hpp @@ -70,10 +70,10 @@ void KernelNestedLoopTest() test_arrA[i] = i * 1.2; test_arrB[i] = i * 0.5; } - work_res.memcpy( - work_arrA, test_arrA, sizeof(double) * RAJA::stripIndexType(N * N)); - work_res.memcpy( - work_arrB, test_arrB, sizeof(double) * RAJA::stripIndexType(N * N)); + work_res.memcpy(work_arrA, test_arrA, + sizeof(double) * RAJA::stripIndexType(N * N)); + work_res.memcpy(work_arrB, test_arrB, + sizeof(double) * RAJA::stripIndexType(N * N)); // Initialize RAJA Views RAJA::View> test_viewA(test_arrA, N, N); @@ -125,10 +125,10 @@ void KernelNestedLoopTest() work_viewB(i - 1, j)); }); - work_res.memcpy( - check_arrA, work_arrA, sizeof(double) * RAJA::stripIndexType(N * N)); - work_res.memcpy( - check_arrB, work_arrB, sizeof(double) * RAJA::stripIndexType(N * N)); + work_res.memcpy(check_arrA, work_arrA, + sizeof(double) * RAJA::stripIndexType(N * N)); + work_res.memcpy(check_arrB, work_arrB, + sizeof(double) * RAJA::stripIndexType(N * N)); RAJA::forall( RAJA::RangeSegment{0, N * N}, diff --git a/test/functional/kernel/nested-loop/tests/nested-loop-MultiLambdaParam-impl.hpp b/test/functional/kernel/nested-loop/tests/nested-loop-MultiLambdaParam-impl.hpp index fd751bc399..4c09133014 100644 --- a/test/functional/kernel/nested-loop/tests/nested-loop-MultiLambdaParam-impl.hpp +++ b/test/functional/kernel/nested-loop/tests/nested-loop-MultiLambdaParam-impl.hpp @@ -90,12 +90,12 @@ void KernelNestedLoopTest() } } - work_res.memcpy( - work_arrA, test_arrA, sizeof(double) * RAJA::stripIndexType(N * N)); - work_res.memcpy( - work_arrB, test_arrB, sizeof(double) * RAJA::stripIndexType(N * N)); - work_res.memcpy( - work_arrC, test_arrC, sizeof(double) * RAJA::stripIndexType(N * N)); + work_res.memcpy(work_arrA, test_arrA, + sizeof(double) * RAJA::stripIndexType(N * N)); + work_res.memcpy(work_arrB, test_arrB, + sizeof(double) * RAJA::stripIndexType(N * N)); + work_res.memcpy(work_arrC, test_arrC, + sizeof(double) * RAJA::stripIndexType(N * N)); // Calculate Test data for (int row = 0; row < N; ++row) @@ -114,8 +114,7 @@ void KernelNestedLoopTest() // Calculate Working data call_kernel( - RAJA::make_tuple(RAJA::RangeSegment{0, N}, - RAJA::RangeSegment{0, N}, + RAJA::make_tuple(RAJA::RangeSegment{0, N}, RAJA::RangeSegment{0, N}, RAJA::RangeSegment{0, N}), RAJA::tuple{0.0}, @@ -136,12 +135,11 @@ void KernelNestedLoopTest() ); - work_res.memcpy( - check_arrC, work_arrC, sizeof(double) * RAJA::stripIndexType(N * N)); + work_res.memcpy(check_arrC, work_arrC, + sizeof(double) * RAJA::stripIndexType(N * N)); RAJA::forall( - RAJA::RangeSegment{0, N * N}, - [=](RAJA::Index_type i) + RAJA::RangeSegment{0, N * N}, [=](RAJA::Index_type i) { ASSERT_TRUE(RAJA::test_abs(test_arrC[i] - check_arrC[i]) < 10e-8); }); work_res.deallocate(work_arrA); diff --git a/test/functional/kernel/nested-loop/tests/test-kernel-nested-loop-Basic.hpp b/test/functional/kernel/nested-loop/tests/test-kernel-nested-loop-Basic.hpp index f276e1e273..6793b452f3 100644 --- a/test/functional/kernel/nested-loop/tests/test-kernel-nested-loop-Basic.hpp +++ b/test/functional/kernel/nested-loop/tests/test-kernel-nested-loop-Basic.hpp @@ -38,8 +38,8 @@ TYPED_TEST_P(KernelNestedLoopBasicTest, NestedLoopBasicKernel) // For double nested loop tests the third arg is ignored. KernelNestedLoopTest(LOOP_TYPE(), 1, 1, 1); - KernelNestedLoopTest( - LOOP_TYPE(), 40, 30, 20); + KernelNestedLoopTest(LOOP_TYPE(), 40, 30, + 20); } REGISTER_TYPED_TEST_SUITE_P(KernelNestedLoopBasicTest, NestedLoopBasicKernel); diff --git a/test/functional/kernel/nested-loop/tests/test-kernel-resource-nested-loop-Basic.hpp b/test/functional/kernel/nested-loop/tests/test-kernel-resource-nested-loop-Basic.hpp index 5da4815546..219c448ce0 100644 --- a/test/functional/kernel/nested-loop/tests/test-kernel-resource-nested-loop-Basic.hpp +++ b/test/functional/kernel/nested-loop/tests/test-kernel-resource-nested-loop-Basic.hpp @@ -37,8 +37,8 @@ TYPED_TEST_P(KernelNestedLoopBasicTest, NestedLoopBasicKernel) // For double nested loop tests the third arg is ignored. KernelNestedLoopTest(LOOP_TYPE(), 1, 1, 1); - KernelNestedLoopTest( - LOOP_TYPE(), 40, 30, 20); + KernelNestedLoopTest(LOOP_TYPE(), 40, 30, + 20); } REGISTER_TYPED_TEST_SUITE_P(KernelNestedLoopBasicTest, NestedLoopBasicKernel); diff --git a/test/functional/kernel/reduce-loc/tests/test-kernel-reduceloc-Max2D.hpp b/test/functional/kernel/reduce-loc/tests/test-kernel-reduceloc-Max2D.hpp index d053dd6c2a..b08b38fa30 100644 --- a/test/functional/kernel/reduce-loc/tests/test-kernel-reduceloc-Max2D.hpp +++ b/test/functional/kernel/reduce-loc/tests/test-kernel-reduceloc-Max2D.hpp @@ -28,20 +28,19 @@ void KernelLocMax2DTestImpl(const int xdim, const int ydim) // square 2D array, xdim x ydim INDEX_TYPE array_length = xdim * ydim; - allocateForallTestData( - array_length, work_res, &work_array, &check_array, &test_array); + allocateForallTestData(array_length, work_res, &work_array, + &check_array, &test_array); - allocateForallTestData( - ydim, work_res, &workarr2D, &checkarr2D, &testarr2D); + allocateForallTestData(ydim, work_res, &workarr2D, &checkarr2D, + &testarr2D); // set rows to point to check and work _arrays RAJA::TypedRangeSegment seg(0, ydim); - RAJA::forall(seg, - [=] RAJA_HOST_DEVICE(INDEX_TYPE zz) + RAJA::forall(seg, [=] RAJA_HOST_DEVICE(INDEX_TYPE zz) { workarr2D[zz] = work_array + zz * ydim; }); - RAJA::forall( - seg, [=](INDEX_TYPE zz) { checkarr2D[zz] = check_array + zz * ydim; }); + RAJA::forall(seg, [=](INDEX_TYPE zz) + { checkarr2D[zz] = check_array + zz * ydim; }); // initializing values RAJA::forall(seg, @@ -63,8 +62,7 @@ void KernelLocMax2DTestImpl(const int xdim, const int ydim) (DATA_TYPE)0, Index2D(0, 0)); RAJA::kernel( - RAJA::make_tuple(colrange, rowrange), - [=] RAJA_HOST_DEVICE(int c, int r) + RAJA::make_tuple(colrange, rowrange), [=] RAJA_HOST_DEVICE(int c, int r) { maxloc_reducer.maxloc(workarr2D[r][c], Index2D(c, r)); }); // CPU answer @@ -90,11 +88,11 @@ void KernelLocMax2DTestImpl(const int xdim, const int ydim) ASSERT_EQ(checkraja_loc.idx, raja_loc.idx); ASSERT_EQ(checkraja_loc.idy, raja_loc.idy); - deallocateForallTestData( - work_res, work_array, check_array, test_array); + deallocateForallTestData(work_res, work_array, check_array, + test_array); - deallocateForallTestData( - work_res, workarr2D, checkarr2D, testarr2D); + deallocateForallTestData(work_res, workarr2D, checkarr2D, + testarr2D); } @@ -112,24 +110,12 @@ TYPED_TEST_P(KernelLocMax2DTest, LocMax2DKernel) using EXEC_POLICY = typename camp::at>::type; using REDUCE_POLICY = typename camp::at>::type; - KernelLocMax2DTestImpl(10, 10); - KernelLocMax2DTestImpl(151, 151); - KernelLocMax2DTestImpl(362, 362); + KernelLocMax2DTestImpl(10, 10); + KernelLocMax2DTestImpl(151, 151); + KernelLocMax2DTestImpl(362, 362); } REGISTER_TYPED_TEST_SUITE_P(KernelLocMax2DTest, LocMax2DKernel); diff --git a/test/functional/kernel/reduce-loc/tests/test-kernel-reduceloc-Max2DView.hpp b/test/functional/kernel/reduce-loc/tests/test-kernel-reduceloc-Max2DView.hpp index 42626bb986..f3f5cda19c 100644 --- a/test/functional/kernel/reduce-loc/tests/test-kernel-reduceloc-Max2DView.hpp +++ b/test/functional/kernel/reduce-loc/tests/test-kernel-reduceloc-Max2DView.hpp @@ -28,20 +28,19 @@ void KernelLocMax2DViewTestImpl(const int xdim, const int ydim) // square 2D array, xdim x ydim INDEX_TYPE array_length = xdim * ydim; - allocateForallTestData( - array_length, work_res, &work_array, &check_array, &test_array); + allocateForallTestData(array_length, work_res, &work_array, + &check_array, &test_array); - allocateForallTestData( - ydim, work_res, &workarr2D, &checkarr2D, &testarr2D); + allocateForallTestData(ydim, work_res, &workarr2D, &checkarr2D, + &testarr2D); // set rows to point to check and work _arrays RAJA::TypedRangeSegment seg(0, ydim); - RAJA::forall(seg, - [=] RAJA_HOST_DEVICE(INDEX_TYPE zz) + RAJA::forall(seg, [=] RAJA_HOST_DEVICE(INDEX_TYPE zz) { workarr2D[zz] = work_array + zz * ydim; }); - RAJA::forall( - seg, [=](INDEX_TYPE zz) { checkarr2D[zz] = check_array + zz * ydim; }); + RAJA::forall(seg, [=](INDEX_TYPE zz) + { checkarr2D[zz] = check_array + zz * ydim; }); // initializing values RAJA::forall(seg, @@ -65,8 +64,7 @@ void KernelLocMax2DViewTestImpl(const int xdim, const int ydim) (DATA_TYPE)0, Index2D(0, 0)); RAJA::kernel( - RAJA::make_tuple(colrange, rowrange), - [=] RAJA_HOST_DEVICE(int c, int r) + RAJA::make_tuple(colrange, rowrange), [=] RAJA_HOST_DEVICE(int c, int r) { maxloc_reducer.maxloc(ArrView(r, c), Index2D(c, r)); }); // CPU answer @@ -92,11 +90,11 @@ void KernelLocMax2DViewTestImpl(const int xdim, const int ydim) ASSERT_EQ(checkraja_loc.idx, raja_loc.idx); ASSERT_EQ(checkraja_loc.idy, raja_loc.idy); - deallocateForallTestData( - work_res, work_array, check_array, test_array); + deallocateForallTestData(work_res, work_array, check_array, + test_array); - deallocateForallTestData( - work_res, workarr2D, checkarr2D, testarr2D); + deallocateForallTestData(work_res, workarr2D, checkarr2D, + testarr2D); } @@ -114,24 +112,12 @@ TYPED_TEST_P(KernelLocMax2DViewTest, LocMax2DViewKernel) using EXEC_POLICY = typename camp::at>::type; using REDUCE_POLICY = typename camp::at>::type; - KernelLocMax2DViewTestImpl(10, 10); - KernelLocMax2DViewTestImpl(151, 151); - KernelLocMax2DViewTestImpl(362, 362); + KernelLocMax2DViewTestImpl(10, 10); + KernelLocMax2DViewTestImpl(151, 151); + KernelLocMax2DViewTestImpl(362, 362); } REGISTER_TYPED_TEST_SUITE_P(KernelLocMax2DViewTest, LocMax2DViewKernel); diff --git a/test/functional/kernel/reduce-loc/tests/test-kernel-reduceloc-Max2DViewTuple.hpp b/test/functional/kernel/reduce-loc/tests/test-kernel-reduceloc-Max2DViewTuple.hpp index 864b68644e..e4faccc437 100644 --- a/test/functional/kernel/reduce-loc/tests/test-kernel-reduceloc-Max2DViewTuple.hpp +++ b/test/functional/kernel/reduce-loc/tests/test-kernel-reduceloc-Max2DViewTuple.hpp @@ -28,20 +28,19 @@ void KernelLocMax2DViewTupleTestImpl(const int xdim, const int ydim) // square 2D array, xdim x ydim INDEX_TYPE array_length = xdim * ydim; - allocateForallTestData( - array_length, work_res, &work_array, &check_array, &test_array); + allocateForallTestData(array_length, work_res, &work_array, + &check_array, &test_array); - allocateForallTestData( - ydim, work_res, &workarr2D, &checkarr2D, &testarr2D); + allocateForallTestData(ydim, work_res, &workarr2D, &checkarr2D, + &testarr2D); // set rows to point to check and work _arrays RAJA::TypedRangeSegment seg(0, ydim); - RAJA::forall(seg, - [=] RAJA_HOST_DEVICE(INDEX_TYPE zz) + RAJA::forall(seg, [=] RAJA_HOST_DEVICE(INDEX_TYPE zz) { workarr2D[zz] = work_array + zz * ydim; }); - RAJA::forall( - seg, [=](INDEX_TYPE zz) { checkarr2D[zz] = check_array + zz * ydim; }); + RAJA::forall(seg, [=](INDEX_TYPE zz) + { checkarr2D[zz] = check_array + zz * ydim; }); // initializing values RAJA::forall(seg, @@ -67,9 +66,9 @@ void KernelLocMax2DViewTupleTestImpl(const int xdim, const int ydim) RAJA::tuple LocTup(0, 0); - RAJA:: - ReduceMaxLoc> - maxloc_reducer((DATA_TYPE)0, LocTup); + RAJA::ReduceMaxLoc> + maxloc_reducer((DATA_TYPE)0, LocTup); RAJA::kernel(RAJA::make_tuple(colrange, rowrange), [=] RAJA_HOST_DEVICE(int c, int r) @@ -102,11 +101,11 @@ void KernelLocMax2DViewTupleTestImpl(const int xdim, const int ydim) ASSERT_EQ(checkraja_loc.idx, RAJA::get<0>(raja_loc)); ASSERT_EQ(checkraja_loc.idy, RAJA::get<1>(raja_loc)); - deallocateForallTestData( - work_res, work_array, check_array, test_array); + deallocateForallTestData(work_res, work_array, check_array, + test_array); - deallocateForallTestData( - work_res, workarr2D, checkarr2D, testarr2D); + deallocateForallTestData(work_res, workarr2D, checkarr2D, + testarr2D); } @@ -124,24 +123,15 @@ TYPED_TEST_P(KernelLocMax2DViewTupleTest, LocMax2DViewTupleKernel) using EXEC_POLICY = typename camp::at>::type; using REDUCE_POLICY = typename camp::at>::type; - KernelLocMax2DViewTupleTestImpl(10, 10); - KernelLocMax2DViewTupleTestImpl(151, 151); - KernelLocMax2DViewTupleTestImpl(362, 362); + KernelLocMax2DViewTupleTestImpl( + 10, 10); + KernelLocMax2DViewTupleTestImpl( + 151, 151); + KernelLocMax2DViewTupleTestImpl( + 362, 362); } REGISTER_TYPED_TEST_SUITE_P(KernelLocMax2DViewTupleTest, diff --git a/test/functional/kernel/reduce-loc/tests/test-kernel-reduceloc-Min2D.hpp b/test/functional/kernel/reduce-loc/tests/test-kernel-reduceloc-Min2D.hpp index 1acd780406..fa4d583d74 100644 --- a/test/functional/kernel/reduce-loc/tests/test-kernel-reduceloc-Min2D.hpp +++ b/test/functional/kernel/reduce-loc/tests/test-kernel-reduceloc-Min2D.hpp @@ -28,20 +28,19 @@ void KernelLocMin2DTestImpl(const int xdim, const int ydim) // square 2D array, xdim x ydim INDEX_TYPE array_length = xdim * ydim; - allocateForallTestData( - array_length, work_res, &work_array, &check_array, &test_array); + allocateForallTestData(array_length, work_res, &work_array, + &check_array, &test_array); - allocateForallTestData( - ydim, work_res, &workarr2D, &checkarr2D, &testarr2D); + allocateForallTestData(ydim, work_res, &workarr2D, &checkarr2D, + &testarr2D); // set rows to point to check and work _arrays RAJA::TypedRangeSegment seg(0, ydim); - RAJA::forall(seg, - [=] RAJA_HOST_DEVICE(INDEX_TYPE zz) + RAJA::forall(seg, [=] RAJA_HOST_DEVICE(INDEX_TYPE zz) { workarr2D[zz] = work_array + zz * ydim; }); - RAJA::forall( - seg, [=](INDEX_TYPE zz) { checkarr2D[zz] = check_array + zz * ydim; }); + RAJA::forall(seg, [=](INDEX_TYPE zz) + { checkarr2D[zz] = check_array + zz * ydim; }); // initializing values RAJA::forall(seg, @@ -63,8 +62,7 @@ void KernelLocMin2DTestImpl(const int xdim, const int ydim) (DATA_TYPE)1024, Index2D(0, 0)); RAJA::kernel( - RAJA::make_tuple(colrange, rowrange), - [=] RAJA_HOST_DEVICE(int c, int r) + RAJA::make_tuple(colrange, rowrange), [=] RAJA_HOST_DEVICE(int c, int r) { minloc_reducer.minloc(workarr2D[r][c], Index2D(c, r)); }); // CPU answer @@ -90,11 +88,11 @@ void KernelLocMin2DTestImpl(const int xdim, const int ydim) ASSERT_EQ(checkraja_loc.idx, raja_loc.idx); ASSERT_EQ(checkraja_loc.idy, raja_loc.idy); - deallocateForallTestData( - work_res, work_array, check_array, test_array); + deallocateForallTestData(work_res, work_array, check_array, + test_array); - deallocateForallTestData( - work_res, workarr2D, checkarr2D, testarr2D); + deallocateForallTestData(work_res, workarr2D, checkarr2D, + testarr2D); } @@ -112,24 +110,12 @@ TYPED_TEST_P(KernelLocMin2DTest, LocMin2DKernel) using EXEC_POLICY = typename camp::at>::type; using REDUCE_POLICY = typename camp::at>::type; - KernelLocMin2DTestImpl(10, 10); - KernelLocMin2DTestImpl(151, 151); - KernelLocMin2DTestImpl(362, 362); + KernelLocMin2DTestImpl(10, 10); + KernelLocMin2DTestImpl(151, 151); + KernelLocMin2DTestImpl(362, 362); } REGISTER_TYPED_TEST_SUITE_P(KernelLocMin2DTest, LocMin2DKernel); diff --git a/test/functional/kernel/reduce-loc/tests/test-kernel-reduceloc-Min2DView.hpp b/test/functional/kernel/reduce-loc/tests/test-kernel-reduceloc-Min2DView.hpp index 8eada9e8c8..ccd8c542fc 100644 --- a/test/functional/kernel/reduce-loc/tests/test-kernel-reduceloc-Min2DView.hpp +++ b/test/functional/kernel/reduce-loc/tests/test-kernel-reduceloc-Min2DView.hpp @@ -28,20 +28,19 @@ void KernelLocMin2DViewTestImpl(const int xdim, const int ydim) // square 2D array, xdim x ydim INDEX_TYPE array_length = xdim * ydim; - allocateForallTestData( - array_length, work_res, &work_array, &check_array, &test_array); + allocateForallTestData(array_length, work_res, &work_array, + &check_array, &test_array); - allocateForallTestData( - ydim, work_res, &workarr2D, &checkarr2D, &testarr2D); + allocateForallTestData(ydim, work_res, &workarr2D, &checkarr2D, + &testarr2D); // set rows to point to check and work _arrays RAJA::TypedRangeSegment seg(0, ydim); - RAJA::forall(seg, - [=] RAJA_HOST_DEVICE(INDEX_TYPE zz) + RAJA::forall(seg, [=] RAJA_HOST_DEVICE(INDEX_TYPE zz) { workarr2D[zz] = work_array + zz * ydim; }); - RAJA::forall( - seg, [=](INDEX_TYPE zz) { checkarr2D[zz] = check_array + zz * ydim; }); + RAJA::forall(seg, [=](INDEX_TYPE zz) + { checkarr2D[zz] = check_array + zz * ydim; }); // initializing values RAJA::forall(seg, @@ -65,8 +64,7 @@ void KernelLocMin2DViewTestImpl(const int xdim, const int ydim) (DATA_TYPE)1024, Index2D(0, 0)); RAJA::kernel( - RAJA::make_tuple(colrange, rowrange), - [=] RAJA_HOST_DEVICE(int c, int r) + RAJA::make_tuple(colrange, rowrange), [=] RAJA_HOST_DEVICE(int c, int r) { minloc_reducer.minloc(ArrView(r, c), Index2D(c, r)); }); // CPU answer @@ -92,11 +90,11 @@ void KernelLocMin2DViewTestImpl(const int xdim, const int ydim) ASSERT_EQ(checkraja_loc.idx, raja_loc.idx); ASSERT_EQ(checkraja_loc.idy, raja_loc.idy); - deallocateForallTestData( - work_res, work_array, check_array, test_array); + deallocateForallTestData(work_res, work_array, check_array, + test_array); - deallocateForallTestData( - work_res, workarr2D, checkarr2D, testarr2D); + deallocateForallTestData(work_res, workarr2D, checkarr2D, + testarr2D); } @@ -114,24 +112,12 @@ TYPED_TEST_P(KernelLocMin2DViewTest, LocMin2DViewKernel) using EXEC_POLICY = typename camp::at>::type; using REDUCE_POLICY = typename camp::at>::type; - KernelLocMin2DViewTestImpl(10, 10); - KernelLocMin2DViewTestImpl(151, 151); - KernelLocMin2DViewTestImpl(362, 362); + KernelLocMin2DViewTestImpl(10, 10); + KernelLocMin2DViewTestImpl(151, 151); + KernelLocMin2DViewTestImpl(362, 362); } REGISTER_TYPED_TEST_SUITE_P(KernelLocMin2DViewTest, LocMin2DViewKernel); diff --git a/test/functional/kernel/reduce-loc/tests/test-kernel-reduceloc-Min2DViewTuple.hpp b/test/functional/kernel/reduce-loc/tests/test-kernel-reduceloc-Min2DViewTuple.hpp index 8b886df355..12be090907 100644 --- a/test/functional/kernel/reduce-loc/tests/test-kernel-reduceloc-Min2DViewTuple.hpp +++ b/test/functional/kernel/reduce-loc/tests/test-kernel-reduceloc-Min2DViewTuple.hpp @@ -28,20 +28,19 @@ void KernelLocMin2DViewTupleTestImpl(const int xdim, const int ydim) // square 2D array, xdim x ydim INDEX_TYPE array_length = xdim * ydim; - allocateForallTestData( - array_length, work_res, &work_array, &check_array, &test_array); + allocateForallTestData(array_length, work_res, &work_array, + &check_array, &test_array); - allocateForallTestData( - ydim, work_res, &workarr2D, &checkarr2D, &testarr2D); + allocateForallTestData(ydim, work_res, &workarr2D, &checkarr2D, + &testarr2D); // set rows to point to check and work _arrays RAJA::TypedRangeSegment seg(0, ydim); - RAJA::forall(seg, - [=] RAJA_HOST_DEVICE(INDEX_TYPE zz) + RAJA::forall(seg, [=] RAJA_HOST_DEVICE(INDEX_TYPE zz) { workarr2D[zz] = work_array + zz * ydim; }); - RAJA::forall( - seg, [=](INDEX_TYPE zz) { checkarr2D[zz] = check_array + zz * ydim; }); + RAJA::forall(seg, [=](INDEX_TYPE zz) + { checkarr2D[zz] = check_array + zz * ydim; }); // initializing values RAJA::forall(seg, @@ -63,9 +62,9 @@ void KernelLocMin2DViewTupleTestImpl(const int xdim, const int ydim) RAJA::tuple LocTup(0, 0); - RAJA:: - ReduceMinLoc> - minloc_reducer((DATA_TYPE)1024, LocTup); + RAJA::ReduceMinLoc> + minloc_reducer((DATA_TYPE)1024, LocTup); RAJA::kernel(RAJA::make_tuple(colrange, rowrange), [=] RAJA_HOST_DEVICE(int c, int r) @@ -98,11 +97,11 @@ void KernelLocMin2DViewTupleTestImpl(const int xdim, const int ydim) ASSERT_EQ(checkraja_loc.idx, RAJA::get<0>(raja_loc)); ASSERT_EQ(checkraja_loc.idy, RAJA::get<1>(raja_loc)); - deallocateForallTestData( - work_res, work_array, check_array, test_array); + deallocateForallTestData(work_res, work_array, check_array, + test_array); - deallocateForallTestData( - work_res, workarr2D, checkarr2D, testarr2D); + deallocateForallTestData(work_res, workarr2D, checkarr2D, + testarr2D); } @@ -120,24 +119,15 @@ TYPED_TEST_P(KernelLocMin2DViewTupleTest, LocMin2DViewTupleKernel) using EXEC_POLICY = typename camp::at>::type; using REDUCE_POLICY = typename camp::at>::type; - KernelLocMin2DViewTupleTestImpl(10, 10); - KernelLocMin2DViewTupleTestImpl(151, 151); - KernelLocMin2DViewTupleTestImpl(362, 362); + KernelLocMin2DViewTupleTestImpl( + 10, 10); + KernelLocMin2DViewTupleTestImpl( + 151, 151); + KernelLocMin2DViewTupleTestImpl( + 362, 362); } REGISTER_TYPED_TEST_SUITE_P(KernelLocMin2DViewTupleTest, diff --git a/test/functional/kernel/region/tests/test-kernel-region-sync.hpp b/test/functional/kernel/region/tests/test-kernel-region-sync.hpp index 2855c53b91..99d7353434 100644 --- a/test/functional/kernel/region/tests/test-kernel-region-sync.hpp +++ b/test/functional/kernel/region/tests/test-kernel-region-sync.hpp @@ -26,13 +26,8 @@ void KernelRegionSyncTestImpl(INDEX_TYPE first, INDEX_TYPE last) INDEX_TYPE* check_array; - allocRegionTestData(N, - work_res, - &work_array1, - &work_array2, - &work_array3, - host_res, - &check_array); + allocRegionTestData(N, work_res, &work_array1, &work_array2, &work_array3, + host_res, &check_array); work_res.memset(work_array1, 0, sizeof(INDEX_TYPE) * N); work_res.memset(work_array2, 0, sizeof(INDEX_TYPE) * N); @@ -77,8 +72,8 @@ void KernelRegionSyncTestImpl(INDEX_TYPE first, INDEX_TYPE last) ASSERT_EQ(check_array[i], 151); } - deallocRegionTestData( - work_res, work_array1, work_array2, work_array3, host_res, check_array); + deallocRegionTestData(work_res, work_array1, work_array2, work_array3, + host_res, check_array); } diff --git a/test/functional/kernel/region/tests/test-kernel-region.hpp b/test/functional/kernel/region/tests/test-kernel-region.hpp index adce8baad5..0a14678bff 100644 --- a/test/functional/kernel/region/tests/test-kernel-region.hpp +++ b/test/functional/kernel/region/tests/test-kernel-region.hpp @@ -22,13 +22,8 @@ void KernelRegionTestImpl(INDEX_TYPE first, INDEX_TYPE last) INDEX_TYPE* check_array; - allocRegionTestData(N, - work_res, - &work_array1, - &work_array2, - &work_array3, - host_res, - &check_array); + allocRegionTestData(N, work_res, &work_array1, &work_array2, &work_array3, + host_res, &check_array); work_res.memset(work_array1, 0, sizeof(INDEX_TYPE) * N); work_res.memset(work_array2, 0, sizeof(INDEX_TYPE) * N); @@ -62,8 +57,8 @@ void KernelRegionTestImpl(INDEX_TYPE first, INDEX_TYPE last) ASSERT_EQ(check_array[i], 151); } - deallocRegionTestData( - work_res, work_array1, work_array2, work_array3, host_res, check_array); + deallocRegionTestData(work_res, work_array1, work_array2, work_array3, + host_res, check_array); } diff --git a/test/functional/kernel/tile-variants/tests/test-kernel-tile-Dynamic2D.hpp b/test/functional/kernel/tile-variants/tests/test-kernel-tile-Dynamic2D.hpp index eaf2062158..a0119c3edf 100644 --- a/test/functional/kernel/tile-variants/tests/test-kernel-tile-Dynamic2D.hpp +++ b/test/functional/kernel/tile-variants/tests/test-kernel-tile-Dynamic2D.hpp @@ -31,11 +31,11 @@ void KernelTileDynamic2DTestImpl(const int rows, const int cols) INDEX_TYPE array_length = rows * cols; - allocateForallTestData( - array_length, work_res, &work_array, &check_array, &test_array); + allocateForallTestData(array_length, work_res, &work_array, + &check_array, &test_array); - allocateForallTestData( - array_length, work_res, &work_array_t, &check_array_t, &test_array_t); + allocateForallTestData(array_length, work_res, &work_array_t, + &check_array_t, &test_array_t); RAJA::View> HostView(test_array, rows, cols); RAJA::View> HostTView(test_array_t, cols, rows); @@ -69,8 +69,8 @@ void KernelTileDynamic2DTestImpl(const int rows, const int cols) [=] RAJA_HOST_DEVICE(INDEX_TYPE cc, INDEX_TYPE rr) { WorkTView(cc, rr) = WorkView(rr, cc); }); - work_res.memcpy( - check_array_t, work_array_t, sizeof(DATA_TYPE) * array_length); + work_res.memcpy(check_array_t, work_array_t, + sizeof(DATA_TYPE) * array_length); for (int rr = 0; rr < rows; ++rr) { @@ -92,8 +92,8 @@ void KernelTileDynamic2DTestImpl(const int rows, const int cols) [=] RAJA_HOST_DEVICE(INDEX_TYPE cc, INDEX_TYPE rr) { WorkTView(cc, rr) = WorkView(rr, cc); }); - work_res.memcpy( - check_array_t, work_array_t, sizeof(DATA_TYPE) * array_length); + work_res.memcpy(check_array_t, work_array_t, + sizeof(DATA_TYPE) * array_length); for (int rr = 0; rr < rows; ++rr) { @@ -103,11 +103,11 @@ void KernelTileDynamic2DTestImpl(const int rows, const int cols) } } - deallocateForallTestData( - work_res, work_array, check_array, test_array); + deallocateForallTestData(work_res, work_array, check_array, + test_array); - deallocateForallTestData( - work_res, work_array_t, check_array_t, test_array_t); + deallocateForallTestData(work_res, work_array_t, check_array_t, + test_array_t); } diff --git a/test/functional/kernel/tile-variants/tests/test-kernel-tile-Fixed2D.hpp b/test/functional/kernel/tile-variants/tests/test-kernel-tile-Fixed2D.hpp index c947a01829..62f09ff9ce 100644 --- a/test/functional/kernel/tile-variants/tests/test-kernel-tile-Fixed2D.hpp +++ b/test/functional/kernel/tile-variants/tests/test-kernel-tile-Fixed2D.hpp @@ -31,11 +31,11 @@ void KernelTileFixed2DTestImpl(const int rows, const int cols) INDEX_TYPE array_length = rows * cols; - allocateForallTestData( - array_length, work_res, &work_array, &check_array, &test_array); + allocateForallTestData(array_length, work_res, &work_array, + &check_array, &test_array); - allocateForallTestData( - array_length, work_res, &work_array_t, &check_array_t, &test_array_t); + allocateForallTestData(array_length, work_res, &work_array_t, + &check_array_t, &test_array_t); RAJA::View> HostView(test_array, rows, cols); RAJA::View> HostTView(test_array_t, cols, rows); @@ -67,8 +67,8 @@ void KernelTileFixed2DTestImpl(const int rows, const int cols) [=] RAJA_HOST_DEVICE(INDEX_TYPE cc, INDEX_TYPE rr) { WorkTView(cc, rr) = WorkView(rr, cc); }); - work_res.memcpy( - check_array_t, work_array_t, sizeof(DATA_TYPE) * array_length); + work_res.memcpy(check_array_t, work_array_t, + sizeof(DATA_TYPE) * array_length); for (int rr = 0; rr < rows; ++rr) { @@ -78,11 +78,11 @@ void KernelTileFixed2DTestImpl(const int rows, const int cols) } } - deallocateForallTestData( - work_res, work_array, check_array, test_array); + deallocateForallTestData(work_res, work_array, check_array, + test_array); - deallocateForallTestData( - work_res, work_array_t, check_array_t, test_array_t); + deallocateForallTestData(work_res, work_array_t, check_array_t, + test_array_t); } diff --git a/test/functional/kernel/tile-variants/tests/test-kernel-tile-Fixed2DMinMax.hpp b/test/functional/kernel/tile-variants/tests/test-kernel-tile-Fixed2DMinMax.hpp index 6c540bebfb..6fa6446883 100644 --- a/test/functional/kernel/tile-variants/tests/test-kernel-tile-Fixed2DMinMax.hpp +++ b/test/functional/kernel/tile-variants/tests/test-kernel-tile-Fixed2DMinMax.hpp @@ -28,8 +28,8 @@ void KernelTileFixed2DMinMaxTestImpl(const int rows, const int cols) INDEX_TYPE array_length = rows * cols; - allocateForallTestData( - array_length, work_res, &work_array, &check_array, &test_array); + allocateForallTestData(array_length, work_res, &work_array, + &check_array, &test_array); // initialize arrays std::iota(test_array, test_array + array_length, 1); @@ -54,8 +54,8 @@ void KernelTileFixed2DMinMaxTestImpl(const int rows, const int cols) colidx.push_back(ii); } - RAJA::TypedListSegment colrange( - &colidx[0], colidx.size(), work_res); + RAJA::TypedListSegment colrange(&colidx[0], colidx.size(), + work_res); // find min and max on target platform RAJA::kernel(RAJA::make_tuple(colrange, rowrange), @@ -69,8 +69,8 @@ void KernelTileFixed2DMinMaxTestImpl(const int rows, const int cols) ASSERT_EQ(static_cast(array_length + 2), static_cast(workmax.get())); - deallocateForallTestData( - work_res, work_array, check_array, test_array); + deallocateForallTestData(work_res, work_array, check_array, + test_array); } @@ -87,21 +87,12 @@ TYPED_TEST_P(KernelTileFixed2DMinMaxTest, TileFixed2DMinMaxKernel) using EXEC_POLICY = typename camp::at>::type; using REDUCE_POLICY = typename camp::at>::type; - KernelTileFixed2DMinMaxTestImpl(10, 10); - KernelTileFixed2DMinMaxTestImpl(151, 111); - KernelTileFixed2DMinMaxTestImpl(362, 362); + KernelTileFixed2DMinMaxTestImpl(10, 10); + KernelTileFixed2DMinMaxTestImpl(151, 111); + KernelTileFixed2DMinMaxTestImpl(362, 362); } REGISTER_TYPED_TEST_SUITE_P(KernelTileFixed2DMinMaxTest, diff --git a/test/functional/kernel/tile-variants/tests/test-kernel-tile-Fixed2DSum.hpp b/test/functional/kernel/tile-variants/tests/test-kernel-tile-Fixed2DSum.hpp index dd2a9b9524..9442c8c0bd 100644 --- a/test/functional/kernel/tile-variants/tests/test-kernel-tile-Fixed2DSum.hpp +++ b/test/functional/kernel/tile-variants/tests/test-kernel-tile-Fixed2DSum.hpp @@ -64,8 +64,8 @@ void KernelTileFixed2DSumTestImpl(const int rowsin, const int colsin) colidx.push_back(ii); } - RAJA::TypedListSegment colrange( - &colidx[0], colidx.size(), work_res); + RAJA::TypedListSegment colrange(&colidx[0], colidx.size(), + work_res); // sum on target platform RAJA::kernel(RAJA::make_tuple(colrange, rowrange), @@ -89,20 +89,11 @@ TYPED_TEST_P(KernelTileFixed2DSumTest, TileFixed2DSumKernel) using EXEC_POLICY = typename camp::at>::type; using REDUCE_POLICY = typename camp::at>::type; - KernelTileFixed2DSumTestImpl(10, 10); - KernelTileFixed2DSumTestImpl(151, 111); - KernelTileFixed2DSumTestImpl(362, 362); } diff --git a/test/functional/kernel/tile-variants/tests/test-kernel-tile-LocalArray2D.hpp b/test/functional/kernel/tile-variants/tests/test-kernel-tile-LocalArray2D.hpp index 2c47f96a2e..90a3e34e0d 100644 --- a/test/functional/kernel/tile-variants/tests/test-kernel-tile-LocalArray2D.hpp +++ b/test/functional/kernel/tile-variants/tests/test-kernel-tile-LocalArray2D.hpp @@ -31,11 +31,11 @@ void KernelTileLocalArray2DTestImpl(const int rows, const int cols) INDEX_TYPE array_length = rows * cols; - allocateForallTestData( - array_length, work_res, &work_array, &check_array, &test_array); + allocateForallTestData(array_length, work_res, &work_array, + &check_array, &test_array); - allocateForallTestData( - array_length, work_res, &work_array_t, &check_array_t, &test_array_t); + allocateForallTestData(array_length, work_res, &work_array_t, + &check_array_t, &test_array_t); RAJA::View> HostView(test_array, rows, cols); RAJA::View> HostTView(test_array_t, cols, rows); @@ -44,8 +44,7 @@ void KernelTileLocalArray2DTestImpl(const int rows, const int cols) RAJA::View> CheckTView(check_array_t, cols, rows); // initialize local array (shared mem) - using TILE_MEM = RAJA::LocalArray, + using TILE_MEM = RAJA::LocalArray, RAJA::SizeList>; TILE_MEM Tile_Array; @@ -72,22 +71,16 @@ void KernelTileLocalArray2DTestImpl(const int rows, const int cols) RAJA::kernel_param( RAJA::make_tuple(colrange, rowrange), RAJA::make_tuple((INDEX_TYPE)0, (INDEX_TYPE)0, Tile_Array), - [=] RAJA_HOST_DEVICE(INDEX_TYPE cc, - INDEX_TYPE rr, - INDEX_TYPE tx, - INDEX_TYPE ty, - TILE_MEM & Tile_Array) + [=] RAJA_HOST_DEVICE(INDEX_TYPE cc, INDEX_TYPE rr, INDEX_TYPE tx, + INDEX_TYPE ty, TILE_MEM & Tile_Array) { Tile_Array(ty, tx) = WorkView(rr, cc); }, - [=] RAJA_HOST_DEVICE(INDEX_TYPE cc, - INDEX_TYPE rr, - INDEX_TYPE tx, - INDEX_TYPE ty, - TILE_MEM & Tile_Array) + [=] RAJA_HOST_DEVICE(INDEX_TYPE cc, INDEX_TYPE rr, INDEX_TYPE tx, + INDEX_TYPE ty, TILE_MEM & Tile_Array) { WorkTView(cc, rr) = Tile_Array(ty, tx); }); - work_res.memcpy( - check_array_t, work_array_t, sizeof(DATA_TYPE) * array_length); + work_res.memcpy(check_array_t, work_array_t, + sizeof(DATA_TYPE) * array_length); for (int rr = 0; rr < rows; ++rr) { @@ -97,11 +90,11 @@ void KernelTileLocalArray2DTestImpl(const int rows, const int cols) } } - deallocateForallTestData( - work_res, work_array, check_array, test_array); + deallocateForallTestData(work_res, work_array, check_array, + test_array); - deallocateForallTestData( - work_res, work_array_t, check_array_t, test_array_t); + deallocateForallTestData(work_res, work_array_t, check_array_t, + test_array_t); } @@ -117,17 +110,11 @@ TYPED_TEST_P(KernelTileLocalArray2DTest, TileLocalArray2DKernel) using WORKING_RES = typename camp::at>::type; using EXEC_POLICY = typename camp::at>::type; - KernelTileLocalArray2DTestImpl(10, 10); - KernelTileLocalArray2DTestImpl(151, 111); - KernelTileLocalArray2DTestImpl(362, 362); } diff --git a/test/functional/kernel/warp-thread/tests/warp-thread-ReduceMask-impl.hpp b/test/functional/kernel/warp-thread/tests/warp-thread-ReduceMask-impl.hpp index 2bf555ef61..a8bc973981 100644 --- a/test/functional/kernel/warp-thread/tests/warp-thread-ReduceMask-impl.hpp +++ b/test/functional/kernel/warp-thread/tests/warp-thread-ReduceMask-impl.hpp @@ -88,10 +88,8 @@ void KernelWarpThreadTest(const DEVICE_DEPTH_2_REDUCESUM_WARPMASK&, RAJA::Index_type* check_array; RAJA::Index_type* test_array; - allocateForallTestData(directlen * looplen, - erased_work_res, - &work_array, - &check_array, + allocateForallTestData(directlen * looplen, erased_work_res, + &work_array, &check_array, &test_array); RAJA::ReduceMax max_thread(0); @@ -113,8 +111,8 @@ void KernelWarpThreadTest(const DEVICE_DEPTH_2_REDUCESUM_WARPMASK&, ASSERT_EQ(trip_count.get(), looplen * directlen); ASSERT_EQ(worksum.get(), looplen * directlen * (directlen - 1) / 2); - deallocateForallTestData( - erased_work_res, work_array, check_array, test_array); + deallocateForallTestData(erased_work_res, work_array, + check_array, test_array); } template (directlen * looplen, - erased_work_res, - &work_array, - &check_array, + allocateForallTestData(directlen * looplen, erased_work_res, + &work_array, &check_array, &test_array); RAJA::ReduceMax max_thread(0); @@ -145,12 +141,10 @@ void KernelWarpThreadTest(const DEVICE_DEPTH_2_REDUCESUM_WARPMASK_FORI&, call_kernel_param( RAJA::make_tuple(RAJA::TypedRangeSegment(0, directlen), RAJA::TypedRangeSegment(0, looplen)), - RAJA::make_tuple((RAJA::Index_type)0, (RAJA::Index_type)0), - work_res, + RAJA::make_tuple((RAJA::Index_type)0, (RAJA::Index_type)0), work_res, [=] RAJA_DEVICE(RAJA::Index_type RAJA_UNUSED_ARG(i), RAJA::Index_type RAJA_UNUSED_ARG(j), - RAJA::Index_type RAJA_UNUSED_ARG(x), - RAJA::Index_type y) + RAJA::Index_type RAJA_UNUSED_ARG(x), RAJA::Index_type y) { trip_count += 1; worksum += y; // y should only be 0..3 @@ -161,8 +155,8 @@ void KernelWarpThreadTest(const DEVICE_DEPTH_2_REDUCESUM_WARPMASK_FORI&, ASSERT_EQ(trip_count.get(), looplen * directlen); ASSERT_EQ(worksum.get(), looplen * directlen * (looplen - 1) / 2); - deallocateForallTestData( - erased_work_res, work_array, check_array, test_array); + deallocateForallTestData(erased_work_res, work_array, + check_array, test_array); } // diff --git a/test/functional/kernel/warp-thread/tests/warp-thread-ReduceWarp-impl.hpp b/test/functional/kernel/warp-thread/tests/warp-thread-ReduceWarp-impl.hpp index e453ab19eb..56c5eeb673 100644 --- a/test/functional/kernel/warp-thread/tests/warp-thread-ReduceWarp-impl.hpp +++ b/test/functional/kernel/warp-thread/tests/warp-thread-ReduceWarp-impl.hpp @@ -66,16 +66,15 @@ void KernelWarpThreadTest(const DEVICE_DEPTH_1_REDUCESUM_WARPREDUCE&, RAJA::Index_type* check_array; RAJA::Index_type* test_array; - allocateForallTestData( - len, erased_work_res, &work_array, &check_array, &test_array); + allocateForallTestData(len, erased_work_res, &work_array, + &check_array, &test_array); RAJA::ReduceSum worksum(0); RAJA::ReduceSum reduce_count(0); call_kernel_param( RAJA::make_tuple(RAJA::TypedRangeSegment(0, len)), - RAJA::make_tuple((RAJA::Index_type)0), - work_res, + RAJA::make_tuple((RAJA::Index_type)0), work_res, [=] RAJA_HOST_DEVICE(RAJA::Index_type i, RAJA::Index_type & value) { value += i; }, @@ -91,8 +90,8 @@ void KernelWarpThreadTest(const DEVICE_DEPTH_1_REDUCESUM_WARPREDUCE&, ASSERT_EQ(worksum.get(), len * (len - 1) / 2); ASSERT_EQ(reduce_count.get(), 1); - deallocateForallTestData( - erased_work_res, work_array, check_array, test_array); + deallocateForallTestData(erased_work_res, work_array, + check_array, test_array); } template ( - len, erased_work_res, &work_array, &check_array, &test_array); + allocateForallTestData(len, erased_work_res, &work_array, + &check_array, &test_array); RAJA::ReduceSum worksum(0); RAJA::ReduceSum reduce_count(0); @@ -122,11 +121,10 @@ void KernelWarpThreadTest( call_kernel_param( RAJA::make_tuple(RAJA::TypedRangeSegment(0, outerlen), RAJA::TypedRangeSegment(0, innerlen)), - RAJA::make_tuple((RAJA::Index_type)0), - work_res, + RAJA::make_tuple((RAJA::Index_type)0), work_res, - [=] RAJA_HOST_DEVICE( - RAJA::Index_type i, RAJA::Index_type j, RAJA::Index_type & value) + [=] RAJA_HOST_DEVICE(RAJA::Index_type i, RAJA::Index_type j, + RAJA::Index_type & value) { value += i + j * outerlen; }, [=] RAJA_HOST_DEVICE(RAJA::Index_type & value) @@ -140,8 +138,8 @@ void KernelWarpThreadTest( ASSERT_EQ(worksum.get(), outerlen * innerlen * (outerlen * innerlen - 1) / 2); ASSERT_EQ(reduce_count.get(), innerlen); - deallocateForallTestData( - erased_work_res, work_array, check_array, test_array); + deallocateForallTestData(erased_work_res, work_array, + check_array, test_array); } template ( - len, erased_work_res, &work_array, &check_array, &test_array); + allocateForallTestData(len, erased_work_res, &work_array, + &check_array, &test_array); RAJA::ReduceSum worksum(0); RAJA::ReduceSum reduce_count(0); @@ -173,13 +171,10 @@ void KernelWarpThreadTest( RAJA::make_tuple(RAJA::TypedRangeSegment(0, outerlen), RAJA::TypedRangeSegment(0, middlelen), RAJA::TypedRangeSegment(0, innerlen)), - RAJA::make_tuple((RAJA::Index_type)0), - work_res, + RAJA::make_tuple((RAJA::Index_type)0), work_res, - [=] RAJA_HOST_DEVICE(RAJA::Index_type i, - RAJA::Index_type j, - RAJA::Index_type k, - RAJA::Index_type & value) + [=] RAJA_HOST_DEVICE(RAJA::Index_type i, RAJA::Index_type j, + RAJA::Index_type k, RAJA::Index_type & value) { value += i + j * outerlen + k * outerlen * middlelen; }, [=] RAJA_HOST_DEVICE(RAJA::Index_type & value) @@ -190,13 +185,12 @@ void KernelWarpThreadTest( reduce_count += 1; }); - ASSERT_EQ(worksum.get(), - outerlen * middlelen * innerlen * - (outerlen * middlelen * innerlen - 1) / 2); + ASSERT_EQ(worksum.get(), outerlen * middlelen * innerlen * + (outerlen * middlelen * innerlen - 1) / 2); ASSERT_EQ(reduce_count.get(), middlelen * innerlen); - deallocateForallTestData( - erased_work_res, work_array, check_array, test_array); + deallocateForallTestData(erased_work_res, work_array, + check_array, test_array); } // diff --git a/test/functional/kernel/warp-thread/tests/warp-thread-WarpLoop-impl.hpp b/test/functional/kernel/warp-thread/tests/warp-thread-WarpLoop-impl.hpp index b624fd5e8c..21326cd7d6 100644 --- a/test/functional/kernel/warp-thread/tests/warp-thread-WarpLoop-impl.hpp +++ b/test/functional/kernel/warp-thread/tests/warp-thread-WarpLoop-impl.hpp @@ -88,8 +88,8 @@ void KernelWarpThreadTest(const DEVICE_DEPTH_1_REDUCESUM_WARP&, RAJA::Index_type* check_array; RAJA::Index_type* test_array; - allocateForallTestData( - len, erased_work_res, &work_array, &check_array, &test_array); + allocateForallTestData(len, erased_work_res, &work_array, + &check_array, &test_array); RAJA::TypedRangeSegment rangelen(0, len); @@ -97,13 +97,12 @@ void KernelWarpThreadTest(const DEVICE_DEPTH_1_REDUCESUM_WARP&, call_kernel( RAJA::make_tuple(RAJA::TypedRangeSegment(0, len)), - work_res, - [=] RAJA_HOST_DEVICE(RAJA::Index_type i) { worksum += i; }); + work_res, [=] RAJA_HOST_DEVICE(RAJA::Index_type i) { worksum += i; }); ASSERT_EQ(worksum.get(), len * (len - 1) / 2); - deallocateForallTestData( - erased_work_res, work_array, check_array, test_array); + deallocateForallTestData(erased_work_res, work_array, + check_array, test_array); } template ( RAJA::make_tuple(RAJA::TypedRangeSegment(0, flatSize)), - RAJA::make_tuple((RAJA::Index_type)0), - work_res, + RAJA::make_tuple((RAJA::Index_type)0), work_res, [=] RAJA_HOST_DEVICE(RAJA::Index_type RAJA_UNUSED_ARG(i), RAJA::Index_type j) { @@ -140,8 +138,8 @@ void KernelWarpThreadTest(const DEVICE_DEPTH_2_REDUCESUM_WARP&, ASSERT_EQ(worksum.get(), numtiles * 32 * (32 - 1) / 2); - deallocateForallTestData( - erased_work_res, work_array, check_array, test_array); + deallocateForallTestData(erased_work_res, work_array, + check_array, test_array); } // More specific execution policies that use the above diff --git a/test/functional/launch/multi-reduce-nested/tests/test-launch-nested-MultiReduce.hpp b/test/functional/launch/multi-reduce-nested/tests/test-launch-nested-MultiReduce.hpp index 8655bf2119..10ea760622 100644 --- a/test/functional/launch/multi-reduce-nested/tests/test-launch-nested-MultiReduce.hpp +++ b/test/functional/launch/multi-reduce-nested/tests/test-launch-nested-MultiReduce.hpp @@ -61,18 +61,15 @@ void Launch(const SEGMENTS_TYPE& segments, Lambda&& lambda) [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) { RAJA::loop( - ctx, - RAJA::TypedRangeSegment(0, blocks_k), + ctx, RAJA::TypedRangeSegment(0, blocks_k), [&](IDX_TYPE bk) { RAJA::loop( - ctx, - RAJA::TypedRangeSegment(0, blocks_j), + ctx, RAJA::TypedRangeSegment(0, blocks_j), [&](IDX_TYPE bj) { RAJA::loop( - ctx, - RAJA::TypedRangeSegment(0, blocks_i), + ctx, RAJA::TypedRangeSegment(0, blocks_i), [&](IDX_TYPE bi) { RAJA::loop( @@ -100,8 +97,7 @@ void Launch(const SEGMENTS_TYPE& segments, Lambda&& lambda) j < distance_sj && k < distance_sk) { - lambda(begin_sk[k], - begin_sj[j], + lambda(begin_sk[k], begin_sj[j], begin_si[i]); } }); @@ -184,8 +180,8 @@ LaunchMultiReduceNestedTestImpl(const SEGMENTS_TYPE& segments, IDX_TYPE data_len = 0; - allocateForallTestData( - idx_range + 1, working_res, &working_range, &check_range, &test_range); + allocateForallTestData(idx_range + 1, working_res, &working_range, + &check_range, &test_range); for (IDX_TYPE i = 0; i < idx_range + 1; ++i) { @@ -211,11 +207,11 @@ LaunchMultiReduceNestedTestImpl(const SEGMENTS_TYPE& segments, } } - allocateForallTestData( - data_len, working_res, &working_array, &check_array, &test_array); + allocateForallTestData(data_len, working_res, &working_array, &check_array, + &test_array); - allocateForallTestData( - data_len, working_res, &working_bins, &check_bins, &test_bins); + allocateForallTestData(data_len, working_res, &working_bins, &check_bins, + &test_bins); if (data_len > IDX_TYPE(0)) { @@ -235,8 +231,8 @@ LaunchMultiReduceNestedTestImpl(const SEGMENTS_TYPE& segments, } } - working_res.memcpy( - working_range, test_range, sizeof(IDX_TYPE) * (idx_range + 1)); + working_res.memcpy(working_range, test_range, + sizeof(IDX_TYPE) * (idx_range + 1)); working_res.memcpy(working_array, test_array, sizeof(DATA_TYPE) * data_len); working_res.memcpy(working_bins, test_bins, sizeof(IDX_TYPE) * data_len); @@ -331,8 +327,8 @@ LaunchMultiReduceNestedTestImpl(const SEGMENTS_TYPE& segments, { test_array[i] = DATA_TYPE(array_flt_distribution(rngen)); } - working_res.memcpy( - working_array, test_array, sizeof(DATA_TYPE) * data_len); + working_res.memcpy(working_array, test_array, + sizeof(DATA_TYPE) * data_len); } @@ -417,34 +413,25 @@ TYPED_TEST_P(LaunchMultiReduceNestedTest, MultiReduceNestedLaunch) auto s1 = RAJA::make_tuple(RAJA::TypedRangeSegment(0, 2), RAJA::TypedRangeSegment(0, 7), RAJA::TypedRangeSegment(0, 3)); - LaunchMultiReduceNestedTestImpl( - s1, container, working_res, rngen); + LaunchMultiReduceNestedTestImpl(s1, container, + working_res, rngen); auto s2 = RAJA::make_tuple(RAJA::TypedRangeSegment(2, 35), RAJA::TypedRangeSegment(0, 19), RAJA::TypedRangeSegment(3, 13)); - LaunchMultiReduceNestedTestImpl( - s2, container, working_res, rngen); + LaunchMultiReduceNestedTestImpl(s2, container, + working_res, rngen); // Range-stride segment tests auto s3 = RAJA::make_tuple(RAJA::TypedRangeStrideSegment(0, 6, 2), RAJA::TypedRangeStrideSegment(1, 38, 3), RAJA::TypedRangeStrideSegment(5, 17, 1)); - LaunchMultiReduceNestedTestImpl( - s3, container, working_res, rngen); + LaunchMultiReduceNestedTestImpl(s3, container, + working_res, rngen); } } diff --git a/test/functional/launch/nested_direct/tests/test-launch-nested-Direct.hpp b/test/functional/launch/nested_direct/tests/test-launch-nested-Direct.hpp index 13d0892098..c555649a4d 100644 --- a/test/functional/launch/nested_direct/tests/test-launch-nested-Direct.hpp +++ b/test/functional/launch/nested_direct/tests/test-launch-nested-Direct.hpp @@ -51,8 +51,8 @@ void LaunchNestedDirectTestImpl(INDEX_TYPE M) data_len = 1; } - allocateForallTestData( - data_len, working_res, &working_array, &check_array, &test_array); + allocateForallTestData(data_len, working_res, &working_array, + &check_array, &test_array); // 6 threads total constexpr int threads_x = 2; constexpr int threads_y = 3; @@ -69,8 +69,8 @@ void LaunchNestedDirectTestImpl(INDEX_TYPE M) constexpr int DIM = 6; using layout_t = RAJA::Layout; - RAJA::View Aview( - working_array, N6, N5, N4, N3, N2, N1); + RAJA::View Aview(working_array, N6, N5, N4, N3, N2, + N1); RAJA::launch( RAJA::LaunchParams(RAJA::Teams(blocks_x, blocks_y, blocks_z), @@ -78,33 +78,27 @@ void LaunchNestedDirectTestImpl(INDEX_TYPE M) [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) { RAJA::loop( - ctx, - r6, + ctx, r6, [&](INDEX_TYPE bz) { RAJA::loop( - ctx, - r5, + ctx, r5, [&](INDEX_TYPE by) { RAJA::loop( - ctx, - r4, + ctx, r4, [&](INDEX_TYPE bx) { RAJA::loop( - ctx, - r3, + ctx, r3, [&](INDEX_TYPE tz) { RAJA::loop( - ctx, - r2, + ctx, r2, [&](INDEX_TYPE ty) { RAJA::loop( - ctx, - r1, + ctx, r1, [&](INDEX_TYPE tx) { auto idx = @@ -136,8 +130,8 @@ void LaunchNestedDirectTestImpl(INDEX_TYPE M) memset(static_cast(test_array), 0, sizeof(INDEX_TYPE) * data_len); - working_res.memcpy( - working_array, test_array, sizeof(INDEX_TYPE) * data_len); + working_res.memcpy(working_array, test_array, + sizeof(INDEX_TYPE) * data_len); RAJA::launch( RAJA::LaunchParams(RAJA::Teams(blocks_x, blocks_y, blocks_z), @@ -145,33 +139,27 @@ void LaunchNestedDirectTestImpl(INDEX_TYPE M) [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) { RAJA::loop( - ctx, - r3, + ctx, r3, [&](INDEX_TYPE RAJA_UNUSED_ARG(bz)) { RAJA::loop( - ctx, - r2, + ctx, r2, [&](INDEX_TYPE RAJA_UNUSED_ARG(by)) { RAJA::loop( - ctx, - r1, + ctx, r1, [&](INDEX_TYPE RAJA_UNUSED_ARG(bx)) { RAJA::loop( - ctx, - r3, + ctx, r3, [&](INDEX_TYPE RAJA_UNUSED_ARG(tz)) { RAJA::loop( - ctx, - r2, + ctx, r2, [&](INDEX_TYPE RAJA_UNUSED_ARG(ty)) { RAJA::loop( - ctx, - r1, + ctx, r1, [&](INDEX_TYPE RAJA_UNUSED_ARG(tx)) { working_array[0]++; }); }); @@ -199,8 +187,8 @@ void LaunchNestedDirectTestImpl(INDEX_TYPE M) ASSERT_EQ(test_array[0], check_array[0]); } - deallocateForallTestData( - working_res, working_array, check_array, test_array); + deallocateForallTestData(working_res, working_array, check_array, + test_array); } @@ -241,26 +229,16 @@ TYPED_TEST_P(LaunchNestedDirectTest, RangeSegmentTeams) // test zero-length range segment - LaunchNestedDirectTestImpl(INDEX_TYPE(0)); + LaunchNestedDirectTestImpl( + INDEX_TYPE(0)); // Keep at one since we are doing a direct thread test - LaunchNestedDirectTestImpl(INDEX_TYPE(1)); + LaunchNestedDirectTestImpl( + INDEX_TYPE(1)); } REGISTER_TYPED_TEST_SUITE_P(LaunchNestedDirectTest, RangeSegmentTeams); diff --git a/test/functional/launch/nested_loop/tests/test-launch-nested-Loop.hpp b/test/functional/launch/nested_loop/tests/test-launch-nested-Loop.hpp index b5b8ab20a6..4d857b37fa 100644 --- a/test/functional/launch/nested_loop/tests/test-launch-nested-Loop.hpp +++ b/test/functional/launch/nested_loop/tests/test-launch-nested-Loop.hpp @@ -52,8 +52,8 @@ void LaunchNestedLoopTestImpl(INDEX_TYPE M) data_len = 1; } - allocateForallTestData( - data_len, working_res, &working_array, &check_array, &test_array); + allocateForallTestData(data_len, working_res, &working_array, + &check_array, &test_array); // 6 threads total constexpr int threads_x = 1; @@ -71,8 +71,8 @@ void LaunchNestedLoopTestImpl(INDEX_TYPE M) constexpr int DIM = 6; using layout_t = RAJA::Layout; - RAJA::View Aview( - working_array, N6, N5, N4, N3, N2, N1); + RAJA::View Aview(working_array, N6, N5, N4, N3, N2, + N1); RAJA::launch( RAJA::LaunchParams(RAJA::Teams(blocks_x, blocks_y, blocks_z), @@ -80,33 +80,27 @@ void LaunchNestedLoopTestImpl(INDEX_TYPE M) [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) { RAJA::loop( - ctx, - r6, + ctx, r6, [&](INDEX_TYPE bz) { RAJA::loop( - ctx, - r5, + ctx, r5, [&](INDEX_TYPE by) { RAJA::loop( - ctx, - r4, + ctx, r4, [&](INDEX_TYPE bx) { RAJA::loop( - ctx, - r3, + ctx, r3, [&](INDEX_TYPE tz) { RAJA::loop( - ctx, - r2, + ctx, r2, [&](INDEX_TYPE ty) { RAJA::loop( - ctx, - r1, + ctx, r1, [&](INDEX_TYPE tx) { auto idx = @@ -138,8 +132,8 @@ void LaunchNestedLoopTestImpl(INDEX_TYPE M) memset(static_cast(test_array), 0, sizeof(INDEX_TYPE) * data_len); - working_res.memcpy( - working_array, test_array, sizeof(INDEX_TYPE) * data_len); + working_res.memcpy(working_array, test_array, + sizeof(INDEX_TYPE) * data_len); RAJA::launch( RAJA::LaunchParams(RAJA::Teams(blocks_x, blocks_y, blocks_z), @@ -147,33 +141,27 @@ void LaunchNestedLoopTestImpl(INDEX_TYPE M) [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) { RAJA::loop( - ctx, - r3, + ctx, r3, [&](INDEX_TYPE RAJA_UNUSED_ARG(bz)) { RAJA::loop( - ctx, - r2, + ctx, r2, [&](INDEX_TYPE RAJA_UNUSED_ARG(by)) { RAJA::loop( - ctx, - r1, + ctx, r1, [&](INDEX_TYPE RAJA_UNUSED_ARG(bx)) { RAJA::loop( - ctx, - r3, + ctx, r3, [&](INDEX_TYPE RAJA_UNUSED_ARG(tz)) { RAJA::loop( - ctx, - r2, + ctx, r2, [&](INDEX_TYPE RAJA_UNUSED_ARG(ty)) { RAJA::loop( - ctx, - r1, + ctx, r1, [&](INDEX_TYPE RAJA_UNUSED_ARG(tx)) { working_array[0]++; }); }); @@ -201,8 +189,8 @@ void LaunchNestedLoopTestImpl(INDEX_TYPE M) ASSERT_EQ(test_array[0], check_array[0]); } - deallocateForallTestData( - working_res, working_array, check_array, test_array); + deallocateForallTestData(working_res, working_array, check_array, + test_array); } @@ -243,25 +231,15 @@ TYPED_TEST_P(LaunchNestedLoopTest, RangeSegmentTeams) // test zero-length range segment - LaunchNestedLoopTestImpl(INDEX_TYPE(0)); - - LaunchNestedLoopTestImpl(INDEX_TYPE(3)); + LaunchNestedLoopTestImpl( + INDEX_TYPE(0)); + + LaunchNestedLoopTestImpl( + INDEX_TYPE(3)); } REGISTER_TYPED_TEST_SUITE_P(LaunchNestedLoopTest, RangeSegmentTeams); diff --git a/test/functional/launch/nested_tile_direct/tests/test-launch-nested-Tile-Direct.hpp b/test/functional/launch/nested_tile_direct/tests/test-launch-nested-Tile-Direct.hpp index e82dfb3a4a..e4e36a33c5 100644 --- a/test/functional/launch/nested_tile_direct/tests/test-launch-nested-Tile-Direct.hpp +++ b/test/functional/launch/nested_tile_direct/tests/test-launch-nested-Tile-Direct.hpp @@ -55,8 +55,8 @@ void LaunchNestedTileDirectTestImpl(INDEX_TYPE M) data_len = 1; } - allocateForallTestData( - data_len, working_res, &working_array, &check_array, &test_array); + allocateForallTestData(data_len, working_res, &working_array, + &check_array, &test_array); if (RAJA::stripIndexType(N) > 0) { @@ -73,36 +73,27 @@ void LaunchNestedTileDirectTestImpl(INDEX_TYPE M) [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) { RAJA::tile( - ctx, - tile_size_z, - r3, + ctx, tile_size_z, r3, [&](RAJA::TypedRangeSegment const& z_tile) { RAJA::tile( - ctx, - tile_size_y, - r2, + ctx, tile_size_y, r2, [&](RAJA::TypedRangeSegment const& y_tile) { RAJA::tile( - ctx, - tile_size_x, - r1, + ctx, tile_size_x, r1, [&](RAJA::TypedRangeSegment const& x_tile) { RAJA::loop( - ctx, - z_tile, + ctx, z_tile, [&](INDEX_TYPE tz) { RAJA::loop( - ctx, - y_tile, + ctx, y_tile, [&](INDEX_TYPE ty) { RAJA::loop( - ctx, - x_tile, + ctx, x_tile, [&](INDEX_TYPE tx) { auto idx = @@ -123,8 +114,8 @@ void LaunchNestedTileDirectTestImpl(INDEX_TYPE M) memset(static_cast(test_array), 0, sizeof(INDEX_TYPE) * data_len); - working_res.memcpy( - working_array, test_array, sizeof(INDEX_TYPE) * data_len); + working_res.memcpy(working_array, test_array, + sizeof(INDEX_TYPE) * data_len); RAJA::launch( RAJA::LaunchParams(RAJA::Teams(blocks_x, blocks_y, blocks_z), @@ -132,36 +123,27 @@ void LaunchNestedTileDirectTestImpl(INDEX_TYPE M) [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) { RAJA::tile( - ctx, - threads_z, - r3, + ctx, threads_z, r3, [&](RAJA::TypedRangeSegment const& z_tile) { RAJA::tile( - ctx, - threads_y, - r2, + ctx, threads_y, r2, [&](RAJA::TypedRangeSegment const& y_tile) { RAJA::tile( - ctx, - threads_x, - r1, + ctx, threads_x, r1, [&](RAJA::TypedRangeSegment const& x_tile) { RAJA::loop( - ctx, - z_tile, + ctx, z_tile, [&](INDEX_TYPE RAJA_UNUSED_ARG(tz)) { RAJA::loop( - ctx, - y_tile, + ctx, y_tile, [&](INDEX_TYPE RAJA_UNUSED_ARG(ty)) { RAJA::loop( - ctx, - x_tile, + ctx, x_tile, [&](INDEX_TYPE RAJA_UNUSED_ARG(tx)) { working_array[0]++; }); }); @@ -189,8 +171,8 @@ void LaunchNestedTileDirectTestImpl(INDEX_TYPE M) ASSERT_EQ(test_array[0], check_array[0]); } - deallocateForallTestData( - working_res, working_array, check_array, test_array); + deallocateForallTestData(working_res, working_array, check_array, + test_array); } @@ -231,26 +213,16 @@ TYPED_TEST_P(LaunchNestedTileDirectTest, RangeSegmentTeams) // test zero-length range segment - LaunchNestedTileDirectTestImpl(INDEX_TYPE(0)); + LaunchNestedTileDirectTestImpl< + INDEX_TYPE, WORKING_RES, LAUNCH_POLICY, THREAD_X_POLICY, THREAD_Y_POLICY, + THREAD_Z_POLICY, TEAM_X_POLICY, TEAM_Y_POLICY, TEAM_Z_POLICY>( + INDEX_TYPE(0)); // Keep at one since we are doing a direct thread test - LaunchNestedTileDirectTestImpl(INDEX_TYPE(1)); + LaunchNestedTileDirectTestImpl< + INDEX_TYPE, WORKING_RES, LAUNCH_POLICY, THREAD_X_POLICY, THREAD_Y_POLICY, + THREAD_Z_POLICY, TEAM_X_POLICY, TEAM_Y_POLICY, TEAM_Z_POLICY>( + INDEX_TYPE(1)); } REGISTER_TYPED_TEST_SUITE_P(LaunchNestedTileDirectTest, RangeSegmentTeams); diff --git a/test/functional/launch/nested_tile_loop/tests/test-launch-nested-Tile-Loop.hpp b/test/functional/launch/nested_tile_loop/tests/test-launch-nested-Tile-Loop.hpp index 83535095a6..6b6619a6bd 100644 --- a/test/functional/launch/nested_tile_loop/tests/test-launch-nested-Tile-Loop.hpp +++ b/test/functional/launch/nested_tile_loop/tests/test-launch-nested-Tile-Loop.hpp @@ -52,8 +52,8 @@ void LaunchNestedTileLoopTestImpl(INDEX_TYPE M) data_len = 1; } - allocateForallTestData( - data_len, working_res, &working_array, &check_array, &test_array); + allocateForallTestData(data_len, working_res, &working_array, + &check_array, &test_array); if (RAJA::stripIndexType(N) > 0) { @@ -70,36 +70,27 @@ void LaunchNestedTileLoopTestImpl(INDEX_TYPE M) [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) { RAJA::tile( - ctx, - threads_z, - r3, + ctx, threads_z, r3, [&](RAJA::TypedRangeSegment const& z_tile) { RAJA::tile( - ctx, - threads_y, - r2, + ctx, threads_y, r2, [&](RAJA::TypedRangeSegment const& y_tile) { RAJA::tile( - ctx, - threads_x, - r1, + ctx, threads_x, r1, [&](RAJA::TypedRangeSegment const& x_tile) { RAJA::loop( - ctx, - z_tile, + ctx, z_tile, [&](INDEX_TYPE tz) { RAJA::loop( - ctx, - y_tile, + ctx, y_tile, [&](INDEX_TYPE ty) { RAJA::loop( - ctx, - x_tile, + ctx, x_tile, [&](INDEX_TYPE tx) { auto idx = @@ -120,8 +111,8 @@ void LaunchNestedTileLoopTestImpl(INDEX_TYPE M) memset(static_cast(test_array), 0, sizeof(INDEX_TYPE) * data_len); - working_res.memcpy( - working_array, test_array, sizeof(INDEX_TYPE) * data_len); + working_res.memcpy(working_array, test_array, + sizeof(INDEX_TYPE) * data_len); RAJA::launch( RAJA::LaunchParams(RAJA::Teams(blocks_x, blocks_y, blocks_z), @@ -129,36 +120,27 @@ void LaunchNestedTileLoopTestImpl(INDEX_TYPE M) [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) { RAJA::tile( - ctx, - threads_z, - r3, + ctx, threads_z, r3, [&](RAJA::TypedRangeSegment const& z_tile) { RAJA::tile( - ctx, - threads_y, - r2, + ctx, threads_y, r2, [&](RAJA::TypedRangeSegment const& y_tile) { RAJA::tile( - ctx, - threads_x, - r1, + ctx, threads_x, r1, [&](RAJA::TypedRangeSegment const& x_tile) { RAJA::loop( - ctx, - z_tile, + ctx, z_tile, [&](INDEX_TYPE tz) { RAJA::loop( - ctx, - y_tile, + ctx, y_tile, [&](INDEX_TYPE ty) { RAJA::loop( - ctx, - x_tile, + ctx, x_tile, [&](INDEX_TYPE tx) { (void)tx; @@ -192,8 +174,8 @@ void LaunchNestedTileLoopTestImpl(INDEX_TYPE M) ASSERT_EQ(test_array[0], check_array[0]); } - deallocateForallTestData( - working_res, working_array, check_array, test_array); + deallocateForallTestData(working_res, working_array, check_array, + test_array); } @@ -234,26 +216,16 @@ TYPED_TEST_P(LaunchNestedTileLoopTest, RangeSegmentTeams) // test zero-length range segment - LaunchNestedTileLoopTestImpl(INDEX_TYPE(0)); + LaunchNestedTileLoopTestImpl< + INDEX_TYPE, WORKING_RES, LAUNCH_POLICY, THREAD_X_POLICY, THREAD_Y_POLICY, + THREAD_Z_POLICY, TEAM_X_POLICY, TEAM_Y_POLICY, TEAM_Z_POLICY>( + INDEX_TYPE(0)); // Keep at one since we are doing a direct thread test - LaunchNestedTileLoopTestImpl(INDEX_TYPE(1)); + LaunchNestedTileLoopTestImpl< + INDEX_TYPE, WORKING_RES, LAUNCH_POLICY, THREAD_X_POLICY, THREAD_Y_POLICY, + THREAD_Z_POLICY, TEAM_X_POLICY, TEAM_Y_POLICY, TEAM_Z_POLICY>( + INDEX_TYPE(1)); } REGISTER_TYPED_TEST_SUITE_P(LaunchNestedTileLoopTest, RangeSegmentTeams); diff --git a/test/functional/launch/reduce-basic/tests/test-launch-basic-ReduceBitAnd.hpp b/test/functional/launch/reduce-basic/tests/test-launch-basic-ReduceBitAnd.hpp index 4fd6703c2c..58654af529 100644 --- a/test/functional/launch/reduce-basic/tests/test-launch-basic-ReduceBitAnd.hpp +++ b/test/functional/launch/reduce-basic/tests/test-launch-basic-ReduceBitAnd.hpp @@ -33,8 +33,8 @@ void LaunchReduceBitAndBasicTestImpl(const SEG_TYPE& seg, constexpr int threads = 256; int blocks = (seg.size() - 1) / threads + 1; - allocateForallTestData( - data_len, working_res, &working_array, &check_array, &test_array); + allocateForallTestData(data_len, working_res, &working_array, + &check_array, &test_array); // // First a simple non-trivial test that is mildly interesting @@ -83,8 +83,7 @@ void LaunchReduceBitAndBasicTestImpl(const SEG_TYPE& seg, RAJA::LaunchParams(RAJA::Teams(blocks), RAJA::Threads(threads)), [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) { - RAJA::loop(ctx, - seg, + RAJA::loop(ctx, seg, [&](IDX_TYPE idx) { redand &= working_array[idx]; @@ -112,8 +111,8 @@ void LaunchReduceBitAndBasicTestImpl(const SEG_TYPE& seg, ASSERT_EQ(static_cast(redand.get()), ref_and); - deallocateForallTestData( - working_res, working_array, check_array, test_array); + deallocateForallTestData(working_res, working_array, check_array, + test_array); } @@ -142,53 +141,40 @@ TYPED_TEST_P(LaunchReduceBitAndBasicTest, ReduceBitAndBasicForall) // Range segment tests RAJA::TypedRangeSegment r1(0, 28); RAJA::getIndices(seg_idx, r1); - LaunchReduceBitAndBasicTestImpl, - LAUNCH_POLICY, - GLOBAL_THREAD_POLICY, - REDUCE_POLICY>(r1, seg_idx, working_res); + LaunchReduceBitAndBasicTestImpl< + IDX_TYPE, DATA_TYPE, RAJA::TypedRangeSegment, LAUNCH_POLICY, + GLOBAL_THREAD_POLICY, REDUCE_POLICY>(r1, seg_idx, working_res); seg_idx.clear(); RAJA::TypedRangeSegment r2(3, 642); RAJA::getIndices(seg_idx, r2); - LaunchReduceBitAndBasicTestImpl, - LAUNCH_POLICY, - GLOBAL_THREAD_POLICY, - REDUCE_POLICY>(r2, seg_idx, working_res); + LaunchReduceBitAndBasicTestImpl< + IDX_TYPE, DATA_TYPE, RAJA::TypedRangeSegment, LAUNCH_POLICY, + GLOBAL_THREAD_POLICY, REDUCE_POLICY>(r2, seg_idx, working_res); seg_idx.clear(); RAJA::TypedRangeSegment r3(0, 2057); RAJA::getIndices(seg_idx, r3); - LaunchReduceBitAndBasicTestImpl, - LAUNCH_POLICY, - GLOBAL_THREAD_POLICY, - REDUCE_POLICY>(r3, seg_idx, working_res); + LaunchReduceBitAndBasicTestImpl< + IDX_TYPE, DATA_TYPE, RAJA::TypedRangeSegment, LAUNCH_POLICY, + GLOBAL_THREAD_POLICY, REDUCE_POLICY>(r3, seg_idx, working_res); // Range-stride segment tests seg_idx.clear(); RAJA::TypedRangeStrideSegment r4(0, 188, 2); RAJA::getIndices(seg_idx, r4); - LaunchReduceBitAndBasicTestImpl, - LAUNCH_POLICY, - GLOBAL_THREAD_POLICY, - REDUCE_POLICY>(r4, seg_idx, working_res); + LaunchReduceBitAndBasicTestImpl< + IDX_TYPE, DATA_TYPE, RAJA::TypedRangeStrideSegment, + LAUNCH_POLICY, GLOBAL_THREAD_POLICY, REDUCE_POLICY>(r4, seg_idx, + working_res); seg_idx.clear(); RAJA::TypedRangeStrideSegment r5(3, 1029, 3); RAJA::getIndices(seg_idx, r5); - LaunchReduceBitAndBasicTestImpl, - LAUNCH_POLICY, - GLOBAL_THREAD_POLICY, - REDUCE_POLICY>(r5, seg_idx, working_res); + LaunchReduceBitAndBasicTestImpl< + IDX_TYPE, DATA_TYPE, RAJA::TypedRangeStrideSegment, + LAUNCH_POLICY, GLOBAL_THREAD_POLICY, REDUCE_POLICY>(r5, seg_idx, + working_res); // List segment tests seg_idx.clear(); @@ -203,12 +189,9 @@ TYPED_TEST_P(LaunchReduceBitAndBasicTest, ReduceBitAndBasicForall) } } RAJA::TypedListSegment l1(&seg_idx[0], seg_idx.size(), working_res); - LaunchReduceBitAndBasicTestImpl, - LAUNCH_POLICY, - GLOBAL_THREAD_POLICY, - REDUCE_POLICY>(l1, seg_idx, working_res); + LaunchReduceBitAndBasicTestImpl< + IDX_TYPE, DATA_TYPE, RAJA::TypedListSegment, LAUNCH_POLICY, + GLOBAL_THREAD_POLICY, REDUCE_POLICY>(l1, seg_idx, working_res); } REGISTER_TYPED_TEST_SUITE_P(LaunchReduceBitAndBasicTest, diff --git a/test/functional/launch/reduce-basic/tests/test-launch-basic-ReduceMin.hpp b/test/functional/launch/reduce-basic/tests/test-launch-basic-ReduceMin.hpp index 88a1ca2ba4..7cba0f0569 100644 --- a/test/functional/launch/reduce-basic/tests/test-launch-basic-ReduceMin.hpp +++ b/test/functional/launch/reduce-basic/tests/test-launch-basic-ReduceMin.hpp @@ -33,8 +33,8 @@ void LaunchReduceMinBasicTestImpl(const SEG_TYPE& seg, constexpr int threads = 256; int blocks = (seg.size() - 1) / threads + 1; - allocateForallTestData( - data_len, working_res, &working_array, &check_array, &test_array); + allocateForallTestData(data_len, working_res, &working_array, + &check_array, &test_array); const int modval = 100; const DATA_TYPE min_init = modval + 1; @@ -61,8 +61,7 @@ void LaunchReduceMinBasicTestImpl(const SEG_TYPE& seg, RAJA::LaunchParams(RAJA::Teams(blocks), RAJA::Threads(threads)), [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) { - RAJA::loop(ctx, - seg, + RAJA::loop(ctx, seg, [&](IDX_TYPE idx) { mininit.min(working_array[idx]); @@ -82,8 +81,7 @@ void LaunchReduceMinBasicTestImpl(const SEG_TYPE& seg, [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) { RAJA::loop( - ctx, - seg, + ctx, seg, [&](IDX_TYPE idx) { min.min(working_array[idx] * factor); }); }); @@ -95,16 +93,15 @@ void LaunchReduceMinBasicTestImpl(const SEG_TYPE& seg, [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) { RAJA::loop( - ctx, - seg, + ctx, seg, [&](IDX_TYPE idx) { min.min(working_array[idx] * factor); }); }); ASSERT_EQ(static_cast(min.get()), ref_min * factor); - deallocateForallTestData( - working_res, working_array, check_array, test_array); + deallocateForallTestData(working_res, working_array, check_array, + test_array); } @@ -133,53 +130,43 @@ TYPED_TEST_P(LaunchReduceMinBasicTest, ReduceMinBasicForall) // Range segment tests RAJA::TypedRangeSegment r1(0, 28); RAJA::getIndices(seg_idx, r1); - LaunchReduceMinBasicTestImpl, - LAUNCH_POLICY, - GLOBAL_THREAD_POLICY, - REDUCE_POLICY>(r1, seg_idx, working_res); + LaunchReduceMinBasicTestImpl, LAUNCH_POLICY, + GLOBAL_THREAD_POLICY, REDUCE_POLICY>( + r1, seg_idx, working_res); seg_idx.clear(); RAJA::TypedRangeSegment r2(3, 642); RAJA::getIndices(seg_idx, r2); - LaunchReduceMinBasicTestImpl, - LAUNCH_POLICY, - GLOBAL_THREAD_POLICY, - REDUCE_POLICY>(r2, seg_idx, working_res); + LaunchReduceMinBasicTestImpl, LAUNCH_POLICY, + GLOBAL_THREAD_POLICY, REDUCE_POLICY>( + r2, seg_idx, working_res); seg_idx.clear(); RAJA::TypedRangeSegment r3(0, 2057); RAJA::getIndices(seg_idx, r3); - LaunchReduceMinBasicTestImpl, - LAUNCH_POLICY, - GLOBAL_THREAD_POLICY, - REDUCE_POLICY>(r3, seg_idx, working_res); + LaunchReduceMinBasicTestImpl, LAUNCH_POLICY, + GLOBAL_THREAD_POLICY, REDUCE_POLICY>( + r3, seg_idx, working_res); // Range-stride segment tests seg_idx.clear(); RAJA::TypedRangeStrideSegment r4(0, 188, 2); RAJA::getIndices(seg_idx, r4); - LaunchReduceMinBasicTestImpl, - LAUNCH_POLICY, - GLOBAL_THREAD_POLICY, - REDUCE_POLICY>(r4, seg_idx, working_res); + LaunchReduceMinBasicTestImpl< + IDX_TYPE, DATA_TYPE, RAJA::TypedRangeStrideSegment, + LAUNCH_POLICY, GLOBAL_THREAD_POLICY, REDUCE_POLICY>(r4, seg_idx, + working_res); seg_idx.clear(); RAJA::TypedRangeStrideSegment r5(3, 1029, 3); RAJA::getIndices(seg_idx, r5); - LaunchReduceMinBasicTestImpl, - LAUNCH_POLICY, - GLOBAL_THREAD_POLICY, - REDUCE_POLICY>(r5, seg_idx, working_res); + LaunchReduceMinBasicTestImpl< + IDX_TYPE, DATA_TYPE, RAJA::TypedRangeStrideSegment, + LAUNCH_POLICY, GLOBAL_THREAD_POLICY, REDUCE_POLICY>(r5, seg_idx, + working_res); // List segment tests seg_idx.clear(); @@ -194,12 +181,10 @@ TYPED_TEST_P(LaunchReduceMinBasicTest, ReduceMinBasicForall) } } RAJA::TypedListSegment l1(&seg_idx[0], seg_idx.size(), working_res); - LaunchReduceMinBasicTestImpl, - LAUNCH_POLICY, - GLOBAL_THREAD_POLICY, - REDUCE_POLICY>(l1, seg_idx, working_res); + LaunchReduceMinBasicTestImpl, LAUNCH_POLICY, + GLOBAL_THREAD_POLICY, REDUCE_POLICY>( + l1, seg_idx, working_res); } REGISTER_TYPED_TEST_SUITE_P(LaunchReduceMinBasicTest, ReduceMinBasicForall); diff --git a/test/functional/launch/reduce-basic/tests/test-launch-basic-ReduceSum.hpp b/test/functional/launch/reduce-basic/tests/test-launch-basic-ReduceSum.hpp index c60239013f..a48897d2fd 100644 --- a/test/functional/launch/reduce-basic/tests/test-launch-basic-ReduceSum.hpp +++ b/test/functional/launch/reduce-basic/tests/test-launch-basic-ReduceSum.hpp @@ -34,8 +34,8 @@ void LaunchReduceSumBasicTestImpl(const SEG_TYPE& seg, constexpr int threads = 256; int blocks = (seg.size() - 1) / threads + 1; - allocateForallTestData( - data_len, working_res, &working_array, &check_array, &test_array); + allocateForallTestData(data_len, working_res, &working_array, + &check_array, &test_array); const int modval = 100; @@ -60,8 +60,7 @@ void LaunchReduceSumBasicTestImpl(const SEG_TYPE& seg, RAJA::LaunchParams(RAJA::Teams(blocks), RAJA::Threads(threads)), [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) { - RAJA::loop(ctx, - seg, + RAJA::loop(ctx, seg, [&](IDX_TYPE idx) { sum += working_array[idx]; @@ -90,8 +89,8 @@ void LaunchReduceSumBasicTestImpl(const SEG_TYPE& seg, ASSERT_EQ(static_cast(sum.get()), nloops * ref_sum); - deallocateForallTestData( - working_res, working_array, check_array, test_array); + deallocateForallTestData(working_res, working_array, check_array, + test_array); } @@ -120,52 +119,42 @@ TYPED_TEST_P(LaunchReduceSumBasicTest, ReduceSumBasicForall) // Range segment tests RAJA::TypedRangeSegment r1(0, 28); RAJA::getIndices(seg_idx, r1); - LaunchReduceSumBasicTestImpl, - LAUNCH_POLICY, - GLOBAL_THREAD_POLICY, - REDUCE_POLICY>(r1, seg_idx, working_res); + LaunchReduceSumBasicTestImpl, LAUNCH_POLICY, + GLOBAL_THREAD_POLICY, REDUCE_POLICY>( + r1, seg_idx, working_res); seg_idx.clear(); RAJA::TypedRangeSegment r2(3, 642); RAJA::getIndices(seg_idx, r2); - LaunchReduceSumBasicTestImpl, - LAUNCH_POLICY, - GLOBAL_THREAD_POLICY, - REDUCE_POLICY>(r2, seg_idx, working_res); + LaunchReduceSumBasicTestImpl, LAUNCH_POLICY, + GLOBAL_THREAD_POLICY, REDUCE_POLICY>( + r2, seg_idx, working_res); seg_idx.clear(); RAJA::TypedRangeSegment r3(0, 2057); RAJA::getIndices(seg_idx, r3); - LaunchReduceSumBasicTestImpl, - LAUNCH_POLICY, - GLOBAL_THREAD_POLICY, - REDUCE_POLICY>(r3, seg_idx, working_res); + LaunchReduceSumBasicTestImpl, LAUNCH_POLICY, + GLOBAL_THREAD_POLICY, REDUCE_POLICY>( + r3, seg_idx, working_res); // Range-stride segment tests seg_idx.clear(); RAJA::TypedRangeStrideSegment r4(0, 188, 2); RAJA::getIndices(seg_idx, r4); - LaunchReduceSumBasicTestImpl, - LAUNCH_POLICY, - GLOBAL_THREAD_POLICY, - REDUCE_POLICY>(r4, seg_idx, working_res); + LaunchReduceSumBasicTestImpl< + IDX_TYPE, DATA_TYPE, RAJA::TypedRangeStrideSegment, + LAUNCH_POLICY, GLOBAL_THREAD_POLICY, REDUCE_POLICY>(r4, seg_idx, + working_res); seg_idx.clear(); RAJA::TypedRangeStrideSegment r5(3, 1029, 3); RAJA::getIndices(seg_idx, r5); - LaunchReduceSumBasicTestImpl, - LAUNCH_POLICY, - GLOBAL_THREAD_POLICY, - REDUCE_POLICY>(r5, seg_idx, working_res); + LaunchReduceSumBasicTestImpl< + IDX_TYPE, DATA_TYPE, RAJA::TypedRangeStrideSegment, + LAUNCH_POLICY, GLOBAL_THREAD_POLICY, REDUCE_POLICY>(r5, seg_idx, + working_res); // List segment tests seg_idx.clear(); @@ -180,12 +169,10 @@ TYPED_TEST_P(LaunchReduceSumBasicTest, ReduceSumBasicForall) } } RAJA::TypedListSegment l1(&seg_idx[0], seg_idx.size(), working_res); - LaunchReduceSumBasicTestImpl, - LAUNCH_POLICY, - GLOBAL_THREAD_POLICY, - REDUCE_POLICY>(l1, seg_idx, working_res); + LaunchReduceSumBasicTestImpl, LAUNCH_POLICY, + GLOBAL_THREAD_POLICY, REDUCE_POLICY>( + l1, seg_idx, working_res); } REGISTER_TYPED_TEST_SUITE_P(LaunchReduceSumBasicTest, ReduceSumBasicForall); diff --git a/test/functional/launch/reduce-params/tests/test-launch-basic-param-expt-ReduceBitAnd.hpp b/test/functional/launch/reduce-params/tests/test-launch-basic-param-expt-ReduceBitAnd.hpp index 50658f5023..b876291685 100644 --- a/test/functional/launch/reduce-params/tests/test-launch-basic-param-expt-ReduceBitAnd.hpp +++ b/test/functional/launch/reduce-params/tests/test-launch-basic-param-expt-ReduceBitAnd.hpp @@ -34,8 +34,8 @@ void LaunchParamExptReduceBitAndBasicTestImpl( constexpr int threads = 256; int blocks = (seg.size() - 1) / threads + 1; - allocateForallTestData( - data_len, working_res, &working_array, &check_array, &test_array); + allocateForallTestData(data_len, working_res, &working_array, + &check_array, &test_array); // // First a simple non-trivial test that is mildly interesting @@ -85,11 +85,10 @@ void LaunchParamExptReduceBitAndBasicTestImpl( RAJA::LaunchParams(RAJA::Teams(blocks), RAJA::Threads(threads)), RAJA::expt::Reduce(&redand), RAJA::expt::Reduce(&redand2), - [=] RAJA_HOST_DEVICE( - RAJA::LaunchContext ctx, DATA_TYPE & _redand, DATA_TYPE & _redand2) + [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx, DATA_TYPE & _redand, + DATA_TYPE & _redand2) { - RAJA::loop(ctx, - seg, + RAJA::loop(ctx, seg, [&](IDX_TYPE idx) { _redand &= working_array[idx]; @@ -118,8 +117,8 @@ void LaunchParamExptReduceBitAndBasicTestImpl( ASSERT_EQ(static_cast(redand), ref_and); - deallocateForallTestData( - working_res, working_array, check_array, test_array); + deallocateForallTestData(working_res, working_array, check_array, + test_array); } @@ -147,31 +146,25 @@ TYPED_TEST_P(LaunchParamExptReduceBitAndBasicTest, ReduceBitAndBasicForall) // Range segment tests RAJA::TypedRangeSegment r1(0, 28); RAJA::getIndices(seg_idx, r1); - LaunchParamExptReduceBitAndBasicTestImpl, - LAUNCH_POLICY, - GLOBAL_THREAD_POLICY>( + LAUNCH_POLICY, GLOBAL_THREAD_POLICY>( r1, seg_idx, working_res); seg_idx.clear(); RAJA::TypedRangeSegment r2(3, 642); RAJA::getIndices(seg_idx, r2); - LaunchParamExptReduceBitAndBasicTestImpl, - LAUNCH_POLICY, - GLOBAL_THREAD_POLICY>( + LAUNCH_POLICY, GLOBAL_THREAD_POLICY>( r2, seg_idx, working_res); seg_idx.clear(); RAJA::TypedRangeSegment r3(0, 2057); RAJA::getIndices(seg_idx, r3); - LaunchParamExptReduceBitAndBasicTestImpl, - LAUNCH_POLICY, - GLOBAL_THREAD_POLICY>( + LAUNCH_POLICY, GLOBAL_THREAD_POLICY>( r3, seg_idx, working_res); // Range-stride segment tests @@ -179,21 +172,15 @@ TYPED_TEST_P(LaunchParamExptReduceBitAndBasicTest, ReduceBitAndBasicForall) RAJA::TypedRangeStrideSegment r4(0, 188, 2); RAJA::getIndices(seg_idx, r4); LaunchParamExptReduceBitAndBasicTestImpl< - IDX_TYPE, - DATA_TYPE, - RAJA::TypedRangeStrideSegment, - LAUNCH_POLICY, - GLOBAL_THREAD_POLICY>(r4, seg_idx, working_res); + IDX_TYPE, DATA_TYPE, RAJA::TypedRangeStrideSegment, + LAUNCH_POLICY, GLOBAL_THREAD_POLICY>(r4, seg_idx, working_res); seg_idx.clear(); RAJA::TypedRangeStrideSegment r5(3, 1029, 3); RAJA::getIndices(seg_idx, r5); LaunchParamExptReduceBitAndBasicTestImpl< - IDX_TYPE, - DATA_TYPE, - RAJA::TypedRangeStrideSegment, - LAUNCH_POLICY, - GLOBAL_THREAD_POLICY>(r5, seg_idx, working_res); + IDX_TYPE, DATA_TYPE, RAJA::TypedRangeStrideSegment, + LAUNCH_POLICY, GLOBAL_THREAD_POLICY>(r5, seg_idx, working_res); // List segment tests seg_idx.clear(); @@ -208,11 +195,9 @@ TYPED_TEST_P(LaunchParamExptReduceBitAndBasicTest, ReduceBitAndBasicForall) } } RAJA::TypedListSegment l1(&seg_idx[0], seg_idx.size(), working_res); - LaunchParamExptReduceBitAndBasicTestImpl, - LAUNCH_POLICY, - GLOBAL_THREAD_POLICY>( + LAUNCH_POLICY, GLOBAL_THREAD_POLICY>( l1, seg_idx, working_res); } diff --git a/test/functional/launch/reduce-params/tests/test-launch-basic-param-expt-ReduceMin.hpp b/test/functional/launch/reduce-params/tests/test-launch-basic-param-expt-ReduceMin.hpp index c646edc950..2eb1fce3da 100644 --- a/test/functional/launch/reduce-params/tests/test-launch-basic-param-expt-ReduceMin.hpp +++ b/test/functional/launch/reduce-params/tests/test-launch-basic-param-expt-ReduceMin.hpp @@ -33,8 +33,8 @@ void LaunchParamExptReduceMinBasicTestImpl( constexpr int threads = 256; int blocks = (seg.size() - 1) / threads + 1; - allocateForallTestData( - data_len, working_res, &working_array, &check_array, &test_array); + allocateForallTestData(data_len, working_res, &working_array, + &check_array, &test_array); const int modval = 100; const DATA_TYPE min_init = modval + 1; @@ -61,12 +61,11 @@ void LaunchParamExptReduceMinBasicTestImpl( "LaunchMinBasicTest", RAJA::expt::Reduce(&mininit), RAJA::expt::Reduce(&min), - [=] RAJA_HOST_DEVICE( - RAJA::LaunchContext ctx, DATA_TYPE & _mininit, DATA_TYPE & _min) + [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx, DATA_TYPE & _mininit, + DATA_TYPE & _min) { RAJA::loop( - ctx, - seg, + ctx, seg, [&](IDX_TYPE idx) { _mininit = RAJA_MIN(working_array[idx], _mininit); @@ -88,8 +87,7 @@ void LaunchParamExptReduceMinBasicTestImpl( [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx, DATA_TYPE & _min) { RAJA::loop( - ctx, - seg, + ctx, seg, [&](IDX_TYPE idx) { _min = RAJA_MIN(working_array[idx] * factor, _min); }); }); @@ -104,8 +102,7 @@ void LaunchParamExptReduceMinBasicTestImpl( [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx, DATA_TYPE & _min) { RAJA::loop( - ctx, - seg, + ctx, seg, [&](IDX_TYPE idx) { _min = RAJA_MIN(working_array[idx] * factor, _min); }); }); @@ -113,8 +110,8 @@ void LaunchParamExptReduceMinBasicTestImpl( ASSERT_EQ(static_cast(min), ref_min * factor); - deallocateForallTestData( - working_res, working_array, check_array, test_array); + deallocateForallTestData(working_res, working_array, check_array, + test_array); } @@ -142,52 +139,42 @@ TYPED_TEST_P(LaunchParamExptReduceMinBasicTest, ReduceMinBasicForall) // Range segment tests RAJA::TypedRangeSegment r1(0, 28); RAJA::getIndices(seg_idx, r1); - LaunchParamExptReduceMinBasicTestImpl, - LAUNCH_POLICY, - GLOBAL_THREAD_POLICY>( + LAUNCH_POLICY, GLOBAL_THREAD_POLICY>( r1, seg_idx, working_res); seg_idx.clear(); RAJA::TypedRangeSegment r2(3, 642); RAJA::getIndices(seg_idx, r2); - LaunchParamExptReduceMinBasicTestImpl, - LAUNCH_POLICY, - GLOBAL_THREAD_POLICY>( + LAUNCH_POLICY, GLOBAL_THREAD_POLICY>( r2, seg_idx, working_res); seg_idx.clear(); RAJA::TypedRangeSegment r3(0, 2057); RAJA::getIndices(seg_idx, r3); - LaunchParamExptReduceMinBasicTestImpl, - LAUNCH_POLICY, - GLOBAL_THREAD_POLICY>( + LAUNCH_POLICY, GLOBAL_THREAD_POLICY>( r3, seg_idx, working_res); // Range-stride segment tests seg_idx.clear(); RAJA::TypedRangeStrideSegment r4(0, 188, 2); RAJA::getIndices(seg_idx, r4); - LaunchParamExptReduceMinBasicTestImpl, - LAUNCH_POLICY, - GLOBAL_THREAD_POLICY>( + LAUNCH_POLICY, GLOBAL_THREAD_POLICY>( r4, seg_idx, working_res); seg_idx.clear(); RAJA::TypedRangeStrideSegment r5(3, 1029, 3); RAJA::getIndices(seg_idx, r5); - LaunchParamExptReduceMinBasicTestImpl, - LAUNCH_POLICY, - GLOBAL_THREAD_POLICY>( + LAUNCH_POLICY, GLOBAL_THREAD_POLICY>( r5, seg_idx, working_res); // List segment tests @@ -203,11 +190,9 @@ TYPED_TEST_P(LaunchParamExptReduceMinBasicTest, ReduceMinBasicForall) } } RAJA::TypedListSegment l1(&seg_idx[0], seg_idx.size(), working_res); - LaunchParamExptReduceMinBasicTestImpl, - LAUNCH_POLICY, - GLOBAL_THREAD_POLICY>( + LAUNCH_POLICY, GLOBAL_THREAD_POLICY>( l1, seg_idx, working_res); } diff --git a/test/functional/launch/reduce-params/tests/test-launch-basic-param-expt-ReduceSum.hpp b/test/functional/launch/reduce-params/tests/test-launch-basic-param-expt-ReduceSum.hpp index 2bd97982e4..0ce6f6483d 100644 --- a/test/functional/launch/reduce-params/tests/test-launch-basic-param-expt-ReduceSum.hpp +++ b/test/functional/launch/reduce-params/tests/test-launch-basic-param-expt-ReduceSum.hpp @@ -34,8 +34,8 @@ void LaunchParamExptReduceSumBasicTestImpl( constexpr int threads = 256; int blocks = (seg.size() - 1) / threads + 1; - allocateForallTestData( - data_len, working_res, &working_array, &check_array, &test_array); + allocateForallTestData(data_len, working_res, &working_array, + &check_array, &test_array); const int modval = 100; @@ -57,14 +57,12 @@ void LaunchParamExptReduceSumBasicTestImpl( RAJA::launch( RAJA::LaunchParams(RAJA::Teams(blocks), RAJA::Threads(threads)), - "LaunchSumBasicTest", - RAJA::expt::Reduce(&sum), + "LaunchSumBasicTest", RAJA::expt::Reduce(&sum), RAJA::expt::Reduce(&sum2), - [=] RAJA_HOST_DEVICE( - RAJA::LaunchContext ctx, DATA_TYPE & _sum, DATA_TYPE & _sum2) + [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx, DATA_TYPE & _sum, + DATA_TYPE & _sum2) { - RAJA::loop(ctx, - seg, + RAJA::loop(ctx, seg, [&](IDX_TYPE idx) { _sum += working_array[idx]; @@ -94,8 +92,8 @@ void LaunchParamExptReduceSumBasicTestImpl( ASSERT_EQ(static_cast(sum), nloops * ref_sum); - deallocateForallTestData( - working_res, working_array, check_array, test_array); + deallocateForallTestData(working_res, working_array, check_array, + test_array); } @@ -123,52 +121,42 @@ TYPED_TEST_P(LaunchParamExptReduceSumBasicTest, ReduceSumBasicForall) // Range segment tests RAJA::TypedRangeSegment r1(0, 28); RAJA::getIndices(seg_idx, r1); - LaunchParamExptReduceSumBasicTestImpl, - LAUNCH_POLICY, - GLOBAL_THREAD_POLICY>( + LAUNCH_POLICY, GLOBAL_THREAD_POLICY>( r1, seg_idx, working_res); seg_idx.clear(); RAJA::TypedRangeSegment r2(3, 642); RAJA::getIndices(seg_idx, r2); - LaunchParamExptReduceSumBasicTestImpl, - LAUNCH_POLICY, - GLOBAL_THREAD_POLICY>( + LAUNCH_POLICY, GLOBAL_THREAD_POLICY>( r2, seg_idx, working_res); seg_idx.clear(); RAJA::TypedRangeSegment r3(0, 2057); RAJA::getIndices(seg_idx, r3); - LaunchParamExptReduceSumBasicTestImpl, - LAUNCH_POLICY, - GLOBAL_THREAD_POLICY>( + LAUNCH_POLICY, GLOBAL_THREAD_POLICY>( r3, seg_idx, working_res); // Range-stride segment tests seg_idx.clear(); RAJA::TypedRangeStrideSegment r4(0, 188, 2); RAJA::getIndices(seg_idx, r4); - LaunchParamExptReduceSumBasicTestImpl, - LAUNCH_POLICY, - GLOBAL_THREAD_POLICY>( + LAUNCH_POLICY, GLOBAL_THREAD_POLICY>( r4, seg_idx, working_res); seg_idx.clear(); RAJA::TypedRangeStrideSegment r5(3, 1029, 3); RAJA::getIndices(seg_idx, r5); - LaunchParamExptReduceSumBasicTestImpl, - LAUNCH_POLICY, - GLOBAL_THREAD_POLICY>( + LAUNCH_POLICY, GLOBAL_THREAD_POLICY>( r5, seg_idx, working_res); // List segment tests @@ -184,11 +172,9 @@ TYPED_TEST_P(LaunchParamExptReduceSumBasicTest, ReduceSumBasicForall) } } RAJA::TypedListSegment l1(&seg_idx[0], seg_idx.size(), working_res); - LaunchParamExptReduceSumBasicTestImpl, - LAUNCH_POLICY, - GLOBAL_THREAD_POLICY>( + LAUNCH_POLICY, GLOBAL_THREAD_POLICY>( l1, seg_idx, working_res); } diff --git a/test/functional/launch/run-time-switch/tests/test-launch-BasicShared.hpp b/test/functional/launch/run-time-switch/tests/test-launch-BasicShared.hpp index 7f478705c8..e9d5e0b503 100644 --- a/test/functional/launch/run-time-switch/tests/test-launch-BasicShared.hpp +++ b/test/functional/launch/run-time-switch/tests/test-launch-BasicShared.hpp @@ -24,8 +24,8 @@ void LaunchBasicSharedTestImpl() int* check_array; int* test_array; - allocateForallTestData( - N * N, working_res, &working_array, &check_array, &test_array); + allocateForallTestData(N * N, working_res, &working_array, &check_array, + &test_array); // Select platform @@ -47,21 +47,19 @@ void LaunchBasicSharedTestImpl() [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) { RAJA::loop( - ctx, - RAJA::RangeSegment(0, N), + ctx, RAJA::RangeSegment(0, N), [&](int r) { // Array shared within threads of the same team int* s_A = ctx.getSharedMemory(1); - RAJA::loop( - ctx, RAJA::RangeSegment(0, 1), [&](int c) { s_A[c] = r; }); + RAJA::loop(ctx, RAJA::RangeSegment(0, 1), + [&](int c) { s_A[c] = r; }); ctx.teamSync(); // broadcast shared value to all threads and write to array - RAJA::loop(ctx, - RAJA::RangeSegment(0, N), + RAJA::loop(ctx, RAJA::RangeSegment(0, N), [&](int c) { const int idx = c + N * r; @@ -83,8 +81,8 @@ void LaunchBasicSharedTestImpl() } } - deallocateForallTestData( - working_res, working_array, check_array, test_array); + deallocateForallTestData(working_res, working_array, check_array, + test_array); } @@ -107,9 +105,7 @@ TYPED_TEST_P(LaunchBasicSharedTest, BasicSharedTeams) typename camp::at>::type, camp::num<2>>::type; - LaunchBasicSharedTestImpl(); } diff --git a/test/functional/launch/segment/tests/test-launch-ListSegment.hpp b/test/functional/launch/segment/tests/test-launch-ListSegment.hpp index 9f14fe6b2e..f100d3dac3 100644 --- a/test/functional/launch/segment/tests/test-launch-ListSegment.hpp +++ b/test/functional/launch/segment/tests/test-launch-ListSegment.hpp @@ -58,8 +58,8 @@ void LaunchListSegmentTestImpl(INDEX_TYPE N) data_len = 1; } - allocateForallTestData( - data_len, working_res, &working_array, &check_array, &test_array); + allocateForallTestData(data_len, working_res, &working_array, + &check_array, &test_array); constexpr int threads = 256; int blocks = (data_len - 1) / threads + 1; @@ -72,16 +72,15 @@ void LaunchListSegmentTestImpl(INDEX_TYPE N) test_array[RAJA::stripIndexType(idx_vals[i])] = idx_vals[i]; } - working_res.memcpy( - working_array, test_array, sizeof(INDEX_TYPE) * data_len); + working_res.memcpy(working_array, test_array, + sizeof(INDEX_TYPE) * data_len); RAJA::launch( RAJA::LaunchParams(RAJA::Teams(blocks), RAJA::Threads(threads)), [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) { RAJA::loop( - ctx, - lseg, + ctx, lseg, [&](INDEX_TYPE idx) { working_array[RAJA::stripIndexType(idx)] = idx; }); }); @@ -91,15 +90,14 @@ void LaunchListSegmentTestImpl(INDEX_TYPE N) memset(static_cast(test_array), 0, sizeof(INDEX_TYPE) * data_len); - working_res.memcpy( - working_array, test_array, sizeof(INDEX_TYPE) * data_len); + working_res.memcpy(working_array, test_array, + sizeof(INDEX_TYPE) * data_len); RAJA::launch( RAJA::LaunchParams(RAJA::Teams(blocks), RAJA::Threads(threads)), [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) { - RAJA::loop(ctx, - lseg, + RAJA::loop(ctx, lseg, [&](INDEX_TYPE idx) { (void)idx; @@ -124,8 +122,8 @@ void LaunchListSegmentTestImpl(INDEX_TYPE N) } - deallocateForallTestData( - working_res, working_array, check_array, test_array); + deallocateForallTestData(working_res, working_array, check_array, + test_array); } @@ -146,24 +144,16 @@ TYPED_TEST_P(LaunchListSegmentTest, ListSegmentTeams) camp::num<1>>::type; // test zero-length list segment - LaunchListSegmentTestImpl(INDEX_TYPE(0)); - LaunchListSegmentTestImpl(INDEX_TYPE(13)); - LaunchListSegmentTestImpl(INDEX_TYPE(2047)); - LaunchListSegmentTestImpl(INDEX_TYPE(32000)); } diff --git a/test/functional/launch/segment/tests/test-launch-RangeSegment.hpp b/test/functional/launch/segment/tests/test-launch-RangeSegment.hpp index 6691541038..cdb28dc6f3 100644 --- a/test/functional/launch/segment/tests/test-launch-RangeSegment.hpp +++ b/test/functional/launch/segment/tests/test-launch-RangeSegment.hpp @@ -32,8 +32,8 @@ void LaunchRangeSegmentTestImpl(INDEX_TYPE first, INDEX_TYPE last) data_len = 1; } - allocateForallTestData( - data_len, working_res, &working_array, &check_array, &test_array); + allocateForallTestData(data_len, working_res, &working_array, + &check_array, &test_array); constexpr int threads = 256; int blocks = (data_len - 1) / threads + 1; @@ -50,8 +50,7 @@ void LaunchRangeSegmentTestImpl(INDEX_TYPE first, INDEX_TYPE last) [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) { RAJA::loop( - ctx, - r1, + ctx, r1, [&](INDEX_TYPE idx) { working_array[RAJA::stripIndexType(idx - rbegin)] = idx; }); }); @@ -61,15 +60,14 @@ void LaunchRangeSegmentTestImpl(INDEX_TYPE first, INDEX_TYPE last) memset(static_cast(test_array), 0, sizeof(INDEX_TYPE) * data_len); - working_res.memcpy( - working_array, test_array, sizeof(INDEX_TYPE) * data_len); + working_res.memcpy(working_array, test_array, + sizeof(INDEX_TYPE) * data_len); RAJA::launch( RAJA::LaunchParams(RAJA::Teams(blocks), RAJA::Threads(threads)), [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) { - RAJA::loop(ctx, - r1, + RAJA::loop(ctx, r1, [&](INDEX_TYPE RAJA_UNUSED_ARG(idx)) { working_array[0]++; }); }); @@ -92,8 +90,8 @@ void LaunchRangeSegmentTestImpl(INDEX_TYPE first, INDEX_TYPE last) ASSERT_EQ(test_array[0], check_array[0]); } - deallocateForallTestData( - working_res, working_array, check_array, test_array); + deallocateForallTestData(working_res, working_array, check_array, + test_array); } @@ -120,20 +118,14 @@ template (INDEX_TYPE(-5), INDEX_TYPE(-5)); - LaunchRangeSegmentTestImpl(INDEX_TYPE(-5), INDEX_TYPE(0)); - LaunchRangeSegmentTestImpl(INDEX_TYPE(-5), INDEX_TYPE(5)); } @@ -151,31 +143,21 @@ TYPED_TEST_P(LaunchRangeSegmentTest, RangeSegmentTeams) camp::num<1>>::type; // test zero-length range segment - LaunchRangeSegmentTestImpl(INDEX_TYPE(3), INDEX_TYPE(3)); - LaunchRangeSegmentTestImpl(INDEX_TYPE(0), INDEX_TYPE(27)); - LaunchRangeSegmentTestImpl(INDEX_TYPE(1), INDEX_TYPE(2047)); - LaunchRangeSegmentTestImpl(INDEX_TYPE(1), INDEX_TYPE(32000)); - runNegativeTests(); } diff --git a/test/functional/launch/segment/tests/test-launch-RangeStrideSegment.hpp b/test/functional/launch/segment/tests/test-launch-RangeStrideSegment.hpp index d17cd31280..c8095d1f15 100644 --- a/test/functional/launch/segment/tests/test-launch-RangeStrideSegment.hpp +++ b/test/functional/launch/segment/tests/test-launch-RangeStrideSegment.hpp @@ -35,8 +35,8 @@ void LaunchRangeStrideSegmentTestImpl(INDEX_TYPE first, data_len = 1; } - allocateForallTestData( - data_len, working_res, &working_array, &check_array, &test_array); + allocateForallTestData(data_len, working_res, &working_array, + &check_array, &test_array); memset(static_cast(test_array), 0, sizeof(INDEX_TYPE) * data_len); @@ -60,8 +60,7 @@ void LaunchRangeStrideSegmentTestImpl(INDEX_TYPE first, [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) { RAJA::loop( - ctx, - r1, + ctx, r1, [&](INDEX_TYPE idx) { working_array[RAJA::stripIndexType((idx - first) / stride)] = idx; @@ -75,8 +74,7 @@ void LaunchRangeStrideSegmentTestImpl(INDEX_TYPE first, RAJA::LaunchParams(RAJA::Teams(blocks), RAJA::Threads(threads)), [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) { - RAJA::loop(ctx, - r1, + RAJA::loop(ctx, r1, [&](INDEX_TYPE RAJA_UNUSED_ARG(idx)) { working_array[0]++; }); }); @@ -99,8 +97,8 @@ void LaunchRangeStrideSegmentTestImpl(INDEX_TYPE first, ASSERT_EQ(test_array[0], check_array[0]); } - deallocateForallTestData( - working_res, working_array, check_array, test_array); + deallocateForallTestData(working_res, working_array, check_array, + test_array); } @@ -128,37 +126,22 @@ template >::value>::type* = nullptr> void runNegativeStrideTests() { - LaunchRangeStrideSegmentTestImpl( + LaunchRangeStrideSegmentTestImpl( INDEX_TYPE(-10), INDEX_TYPE(-1), DIFF_TYPE(2)); - LaunchRangeStrideSegmentTestImpl( + LaunchRangeStrideSegmentTestImpl( INDEX_TYPE(-5), INDEX_TYPE(0), DIFF_TYPE(2)); - LaunchRangeStrideSegmentTestImpl( + LaunchRangeStrideSegmentTestImpl( INDEX_TYPE(-5), INDEX_TYPE(5), DIFF_TYPE(3)); // Test negative strides - LaunchRangeStrideSegmentTestImpl( + LaunchRangeStrideSegmentTestImpl( INDEX_TYPE(10), INDEX_TYPE(-1), DIFF_TYPE(-1)); - LaunchRangeStrideSegmentTestImpl( + LaunchRangeStrideSegmentTestImpl( INDEX_TYPE(10), INDEX_TYPE(0), DIFF_TYPE(-2)); } @@ -176,67 +159,37 @@ TYPED_TEST_P(LaunchRangeStrideSegmentTest, RangeStrideSegmentTeams) using DIFF_TYPE = typename std::make_signed>::type; - LaunchRangeStrideSegmentTestImpl( + LaunchRangeStrideSegmentTestImpl( INDEX_TYPE(0), INDEX_TYPE(20), DIFF_TYPE(1)); - LaunchRangeStrideSegmentTestImpl( + LaunchRangeStrideSegmentTestImpl( INDEX_TYPE(1), INDEX_TYPE(20), DIFF_TYPE(1)); - LaunchRangeStrideSegmentTestImpl( + LaunchRangeStrideSegmentTestImpl( INDEX_TYPE(0), INDEX_TYPE(20), DIFF_TYPE(2)); - LaunchRangeStrideSegmentTestImpl( + LaunchRangeStrideSegmentTestImpl( INDEX_TYPE(1), INDEX_TYPE(20), DIFF_TYPE(2)); - LaunchRangeStrideSegmentTestImpl( + LaunchRangeStrideSegmentTestImpl( INDEX_TYPE(0), INDEX_TYPE(21), DIFF_TYPE(2)); - LaunchRangeStrideSegmentTestImpl( + LaunchRangeStrideSegmentTestImpl( INDEX_TYPE(1), INDEX_TYPE(21), DIFF_TYPE(2)); - LaunchRangeStrideSegmentTestImpl( + LaunchRangeStrideSegmentTestImpl( INDEX_TYPE(1), INDEX_TYPE(255), DIFF_TYPE(2)); // Test size zero segments - LaunchRangeStrideSegmentTestImpl( + LaunchRangeStrideSegmentTestImpl( INDEX_TYPE(0), INDEX_TYPE(20), DIFF_TYPE(-2)); - LaunchRangeStrideSegmentTestImpl( + LaunchRangeStrideSegmentTestImpl( INDEX_TYPE(1), INDEX_TYPE(20), DIFF_TYPE(-2)); - runNegativeStrideTests(); } diff --git a/test/functional/launch/shared_mem/tests/test-launch-DynamicMem.hpp b/test/functional/launch/shared_mem/tests/test-launch-DynamicMem.hpp index 9f944fb805..0c59d7f5ed 100644 --- a/test/functional/launch/shared_mem/tests/test-launch-DynamicMem.hpp +++ b/test/functional/launch/shared_mem/tests/test-launch-DynamicMem.hpp @@ -31,8 +31,8 @@ void LaunchDynamicMemTestImpl(INDEX_TYPE block_range, INDEX_TYPE thread_range) size_t data_len = RAJA::stripIndexType(block_range) * RAJA::stripIndexType(thread_range); - allocateForallTestData( - data_len, working_res, &working_array, &check_array, &test_array); + allocateForallTestData(data_len, working_res, &working_array, + &check_array, &test_array); // determine the underlying type of block_range using s_type = decltype(RAJA::stripIndexType(block_range)); @@ -60,8 +60,7 @@ void LaunchDynamicMemTestImpl(INDEX_TYPE block_range, INDEX_TYPE thread_range) [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) { RAJA::loop( - ctx, - outer_range, + ctx, outer_range, [&](INDEX_TYPE bid) { INDEX_TYPE* tile_ptr = ctx.getSharedMemory( @@ -75,8 +74,7 @@ void LaunchDynamicMemTestImpl(INDEX_TYPE block_range, INDEX_TYPE thread_range) int_tile_ptr, RAJA::stripIndexType(thread_range)); RAJA::loop( - ctx, - inner_range, + ctx, inner_range, [&](INDEX_TYPE tid) { Int_Tile(RAJA::stripIndexType(tid)) = @@ -89,8 +87,7 @@ void LaunchDynamicMemTestImpl(INDEX_TYPE block_range, INDEX_TYPE thread_range) ctx.teamSync(); RAJA::loop( - ctx, - inner_range, + ctx, inner_range, [&](INDEX_TYPE tid) { INDEX_TYPE idx = tid + thread_range * bid; @@ -111,8 +108,8 @@ void LaunchDynamicMemTestImpl(INDEX_TYPE block_range, INDEX_TYPE thread_range) check_array[RAJA::stripIndexType(i)]); } - deallocateForallTestData( - working_res, working_array, check_array, test_array); + deallocateForallTestData(working_res, working_array, check_array, + test_array); } @@ -137,16 +134,10 @@ TYPED_TEST_P(LaunchDynamicMemTest, DynamicMemLaunch) camp::num<2>>::type; - LaunchDynamicMemTestImpl(INDEX_TYPE(4), INDEX_TYPE(2)); - LaunchDynamicMemTestImpl(INDEX_TYPE(5), INDEX_TYPE(32)); } diff --git a/test/functional/launch/shared_mem/tests/test-launch-StaticMem.hpp b/test/functional/launch/shared_mem/tests/test-launch-StaticMem.hpp index 623075af98..f80f70f752 100644 --- a/test/functional/launch/shared_mem/tests/test-launch-StaticMem.hpp +++ b/test/functional/launch/shared_mem/tests/test-launch-StaticMem.hpp @@ -34,8 +34,8 @@ void LaunchStaticMemTestImpl(INDEX_TYPE block_range) size_t data_len = RAJA::stripIndexType(block_range) * RAJA::stripIndexType(thread_range); - allocateForallTestData( - data_len, working_res, &working_array, &check_array, &test_array); + allocateForallTestData(data_len, working_res, &working_array, + &check_array, &test_array); // determine the underlying type of block_range using s_type = decltype(RAJA::stripIndexType(block_range)); @@ -55,8 +55,7 @@ void LaunchStaticMemTestImpl(INDEX_TYPE block_range) [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) { RAJA::loop( - ctx, - outer_range, + ctx, outer_range, [&](INDEX_TYPE bid) { // Since we are using custom index type we have to first use a @@ -71,8 +70,7 @@ void LaunchStaticMemTestImpl(INDEX_TYPE block_range) INDEX_TYPE* Tile = (INDEX_TYPE*)char_Tile; RAJA::loop( - ctx, - inner_range, + ctx, inner_range, [&](INDEX_TYPE tid) { Tile[RAJA::stripIndexType(thread_range) - @@ -83,8 +81,7 @@ void LaunchStaticMemTestImpl(INDEX_TYPE block_range) ctx.teamSync(); RAJA::loop( - ctx, - inner_range, + ctx, inner_range, [&](INDEX_TYPE tid) { INDEX_TYPE idx = tid + thread_range * bid; @@ -104,8 +101,8 @@ void LaunchStaticMemTestImpl(INDEX_TYPE block_range) check_array[RAJA::stripIndexType(i)]); } - deallocateForallTestData( - working_res, working_array, check_array, test_array); + deallocateForallTestData(working_res, working_array, check_array, + test_array); } @@ -130,19 +127,11 @@ TYPED_TEST_P(LaunchStaticMemTest, StaticMemLaunch) camp::num<2>>::type; - LaunchStaticMemTestImpl(INDEX_TYPE(4)); + LaunchStaticMemTestImpl(INDEX_TYPE(4)); - LaunchStaticMemTestImpl(INDEX_TYPE(5)); + LaunchStaticMemTestImpl(INDEX_TYPE(5)); } REGISTER_TYPED_TEST_SUITE_P(LaunchStaticMemTest, StaticMemLaunch); diff --git a/test/functional/launch/tile_icount_direct/tests/test-launch-nested-Tile-iCount-Direct.hpp b/test/functional/launch/tile_icount_direct/tests/test-launch-nested-Tile-iCount-Direct.hpp index 1a5e95d5bf..db8c4541a6 100644 --- a/test/functional/launch/tile_icount_direct/tests/test-launch-nested-Tile-iCount-Direct.hpp +++ b/test/functional/launch/tile_icount_direct/tests/test-launch-nested-Tile-iCount-Direct.hpp @@ -44,16 +44,12 @@ void LaunchNestedTileDirectTestImpl(INDEX_TYPE M) data_len = 1; } - allocateForallTestData(data_len, - working_res, - &working_ttile_array, - &check_ttile_array, + allocateForallTestData(data_len, working_res, + &working_ttile_array, &check_ttile_array, &test_ttile_array); - allocateForallTestData(data_len, - working_res, - &working_iloop_array, - &check_iloop_array, + allocateForallTestData(data_len, working_res, + &working_iloop_array, &check_iloop_array, &test_iloop_array); if (RAJA::stripIndexType(N) > 0) @@ -67,15 +63,12 @@ void LaunchNestedTileDirectTestImpl(INDEX_TYPE M) [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) { RAJA::tile_tcount( - ctx, - threads_x, - r1, + ctx, threads_x, r1, [&](RAJA::TypedRangeSegment const& x_tile, INDEX_TYPE bx) { RAJA::loop_icount( - ctx, - x_tile, + ctx, x_tile, [&](INDEX_TYPE tx, INDEX_TYPE ix) { working_ttile_array[tx] = bx; @@ -87,26 +80,23 @@ void LaunchNestedTileDirectTestImpl(INDEX_TYPE M) else { // zero-length segment - memset( - static_cast(test_ttile_array), 0, sizeof(INDEX_TYPE) * data_len); + memset(static_cast(test_ttile_array), 0, + sizeof(INDEX_TYPE) * data_len); - working_res.memcpy( - working_ttile_array, test_ttile_array, sizeof(INDEX_TYPE) * data_len); + working_res.memcpy(working_ttile_array, test_ttile_array, + sizeof(INDEX_TYPE) * data_len); RAJA::launch( RAJA::LaunchParams(RAJA::Teams(blocks_x), RAJA::Threads(blocks_x)), [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) { RAJA::tile_tcount( - ctx, - threads_x, - r1, + ctx, threads_x, r1, [&](RAJA::TypedRangeSegment const& x_tile, INDEX_TYPE RAJA_UNUSED_ARG(bx)) { RAJA::loop_icount( - ctx, - x_tile, + ctx, x_tile, [&](INDEX_TYPE RAJA_UNUSED_ARG(tx), INDEX_TYPE RAJA_UNUSED_ARG(ix)) { @@ -117,10 +107,10 @@ void LaunchNestedTileDirectTestImpl(INDEX_TYPE M) }); } - working_res.memcpy( - check_ttile_array, working_ttile_array, sizeof(INDEX_TYPE) * data_len); - working_res.memcpy( - check_iloop_array, working_iloop_array, sizeof(INDEX_TYPE) * data_len); + working_res.memcpy(check_ttile_array, working_ttile_array, + sizeof(INDEX_TYPE) * data_len); + working_res.memcpy(check_iloop_array, working_iloop_array, + sizeof(INDEX_TYPE) * data_len); if (RAJA::stripIndexType(N) > 0) { @@ -147,11 +137,11 @@ void LaunchNestedTileDirectTestImpl(INDEX_TYPE M) ASSERT_EQ(check_iloop_array[0], check_iloop_array[0]); } - deallocateForallTestData( - working_res, working_ttile_array, check_ttile_array, test_ttile_array); + deallocateForallTestData(working_res, working_ttile_array, + check_ttile_array, test_ttile_array); - deallocateForallTestData( - working_res, working_iloop_array, check_iloop_array, test_iloop_array); + deallocateForallTestData(working_res, working_iloop_array, + check_iloop_array, test_iloop_array); } @@ -179,24 +169,15 @@ TYPED_TEST_P(LaunchNestedTileDirectTest, RangeSegmentTeams) // test zero-length range segment - LaunchNestedTileDirectTestImpl(INDEX_TYPE(0)); + LaunchNestedTileDirectTestImpl(INDEX_TYPE(0)); // Keep at one since we are doing a direct thread test - LaunchNestedTileDirectTestImpl(INDEX_TYPE(1)); - - LaunchNestedTileDirectTestImpl(INDEX_TYPE(2)); + LaunchNestedTileDirectTestImpl(INDEX_TYPE(1)); + + LaunchNestedTileDirectTestImpl(INDEX_TYPE(2)); } REGISTER_TYPED_TEST_SUITE_P(LaunchNestedTileDirectTest, RangeSegmentTeams); diff --git a/test/functional/launch/tile_icount_loop/tests/test-launch-nested-Tile-iCount-Loop.hpp b/test/functional/launch/tile_icount_loop/tests/test-launch-nested-Tile-iCount-Loop.hpp index cd79c5259f..e9e7e1c6ca 100644 --- a/test/functional/launch/tile_icount_loop/tests/test-launch-nested-Tile-iCount-Loop.hpp +++ b/test/functional/launch/tile_icount_loop/tests/test-launch-nested-Tile-iCount-Loop.hpp @@ -47,16 +47,12 @@ void LaunchNestedTileLoopTestImpl(INDEX_TYPE M) data_len = 1; } - allocateForallTestData(data_len, - working_res, - &working_ttile_array, - &check_ttile_array, + allocateForallTestData(data_len, working_res, + &working_ttile_array, &check_ttile_array, &test_ttile_array); - allocateForallTestData(data_len, - working_res, - &working_iloop_array, - &check_iloop_array, + allocateForallTestData(data_len, working_res, + &working_iloop_array, &check_iloop_array, &test_iloop_array); if (RAJA::stripIndexType(N) > 0) @@ -70,15 +66,12 @@ void LaunchNestedTileLoopTestImpl(INDEX_TYPE M) [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) { RAJA::tile_tcount( - ctx, - tile_size, - r1, + ctx, tile_size, r1, [&](RAJA::TypedRangeSegment const& x_tile, INDEX_TYPE bx) { RAJA::loop_icount( - ctx, - x_tile, + ctx, x_tile, [&](INDEX_TYPE tx, INDEX_TYPE ix) { working_ttile_array[tx] = bx; @@ -90,26 +83,23 @@ void LaunchNestedTileLoopTestImpl(INDEX_TYPE M) else { // zero-length segment - memset( - static_cast(test_ttile_array), 0, sizeof(INDEX_TYPE) * data_len); + memset(static_cast(test_ttile_array), 0, + sizeof(INDEX_TYPE) * data_len); - working_res.memcpy( - working_ttile_array, test_ttile_array, sizeof(INDEX_TYPE) * data_len); + working_res.memcpy(working_ttile_array, test_ttile_array, + sizeof(INDEX_TYPE) * data_len); RAJA::launch( RAJA::LaunchParams(RAJA::Teams(blocks_x), RAJA::Threads(blocks_x)), [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) { RAJA::tile_tcount( - ctx, - tile_size, - r1, + ctx, tile_size, r1, [&](RAJA::TypedRangeSegment const& x_tile, INDEX_TYPE RAJA_UNUSED_ARG(bx)) { RAJA::loop_icount( - ctx, - x_tile, + ctx, x_tile, [&](INDEX_TYPE RAJA_UNUSED_ARG(tx), INDEX_TYPE RAJA_UNUSED_ARG(ix)) { @@ -120,10 +110,10 @@ void LaunchNestedTileLoopTestImpl(INDEX_TYPE M) }); } - working_res.memcpy( - check_ttile_array, working_ttile_array, sizeof(INDEX_TYPE) * data_len); - working_res.memcpy( - check_iloop_array, working_iloop_array, sizeof(INDEX_TYPE) * data_len); + working_res.memcpy(check_ttile_array, working_ttile_array, + sizeof(INDEX_TYPE) * data_len); + working_res.memcpy(check_iloop_array, working_iloop_array, + sizeof(INDEX_TYPE) * data_len); if (RAJA::stripIndexType(N) > 0) { @@ -150,11 +140,11 @@ void LaunchNestedTileLoopTestImpl(INDEX_TYPE M) ASSERT_EQ(check_iloop_array[0], check_iloop_array[0]); } - deallocateForallTestData( - working_res, working_ttile_array, check_ttile_array, test_ttile_array); + deallocateForallTestData(working_res, working_ttile_array, + check_ttile_array, test_ttile_array); - deallocateForallTestData( - working_res, working_iloop_array, check_iloop_array, test_iloop_array); + deallocateForallTestData(working_res, working_iloop_array, + check_iloop_array, test_iloop_array); } @@ -182,24 +172,15 @@ TYPED_TEST_P(LaunchNestedTileLoopTest, RangeSegmentTeams) // test zero-length range segment - LaunchNestedTileLoopTestImpl(INDEX_TYPE(0)); + LaunchNestedTileLoopTestImpl(INDEX_TYPE(0)); // Keep at one since we are doing a direct thread test - LaunchNestedTileLoopTestImpl(INDEX_TYPE(1)); - - LaunchNestedTileLoopTestImpl(INDEX_TYPE(2)); + LaunchNestedTileLoopTestImpl(INDEX_TYPE(1)); + + LaunchNestedTileLoopTestImpl(INDEX_TYPE(2)); } REGISTER_TYPED_TEST_SUITE_P(LaunchNestedTileLoopTest, RangeSegmentTeams); diff --git a/test/functional/scan/tests/test-scan-Exclusive.hpp b/test/functional/scan/tests/test-scan-Exclusive.hpp index a0d2af93d9..8b904dace8 100644 --- a/test/functional/scan/tests/test-scan-Exclusive.hpp +++ b/test/functional/scan/tests/test-scan-Exclusive.hpp @@ -55,9 +55,7 @@ void ScanExclusiveTestImpl( RAJA::exclusive_scan( RAJA::make_span(static_cast(work_in), N), - RAJA::make_span(work_out, N), - OP_TYPE{}, - offset); + RAJA::make_span(work_out, N), OP_TYPE{}, offset); res.memcpy(host_out, work_out, sizeof(T) * N); res.wait(); @@ -68,11 +66,8 @@ void ScanExclusiveTestImpl( res.memcpy(work_in, host_in, sizeof(T) * N); RAJA::exclusive_scan( - res, - RAJA::make_span(static_cast(work_in), N), - RAJA::make_span(work_out, N), - OP_TYPE{}, - offset); + res, RAJA::make_span(static_cast(work_in), N), + RAJA::make_span(work_out, N), OP_TYPE{}, offset); res.memcpy(host_out, work_out, sizeof(T) * N); res.wait(); diff --git a/test/functional/scan/tests/test-scan-ExclusiveInplace.hpp b/test/functional/scan/tests/test-scan-ExclusiveInplace.hpp index f4d0b7dfc9..4c29013c80 100644 --- a/test/functional/scan/tests/test-scan-ExclusiveInplace.hpp +++ b/test/functional/scan/tests/test-scan-ExclusiveInplace.hpp @@ -53,8 +53,8 @@ void ScanExclusiveInplaceTestImpl( res.memcpy(work_in, host_in, sizeof(T) * N); res.wait(); - RAJA::exclusive_scan_inplace( - RAJA::make_span(work_in, N), OP_TYPE{}, offset); + RAJA::exclusive_scan_inplace(RAJA::make_span(work_in, N), + OP_TYPE{}, offset); res.memcpy(host_out, work_in, sizeof(T) * N); res.wait(); @@ -64,8 +64,8 @@ void ScanExclusiveInplaceTestImpl( // test interface with resource res.memcpy(work_in, host_in, sizeof(T) * N); - RAJA::exclusive_scan_inplace( - res, RAJA::make_span(work_in, N), OP_TYPE{}, offset); + RAJA::exclusive_scan_inplace(res, RAJA::make_span(work_in, N), + OP_TYPE{}, offset); res.memcpy(host_out, work_in, sizeof(T) * N); res.wait(); diff --git a/test/functional/scan/tests/test-scan-Inclusive.hpp b/test/functional/scan/tests/test-scan-Inclusive.hpp index 91b550e99f..1286785154 100644 --- a/test/functional/scan/tests/test-scan-Inclusive.hpp +++ b/test/functional/scan/tests/test-scan-Inclusive.hpp @@ -54,8 +54,7 @@ void ScanInclusiveTestImpl(int N) RAJA::inclusive_scan( RAJA::make_span(static_cast(work_in), N), - RAJA::make_span(work_out, N), - OP_TYPE{}); + RAJA::make_span(work_out, N), OP_TYPE{}); res.memcpy(host_out, work_out, sizeof(T) * N); res.wait(); @@ -66,10 +65,8 @@ void ScanInclusiveTestImpl(int N) res.memcpy(work_in, host_in, sizeof(T) * N); RAJA::inclusive_scan( - res, - RAJA::make_span(static_cast(work_in), N), - RAJA::make_span(work_out, N), - OP_TYPE{}); + res, RAJA::make_span(static_cast(work_in), N), + RAJA::make_span(work_out, N), OP_TYPE{}); res.memcpy(host_out, work_out, sizeof(T) * N); res.wait(); diff --git a/test/functional/scan/tests/test-scan-InclusiveInplace.hpp b/test/functional/scan/tests/test-scan-InclusiveInplace.hpp index ffdc9869aa..92b7e7447c 100644 --- a/test/functional/scan/tests/test-scan-InclusiveInplace.hpp +++ b/test/functional/scan/tests/test-scan-InclusiveInplace.hpp @@ -63,8 +63,8 @@ void ScanInclusiveInplaceTestImpl(int N) // test interface with resource res.memcpy(work_in, host_in, sizeof(T) * N); - RAJA::inclusive_scan_inplace( - res, RAJA::make_span(work_in, N), OP_TYPE{}); + RAJA::inclusive_scan_inplace(res, RAJA::make_span(work_in, N), + OP_TYPE{}); res.memcpy(host_out, work_in, sizeof(T) * N); res.wait(); diff --git a/test/functional/tensor/matrix/tests/test-tensor-matrix-CtorGetSet.hpp b/test/functional/tensor/matrix/tests/test-tensor-matrix-CtorGetSet.hpp index 261d3a6d75..e1390849f9 100644 --- a/test/functional/tensor/matrix/tests/test-tensor-matrix-CtorGetSet.hpp +++ b/test/functional/tensor/matrix/tests/test-tensor-matrix-CtorGetSet.hpp @@ -23,7 +23,7 @@ void CtorGetSetImpl() // Allocate Data // std::vector data1_vec(matrix_t::s_num_rows * - matrix_t::s_num_columns); + matrix_t::s_num_columns); RAJA::View> data1_h( data1_vec.data(), matrix_t::s_num_rows, matrix_t::s_num_columns); @@ -33,7 +33,7 @@ void CtorGetSetImpl() std::vector data2_vec(matrix_t::s_num_rows * - matrix_t::s_num_columns); + matrix_t::s_num_columns); RAJA::View> data2_h( data2_vec.data(), matrix_t::s_num_rows, matrix_t::s_num_columns); diff --git a/test/functional/tensor/matrix/tests/test-tensor-matrix-ET_Add.hpp b/test/functional/tensor/matrix/tests/test-tensor-matrix-ET_Add.hpp index a2d2a27b32..3f8d42417f 100644 --- a/test/functional/tensor/matrix/tests/test-tensor-matrix-ET_Add.hpp +++ b/test/functional/tensor/matrix/tests/test-tensor-matrix-ET_Add.hpp @@ -125,9 +125,8 @@ void ET_AddImpl() { for (camp::idx_t j = 0; j < N; ++j) { - ASSERT_SCALAR_EQ(data5_h(j, i), - data1_h(i, j) + data2_h(j, i) + data3_h(i, j) + - data4_h(j, i)); + ASSERT_SCALAR_EQ(data5_h(j, i), data1_h(i, j) + data2_h(j, i) + + data3_h(i, j) + data4_h(j, i)); // printf("%d,%d: %lf, %lf\n", (int)i, (int)j, data1(i,j), // data2(i,j)); } diff --git a/test/functional/tensor/matrix/tests/test-tensor-matrix-ET_Divide.hpp b/test/functional/tensor/matrix/tests/test-tensor-matrix-ET_Divide.hpp index f934c097a0..95b3b38304 100644 --- a/test/functional/tensor/matrix/tests/test-tensor-matrix-ET_Divide.hpp +++ b/test/functional/tensor/matrix/tests/test-tensor-matrix-ET_Divide.hpp @@ -126,9 +126,8 @@ void ET_DivideImpl() { for (camp::idx_t j = 0; j < N; ++j) { - ASSERT_SCALAR_EQ(data5_h(j, i), - data1_h(i, j) / data2_h(j, i) + - data3_h(i, j) / data4_h(j, i)); + ASSERT_SCALAR_EQ(data5_h(j, i), data1_h(i, j) / data2_h(j, i) + + data3_h(i, j) / data4_h(j, i)); // printf("%d,%d: %lf, %lf\n", (int)i, (int)j, data1(i,j), // data2(i,j)); } diff --git a/test/functional/tensor/matrix/tests/test-tensor-matrix-ET_LoadStore.hpp b/test/functional/tensor/matrix/tests/test-tensor-matrix-ET_LoadStore.hpp index f1183bcc2d..12e48fdc92 100644 --- a/test/functional/tensor/matrix/tests/test-tensor-matrix-ET_LoadStore.hpp +++ b/test/functional/tensor/matrix/tests/test-tensor-matrix-ET_LoadStore.hpp @@ -25,7 +25,7 @@ void ET_LoadStoreImpl() // alloc data1 std::vector data1_vec(matrix_t::s_num_rows * - matrix_t::s_num_columns); + matrix_t::s_num_columns); RAJA::View> data1_h( data1_vec.data(), matrix_t::s_num_rows, matrix_t::s_num_columns); @@ -36,7 +36,7 @@ void ET_LoadStoreImpl() // alloc data2 std::vector data2_vec(matrix_t::s_num_rows * - matrix_t::s_num_columns); + matrix_t::s_num_columns); RAJA::View> data2_h( data2_vec.data(), matrix_t::s_num_columns, matrix_t::s_num_rows); @@ -48,23 +48,19 @@ void ET_LoadStoreImpl() // alloc data3 with StaticLayout std::vector data3_vec(matrix_t::s_num_rows * matrix_t::s_num_columns); - RAJA::View> + RAJA::View> data3_h(data3_vec.data()); element_t* data3_ptr = tensor_malloc(data3_vec); - RAJA::View> + RAJA::View> data3_d(data3_ptr); // alloc data4 std::vector data4_vec(matrix_t::s_num_rows * - matrix_t::s_num_columns); + matrix_t::s_num_columns); RAJA::View> data4_h( data4_vec.data(), matrix_t::s_num_columns, matrix_t::s_num_rows); @@ -75,7 +71,7 @@ void ET_LoadStoreImpl() // alloc data5 std::vector data5_vec(matrix_t::s_num_rows * - matrix_t::s_num_columns); + matrix_t::s_num_columns); RAJA::View> data5_h( data5_vec.data(), matrix_t::s_num_columns, matrix_t::s_num_rows); @@ -86,7 +82,7 @@ void ET_LoadStoreImpl() // alloc data6 std::vector data6_vec(matrix_t::s_num_rows * - matrix_t::s_num_columns); + matrix_t::s_num_columns); RAJA::View> data6_h( data6_vec.data(), matrix_t::s_num_columns, matrix_t::s_num_rows); @@ -97,7 +93,7 @@ void ET_LoadStoreImpl() // alloc data7 std::vector data7_vec(matrix_t::s_num_rows * - matrix_t::s_num_columns); + matrix_t::s_num_columns); RAJA::View> data7_h( data7_vec.data(), matrix_t::s_num_columns, matrix_t::s_num_rows); @@ -132,10 +128,10 @@ void ET_LoadStoreImpl() auto SArows = RAJA::expt::RowIndex::static_all(); auto SAcols = RAJA::expt::ColIndex::static_all(); - auto SRrows = RAJA::expt::RowIndex:: - template static_range<0, matrix_t::s_num_rows>(); - auto SRcols = RAJA::expt::ColIndex:: - template static_range<0, matrix_t::s_num_columns>(); + auto SRrows = RAJA::expt::RowIndex< + int, matrix_t>::template static_range<0, matrix_t::s_num_rows>(); + auto SRcols = RAJA::expt::ColIndex< + int, matrix_t>::template static_range<0, matrix_t::s_num_columns>(); data2_d(cols, rows) = data1_d(rows, cols); diff --git a/test/functional/tensor/matrix/tests/test-tensor-matrix-ET_MatrixMatrixMultiplyAdd.hpp b/test/functional/tensor/matrix/tests/test-tensor-matrix-ET_MatrixMatrixMultiplyAdd.hpp index b7fda8523f..1e15d39bc9 100644 --- a/test/functional/tensor/matrix/tests/test-tensor-matrix-ET_MatrixMatrixMultiplyAdd.hpp +++ b/test/functional/tensor/matrix/tests/test-tensor-matrix-ET_MatrixMatrixMultiplyAdd.hpp @@ -59,8 +59,8 @@ void ET_MatrixMatrixMultiplyAddImpl() // alloc data3 - The result matrix std::vector data3_vec(N * N); - RAJA::TypedView, TX, TY> data3_h( - data3_vec.data(), N, N); + RAJA::TypedView, TX, TY> data3_h(data3_vec.data(), + N, N); element_t* data3_ptr = tensor_malloc(data3_vec); RAJA::TypedView, TX, TY> data3_d(data3_ptr, N, N); diff --git a/test/functional/tensor/matrix/tests/test-tensor-matrix-ET_Subtract.hpp b/test/functional/tensor/matrix/tests/test-tensor-matrix-ET_Subtract.hpp index de849d5e32..2d4abee459 100644 --- a/test/functional/tensor/matrix/tests/test-tensor-matrix-ET_Subtract.hpp +++ b/test/functional/tensor/matrix/tests/test-tensor-matrix-ET_Subtract.hpp @@ -125,9 +125,8 @@ void ET_SubtractImpl() { for (camp::idx_t j = 0; j < N; ++j) { - ASSERT_SCALAR_EQ(data5_h(j, i), - data1_h(i, j) - data2_h(j, i) + data3_h(i, j) - - data4_h(j, i)); + ASSERT_SCALAR_EQ(data5_h(j, i), data1_h(i, j) - data2_h(j, i) + + data3_h(i, j) - data4_h(j, i)); // printf("%d,%d: %lf, %lf\n", (int)i, (int)j, data1(i,j), // data2(i,j)); } diff --git a/test/functional/tensor/matrix/tests/test-tensor-matrix-Load_ColMajor.hpp b/test/functional/tensor/matrix/tests/test-tensor-matrix-Load_ColMajor.hpp index 767d5df0a5..37c749524c 100644 --- a/test/functional/tensor/matrix/tests/test-tensor-matrix-Load_ColMajor.hpp +++ b/test/functional/tensor/matrix/tests/test-tensor-matrix-Load_ColMajor.hpp @@ -26,7 +26,7 @@ void Load_ColMajorImpl() // alloc data1 std::vector data1_vec(4 * matrix_t::s_num_rows * - matrix_t::s_num_columns); + matrix_t::s_num_columns); RAJA::View> data1_h( data1_vec.data(), 2 * matrix_t::s_num_columns, 2 * matrix_t::s_num_rows); @@ -38,7 +38,7 @@ void Load_ColMajorImpl() // alloc data2 std::vector data2_vec(matrix_t::s_num_rows * - matrix_t::s_num_columns); + matrix_t::s_num_columns); RAJA::View> data2_h( data2_vec.data(), matrix_t::s_num_columns, matrix_t::s_num_rows); @@ -135,13 +135,13 @@ void Load_ColMajorImpl() matrix_t m; if (matrix_t::layout_type::is_column_major()) { - m.load_packed_nm( - data1_ptr, 1, 2 * matrix_t::s_num_rows, n_size, m_size); + m.load_packed_nm(data1_ptr, 1, 2 * matrix_t::s_num_rows, n_size, + m_size); } else { - m.load_strided_nm( - data1_ptr, 1, 2 * matrix_t::s_num_rows, n_size, m_size); + m.load_strided_nm(data1_ptr, 1, 2 * matrix_t::s_num_rows, n_size, + m_size); } // write out to a second view so we can check it on the host diff --git a/test/functional/tensor/matrix/tests/test-tensor-matrix-Load_RowMajor.hpp b/test/functional/tensor/matrix/tests/test-tensor-matrix-Load_RowMajor.hpp index 50943cee85..11eb276124 100644 --- a/test/functional/tensor/matrix/tests/test-tensor-matrix-Load_RowMajor.hpp +++ b/test/functional/tensor/matrix/tests/test-tensor-matrix-Load_RowMajor.hpp @@ -26,7 +26,7 @@ void Load_RowMajorImpl() // alloc data1 std::vector data1_vec(4 * matrix_t::s_num_rows * - matrix_t::s_num_columns); + matrix_t::s_num_columns); RAJA::View> data1_h( data1_vec.data(), 2 * matrix_t::s_num_rows, 2 * matrix_t::s_num_columns); @@ -38,7 +38,7 @@ void Load_RowMajorImpl() // alloc data2 std::vector data2_vec(matrix_t::s_num_rows * - matrix_t::s_num_columns); + matrix_t::s_num_columns); RAJA::View> data2_h( data2_vec.data(), matrix_t::s_num_rows, matrix_t::s_num_columns); @@ -133,13 +133,13 @@ void Load_RowMajorImpl() matrix_t m; if (matrix_t::layout_type::is_row_major()) { - m.load_packed_nm( - data1_ptr, 2 * matrix_t::s_num_columns, 1, n_size, m_size); + m.load_packed_nm(data1_ptr, 2 * matrix_t::s_num_columns, 1, + n_size, m_size); } else { - m.load_strided_nm( - data1_ptr, 2 * matrix_t::s_num_columns, 1, n_size, m_size); + m.load_strided_nm(data1_ptr, 2 * matrix_t::s_num_columns, 1, + n_size, m_size); } // write out to a second view so we can check it on the host diff --git a/test/functional/tensor/matrix/tests/test-tensor-matrix-Store_ColMajor.hpp b/test/functional/tensor/matrix/tests/test-tensor-matrix-Store_ColMajor.hpp index b991b37225..974994c7e1 100644 --- a/test/functional/tensor/matrix/tests/test-tensor-matrix-Store_ColMajor.hpp +++ b/test/functional/tensor/matrix/tests/test-tensor-matrix-Store_ColMajor.hpp @@ -26,7 +26,7 @@ void Store_ColMajorImpl() // alloc data1 - matrix data will be generated on device, stored into data1 std::vector data1_vec(4 * matrix_t::s_num_rows * - matrix_t::s_num_columns); + matrix_t::s_num_columns); RAJA::View> data1_h( data1_vec.data(), 2 * matrix_t::s_num_columns, 2 * matrix_t::s_num_rows); @@ -38,7 +38,7 @@ void Store_ColMajorImpl() // alloc data2 - reference data to compare with data1 on host std::vector data2_vec(matrix_t::s_num_rows * - matrix_t::s_num_columns); + matrix_t::s_num_columns); RAJA::View> data2_h( data2_vec.data(), matrix_t::s_num_columns, matrix_t::s_num_rows); @@ -161,13 +161,13 @@ void Store_ColMajorImpl() // Store matrix to memory if (matrix_t::layout_type::is_column_major()) { - m.store_packed_nm( - data1_ptr, 1, 2 * matrix_t::s_num_rows, n_size, m_size); + m.store_packed_nm(data1_ptr, 1, 2 * matrix_t::s_num_rows, n_size, + m_size); } else { - m.store_strided_nm( - data1_ptr, 1, 2 * matrix_t::s_num_rows, n_size, m_size); + m.store_strided_nm(data1_ptr, 1, 2 * matrix_t::s_num_rows, n_size, + m_size); } }); diff --git a/test/functional/tensor/matrix/tests/test-tensor-matrix-Store_RowMajor.hpp b/test/functional/tensor/matrix/tests/test-tensor-matrix-Store_RowMajor.hpp index c077afcb49..3958c2bf5d 100644 --- a/test/functional/tensor/matrix/tests/test-tensor-matrix-Store_RowMajor.hpp +++ b/test/functional/tensor/matrix/tests/test-tensor-matrix-Store_RowMajor.hpp @@ -25,7 +25,7 @@ void Store_RowMajorImpl() // alloc data1 - matrix data will be generated on device, stored into data1 std::vector data1_vec(4 * matrix_t::s_num_rows * - matrix_t::s_num_columns); + matrix_t::s_num_columns); RAJA::View> data1_h( data1_vec.data(), 2 * matrix_t::s_num_rows, 2 * matrix_t::s_num_columns); @@ -37,7 +37,7 @@ void Store_RowMajorImpl() // alloc data2 - reference data to compare with data1 on host std::vector data2_vec(matrix_t::s_num_rows * - matrix_t::s_num_columns); + matrix_t::s_num_columns); RAJA::View> data2_h( data2_vec.data(), matrix_t::s_num_rows, matrix_t::s_num_columns); @@ -160,13 +160,13 @@ void Store_RowMajorImpl() // Store matrix to memory if (matrix_t::layout_type::is_row_major()) { - m.store_packed_nm( - data1_ptr, 2 * matrix_t::s_num_columns, 1, n_size, m_size); + m.store_packed_nm(data1_ptr, 2 * matrix_t::s_num_columns, 1, + n_size, m_size); } else { - m.store_strided_nm( - data1_ptr, 2 * matrix_t::s_num_columns, 1, n_size, m_size); + m.store_strided_nm(data1_ptr, 2 * matrix_t::s_num_columns, 1, + n_size, m_size); } }); diff --git a/test/functional/tensor/vector/tests/test-tensor-vector-ForallVectorRef2d.hpp b/test/functional/tensor/vector/tests/test-tensor-vector-ForallVectorRef2d.hpp index d4ff9d90a8..3b1111b6ef 100644 --- a/test/functional/tensor/vector/tests/test-tensor-vector-ForallVectorRef2d.hpp +++ b/test/functional/tensor/vector/tests/test-tensor-vector-ForallVectorRef2d.hpp @@ -85,10 +85,8 @@ ForallVectorRef2dImpl() } using policy2_t = RAJA::KernelPolicy, + 0, RAJA::seq_exec, + RAJA::statement::For<1, RAJA::expt::vector_exec, RAJA::statement::Lambda<0>>>>; RAJA::kernel( diff --git a/test/functional/util/test-CombiningAdapter-2D.cpp b/test/functional/util/test-CombiningAdapter-2D.cpp index d72f1f5297..f4658f2cde 100644 --- a/test/functional/util/test-CombiningAdapter-2D.cpp +++ b/test/functional/util/test-CombiningAdapter-2D.cpp @@ -46,8 +46,7 @@ void test_CombiningAdapter_2D(Segment0 const& seg0, Segment1 const& seg1) counter0 += 1; } }, - seg0, - seg1); + seg0, seg1); ASSERT_EQ(adapter.size(), seg0.size() * seg1.size()); diff --git a/test/functional/util/test-CombiningAdapter-3D.cpp b/test/functional/util/test-CombiningAdapter-3D.cpp index 39231bb232..ffcce8a06f 100644 --- a/test/functional/util/test-CombiningAdapter-3D.cpp +++ b/test/functional/util/test-CombiningAdapter-3D.cpp @@ -59,9 +59,7 @@ void test_CombiningAdapter_3D(Segment0 const& seg0, } } }, - seg0, - seg1, - seg2); + seg0, seg1, seg2); ASSERT_EQ(adapter.size(), seg0.size() * seg1.size() * seg2.size()); diff --git a/test/functional/util/test-PermutedCombiningAdapter-2D.cpp b/test/functional/util/test-PermutedCombiningAdapter-2D.cpp index 104c213f96..882af87b07 100644 --- a/test/functional/util/test-PermutedCombiningAdapter-2D.cpp +++ b/test/functional/util/test-PermutedCombiningAdapter-2D.cpp @@ -44,8 +44,7 @@ void test_PermutedCombiningAdapter_2D(Segment const& seg0, Segment const& seg1) counters[camp::seq_at<0, Perm>::value] += 1; } }, - seg0, - seg1); + seg0, seg1); ASSERT_EQ(adapter.size(), seg0.size() * seg1.size()); diff --git a/test/functional/util/test-PermutedCombiningAdapter-3D.cpp b/test/functional/util/test-PermutedCombiningAdapter-3D.cpp index 3790882fa1..ab0ed6b4dd 100644 --- a/test/functional/util/test-PermutedCombiningAdapter-3D.cpp +++ b/test/functional/util/test-PermutedCombiningAdapter-3D.cpp @@ -55,9 +55,7 @@ void test_PermutedCombiningAdapter_3D(Segment const& seg0, } } }, - seg0, - seg1, - seg2); + seg0, seg1, seg2); ASSERT_EQ(adapter.size(), seg0.size() * seg1.size() * seg2.size()); @@ -95,10 +93,10 @@ TEST(PermutedCombiningAdapter, test3D) test_types_PermutedCombiningAdapter_3D(0, 0, 0, 0, 0, 5); test_types_PermutedCombiningAdapter_3D(0, 3, 0, 4, 0, 5); - test_types_PermutedCombiningAdapter_3D( - -3, 5, 0, 6, 2, 5); - test_types_PermutedCombiningAdapter_3D( - 4, 13, -2, 7, -3, 0); - test_types_PermutedCombiningAdapter_3D( - -8, -2, -5, 3, 1, 4); + test_types_PermutedCombiningAdapter_3D(-3, 5, 0, 6, 2, + 5); + test_types_PermutedCombiningAdapter_3D(4, 13, -2, 7, -3, + 0); + test_types_PermutedCombiningAdapter_3D(-8, -2, -5, 3, 1, + 4); } diff --git a/test/functional/workgroup/tests/test-workgroup-Ordered-MultipleReuse.hpp b/test/functional/workgroup/tests/test-workgroup-Ordered-MultipleReuse.hpp index 412529e661..bb3f2917bb 100644 --- a/test/functional/workgroup/tests/test-workgroup-Ordered-MultipleReuse.hpp +++ b/test/functional/workgroup/tests/test-workgroup-Ordered-MultipleReuse.hpp @@ -147,14 +147,14 @@ struct testWorkGroupOrderedMultiple type3* check_array3 = nullptr; type3* test_array3 = nullptr; - allocateForallTestData( - N * num1, working_res, &working_array1, &check_array1, &test_array1); + allocateForallTestData(N * num1, working_res, &working_array1, + &check_array1, &test_array1); - allocateForallTestData( - N * num2, working_res, &working_array2, &check_array2, &test_array2); + allocateForallTestData(N * num2, working_res, &working_array2, + &check_array2, &test_array2); - allocateForallTestData( - N * num3, working_res, &working_array3, &check_array3, &test_array3); + allocateForallTestData(N * num3, working_res, &working_array3, + &check_array3, &test_array3); type1 const test_val1(5); type2 const test_val2(7); @@ -171,30 +171,20 @@ struct testWorkGroupOrderedMultiple camp::list>, camp::list>>; - using WorkPool_type = RAJA::WorkPool, - IndexType, - RAJA::xargs<>, - Allocator>; + using WorkPool_type = + RAJA::WorkPool, + IndexType, RAJA::xargs<>, Allocator>; using WorkGroup_type = - RAJA::WorkGroup, - IndexType, - RAJA::xargs<>, - Allocator>; - - using WorkSite_type = RAJA::WorkSite, - IndexType, - RAJA::xargs<>, - Allocator>; + RAJA::WorkGroup, + IndexType, RAJA::xargs<>, Allocator>; + + using WorkSite_type = + RAJA::WorkSite, + IndexType, RAJA::xargs<>, Allocator>; using resource_type = typename WorkGroup_type::resource_type; @@ -384,14 +374,14 @@ struct testWorkGroupOrderedMultiple } - deallocateForallTestData( - working_res, working_array1, check_array1, test_array1); + deallocateForallTestData(working_res, working_array1, check_array1, + test_array1); - deallocateForallTestData( - working_res, working_array2, check_array2, test_array2); + deallocateForallTestData(working_res, working_array2, check_array2, + test_array2); - deallocateForallTestData( - working_res, working_array3, check_array3, test_array3); + deallocateForallTestData(working_res, working_array3, check_array3, + test_array3); } }; @@ -482,20 +472,11 @@ TYPED_TEST_P(WorkGroupBasicOrderedMultipleReuseFunctionalTest, IndexType pool_reuse = dist_type(IndexType(0), IndexType(8))(rng); IndexType group_reuse = dist_type(IndexType(0), IndexType(8))(rng); - testWorkGroupOrderedMultiple{}(rng, - IndexType(96), - IndexType(4000), - num1, - num2, - num3, - pool_reuse, - group_reuse); + testWorkGroupOrderedMultiple{}( + rng, IndexType(96), IndexType(4000), num1, num2, num3, pool_reuse, + group_reuse); } #endif //__TEST_WORKGROUP_ORDERED_MULTIPLEREUSE__ diff --git a/test/functional/workgroup/tests/test-workgroup-Ordered-Single.hpp b/test/functional/workgroup/tests/test-workgroup-Ordered-Single.hpp index 7437cb3059..d2989bd763 100644 --- a/test/functional/workgroup/tests/test-workgroup-Ordered-Single.hpp +++ b/test/functional/workgroup/tests/test-workgroup-Ordered-Single.hpp @@ -41,8 +41,8 @@ struct testWorkGroupOrderedSingle IndexType* check_array; IndexType* test_array; - allocateForallTestData( - N, working_res, &working_array, &check_array, &test_array); + allocateForallTestData(N, working_res, &working_array, + &check_array, &test_array); IndexType const test_val(5); @@ -58,30 +58,20 @@ struct testWorkGroupOrderedSingle camp::list, camp::list>; - using WorkPool_type = RAJA::WorkPool, - IndexType, - RAJA::xargs<>, - Allocator>; + using WorkPool_type = + RAJA::WorkPool, + IndexType, RAJA::xargs<>, Allocator>; using WorkGroup_type = - RAJA::WorkGroup, - IndexType, - RAJA::xargs<>, - Allocator>; - - using WorkSite_type = RAJA::WorkSite, - IndexType, - RAJA::xargs<>, - Allocator>; + RAJA::WorkGroup, + IndexType, RAJA::xargs<>, Allocator>; + + using WorkSite_type = + RAJA::WorkSite, + IndexType, RAJA::xargs<>, Allocator>; { for (IndexType i = IndexType(0); i < N; i++) @@ -127,8 +117,8 @@ struct testWorkGroupOrderedSingle } - deallocateForallTestData( - working_res, working_array, check_array, test_array); + deallocateForallTestData(working_res, working_array, check_array, + test_array); } }; @@ -205,26 +195,14 @@ TYPED_TEST_P(WorkGroupBasicOrderedSingleFunctionalTest, IndexType b3 = dist_type(e2, IndexType(1023))(rng); IndexType e3 = dist_type(b3, IndexType(1024))(rng); - testWorkGroupOrderedSingle{}(b1, e1); - testWorkGroupOrderedSingle{}(b2, e2); - testWorkGroupOrderedSingle{}(b3, e3); } diff --git a/test/functional/workgroup/tests/test-workgroup-Unordered-MultipleReuse.hpp b/test/functional/workgroup/tests/test-workgroup-Unordered-MultipleReuse.hpp index dba9f98844..57701c806f 100644 --- a/test/functional/workgroup/tests/test-workgroup-Unordered-MultipleReuse.hpp +++ b/test/functional/workgroup/tests/test-workgroup-Unordered-MultipleReuse.hpp @@ -119,14 +119,14 @@ struct testWorkGroupUnorderedMultiple type3* check_array3 = nullptr; type3* test_array3 = nullptr; - allocateForallTestData( - N * num1, working_res, &working_array1, &check_array1, &test_array1); + allocateForallTestData(N * num1, working_res, &working_array1, + &check_array1, &test_array1); - allocateForallTestData( - N * num2, working_res, &working_array2, &check_array2, &test_array2); + allocateForallTestData(N * num2, working_res, &working_array2, + &check_array2, &test_array2); - allocateForallTestData( - N * num3, working_res, &working_array3, &check_array3, &test_array3); + allocateForallTestData(N * num3, working_res, &working_array3, + &check_array3, &test_array3); type1 const test_val1(5); type2 const test_val2(7); @@ -139,30 +139,20 @@ struct testWorkGroupUnorderedMultiple camp::list>, camp::list>>; - using WorkPool_type = RAJA::WorkPool, - IndexType, - RAJA::xargs<>, - Allocator>; + using WorkPool_type = + RAJA::WorkPool, + IndexType, RAJA::xargs<>, Allocator>; using WorkGroup_type = - RAJA::WorkGroup, - IndexType, - RAJA::xargs<>, - Allocator>; - - using WorkSite_type = RAJA::WorkSite, - IndexType, - RAJA::xargs<>, - Allocator>; + RAJA::WorkGroup, + IndexType, RAJA::xargs<>, Allocator>; + + using WorkSite_type = + RAJA::WorkSite, + IndexType, RAJA::xargs<>, Allocator>; WorkPool_type pool(Allocator{}); @@ -336,14 +326,14 @@ struct testWorkGroupUnorderedMultiple } - deallocateForallTestData( - working_res, working_array1, check_array1, test_array1); + deallocateForallTestData(working_res, working_array1, check_array1, + test_array1); - deallocateForallTestData( - working_res, working_array2, check_array2, test_array2); + deallocateForallTestData(working_res, working_array2, check_array2, + test_array2); - deallocateForallTestData( - working_res, working_array3, check_array3, test_array3); + deallocateForallTestData(working_res, working_array3, check_array3, + test_array3); } }; @@ -435,20 +425,11 @@ TYPED_TEST_P(WorkGroupBasicUnorderedMultipleReuseFunctionalTest, IndexType pool_reuse = dist_type(IndexType(0), IndexType(8))(rng); IndexType group_reuse = dist_type(IndexType(0), IndexType(8))(rng); - testWorkGroupUnorderedMultiple{}(rng, - IndexType(96), - IndexType(4000), - num1, - num2, - num3, - pool_reuse, - group_reuse); + testWorkGroupUnorderedMultiple{}( + rng, IndexType(96), IndexType(4000), num1, num2, num3, pool_reuse, + group_reuse); } #endif //__TEST_WORKGROUP_UNORDERED_MULTIPLEREUSE__ diff --git a/test/functional/workgroup/tests/test-workgroup-Unordered-Single.hpp b/test/functional/workgroup/tests/test-workgroup-Unordered-Single.hpp index 2d782c6c72..bb1fefe83f 100644 --- a/test/functional/workgroup/tests/test-workgroup-Unordered-Single.hpp +++ b/test/functional/workgroup/tests/test-workgroup-Unordered-Single.hpp @@ -41,8 +41,8 @@ struct testWorkGroupUnorderedSingle IndexType* check_array; IndexType* test_array; - allocateForallTestData( - N, working_res, &working_array, &check_array, &test_array); + allocateForallTestData(N, working_res, &working_array, + &check_array, &test_array); IndexType const test_val(5); @@ -54,30 +54,20 @@ struct testWorkGroupUnorderedSingle using DispatchPolicy = typename DispatchTyper::template type< camp::list>; - using WorkPool_type = RAJA::WorkPool, - IndexType, - RAJA::xargs<>, - Allocator>; + using WorkPool_type = + RAJA::WorkPool, + IndexType, RAJA::xargs<>, Allocator>; using WorkGroup_type = - RAJA::WorkGroup, - IndexType, - RAJA::xargs<>, - Allocator>; - - using WorkSite_type = RAJA::WorkSite, - IndexType, - RAJA::xargs<>, - Allocator>; + RAJA::WorkGroup, + IndexType, RAJA::xargs<>, Allocator>; + + using WorkSite_type = + RAJA::WorkSite, + IndexType, RAJA::xargs<>, Allocator>; using resource_type = typename WorkSite_type::resource_type; static_assert(std::is_same::value, @@ -128,8 +118,8 @@ struct testWorkGroupUnorderedSingle } - deallocateForallTestData( - working_res, working_array, check_array, test_array); + deallocateForallTestData(working_res, working_array, check_array, + test_array); } }; @@ -206,26 +196,14 @@ TYPED_TEST_P(WorkGroupBasicUnorderedSingleFunctionalTest, IndexType b3 = dist_type(e2, IndexType(1023))(rng); IndexType e3 = dist_type(b3, IndexType(1024))(rng); - testWorkGroupUnorderedSingle{}(b1, e1); - testWorkGroupUnorderedSingle{}(b2, e2); - testWorkGroupUnorderedSingle{}(b3, e3); } diff --git a/test/include/RAJA_gtest.hpp b/test/include/RAJA_gtest.hpp index aedc570448..0ab921f7d9 100644 --- a/test/include/RAJA_gtest.hpp +++ b/test/include/RAJA_gtest.hpp @@ -34,9 +34,7 @@ #define GPU_TEST_F(test_fixture, test_name) \ static void gpu_test_f_##test_fixture##_##test_name(); \ - GTEST_TEST_(test_fixture, \ - test_name, \ - test_fixture, \ + GTEST_TEST_(test_fixture, test_name, test_fixture, \ ::testing::internal::GetTypeId()) \ { \ gpu_test_f_##test_fixture##_##test_name(); \ @@ -65,8 +63,7 @@ #test_case_name, \ ::testing::internal::CodeLocation(__FILE__, __LINE__)) \ ->AddTestPattern( \ - #test_case_name, \ - #test_name, \ + #test_case_name, #test_name, \ new ::testing::internal::TestMetaFactory()); \ return 0; \ @@ -96,9 +93,7 @@ }; \ static bool gtest_##TestName##_defined_ GTEST_ATTRIBUTE_UNUSED_ = \ GTEST_TYPED_TEST_SUITE_P_STATE_(SuiteName).AddTestName( \ - __FILE__, \ - __LINE__, \ - GTEST_STRINGIFY_(SuiteName), \ + __FILE__, __LINE__, GTEST_STRINGIFY_(SuiteName), \ GTEST_STRINGIFY_(TestName)); \ } \ template \ diff --git a/test/include/RAJA_test-tensor.hpp b/test/include/RAJA_test-tensor.hpp index 801222dd5a..ed426fab4e 100644 --- a/test/include/RAJA_test-tensor.hpp +++ b/test/include/RAJA_test-tensor.hpp @@ -134,8 +134,8 @@ void tensor_copy_to_device(T* d_ptr, std::vector const& h_vec) { if (TensorTestHelper::is_device) { - cudaErrchk(cudaMemcpy( - d_ptr, h_vec.data(), h_vec.size() * sizeof(T), cudaMemcpyHostToDevice)); + cudaErrchk(cudaMemcpy(d_ptr, h_vec.data(), h_vec.size() * sizeof(T), + cudaMemcpyHostToDevice)); } else { @@ -148,8 +148,8 @@ void tensor_copy_to_host(std::vector& h_vec, T const* d_ptr) { if (TensorTestHelper::is_device) { - cudaErrchk(cudaMemcpy( - h_vec.data(), d_ptr, h_vec.size() * sizeof(T), cudaMemcpyDeviceToHost)); + cudaErrchk(cudaMemcpy(h_vec.data(), d_ptr, h_vec.size() * sizeof(T), + cudaMemcpyDeviceToHost)); } else { @@ -196,8 +196,8 @@ void tensor_copy_to_device(T* d_ptr, std::vector const& h_vec) { if (TensorTestHelper::is_device) { - hipErrchk(hipMemcpy( - d_ptr, h_vec.data(), h_vec.size() * sizeof(T), hipMemcpyHostToDevice)); + hipErrchk(hipMemcpy(d_ptr, h_vec.data(), h_vec.size() * sizeof(T), + hipMemcpyHostToDevice)); } else { @@ -210,8 +210,8 @@ void tensor_copy_to_host(std::vector& h_vec, T const* d_ptr) { if (TensorTestHelper::is_device) { - hipErrchk(hipMemcpy( - h_vec.data(), d_ptr, h_vec.size() * sizeof(T), hipMemcpyDeviceToHost)); + hipErrchk(hipMemcpy(h_vec.data(), d_ptr, h_vec.size() * sizeof(T), + hipMemcpyDeviceToHost)); } else { diff --git a/test/include/RAJA_unit-test-for3d3d.hpp b/test/include/RAJA_unit-test-for3d3d.hpp index 878bf7b7ca..37eb5652ea 100644 --- a/test/include/RAJA_unit-test-for3d3d.hpp +++ b/test/include/RAJA_unit-test-for3d3d.hpp @@ -123,17 +123,13 @@ inline void for3d3d(test_openmp_target, dim3d3d dim, L&& run) template __global__ void for3d3d_cuda_global(L run) { - run(dim3d3d{{static_cast(threadIdx.x), - static_cast(threadIdx.y), + run(dim3d3d{{static_cast(threadIdx.x), static_cast(threadIdx.y), static_cast(threadIdx.z)}, - {static_cast(blockIdx.x), - static_cast(blockIdx.y), + {static_cast(blockIdx.x), static_cast(blockIdx.y), static_cast(blockIdx.z)}}, - dim3d3d{{static_cast(blockDim.x), - static_cast(blockDim.y), + dim3d3d{{static_cast(blockDim.x), static_cast(blockDim.y), static_cast(blockDim.z)}, - {static_cast(gridDim.x), - static_cast(gridDim.y), + {static_cast(gridDim.x), static_cast(gridDim.y), static_cast(gridDim.z)}}); } @@ -155,17 +151,13 @@ inline void for3d3d(test_cuda, dim3d3d dim, L&& run) template __global__ void for3d3d_hip_global(L run) { - run(dim3d3d{{static_cast(threadIdx.x), - static_cast(threadIdx.y), + run(dim3d3d{{static_cast(threadIdx.x), static_cast(threadIdx.y), static_cast(threadIdx.z)}, - {static_cast(blockIdx.x), - static_cast(blockIdx.y), + {static_cast(blockIdx.x), static_cast(blockIdx.y), static_cast(blockIdx.z)}}, - dim3d3d{{static_cast(blockDim.x), - static_cast(blockDim.y), + dim3d3d{{static_cast(blockDim.x), static_cast(blockDim.y), static_cast(blockDim.z)}, - {static_cast(gridDim.x), - static_cast(gridDim.y), + {static_cast(gridDim.x), static_cast(gridDim.y), static_cast(gridDim.z)}}); } @@ -175,9 +167,7 @@ inline void for3d3d(test_hip, dim3d3d dim, L&& run) { hipLaunchKernelGGL(for3d3d_hip_global>, dim3(dim.block[0], dim.block[1], dim.block[2]), - dim3(dim.thread[0], dim.thread[1], dim.thread[2]), - 0, - 0, + dim3(dim.thread[0], dim.thread[1], dim.thread[2]), 0, 0, std::forward(run)); hipErrchk(hipGetLastError()); hipErrchk(hipDeviceSynchronize()); diff --git a/test/include/RAJA_unit-test-forone.hpp b/test/include/RAJA_unit-test-forone.hpp index 2d9b5ba453..615f250e0e 100644 --- a/test/include/RAJA_unit-test-forone.hpp +++ b/test/include/RAJA_unit-test-forone.hpp @@ -71,11 +71,7 @@ __global__ void forone_hip_global(L run) template inline void forone(test_hip, L&& run) { - hipLaunchKernelGGL(forone_hip_global>, - dim3(1), - dim3(1), - 0, - 0, + hipLaunchKernelGGL(forone_hip_global>, dim3(1), dim3(1), 0, 0, std::forward(run)); hipErrchk(hipGetLastError()); hipErrchk(hipDeviceSynchronize()); diff --git a/test/integration/plugin/tests/test-plugin-forall.hpp b/test/integration/plugin/tests/test-plugin-forall.hpp index 8a5f030ac5..0ef04acdcb 100644 --- a/test/integration/plugin/tests/test-plugin-forall.hpp +++ b/test/integration/plugin/tests/test-plugin-forall.hpp @@ -45,8 +45,8 @@ void PluginForallTestImpl() } CounterData plugin_data; - plugin_test_resource->memcpy( - &plugin_data, plugin_test_data, sizeof(CounterData)); + plugin_test_resource->memcpy(&plugin_data, plugin_test_data, + sizeof(CounterData)); ASSERT_EQ(plugin_data.capture_platform_active, RAJA::Platform::undefined); ASSERT_EQ(plugin_data.capture_counter_pre, 10); ASSERT_EQ(plugin_data.capture_counter_post, 10); @@ -68,8 +68,8 @@ void PluginForAllICountTestImpl() for (int i = 0; i < 10; i++) { - RAJA::forall_Icount( - RAJA::RangeSegment(i, i + 1), i, PluginTestCallable{data}); + RAJA::forall_Icount(RAJA::RangeSegment(i, i + 1), i, + PluginTestCallable{data}); CounterData loop_data; plugin_test_resource->memcpy(&loop_data, &data[i], sizeof(CounterData)); @@ -82,8 +82,8 @@ void PluginForAllICountTestImpl() } CounterData plugin_data; - plugin_test_resource->memcpy( - &plugin_data, plugin_test_data, sizeof(CounterData)); + plugin_test_resource->memcpy(&plugin_data, plugin_test_data, + sizeof(CounterData)); ASSERT_EQ(plugin_data.capture_platform_active, RAJA::Platform::undefined); ASSERT_EQ(plugin_data.capture_counter_pre, 10); ASSERT_EQ(plugin_data.capture_counter_post, 10); @@ -129,8 +129,8 @@ void PluginForAllIdxSetTestImpl() } CounterData plugin_data; - plugin_test_resource->memcpy( - &plugin_data, plugin_test_data, sizeof(CounterData)); + plugin_test_resource->memcpy(&plugin_data, plugin_test_data, + sizeof(CounterData)); ASSERT_EQ(plugin_data.capture_platform_active, RAJA::Platform::undefined); ASSERT_EQ(plugin_data.capture_counter_pre, 10); ASSERT_EQ(plugin_data.capture_counter_post, 10); @@ -176,8 +176,8 @@ void PluginForAllIcountIdxSetTestImpl() } CounterData plugin_data; - plugin_test_resource->memcpy( - &plugin_data, plugin_test_data, sizeof(CounterData)); + plugin_test_resource->memcpy(&plugin_data, plugin_test_data, + sizeof(CounterData)); ASSERT_EQ(plugin_data.capture_platform_active, RAJA::Platform::undefined); ASSERT_EQ(plugin_data.capture_counter_pre, 10); ASSERT_EQ(plugin_data.capture_counter_post, 10); @@ -226,8 +226,7 @@ TYPED_TEST_P(PluginForallTest, PluginForAllIcountIdxSet) using ResType = typename camp::at>::type; using PlatformHolder = typename camp::at>::type; - PluginForAllIcountIdxSetTestImpl(); } diff --git a/test/integration/plugin/tests/test-plugin-kernel.hpp b/test/integration/plugin/tests/test-plugin-kernel.hpp index c9139c3d15..0f1a2f0a82 100644 --- a/test/integration/plugin/tests/test-plugin-kernel.hpp +++ b/test/integration/plugin/tests/test-plugin-kernel.hpp @@ -45,8 +45,8 @@ void PluginKernelTestImpl() } CounterData plugin_data; - plugin_test_resource->memcpy( - &plugin_data, plugin_test_data, sizeof(CounterData)); + plugin_test_resource->memcpy(&plugin_data, plugin_test_data, + sizeof(CounterData)); ASSERT_EQ(plugin_data.capture_platform_active, RAJA::Platform::undefined); ASSERT_EQ(plugin_data.capture_counter_pre, 10); ASSERT_EQ(plugin_data.capture_counter_post, 10); diff --git a/test/integration/plugin/tests/test-plugin-launch.hpp b/test/integration/plugin/tests/test-plugin-launch.hpp index 43cd98a866..46674df4cf 100644 --- a/test/integration/plugin/tests/test-plugin-launch.hpp +++ b/test/integration/plugin/tests/test-plugin-launch.hpp @@ -53,8 +53,8 @@ void PluginLaunchTestImpl() } CounterData plugin_data; - plugin_test_resource->memcpy( - &plugin_data, plugin_test_data, sizeof(CounterData)); + plugin_test_resource->memcpy(&plugin_data, plugin_test_data, + sizeof(CounterData)); ASSERT_EQ(plugin_data.capture_platform_active, RAJA::Platform::undefined); ASSERT_EQ(plugin_data.capture_counter_pre, 10); ASSERT_EQ(plugin_data.capture_counter_post, 10); diff --git a/test/integration/plugin/tests/test-plugin-resource-launch.hpp b/test/integration/plugin/tests/test-plugin-resource-launch.hpp index 792372531e..b71d4e707e 100644 --- a/test/integration/plugin/tests/test-plugin-resource-launch.hpp +++ b/test/integration/plugin/tests/test-plugin-resource-launch.hpp @@ -39,8 +39,7 @@ void PluginResourceLaunchTestImpl() PluginTestCallable p_callable{data}; RAJA::launch( - res, - RAJA::LaunchParams(RAJA::Teams(1), RAJA::Threads(1)), + res, RAJA::LaunchParams(RAJA::Teams(1), RAJA::Threads(1)), [=] RAJA_HOST_DEVICE(RAJA::LaunchContext RAJA_UNUSED_ARG(ctx)) { p_callable(i); }); } @@ -56,8 +55,8 @@ void PluginResourceLaunchTestImpl() } CounterData plugin_data; - plugin_test_resource->memcpy( - &plugin_data, plugin_test_data, sizeof(CounterData)); + plugin_test_resource->memcpy(&plugin_data, plugin_test_data, + sizeof(CounterData)); ASSERT_EQ(plugin_data.capture_platform_active, RAJA::Platform::undefined); ASSERT_EQ(plugin_data.capture_counter_pre, 10); ASSERT_EQ(plugin_data.capture_counter_post, 10); @@ -80,8 +79,7 @@ TYPED_TEST_P(PluginResourceLaunchTest, PluginResourceLaunch) using ResType = typename camp::at>::type; using PlatformHolder = typename camp::at>::type; - PluginResourceLaunchTestImpl(); } diff --git a/test/integration/plugin/tests/test-plugin-workgroup.hpp b/test/integration/plugin/tests/test-plugin-workgroup.hpp index 2c9fcbe1cf..040991f03f 100644 --- a/test/integration/plugin/tests/test-plugin-workgroup.hpp +++ b/test/integration/plugin/tests/test-plugin-workgroup.hpp @@ -38,30 +38,20 @@ struct PluginWorkGroupTestImpl using DispatchPolicy = typename DispatchTyper::template type< camp::list>; - using WorkPool_type = RAJA::WorkPool, - IndexType, - RAJA::xargs<>, - Allocator>; + using WorkPool_type = + RAJA::WorkPool, + IndexType, RAJA::xargs<>, Allocator>; using WorkGroup_type = - RAJA::WorkGroup, - IndexType, - RAJA::xargs<>, - Allocator>; - - using WorkSite_type = RAJA::WorkSite, - IndexType, - RAJA::xargs<>, - Allocator>; + RAJA::WorkGroup, + IndexType, RAJA::xargs<>, Allocator>; + + using WorkSite_type = + RAJA::WorkSite, + IndexType, RAJA::xargs<>, Allocator>; SetupPluginVars spv(WORKINGRES{}); @@ -78,8 +68,8 @@ struct PluginWorkGroupTestImpl loop_data[i].launch_counter_pre = -1; loop_data[i].launch_counter_post = -1; } - plugin_test_resource->memcpy( - data, &loop_data[0], 10 * sizeof(CounterData)); + plugin_test_resource->memcpy(data, &loop_data[0], + 10 * sizeof(CounterData)); } WorkPool_type pool(Allocator{}); @@ -91,8 +81,8 @@ struct PluginWorkGroupTestImpl { CounterData plugin_data; - plugin_test_resource->memcpy( - &plugin_data, plugin_test_data, sizeof(CounterData)); + plugin_test_resource->memcpy(&plugin_data, plugin_test_data, + sizeof(CounterData)); ASSERT_EQ(plugin_data.capture_platform_active, RAJA::Platform::undefined); ASSERT_EQ(plugin_data.capture_counter_pre, 10); ASSERT_EQ(plugin_data.capture_counter_post, 10); @@ -103,8 +93,8 @@ struct PluginWorkGroupTestImpl { CounterData loop_data[10]; - plugin_test_resource->memcpy( - &loop_data[0], data, 10 * sizeof(CounterData)); + plugin_test_resource->memcpy(&loop_data[0], data, + 10 * sizeof(CounterData)); for (int i = 0; i < 10; i++) { @@ -123,8 +113,8 @@ struct PluginWorkGroupTestImpl { CounterData plugin_data; - plugin_test_resource->memcpy( - &plugin_data, plugin_test_data, sizeof(CounterData)); + plugin_test_resource->memcpy(&plugin_data, plugin_test_data, + sizeof(CounterData)); ASSERT_EQ(plugin_data.capture_platform_active, RAJA::Platform::undefined); ASSERT_EQ(plugin_data.capture_counter_pre, 10); ASSERT_EQ(plugin_data.capture_counter_post, 10); @@ -135,8 +125,8 @@ struct PluginWorkGroupTestImpl { CounterData loop_data[10]; - plugin_test_resource->memcpy( - &loop_data[0], data, 10 * sizeof(CounterData)); + plugin_test_resource->memcpy(&loop_data[0], data, + 10 * sizeof(CounterData)); for (int i = 0; i < 10; i++) { @@ -155,8 +145,8 @@ struct PluginWorkGroupTestImpl { CounterData plugin_data; - plugin_test_resource->memcpy( - &plugin_data, plugin_test_data, sizeof(CounterData)); + plugin_test_resource->memcpy(&plugin_data, plugin_test_data, + sizeof(CounterData)); ASSERT_EQ(plugin_data.capture_platform_active, RAJA::Platform::undefined); ASSERT_EQ(plugin_data.capture_counter_pre, 10); ASSERT_EQ(plugin_data.capture_counter_post, 10); @@ -247,13 +237,8 @@ TYPED_TEST_P(PluginWorkGroupTest, PluginWorkGroup) using WORKING_RESOURCE = typename camp::at>::type; using PlatformHolder = typename camp::at>::type; - PluginWorkGroupTestImpl{}(); } diff --git a/test/old-tests/unit/test-sharedmem.cpp b/test/old-tests/unit/test-sharedmem.cpp index 55dd8924f0..1ac947ae8c 100644 --- a/test/old-tests/unit/test-sharedmem.cpp +++ b/test/old-tests/unit/test-sharedmem.cpp @@ -76,12 +76,9 @@ GPU_TYPED_TEST_P(TypedLocalMem, Basic) } } - using SharedTile = AtomicTypedLocalArray, - TY, - TX>; + using SharedTile = + AtomicTypedLocalArray, TY, TX>; SharedTile myTile, myTile2; const TX TX_TILE_DIM(16); @@ -95,8 +92,8 @@ GPU_TYPED_TEST_P(TypedLocalMem, Basic) RAJA::make_tuple(myTile, myTile2), // Load data into shared memory - [=] RAJA_HOST_DEVICE( - TX tx, TY ty, TX bx, TY by, SharedTile & myTile, SharedTile&) + [=] RAJA_HOST_DEVICE(TX tx, TY ty, TX bx, TY by, SharedTile & myTile, + SharedTile&) { TX col = bx * TX_TILE_DIM + tx; // Matrix column index TY row = by * TY_TILE_DIM + ty; // Matrix row index @@ -108,8 +105,8 @@ GPU_TYPED_TEST_P(TypedLocalMem, Basic) }, // read from shared mem - [=] RAJA_HOST_DEVICE( - TX tx, TY ty, TX bx, TY by, SharedTile & myTile, SharedTile&) + [=] RAJA_HOST_DEVICE(TX tx, TY ty, TX bx, TY by, SharedTile & myTile, + SharedTile&) { TX col = bx * TX_TILE_DIM + tx; // Matrix column index TY row = by * TY_TILE_DIM + ty; // Matrix row index @@ -175,10 +172,10 @@ GPU_TYPED_TEST_P(TypedLocalMem_gpu, Basic) RAJA::TypedView, TY, TX> Aview(A, N_rows, N_cols); RAJA::TypedView, TY, TX> Bview(B, N_rows, N_cols); - RAJA::TypedView, TY, TX> d_Aview( - d_A, N_rows, N_cols); - RAJA::TypedView, TY, TX> d_Bview( - d_B, N_rows, N_cols); + RAJA::TypedView, TY, TX> d_Aview(d_A, N_rows, + N_cols); + RAJA::TypedView, TY, TX> d_Bview(d_B, N_rows, + N_cols); for (int row = 0; row < N_rows; ++row) { @@ -190,11 +187,9 @@ GPU_TYPED_TEST_P(TypedLocalMem_gpu, Basic) hipMemcpy(d_A, A, Arr_sz * sizeof(double), hipMemcpyHostToDevice); - using SharedTile = TypedLocalArray, - TY, - TX>; + using SharedTile = + TypedLocalArray, + TY, TX>; SharedTile myTile, myTile2; const TX TX_TILE_DIM(16); @@ -208,8 +203,8 @@ GPU_TYPED_TEST_P(TypedLocalMem_gpu, Basic) RAJA::make_tuple(myTile, myTile2), // Load data into shared memory - [=] RAJA_HOST_DEVICE( - TX tx, TY ty, TX bx, TY by, SharedTile & myTile, SharedTile&) + [=] RAJA_HOST_DEVICE(TX tx, TY ty, TX bx, TY by, SharedTile & myTile, + SharedTile&) { TX col = bx * TX_TILE_DIM + tx; // Matrix column index TY row = by * TY_TILE_DIM + ty; // Matrix row index @@ -221,8 +216,8 @@ GPU_TYPED_TEST_P(TypedLocalMem_gpu, Basic) }, // read from shared mem - [=] RAJA_HOST_DEVICE( - TX tx, TY ty, TX bx, TY by, SharedTile & myTile, SharedTile&) + [=] RAJA_HOST_DEVICE(TX tx, TY ty, TX bx, TY by, SharedTile & myTile, + SharedTile&) { TX col = bx * TX_TILE_DIM + tx; // Matrix column index TY row = by * TY_TILE_DIM + ty; // Matrix row index @@ -317,18 +312,13 @@ GPU_TYPED_TEST_P(MatTranspose, Basic) SharedTile myTile, myTile2; RAJA::kernel_param( - RAJA::make_tuple(RAJA::RangeSegment(0, inner_Dim0), - RAJA::RangeSegment(0, inner_Dim1), - RAJA::RangeSegment(0, outer_Dim0), - RAJA::RangeSegment(0, outer_Dim1)), + RAJA::make_tuple( + RAJA::RangeSegment(0, inner_Dim0), RAJA::RangeSegment(0, inner_Dim1), + RAJA::RangeSegment(0, outer_Dim0), RAJA::RangeSegment(0, outer_Dim1)), RAJA::make_tuple(myTile, myTile2), // Load data into shared memory - [=] RAJA_HOST_DEVICE(int tx, - int ty, - int bx, - int by, - SharedTile& myTile, + [=] RAJA_HOST_DEVICE(int tx, int ty, int bx, int by, SharedTile& myTile, SharedTile& myTile2) { int col = bx * TILE_DIM + tx; // Matrix column index @@ -342,11 +332,7 @@ GPU_TYPED_TEST_P(MatTranspose, Basic) }, // read from shared mem - [=] RAJA_HOST_DEVICE(int tx, - int ty, - int bx, - int by, - SharedTile& myTile, + [=] RAJA_HOST_DEVICE(int tx, int ty, int bx, int by, SharedTile& myTile, SharedTile& myTile2) { int col = by * TILE_DIM + tx; // Transposed matrix column index @@ -454,18 +440,13 @@ GPU_TYPED_TEST_P(MatTranspose_gpu, Basic) SharedTile myTile, myTile2; RAJA::kernel_param( - RAJA::make_tuple(RAJA::RangeSegment(0, inner_Dim0), - RAJA::RangeSegment(0, inner_Dim1), - RAJA::RangeSegment(0, outer_Dim0), - RAJA::RangeSegment(0, outer_Dim1)), + RAJA::make_tuple( + RAJA::RangeSegment(0, inner_Dim0), RAJA::RangeSegment(0, inner_Dim1), + RAJA::RangeSegment(0, outer_Dim0), RAJA::RangeSegment(0, outer_Dim1)), RAJA::make_tuple(myTile, myTile2), // Load data into shared memory - [=] RAJA_HOST_DEVICE(int tx, - int ty, - int bx, - int by, - SharedTile& myTile, + [=] RAJA_HOST_DEVICE(int tx, int ty, int bx, int by, SharedTile& myTile, SharedTile& myTile2) { int col = bx * TILE_DIM + tx; // Matrix column index @@ -479,11 +460,7 @@ GPU_TYPED_TEST_P(MatTranspose_gpu, Basic) }, // read from shared mem - [=] RAJA_HOST_DEVICE(int tx, - int ty, - int bx, - int by, - SharedTile& myTile, + [=] RAJA_HOST_DEVICE(int tx, int ty, int bx, int by, SharedTile& myTile, SharedTile& myTile2) { int col = by * TILE_DIM + tx; // Transposed matrix column index @@ -582,7 +559,8 @@ using TestTypes = ::testing::Types< // Read data from shared memory RAJA::statement::Collapse, - RAJA::statement::Lambda<1>>>> // for 2 + RAJA::statement::Lambda<1>>>> // for + // 2 > // for 3 > // close policy > // close list @@ -676,7 +654,8 @@ using TestTypes = ::testing::Types< RAJA::statement::For<0, RAJA::seq_exec, RAJA::statement::Lambda<1>>>> // close - // shared mem + // shared + // mem // window > // outer collapsed > // close policy list @@ -939,8 +918,7 @@ GPU_TYPED_TEST_P(MatMultiply, shmem) ThreadPriv pVal; // iteration dependent data RAJA::kernel_param( - RAJA::make_tuple(RAJA::RangeSegment(0, N), - RAJA::RangeSegment(0, M), + RAJA::make_tuple(RAJA::RangeSegment(0, N), RAJA::RangeSegment(0, M), RAJA::RangeSegment(0, P)), RAJA::make_tuple(aShared, bShared, pVal), @@ -957,12 +935,8 @@ GPU_TYPED_TEST_P(MatMultiply, shmem) { bShared(tm, tp) = Bview(m, p); }, // Do partial update in shmem - [=] RAJA_HOST_DEVICE(int tn, - int tm, - int tp, - Shmem& aShared, - Shmem& bShared, - ThreadPriv& pVal) + [=] RAJA_HOST_DEVICE(int tn, int tm, int tp, Shmem& aShared, + Shmem& bShared, ThreadPriv& pVal) { pVal(tn, tp) += aShared(tn, tm) * bShared(tm, tp); }, // Write out complete result diff --git a/test/old-tests/unit/test-simd.cpp b/test/old-tests/unit/test-simd.cpp index 597666c4ba..1872aa6d7f 100644 --- a/test/old-tests/unit/test-simd.cpp +++ b/test/old-tests/unit/test-simd.cpp @@ -55,8 +55,7 @@ TEST(SIMD, OMPAndSimd) { using POL = RAJA::KernelPolicy>>>; const RAJA::Index_type N = 32; @@ -94,13 +93,10 @@ TEST(SIMD, OMPAndSimd) TEST(SIMD, OMPAndSimd_MultiLambda) { - using POL = RAJA::KernelPolicy< - RAJA::statement::For<1, - RAJA::omp_parallel_for_exec, - RAJA::statement::For<0, - RAJA::simd_exec, - RAJA::statement::Lambda<0>, - RAJA::statement::Lambda<1>>>>; + using POL = RAJA::KernelPolicy, + RAJA::statement::Lambda<1>>>>; const RAJA::Index_type N = 32; const RAJA::Index_type M = 32; diff --git a/test/unit/algorithm/tests/test-algorithm-reduce-utils.hpp b/test/unit/algorithm/tests/test-algorithm-reduce-utils.hpp index 690fd79d71..c262f6f39d 100644 --- a/test/unit/algorithm/tests/test-algorithm-reduce-utils.hpp +++ b/test/unit/algorithm/tests/test-algorithm-reduce-utils.hpp @@ -283,46 +283,19 @@ void testReducerInterfaces(unsigned seed, ReduceData data( N, res, [&]() { return dist(rng); }); - ASSERT_TRUE(testReduce("default", - seed, - data, - N, - RAJA::operators::plus::identity(), - RAJA::operators::plus{}, - reducer, - reduce_category{}, - interface_category{}, - no_init_operator{})); - ASSERT_TRUE(testReduce("init", - seed, - data, - N, - ValType(N), - RAJA::operators::plus{}, - reducer, - reduce_category{}, - interface_category{}, - init_no_operator{})); - ASSERT_TRUE(testReduce("minimum", - seed, - data, - N, - ValType(0), - RAJA::operators::minimum{}, - reducer, - reduce_category{}, - interface_category{}, - init_operator{})); - ASSERT_TRUE(testReduce("Maximum", - seed, - data, - N, - ValType(0), - RAJA::operators::maximum{}, - reducer, - reduce_category{}, - interface_category{}, - init_operator{})); + ASSERT_TRUE(testReduce( + "default", seed, data, N, RAJA::operators::plus::identity(), + RAJA::operators::plus{}, reducer, reduce_category{}, + interface_category{}, no_init_operator{})); + ASSERT_TRUE(testReduce( + "init", seed, data, N, ValType(N), RAJA::operators::plus{}, + reducer, reduce_category{}, interface_category{}, init_no_operator{})); + ASSERT_TRUE(testReduce( + "minimum", seed, data, N, ValType(0), RAJA::operators::minimum{}, + reducer, reduce_category{}, interface_category{}, init_operator{})); + ASSERT_TRUE(testReduce( + "Maximum", seed, data, N, ValType(0), RAJA::operators::maximum{}, + reducer, reduce_category{}, interface_category{}, init_operator{})); } template diff --git a/test/unit/algorithm/tests/test-algorithm-sort-utils.hpp b/test/unit/algorithm/tests/test-algorithm-sort-utils.hpp index 9e4862b442..d8fad70e6d 100644 --- a/test/unit/algorithm/tests/test-algorithm-sort-utils.hpp +++ b/test/unit/algorithm/tests/test-algorithm-sort-utils.hpp @@ -287,8 +287,7 @@ void doSort(SortData& data, data.copy_data(N); data.resource().wait(); sorter(RAJA::make_span(data.sorted_keys, N), - RAJA::make_span(data.sorted_vals, N), - comp); + RAJA::make_span(data.sorted_vals, N), comp); sorter.synchronize(); } @@ -305,8 +304,7 @@ void doSort(SortData& data, sort_res_default_interface_tag) { data.copy_data(N); - sorter(data.resource(), - RAJA::make_span(data.sorted_keys, N), + sorter(data.resource(), RAJA::make_span(data.sorted_keys, N), RAJA::make_span(data.sorted_vals, N)); data.resource().wait(); } @@ -324,10 +322,8 @@ void doSort(SortData& data, sort_res_comp_interface_tag) { data.copy_data(N); - sorter(data.resource(), - RAJA::make_span(data.sorted_keys, N), - RAJA::make_span(data.sorted_vals, N), - comp); + sorter(data.resource(), RAJA::make_span(data.sorted_keys, N), + RAJA::make_span(data.sorted_vals, N), comp); data.resource().wait(); } @@ -629,32 +625,15 @@ void testSorterResInterfaces( using resource_no_comparator = sort_res_default_interface_tag; using resource_use_comparator = sort_res_comp_interface_tag; - ASSERT_TRUE(testSort("resource+default", - seed, - data, - N, - RAJA::operators::less{}, - sorter, - stability_category{}, - pairs_category{}, - resource_no_comparator{})); - ASSERT_TRUE(testSort("resource+ascending", - seed, - data, - N, - RAJA::operators::less{}, - sorter, - stability_category{}, - pairs_category{}, - resource_use_comparator{})); - ASSERT_TRUE(testSort("resource+descending", - seed, - data, - N, - RAJA::operators::greater{}, - sorter, - stability_category{}, - pairs_category{}, + ASSERT_TRUE(testSort("resource+default", seed, data, N, + RAJA::operators::less{}, sorter, stability_category{}, + pairs_category{}, resource_no_comparator{})); + ASSERT_TRUE(testSort("resource+ascending", seed, data, N, + RAJA::operators::less{}, sorter, stability_category{}, + pairs_category{}, resource_use_comparator{})); + ASSERT_TRUE(testSort("resource+descending", seed, data, N, + RAJA::operators::greater{}, sorter, + stability_category{}, pairs_category{}, resource_use_comparator{})); } @@ -677,33 +656,15 @@ void testSorterInterfaces(unsigned seed, SortData data(N, res, [&]() { return dist(rng); }); - ASSERT_TRUE(testSort("default", - seed, - data, - N, - RAJA::operators::less{}, - sorter, - stability_category{}, - pairs_category{}, + ASSERT_TRUE(testSort("default", seed, data, N, RAJA::operators::less{}, + sorter, stability_category{}, pairs_category{}, no_comparator{})); - ASSERT_TRUE(testSort("ascending", - seed, - data, - N, - RAJA::operators::less{}, - sorter, - stability_category{}, - pairs_category{}, - use_comparator{})); - ASSERT_TRUE(testSort("descending", - seed, - data, - N, - RAJA::operators::greater{}, - sorter, - stability_category{}, - pairs_category{}, + ASSERT_TRUE(testSort("ascending", seed, data, N, RAJA::operators::less{}, + sorter, stability_category{}, pairs_category{}, use_comparator{})); + ASSERT_TRUE(testSort( + "descending", seed, data, N, RAJA::operators::greater{}, sorter, + stability_category{}, pairs_category{}, use_comparator{})); testSorterResInterfaces(supports_resource(), seed, data, N, sorter); } diff --git a/test/unit/hip/test-synchronize.cpp b/test/unit/hip/test-synchronize.cpp index f623a65680..0ddc91bb9a 100644 --- a/test/unit/hip/test-synchronize.cpp +++ b/test/unit/hip/test-synchronize.cpp @@ -19,13 +19,12 @@ GPU_TEST(SynchronizeUnitTest, HIP) hipMalloc(&d_managed_data, sizeof(double) * 50); RAJA::forall>( - RAJA::RangeSegment(0, 50), - [=] RAJA_HOST_DEVICE(RAJA::Index_type i) + RAJA::RangeSegment(0, 50), [=] RAJA_HOST_DEVICE(RAJA::Index_type i) { d_managed_data[i] = 1.0 * i; }); RAJA::synchronize(); - hipMemcpy( - managed_data, d_managed_data, sizeof(double) * 50, hipMemcpyDeviceToHost); + hipMemcpy(managed_data, d_managed_data, sizeof(double) * 50, + hipMemcpyDeviceToHost); RAJA::forall(RAJA::RangeSegment(0, 50), [=](RAJA::Index_type i) diff --git a/test/unit/index/test-indexset.cpp b/test/unit/index/test-indexset.cpp index 77ba153913..63b0448bab 100644 --- a/test/unit/index/test-indexset.cpp +++ b/test/unit/index/test-indexset.cpp @@ -214,8 +214,8 @@ TEST(IndexSetUnitTest, ConditionalLessThan100Indices) ref_lt100_indices.push_back(99); RAJA::RAJAVec lt100_indices; - getIndicesConditional( - lt100_indices, iset, [](int idx) { return (idx < 100); }); + getIndicesConditional(lt100_indices, iset, + [](int idx) { return (idx < 100); }); EXPECT_EQ(lt100_indices.size(), ref_lt100_indices.size()); for (size_t i = 0; i < ref_lt100_indices.size(); ++i) diff --git a/test/unit/index/test-rangestridesegment.cpp b/test/unit/index/test-rangestridesegment.cpp index d97021b0cf..dc14699663 100644 --- a/test/unit/index/test-rangestridesegment.cpp +++ b/test/unit/index/test-rangestridesegment.cpp @@ -32,8 +32,7 @@ TYPED_TEST(RangeStrideSegmentUnitTest, Constructors) TYPED_TEST(RangeStrideSegmentUnitTest, Assignments) { auto r = RAJA::make_strided_range( - static_cast(0), - static_cast(5), + static_cast(0), static_cast(5), static_cast::type>(3)); RAJA::TypedRangeStrideSegment seg1 = r; ASSERT_EQ(r, seg1); @@ -122,8 +121,7 @@ TYPED_TEST(RangeStrideSegmentUnitTest, Sizes) ASSERT_EQ(segment11.size(), difftype_t(2)); // PRIMES - RAJA::TypedRangeStrideSegment segment12(0, - 7, + RAJA::TypedRangeStrideSegment segment12(0, 7, 3); // should produce 0,3,6 ASSERT_EQ(segment12.size(), difftype_t(3)); diff --git a/test/unit/indexing/tests/test-indexing-global.hpp b/test/unit/indexing/tests/test-indexing-global.hpp index f9435c3544..004fe6da53 100644 --- a/test/unit/indexing/tests/test-indexing-global.hpp +++ b/test/unit/indexing/tests/test-indexing-global.hpp @@ -102,14 +102,11 @@ TYPED_TEST_P(IndexingUnitTest, BasicIndexing) using threads_type = typename camp::at>::type; using blocks_type = typename camp::at>::type; - using indexer_type = typename indexer_holder_type:: - template type; - - testBasicIndexing(); + using indexer_type = typename indexer_holder_type::template type< + dim_type::value, threads_type::value, blocks_type::value>; + + testBasicIndexing(); } REGISTER_TYPED_TEST_SUITE_P(IndexingUnitTest, BasicIndexing); diff --git a/test/unit/multi_reducer/tests/test-multi-reducer-reset.hpp b/test/unit/multi_reducer/tests/test-multi-reducer-reset.hpp index e307a08b46..f139edb834 100644 --- a/test/unit/multi_reducer/tests/test-multi-reducer-reset.hpp +++ b/test/unit/multi_reducer/tests/test-multi-reducer-reset.hpp @@ -431,11 +431,9 @@ template (false, init_bins, container); - testMultiReducerContainerResetBitwise(false, init_bins, container); // avoid using the reducer as forone does not handle reducers correctly // forone does not make_lambda_body or privatize the body @@ -453,8 +451,7 @@ template (false, init_bins, container); // avoid using the reducer as forone does not handle reducers correctly // forone does not make_lambda_body or privatize the body diff --git a/test/unit/reducer/tests/test-reducer-constructors.hpp b/test/unit/reducer/tests/test-reducer-constructors.hpp index cf3b6bf087..9098910db3 100644 --- a/test/unit/reducer/tests/test-reducer-constructors.hpp +++ b/test/unit/reducer/tests/test-reducer-constructors.hpp @@ -66,12 +66,10 @@ testReducerConstructor() RAJA::ReduceMinLoc reduce_minloc; RAJA::ReduceMaxLoc reduce_maxloc; - RAJA::ReduceMinLoc> reduce_minloctup; - RAJA::ReduceMaxLoc> reduce_maxloctup; @@ -166,12 +164,10 @@ void testInitReducerConstructor() RAJA::ReduceMaxLoc reduce_maxloc(initVal, 1); RAJA::tuple LocTup(1, 1); - RAJA::ReduceMinLoc> reduce_minloctup(initVal, LocTup); - RAJA::ReduceMaxLoc> reduce_maxloctup(initVal, LocTup); @@ -215,9 +211,7 @@ TYPED_TEST_P(ReducerInitConstructorUnitTest, InitReducerConstructor) using ResourceType = typename camp::at>::type; using ForOneType = typename camp::at>::type; - testInitReducerConstructor(); } diff --git a/test/unit/reducer/tests/test-reducer-reset.hpp b/test/unit/reducer/tests/test-reducer-reset.hpp index c4b906c27b..9fc0b996d1 100644 --- a/test/unit/reducer/tests/test-reducer-reset.hpp +++ b/test/unit/reducer/tests/test-reducer-reset.hpp @@ -116,28 +116,18 @@ void testReducerReset() RAJA::ReduceMaxLoc reduce_maxloc(initVal, 1); RAJA::tuple LocTup(1, 1); - RAJA::ReduceMinLoc> reduce_minloctup(initVal, LocTup); - RAJA::ReduceMaxLoc> reduce_maxloctup(initVal, LocTup); // initiate some device computation if using device policy - exec_dispatcher, - ForOnePol>(reduce_sum, - reduce_min, - reduce_max, - reduce_minloc, - reduce_maxloc, - reduce_minloctup, - reduce_maxloctup, - initVal); + exec_dispatcher, ForOnePol>( + reduce_sum, reduce_min, reduce_max, reduce_minloc, reduce_maxloc, + reduce_minloctup, reduce_maxloctup, initVal); // perform real host resets reduce_sum.reset(resetVal[0]); diff --git a/test/unit/resource/tests/test-resource-AsyncTime.hpp b/test/unit/resource/tests/test-resource-AsyncTime.hpp index 0349c8e8bf..fdadcafc6c 100644 --- a/test/unit/resource/tests/test-resource-AsyncTime.hpp +++ b/test/unit/resource/tests/test-resource-AsyncTime.hpp @@ -73,8 +73,7 @@ void ResourceAsyncTimeTestImpl(RAJA::cuda_exec&&) sync_timer.start(); for (std::size_t stream = 0; stream < NUM_STREAMS; ++stream) { - forall(dev[stream], - RangeSegment(0, ARRAY_SIZE), + forall(dev[stream], RangeSegment(0, ARRAY_SIZE), [=] RAJA_HOST_DEVICE(int i) { gpu_time_wait_for(100, clockrate); }); } @@ -85,8 +84,7 @@ void ResourceAsyncTimeTestImpl(RAJA::cuda_exec&&) async_timer.start(); for (std::size_t stream = 0; stream < NUM_STREAMS; ++stream) { - forall(dev[stream], - RangeSegment(0, ARRAY_SIZE), + forall(dev[stream], RangeSegment(0, ARRAY_SIZE), [=] RAJA_HOST_DEVICE(int i) { gpu_time_wait_for(100, clockrate); }); } diff --git a/test/unit/resource/tests/test-resource-BasicAsyncSemantics.hpp b/test/unit/resource/tests/test-resource-BasicAsyncSemantics.hpp index 18ee61720b..8f78509d16 100644 --- a/test/unit/resource/tests/test-resource-BasicAsyncSemantics.hpp +++ b/test/unit/resource/tests/test-resource-BasicAsyncSemantics.hpp @@ -22,23 +22,20 @@ void ResourceBasicAsyncSemanticsTestImpl() int* d_array = resources::Resource{dev}.allocate(ARRAY_SIZE); int* h_array = host.allocate(ARRAY_SIZE); - forall(host, - RangeSegment(0, ARRAY_SIZE), + forall(host, RangeSegment(0, ARRAY_SIZE), [=] RAJA_HOST_DEVICE(int i) { h_array[i] = i; }); dev.memcpy(d_array, h_array, sizeof(int) * ARRAY_SIZE); - forall(dev, - RangeSegment(0, ARRAY_SIZE), + forall(dev, RangeSegment(0, ARRAY_SIZE), [=] RAJA_HOST_DEVICE(int i) { d_array[i] = i + 2; }); dev.memcpy(h_array, d_array, sizeof(int) * ARRAY_SIZE); dev.wait(); - forall(host, - RangeSegment(0, ARRAY_SIZE), + forall(host, RangeSegment(0, ARRAY_SIZE), [=](int i) { ASSERT_EQ(h_array[i], i + 2); }); diff --git a/test/unit/resource/tests/test-resource-Depends.hpp b/test/unit/resource/tests/test-resource-Depends.hpp index 19875a1c90..daf75b3eb4 100644 --- a/test/unit/resource/tests/test-resource-Depends.hpp +++ b/test/unit/resource/tests/test-resource-Depends.hpp @@ -25,19 +25,16 @@ void ResourceDependsTestImpl() int* h_array = host.allocate(ARRAY_SIZE); - forall(dev1, - RangeSegment(0, ARRAY_SIZE), + forall(dev1, RangeSegment(0, ARRAY_SIZE), [=] RAJA_HOST_DEVICE(int i) { d_array1[i] = i; }); resources::Event e = - forall(dev2, - RangeSegment(0, ARRAY_SIZE), + forall(dev2, RangeSegment(0, ARRAY_SIZE), [=] RAJA_HOST_DEVICE(int i) { d_array2[i] = -1; }); dev1.wait_for(&e); - forall(dev1, - RangeSegment(0, ARRAY_SIZE), + forall(dev1, RangeSegment(0, ARRAY_SIZE), [=] RAJA_HOST_DEVICE(int i) { d_array1[i] *= d_array2[i]; }); @@ -45,8 +42,7 @@ void ResourceDependsTestImpl() dev1.wait(); - forall(host, - RangeSegment(0, ARRAY_SIZE), + forall(host, RangeSegment(0, ARRAY_SIZE), [=](int i) { ASSERT_EQ(h_array[i], -i); }); diff --git a/test/unit/resource/tests/test-resource-JoinAsyncSemantics.hpp b/test/unit/resource/tests/test-resource-JoinAsyncSemantics.hpp index 11317194f5..ee3ff324a2 100644 --- a/test/unit/resource/tests/test-resource-JoinAsyncSemantics.hpp +++ b/test/unit/resource/tests/test-resource-JoinAsyncSemantics.hpp @@ -23,8 +23,7 @@ void ResourceJoinAsyncSemanticsTestImpl() int* d_array = resources::Resource{dev1}.allocate(ARRAY_SIZE); int* h_array = host.allocate(ARRAY_SIZE); - forall(host, - RangeSegment(0, ARRAY_SIZE), + forall(host, RangeSegment(0, ARRAY_SIZE), [=] RAJA_HOST_DEVICE(int i) { h_array[i] = i; }); @@ -34,8 +33,7 @@ void ResourceJoinAsyncSemanticsTestImpl() dev1.wait_for(&e1); RAJA::resources::Event e2 = - forall(dev1, - RangeSegment(0, ARRAY_SIZE), + forall(dev1, RangeSegment(0, ARRAY_SIZE), [=] RAJA_HOST_DEVICE(int i) { d_array[i] = i + 2; }); dev2.wait_for(&e2); @@ -44,8 +42,7 @@ void ResourceJoinAsyncSemanticsTestImpl() dev2.wait(); - forall(host, - RangeSegment(0, ARRAY_SIZE), + forall(host, RangeSegment(0, ARRAY_SIZE), [=](int i) { ASSERT_EQ(h_array[i], i + 2); }); diff --git a/test/unit/resource/tests/test-resource-MultiStream.hpp b/test/unit/resource/tests/test-resource-MultiStream.hpp index 379188614a..befc0d5c0b 100644 --- a/test/unit/resource/tests/test-resource-MultiStream.hpp +++ b/test/unit/resource/tests/test-resource-MultiStream.hpp @@ -24,8 +24,7 @@ void ResourceMultiStreamTestImpl() int* d_array = resources::Resource{dev1}.allocate(ARRAY_SIZE); int* h_array = host.allocate(ARRAY_SIZE); - resources::Event e1 = forall(dev1, - RangeSegment(0, ARRAY_SIZE), + resources::Event e1 = forall(dev1, RangeSegment(0, ARRAY_SIZE), [=] RAJA_HOST_DEVICE(int i) { if (i % 3 == 0) @@ -34,8 +33,7 @@ void ResourceMultiStreamTestImpl() } }); - resources::Event e2 = forall(dev2, - RangeSegment(0, ARRAY_SIZE), + resources::Event e2 = forall(dev2, RangeSegment(0, ARRAY_SIZE), [=] RAJA_HOST_DEVICE(int i) { if (i % 3 == 1) @@ -44,8 +42,7 @@ void ResourceMultiStreamTestImpl() } }); - resources::Event e3 = forall(dev2, - RangeSegment(0, ARRAY_SIZE), + resources::Event e3 = forall(dev2, RangeSegment(0, ARRAY_SIZE), [=] RAJA_HOST_DEVICE(int i) { if (i % 3 == 2) @@ -61,8 +58,7 @@ void ResourceMultiStreamTestImpl() dev1.wait(); - forall(host, - RangeSegment(0, ARRAY_SIZE), + forall(host, RangeSegment(0, ARRAY_SIZE), [=](int i) { ASSERT_EQ(h_array[i], i); }); diff --git a/test/unit/view-layout/test-indexlayout.cpp b/test/unit/view-layout/test-indexlayout.cpp index ae46ef993c..14b54ddab0 100644 --- a/test/unit/view-layout/test-indexlayout.cpp +++ b/test/unit/view-layout/test-indexlayout.cpp @@ -279,9 +279,9 @@ TEST(IndexLayout, View3DLayout) Index_type index_list_j[2] = {1, 2}; Index_type index_list_k[2] = {2, 3}; - auto index_tuple = make_index_tuple(DirectIndex<>(), - IndexList<>{&index_list_j[0]}, - IndexList<>{&index_list_k[0]}); + auto index_tuple = + make_index_tuple(DirectIndex<>(), IndexList<>{&index_list_j[0]}, + IndexList<>{&index_list_k[0]}); auto index_layout = make_index_layout(index_tuple, 2, 3, 4); diff --git a/test/unit/view-layout/test-typedlayout.cpp b/test/unit/view-layout/test-typedlayout.cpp index 12b24c28da..4b1a3c4294 100644 --- a/test/unit/view-layout/test-typedlayout.cpp +++ b/test/unit/view-layout/test-typedlayout.cpp @@ -145,11 +145,8 @@ TYPED_TEST(TypedLayoutUnitTest, 2D_StaticLayout) { RAJA::Layout<2, TypeParam> dynamic_layout(7, 5); using static_layout = - RAJA::TypedStaticLayout, - 7, - 5>; + RAJA::TypedStaticLayout, 7, 5>; // Check that we get the same layout for (TypeParam i = 0; i < 7; ++i) @@ -167,11 +164,8 @@ TYPED_TEST(TypedLayoutUnitTest, 2D_PermutedStaticLayout) auto dynamic_layout = RAJA::make_permuted_layout( {{7, 5}}, RAJA::as_array::get()); using static_layout = - RAJA::TypedStaticLayout, - 7, - 5>; + RAJA::TypedStaticLayout, 7, 5>; // Check that we get the same layout for (TypeParam i = 0; i < 7; ++i) @@ -188,12 +182,9 @@ TYPED_TEST(TypedLayoutUnitTest, 3D_PermutedStaticLayout) auto dynamic_layout = RAJA::make_permuted_layout( {{7, 13, 5}}, RAJA::as_array::get()); using static_layout = - RAJA::TypedStaticLayout, - 7, - 13, - 5>; + RAJA::TypedStaticLayout, 7, + 13, 5>; // Check that we get the same layout for (TypeParam i = 0; i < 7; ++i) @@ -215,13 +206,8 @@ TYPED_TEST(TypedLayoutUnitTest, 4D_PermutedStaticLayout) auto dynamic_layout = RAJA::make_permuted_layout( {{7, 13, 5, 17}}, RAJA::as_array::get()); using static_layout = RAJA::TypedStaticLayout< - RAJA::PERM_LJKI, - TypeParam, - RAJA::list, - 7, - 13, - 5, - 17>; + RAJA::PERM_LJKI, TypeParam, + RAJA::list, 7, 13, 5, 17>; // Check that we get the same layout for (TypeParam i = 0; i < 7; ++i) diff --git a/test/unit/view-layout/test-typedview.cpp b/test/unit/view-layout/test-typedview.cpp index e1480e20ea..97aeb654b1 100644 --- a/test/unit/view-layout/test-typedview.cpp +++ b/test/unit/view-layout/test-typedview.cpp @@ -193,7 +193,7 @@ TYPED_TEST(OffsetLayoutViewUnitTest, View) std::array lower{{1}}; std::array upper{{11}}; RAJA::View view(data, - RAJA::make_offset_layout<1>(lower, upper)); + RAJA::make_offset_layout<1>(lower, upper)); for (int i = 0; i < 10; i++) { diff --git a/test/unit/workgroup/tests/test-workgroup-Constructor.hpp b/test/unit/workgroup/tests/test-workgroup-Constructor.hpp index 7942c552ab..1322453ed5 100644 --- a/test/unit/workgroup/tests/test-workgroup-Constructor.hpp +++ b/test/unit/workgroup/tests/test-workgroup-Constructor.hpp @@ -31,47 +31,31 @@ struct testWorkGroupConstructorSingle using DispatchPolicy = typename DispatchTyper::template type<>; { - RAJA::WorkPool, - IndexType, - RAJA::xargs, - Allocator> + RAJA::WorkPool, + IndexType, RAJA::xargs, Allocator> pool(Allocator{}); ASSERT_EQ(pool.num_loops(), (size_t)0); ASSERT_EQ(pool.storage_bytes(), (size_t)0); - RAJA::WorkGroup, - IndexType, - RAJA::xargs, - Allocator> + RAJA::WorkGroup, + IndexType, RAJA::xargs, Allocator> group = pool.instantiate(); ASSERT_EQ(pool.num_loops(), (size_t)0); ASSERT_EQ(pool.storage_bytes(), (size_t)0); - RAJA::WorkSite, - IndexType, - RAJA::xargs, - Allocator> + RAJA::WorkSite, + IndexType, RAJA::xargs, Allocator> site = group.run(Xargs{}...); - using resource_type = - typename RAJA::WorkPool, - IndexType, - RAJA::xargs, - Allocator>::resource_type; + using resource_type = typename RAJA::WorkPool< + RAJA::WorkGroupPolicy, + IndexType, RAJA::xargs, Allocator>::resource_type; auto e = resource_type::get_default().get_event(); e.wait(); @@ -148,12 +132,9 @@ TYPED_TEST_P(WorkGroupBasicConstructorSingleUnitTest, using Xargs = typename camp::at>::type; using Allocator = typename camp::at>::type; - testWorkGroupConstructorSingle{}(Xargs{}); + testWorkGroupConstructorSingle{}( + Xargs{}); } #endif //__TEST_WORKGROUP_CONSTRUCTOR__ diff --git a/test/unit/workgroup/tests/test-workgroup-Dispatcher.hpp b/test/unit/workgroup/tests/test-workgroup-Dispatcher.hpp index b77700c46d..21f9a1d01b 100644 --- a/test/unit/workgroup/tests/test-workgroup-Dispatcher.hpp +++ b/test/unit/workgroup/tests/test-workgroup-Dispatcher.hpp @@ -104,9 +104,9 @@ struct testWorkGroupDispatcherSingle static constexpr auto platform = RAJA::platform_of::value; using DispatchPolicy = typename DispatchTyper::template type; - using Dispatcher_type = RAJA::detail:: - Dispatcher; - using Invoker_type = typename Dispatcher_type::invoker_type; + using Dispatcher_type = RAJA::detail::Dispatcher; + using Invoker_type = typename Dispatcher_type::invoker_type; using Dispatcher_cptr_type = typename Dispatcher_type::void_cptr_wrapper; const Dispatcher_type* dispatcher = RAJA::detail::get_Dispatcher( @@ -172,12 +172,9 @@ struct testWorkGroupDispatcherSingle work_res.memcpy(wrk_obj, new_obj, sizeof(TestCallable) * 1); // move a value onto device and fiddle - call_dispatcher( - dispatcher->invoke, wrk_obj, (IndexType)1, Args{}...); + call_dispatcher(dispatcher->invoke, wrk_obj, (IndexType)1, + Args{}...); work_res.memcpy(testCall, workCall, sizeof(IndexType) * 3); @@ -261,11 +258,8 @@ TYPED_TEST_P(WorkGroupBasicDispatcherSingleUnitTest, using ResourceType = typename camp::at>::type; using ForOneType = typename camp::at>::type; - testWorkGroupDispatcherSingle{}(Args{}); + testWorkGroupDispatcherSingle{}(Args{}); } #endif //__TEST_WORKGROUP_DISPATCHER__ diff --git a/test/unit/workgroup/tests/test-workgroup-Enqueue-Multiple.hpp b/test/unit/workgroup/tests/test-workgroup-Enqueue-Multiple.hpp index 3303cac474..7fc09e6b79 100644 --- a/test/unit/workgroup/tests/test-workgroup-Enqueue-Multiple.hpp +++ b/test/unit/workgroup/tests/test-workgroup-Enqueue-Multiple.hpp @@ -40,22 +40,15 @@ struct testWorkGroupEnqueueMultiple using DispatchPolicy = typename DispatchTyper::template type< camp::list>; - using WorkPool_type = RAJA::WorkPool, - IndexType, - RAJA::xargs, - Allocator>; + using WorkPool_type = + RAJA::WorkPool, + IndexType, RAJA::xargs, Allocator>; using WorkGroup_type = - RAJA::WorkGroup, - IndexType, - RAJA::xargs, - Allocator>; + RAJA::WorkGroup, + IndexType, RAJA::xargs, Allocator>; { WorkPool_type pool(Allocator{}); @@ -160,19 +153,11 @@ TYPED_TEST_P(WorkGroupBasicEnqueueMultipleUnitTest, std::uniform_int_distribution dist_rep(0, 16); std::uniform_int_distribution dist_num(0, 64); - testWorkGroupEnqueueMultiple{}( + testWorkGroupEnqueueMultiple{}( Xargs{}, false, dist_rep(rng), dist_num(rng)); - testWorkGroupEnqueueMultiple{}( + testWorkGroupEnqueueMultiple{}( Xargs{}, true, dist_rep(rng), dist_num(rng)); } diff --git a/test/unit/workgroup/tests/test-workgroup-Enqueue-Single.hpp b/test/unit/workgroup/tests/test-workgroup-Enqueue-Single.hpp index a31a18502b..c313d52ebb 100644 --- a/test/unit/workgroup/tests/test-workgroup-Enqueue-Single.hpp +++ b/test/unit/workgroup/tests/test-workgroup-Enqueue-Single.hpp @@ -40,22 +40,15 @@ struct testWorkGroupEnqueueSingle using DispatchPolicy = typename DispatchTyper::template type< camp::list>; - using WorkPool_type = RAJA::WorkPool, - IndexType, - RAJA::xargs, - Allocator>; + using WorkPool_type = + RAJA::WorkPool, + IndexType, RAJA::xargs, Allocator>; using WorkGroup_type = - RAJA::WorkGroup, - IndexType, - RAJA::xargs, - Allocator>; + RAJA::WorkGroup, + IndexType, RAJA::xargs, Allocator>; { WorkPool_type pool(Allocator{}); @@ -155,18 +148,12 @@ TYPED_TEST_P(WorkGroupBasicEnqueueSingleUnitTest, BasicWorkGroupEnqueueSingle) using Xargs = typename camp::at>::type; using Allocator = typename camp::at>::type; - testWorkGroupEnqueueSingle{}(Xargs{}, false, 1, 1); - testWorkGroupEnqueueSingle{}(Xargs{}, true, 1, 1); + testWorkGroupEnqueueSingle{}( + Xargs{}, false, 1, 1); + testWorkGroupEnqueueSingle{}(Xargs{}, + true, 1, 1); } #endif //__TEST_WORKGROUP_ENQUEUESINGLE__ diff --git a/test/unit/workgroup/tests/test-workgroup-WorkStorage-Constructor.hpp b/test/unit/workgroup/tests/test-workgroup-WorkStorage-Constructor.hpp index e8ec1368f6..2028343127 100644 --- a/test/unit/workgroup/tests/test-workgroup-WorkStorage-Constructor.hpp +++ b/test/unit/workgroup/tests/test-workgroup-WorkStorage-Constructor.hpp @@ -27,8 +27,8 @@ void testWorkGroupWorkStorageConstructor() static constexpr auto platform = RAJA::Platform::host; using DispatchPolicy = typename DispatchTyper::template type<>; - using Dispatcher_type = RAJA::detail:: - Dispatcher; + using Dispatcher_type = RAJA::detail::Dispatcher; using WorkStorage_type = RAJA::detail::WorkStorage; @@ -79,8 +79,7 @@ TYPED_TEST_P(WorkGroupBasicWorkStorageConstructorUnitTest, using DispatchTyper = typename camp::at>::type; using Allocator = typename camp::at>::type; - testWorkGroupWorkStorageConstructor(); } diff --git a/test/unit/workgroup/tests/test-workgroup-WorkStorage-InsertCall.hpp b/test/unit/workgroup/tests/test-workgroup-WorkStorage-InsertCall.hpp index 58f012d966..0b341d4b5e 100644 --- a/test/unit/workgroup/tests/test-workgroup-WorkStorage-InsertCall.hpp +++ b/test/unit/workgroup/tests/test-workgroup-WorkStorage-InsertCall.hpp @@ -29,8 +29,8 @@ void testWorkGroupWorkStorageInsertCall() static constexpr auto platform = RAJA::Platform::host; using DispatchPolicy = typename DispatchTyper::template type; - using Dispatcher_type = RAJA::detail:: - Dispatcher; + using Dispatcher_type = RAJA::detail::Dispatcher; using WorkStorage_type = RAJA::detail::WorkStorage; using WorkStruct_type = typename WorkStorage_type::value_type; @@ -71,8 +71,8 @@ void testWorkGroupWorkStorageInsertCall() double test_val = -1; bool move_constructed = false; bool moved_from = true; - WorkStruct_type::host_call( - &*iter, (void*)&test_val, &move_constructed, &moved_from); + WorkStruct_type::host_call(&*iter, (void*)&test_val, &move_constructed, + &moved_from); ASSERT_EQ(test_val, init_val); ASSERT_TRUE(move_constructed); diff --git a/test/unit/workgroup/tests/test-workgroup-WorkStorage-Iterator.hpp b/test/unit/workgroup/tests/test-workgroup-WorkStorage-Iterator.hpp index d731b93111..72432d8962 100644 --- a/test/unit/workgroup/tests/test-workgroup-WorkStorage-Iterator.hpp +++ b/test/unit/workgroup/tests/test-workgroup-WorkStorage-Iterator.hpp @@ -29,8 +29,8 @@ void testWorkGroupWorkStorageIterator() static constexpr auto platform = RAJA::Platform::host; using DispatchPolicy = typename DispatchTyper::template type; - using Dispatcher_type = RAJA::detail:: - Dispatcher; + using Dispatcher_type = RAJA::detail::Dispatcher; using WorkStorage_type = RAJA::detail::WorkStorage; diff --git a/test/unit/workgroup/tests/test-workgroup-WorkStorage-Multiple.hpp b/test/unit/workgroup/tests/test-workgroup-WorkStorage-Multiple.hpp index 517b51b4b6..4500fb6749 100644 --- a/test/unit/workgroup/tests/test-workgroup-WorkStorage-Multiple.hpp +++ b/test/unit/workgroup/tests/test-workgroup-WorkStorage-Multiple.hpp @@ -62,8 +62,8 @@ void testWorkGroupWorkStorageMultiple(const size_t num0, static constexpr auto platform = RAJA::Platform::host; using DispatchPolicy = typename DispatchTyper::template type; - using Dispatcher_type = RAJA::detail:: - Dispatcher; + using Dispatcher_type = RAJA::detail::Dispatcher; using WorkStorage_type = RAJA::detail::WorkStorage; using WorkStruct_type = typename WorkStorage_type::value_type; @@ -86,10 +86,8 @@ void testWorkGroupWorkStorageMultiple(const size_t num0, ASSERT_EQ(container.storage_size(), (size_t)0); }; - auto fill_contents = [&](WorkStorage_type& container, - double init_val0, - double init_val1, - double init_val2) + auto fill_contents = [&](WorkStorage_type& container, double init_val0, + double init_val1, double init_val2) { std::vector vec0; vec0.reserve(num0); @@ -128,20 +126,18 @@ void testWorkGroupWorkStorageMultiple(const size_t num0, } ASSERT_EQ(container.size(), num0 + num1 + num2); - ASSERT_GE(container.storage_size(), - num0 * sizeof(callable0) + num1 * sizeof(callable1) + - num2 * sizeof(callable2)); + ASSERT_GE(container.storage_size(), num0 * sizeof(callable0) + + num1 * sizeof(callable1) + + num2 * sizeof(callable2)); }; - auto test_contents = [&](WorkStorage_type& container, - double init_val0, - double init_val1, - double init_val2) + auto test_contents = [&](WorkStorage_type& container, double init_val0, + double init_val1, double init_val2) { ASSERT_EQ(container.size(), num0 + num1 + num2); - ASSERT_GE(container.storage_size(), - num0 * sizeof(callable0) + num1 * sizeof(callable1) + - num2 * sizeof(callable2)); + ASSERT_GE(container.storage_size(), num0 * sizeof(callable0) + + num1 * sizeof(callable1) + + num2 * sizeof(callable2)); { auto iter = container.begin(); @@ -151,8 +147,8 @@ void testWorkGroupWorkStorageMultiple(const size_t num0, type0 val{}; bool move_constructed = false; bool moved_from = true; - WorkStruct_type::host_call( - &*iter, (void*)&val, &move_constructed, &moved_from); + WorkStruct_type::host_call(&*iter, (void*)&val, &move_constructed, + &moved_from); type0 expected = make_type0(init_val0, i); ASSERT_EQ(val, expected); @@ -167,8 +163,8 @@ void testWorkGroupWorkStorageMultiple(const size_t num0, type1 val{}; bool move_constructed = false; bool moved_from = true; - WorkStruct_type::host_call( - &*iter, (void*)&val, &move_constructed, &moved_from); + WorkStruct_type::host_call(&*iter, (void*)&val, &move_constructed, + &moved_from); type1 expected = make_type1(init_val1, i); ASSERT_EQ(val, expected); @@ -183,8 +179,8 @@ void testWorkGroupWorkStorageMultiple(const size_t num0, type2 val{}; bool move_constructed = false; bool moved_from = true; - WorkStruct_type::host_call( - &*iter, (void*)&val, &move_constructed, &moved_from); + WorkStruct_type::host_call(&*iter, (void*)&val, &move_constructed, + &moved_from); type2 expected = make_type2(init_val2, i); ASSERT_EQ(val, expected);