Skip to content

Commit

Permalink
Try new formatting to make nested templates more readable
Browse files Browse the repository at this point in the history
  • Loading branch information
johnbowen42 committed Sep 5, 2024
1 parent ac92fed commit 34496ca
Show file tree
Hide file tree
Showing 332 changed files with 3,975 additions and 7,026 deletions.
4 changes: 3 additions & 1 deletion .clang-format
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
BasedOnStyle : LLVM
# Indent formatting
IndentWidth : 2
Language: Cpp
UseTab: Never
KeepEmptyLinesAtTheStartOfBlocks : true
MaxEmptyLinesToKeep : 2
Expand Down Expand Up @@ -40,9 +41,10 @@ PointerAlignment: Left
AllowShortIfStatementsOnASingleLine : true
AllowShortFunctionsOnASingleLine : true
AllowShortLoopsOnASingleLine : false
AllowAllArgumentsOnNextLine : false
AllowAllParametersOfDeclarationOnNextLine : false
AlignTrailingComments : true
BinPackArguments : false
BinPackArguments : true
BinPackParameters : false
ConstructorInitializerAllOnOneLineOrOnePerLine : true
ColumnLimit : 80
Expand Down
3 changes: 1 addition & 2 deletions examples/dynamic-forall.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -100,8 +100,7 @@ int main(int argc, char* argv[])
//----------------------------------------------------------------------------//

// policy is chosen from the list
RAJA::expt::dynamic_forall<policy_list>(pol,
RAJA::RangeSegment(0, N),
RAJA::expt::dynamic_forall<policy_list>(pol, RAJA::RangeSegment(0, N),
[=] RAJA_HOST_DEVICE(int i)
{ c[i] = a[i] + b[i]; });
// _rajaseq_vector_add_end
Expand Down
60 changes: 27 additions & 33 deletions examples/dynamic_mat_transpose.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -355,58 +355,52 @@ int main(int argc, char* argv[])
[=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx)
{
RAJA::loop<outer1>(
ctx,
RAJA::RangeSegment(0, outer_Dimr),
ctx, RAJA::RangeSegment(0, outer_Dimr),
[&](int by)
{
RAJA::loop<outer0>(
ctx,
RAJA::RangeSegment(0, outer_Dimc),
ctx, RAJA::RangeSegment(0, outer_Dimc),
[&](int bx)
{
// Request memory from shared memory pool
int* tile_ptr =
ctx.getSharedMemory<int>(TILE_DIM * TILE_DIM);

// Use RAJA View for simplified indexing
RAJA::View<int, RAJA::Layout<2>> Tile(
tile_ptr, TILE_DIM, TILE_DIM);

RAJA::loop<inner1>(ctx,
RAJA::RangeSegment(0, TILE_DIM),
[&](int ty)
{
RAJA::loop<inner0>(
ctx,
RAJA::RangeSegment(0, TILE_DIM),
[&](int tx)
{
int col =
bx * TILE_DIM +
tx; // Matrix column index
int row = by * TILE_DIM +
ty; // Matrix row index

// Bounds check
if (row < N_r && col < N_c)
{
Tile(ty, tx) = Aview(row, col);
}
});
});
RAJA::View<int, RAJA::Layout<2>> Tile(tile_ptr, TILE_DIM,
TILE_DIM);

RAJA::loop<inner1>(
ctx, RAJA::RangeSegment(0, TILE_DIM),
[&](int ty)
{
RAJA::loop<inner0>(
ctx, RAJA::RangeSegment(0, TILE_DIM),
[&](int tx)
{
int col =
bx * TILE_DIM + tx; // Matrix column index
int row =
by * TILE_DIM + ty; // Matrix row index

// Bounds check
if (row < N_r && col < N_c)
{
Tile(ty, tx) = Aview(row, col);
}
});
});

// Barrier is needed to ensure all threads have written to
// Tile
ctx.teamSync();

RAJA::loop<inner1>(
ctx,
RAJA::RangeSegment(0, TILE_DIM),
ctx, RAJA::RangeSegment(0, TILE_DIM),
[&](int ty)
{
RAJA::loop<inner0>(
ctx,
RAJA::RangeSegment(0, TILE_DIM),
ctx, RAJA::RangeSegment(0, TILE_DIM),
[&](int tx)
{
int col =
Expand Down
71 changes: 18 additions & 53 deletions examples/forall-param-reductions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -118,20 +118,14 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[]))
VALLOC_INT seq_maxloc(std::numeric_limits<int>::min(), -1);

RAJA::forall<EXEC_POL1>(
host_res,
arange,
RAJA::expt::Reduce<RAJA::operators::plus>(&seq_sum),
host_res, arange, RAJA::expt::Reduce<RAJA::operators::plus>(&seq_sum),
RAJA::expt::Reduce<RAJA::operators::minimum>(&seq_min),
RAJA::expt::Reduce<RAJA::operators::maximum>(&seq_max),
RAJA::expt::Reduce<RAJA::operators::minimum>(&seq_minloc),
RAJA::expt::Reduce<RAJA::operators::maximum>(&seq_maxloc),
RAJA::expt::KernelName("RAJA Reduce Seq Kernel"),
[=](int i,
int& _seq_sum,
int& _seq_min,
int& _seq_max,
VALLOC_INT& _seq_minloc,
VALLOC_INT& _seq_maxloc)
[=](int i, int& _seq_sum, int& _seq_min, int& _seq_max,
VALLOC_INT& _seq_minloc, VALLOC_INT& _seq_maxloc)
{
_seq_sum += a[i];

Expand Down Expand Up @@ -173,20 +167,14 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[]))
VALLOC_INT omp_maxloc(std::numeric_limits<int>::min(), -1);

RAJA::forall<EXEC_POL2>(
host_res,
arange,
RAJA::expt::Reduce<RAJA::operators::plus>(&omp_sum),
host_res, arange, RAJA::expt::Reduce<RAJA::operators::plus>(&omp_sum),
RAJA::expt::Reduce<RAJA::operators::minimum>(&omp_min),
RAJA::expt::Reduce<RAJA::operators::maximum>(&omp_max),
RAJA::expt::Reduce<RAJA::operators::minimum>(&omp_minloc),
RAJA::expt::Reduce<RAJA::operators::maximum>(&omp_maxloc),
RAJA::expt::KernelName("RAJA Reduce OpenMP Kernel"),
[=](int i,
int& _omp_sum,
int& _omp_min,
int& _omp_max,
VALLOC_INT& _omp_minloc,
VALLOC_INT& _omp_maxloc)
[=](int i, int& _omp_sum, int& _omp_min, int& _omp_max,
VALLOC_INT& _omp_minloc, VALLOC_INT& _omp_maxloc)
{
_omp_sum += a[i];

Expand Down Expand Up @@ -227,20 +215,14 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[]))
VALLOC_INT omp_t_maxloc(std::numeric_limits<int>::min(), -1);

RAJA::forall<EXEC_POL3>(
omp_res,
arange,
RAJA::expt::Reduce<RAJA::operators::plus>(&omp_t_sum),
omp_res, arange, RAJA::expt::Reduce<RAJA::operators::plus>(&omp_t_sum),
RAJA::expt::Reduce<RAJA::operators::minimum>(&omp_t_min),
RAJA::expt::Reduce<RAJA::operators::maximum>(&omp_t_max),
RAJA::expt::Reduce<RAJA::operators::minimum>(&omp_t_minloc),
RAJA::expt::Reduce<RAJA::operators::maximum>(&omp_t_maxloc),
RAJA::expt::KernelName("RAJA Reduce Target OpenMP Kernel"),
[=](int i,
int& _omp_t_sum,
int& _omp_t_min,
int& _omp_t_max,
VALLOC_INT& _omp_t_minloc,
VALLOC_INT& _omp_t_maxloc)
[=](int i, int& _omp_t_sum, int& _omp_t_min, int& _omp_t_max,
VALLOC_INT& _omp_t_minloc, VALLOC_INT& _omp_t_maxloc)
{
_omp_t_sum += a[i];

Expand Down Expand Up @@ -285,20 +267,14 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[]))
VALLOC_INT cuda_maxloc(std::numeric_limits<int>::min(), -1);

RAJA::forall<EXEC_POL3>(
cuda_res,
arange,
RAJA::expt::Reduce<RAJA::operators::plus>(&cuda_sum),
cuda_res, arange, RAJA::expt::Reduce<RAJA::operators::plus>(&cuda_sum),
RAJA::expt::Reduce<RAJA::operators::minimum>(&cuda_min),
RAJA::expt::Reduce<RAJA::operators::maximum>(&cuda_max),
RAJA::expt::Reduce<RAJA::operators::minimum>(&cuda_minloc),
RAJA::expt::Reduce<RAJA::operators::maximum>(&cuda_maxloc),
RAJA::expt::KernelName("RAJA Reduce CUDA Kernel"),
[=] RAJA_DEVICE(int i,
int& _cuda_sum,
int& _cuda_min,
int& _cuda_max,
VALLOC_INT& _cuda_minloc,
VALLOC_INT& _cuda_maxloc)
[=] RAJA_DEVICE(int i, int& _cuda_sum, int& _cuda_min, int& _cuda_max,
VALLOC_INT& _cuda_minloc, VALLOC_INT& _cuda_maxloc)
{
_cuda_sum += d_a[i];

Expand Down Expand Up @@ -342,19 +318,14 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[]))
VALLOC_INT hip_maxloc(std::numeric_limits<int>::min(), -1);

RAJA::forall<EXEC_POL3>(
arange,
RAJA::expt::Reduce<RAJA::operators::plus>(&hip_sum),
arange, RAJA::expt::Reduce<RAJA::operators::plus>(&hip_sum),
RAJA::expt::Reduce<RAJA::operators::minimum>(&hip_min),
RAJA::expt::Reduce<RAJA::operators::maximum>(&hip_max),
RAJA::expt::Reduce<RAJA::operators::minimum>(&hip_minloc),
RAJA::expt::Reduce<RAJA::operators::maximum>(&hip_maxloc),
RAJA::expt::KernelName("RAJA Reduce HIP Kernel"),
[=] RAJA_DEVICE(int i,
int& _hip_sum,
int& _hip_min,
int& _hip_max,
VALLOC_INT& _hip_minloc,
VALLOC_INT& _hip_maxloc)
[=] RAJA_DEVICE(int i, int& _hip_sum, int& _hip_min, int& _hip_max,
VALLOC_INT& _hip_minloc, VALLOC_INT& _hip_maxloc)
{
_hip_sum += d_a[i];

Expand Down Expand Up @@ -399,20 +370,14 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[]))
VALLOC_INT sycl_maxloc(std::numeric_limits<int>::min(), -1);

RAJA::forall<EXEC_POL3>(
sycl_res,
arange,
RAJA::expt::Reduce<RAJA::operators::plus>(&sycl_sum),
sycl_res, arange, RAJA::expt::Reduce<RAJA::operators::plus>(&sycl_sum),
RAJA::expt::Reduce<RAJA::operators::minimum>(&sycl_min),
RAJA::expt::Reduce<RAJA::operators::maximum>(&sycl_max),
RAJA::expt::Reduce<RAJA::operators::minimum>(&sycl_minloc),
RAJA::expt::Reduce<RAJA::operators::maximum>(&sycl_maxloc),
RAJA::expt::KernelName("RAJA Reduce SYCL Kernel"),
[=] RAJA_DEVICE(int i,
int& _sycl_sum,
int& _sycl_min,
int& _sycl_max,
VALLOC_INT& _sycl_minloc,
VALLOC_INT& _sycl_maxloc)
[=] RAJA_DEVICE(int i, int& _sycl_sum, int& _sycl_min, int& _sycl_max,
VALLOC_INT& _sycl_minloc, VALLOC_INT& _sycl_maxloc)
{
_sycl_sum += d_a[i];

Expand Down
4 changes: 2 additions & 2 deletions examples/forall_multi-reductions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -154,8 +154,8 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv))
res.memcpy(bins, host_bins, N * sizeof(int));
res.memcpy(a, host_a, N * sizeof(int));

example_code<exec_policy, multi_reduce_policy>(
arange, num_bins, bins, a);
example_code<exec_policy, multi_reduce_policy>(arange, num_bins, bins,
a);

res.deallocate(bins);
res.deallocate(a);
Expand Down
37 changes: 11 additions & 26 deletions examples/jacobi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -191,8 +191,7 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[]))
RAJA::RangeSegment jacobiRange(1, (N + 1));

using jacobiSeqNestedPolicy = RAJA::KernelPolicy<RAJA::statement::For<
1,
RAJA::seq_exec,
1, RAJA::seq_exec,
RAJA::statement::For<0, RAJA::seq_exec, RAJA::statement::Lambda<0>>>>;

printf("RAJA: Sequential Policy - Nested ForallN \n");
Expand Down Expand Up @@ -267,8 +266,7 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[]))
*/

using jacobiOmpNestedPolicy = RAJA::KernelPolicy<RAJA::statement::For<
1,
RAJA::omp_parallel_for_exec,
1, RAJA::omp_parallel_for_exec,
RAJA::statement::For<0, RAJA::seq_exec, RAJA::statement::Lambda<0>>>>;

while (resI2 > tol * tol)
Expand Down Expand Up @@ -329,18 +327,12 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[]))

using jacobiCUDANestedPolicy =
RAJA::KernelPolicy<RAJA::statement::CudaKernel<RAJA::statement::Tile<
1,
RAJA::tile_fixed<32>,
RAJA::cuda_block_y_loop,
1, RAJA::tile_fixed<32>, RAJA::cuda_block_y_loop,
RAJA::statement::Tile<
0,
RAJA::tile_fixed<32>,
RAJA::cuda_block_x_loop,
0, RAJA::tile_fixed<32>, RAJA::cuda_block_x_loop,
RAJA::statement::For<
1,
RAJA::cuda_thread_y_direct,
RAJA::statement::For<0,
RAJA::cuda_thread_x_direct,
1, RAJA::cuda_thread_y_direct,
RAJA::statement::For<0, RAJA::cuda_thread_x_direct,
RAJA::statement::Lambda<0>>>>>>>;

resI2 = 1;
Expand Down Expand Up @@ -411,18 +403,12 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[]))

using jacobiHIPNestedPolicy =
RAJA::KernelPolicy<RAJA::statement::HipKernel<RAJA::statement::Tile<
1,
RAJA::tile_fixed<32>,
RAJA::hip_block_y_loop,
1, RAJA::tile_fixed<32>, RAJA::hip_block_y_loop,
RAJA::statement::Tile<
0,
RAJA::tile_fixed<32>,
RAJA::hip_block_x_loop,
0, RAJA::tile_fixed<32>, RAJA::hip_block_x_loop,
RAJA::statement::For<
1,
RAJA::hip_thread_y_direct,
RAJA::statement::For<0,
RAJA::hip_thread_x_direct,
1, RAJA::hip_thread_y_direct,
RAJA::statement::For<0, RAJA::hip_thread_x_direct,
RAJA::statement::Lambda<0>>>>>>>;

resI2 = 1;
Expand Down Expand Up @@ -512,8 +498,7 @@ void computeErr(double* I, grid_s grid)
RAJA::ReduceMax<RAJA::seq_reduce, double> tMax(-1.0);

using jacobiSeqNestedPolicy = RAJA::KernelPolicy<RAJA::statement::For<
1,
RAJA::seq_exec,
1, RAJA::seq_exec,
RAJA::statement::For<0, RAJA::seq_exec, RAJA::statement::Lambda<0>>>>;

RAJA::kernel<jacobiSeqNestedPolicy>(
Expand Down
11 changes: 3 additions & 8 deletions examples/kernel-dynamic-tile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,10 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[]))
using namespace RAJA;

kernel_param<KernelPolicy<statement::Tile<
1,
tile_dynamic<1>,
seq_exec,
1, tile_dynamic<1>, seq_exec,
statement::Tile<
0,
tile_dynamic<0>,
seq_exec,
statement::For<1,
seq_exec,
0, tile_dynamic<0>, seq_exec,
statement::For<1, seq_exec,
statement::For<0, seq_exec, statement::Lambda<0>>>>>>>(
make_tuple(RangeSegment{0, 25}, RangeSegment{0, 25}),
make_tuple(TileSize{5}, TileSize{10}),
Expand Down
Loading

0 comments on commit 34496ca

Please sign in to comment.