Skip to content

Commit

Permalink
Tests for alternative reduction capability.
Browse files Browse the repository at this point in the history
  • Loading branch information
rchen20 committed Sep 19, 2024
1 parent 1b81cb9 commit 81ee963
Show file tree
Hide file tree
Showing 3 changed files with 126 additions and 6 deletions.
96 changes: 90 additions & 6 deletions examples/forall-param-reductions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -129,21 +129,31 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[]))
VALLOC_INT seq_minloc(std::numeric_limits<int>::max(), -1);
VALLOC_INT seq_maxloc(std::numeric_limits<int>::min(), -1);

int seq_min2 = std::numeric_limits<int>::max();
int seq_max2 = std::numeric_limits<int>::min();
RAJA::Index_type seq_minloc2(-1);
RAJA::Index_type seq_maxloc2(-1);

RAJA::forall<EXEC_POL1>(host_res, arange,
RAJA::expt::Reduce<RAJA::operators::plus>(&seq_sum),
RAJA::expt::Reduce<RAJA::operators::minimum>(&seq_min),
RAJA::expt::Reduce<RAJA::operators::maximum>(&seq_max),
RAJA::expt::Reduce<RAJA::operators::minimum>(&seq_minloc),
RAJA::expt::Reduce<RAJA::operators::maximum>(&seq_maxloc),
RAJA::expt::ReduceLoc<RAJA::operators::minimum>(&seq_min2, &seq_minloc2),
RAJA::expt::ReduceLoc<RAJA::operators::maximum>(&seq_max2, &seq_maxloc2),
RAJA::expt::KernelName("RAJA Reduce Seq Kernel"),
[=](int i, REF_INT_SUM &_seq_sum, REF_INT_MIN &_seq_min, REF_INT_MAX &_seq_max, REFLOC_INT_MIN &_seq_minloc, REFLOC_INT_MAX &_seq_maxloc) {
[=](int i, REF_INT_SUM &_seq_sum, REF_INT_MIN &_seq_min, REF_INT_MAX &_seq_max, REFLOC_INT_MIN &_seq_minloc, REFLOC_INT_MAX &_seq_maxloc, REFLOC_INT_MIN &_seq_minloc2, REFLOC_INT_MAX &_seq_maxloc2) {
_seq_sum += a[i];

_seq_min.min(a[i]);
_seq_max.max(a[i]);

_seq_minloc.minloc(a[i], i);
_seq_maxloc.maxloc(a[i], i);

_seq_minloc2.minloc(a[i], i);
_seq_maxloc2.maxloc(a[i], i);
}
);

Expand All @@ -154,6 +164,10 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[]))
<< seq_minloc.getLoc() << std::endl;
std::cout << "\tmax, loc = " << seq_maxloc.getVal() << " , "
<< seq_maxloc.getLoc() << std::endl;
std::cout << "\tmin2, loc2 = " << seq_min2 << " , "
<< seq_minloc2 << std::endl;
std::cout << "\tmax2, loc2 = " << seq_max2 << " , "
<< seq_maxloc2 << std::endl;
// _reductions_raja_seq_end


Expand All @@ -172,21 +186,31 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[]))
VALLOC_INT omp_minloc(std::numeric_limits<int>::max(), -1);
VALLOC_INT omp_maxloc(std::numeric_limits<int>::min(), -1);

int omp_min2 = std::numeric_limits<int>::max();
int omp_max2 = std::numeric_limits<int>::min();
RAJA::Index_type omp_minloc2(-1);
RAJA::Index_type omp_maxloc2(-1);

RAJA::forall<EXEC_POL2>(host_res, arange,
RAJA::expt::Reduce<RAJA::operators::plus>(&omp_sum),
RAJA::expt::Reduce<RAJA::operators::minimum>(&omp_min),
RAJA::expt::Reduce<RAJA::operators::maximum>(&omp_max),
RAJA::expt::Reduce<RAJA::operators::minimum>(&omp_minloc),
RAJA::expt::Reduce<RAJA::operators::maximum>(&omp_maxloc),
RAJA::expt::ReduceLoc<RAJA::operators::minimum>(&omp_min2, &omp_minloc2),
RAJA::expt::ReduceLoc<RAJA::operators::maximum>(&omp_max2, &omp_maxloc2),
RAJA::expt::KernelName("RAJA Reduce OpenMP Kernel"),
[=](int i, REF_INT_SUM &_omp_sum, REF_INT_MIN &_omp_min, REF_INT_MAX &_omp_max, REFLOC_INT_MIN &_omp_minloc, REFLOC_INT_MAX &_omp_maxloc) {
[=](int i, REF_INT_SUM &_omp_sum, REF_INT_MIN &_omp_min, REF_INT_MAX &_omp_max, REFLOC_INT_MIN &_omp_minloc, REFLOC_INT_MAX &_omp_maxloc, REFLOC_INT_MIN &_omp_minloc2, REFLOC_INT_MAX &_omp_maxloc2) {
_omp_sum += a[i];

_omp_min.min(a[i]);
_omp_max.max(a[i]);

_omp_minloc.minloc(a[i], i);
_omp_maxloc.maxloc(a[i], i);

_omp_minloc2.minloc(a[i], i);
_omp_maxloc2.maxloc(a[i], i);
}
);

Expand All @@ -197,6 +221,10 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[]))
<< omp_minloc.getLoc() << std::endl;
std::cout << "\tmax, loc = " << omp_maxloc.getVal() << " , "
<< omp_maxloc.getLoc() << std::endl;
std::cout << "\tmin2, loc2 = " << omp_min2 << " , "
<< omp_minloc2 << std::endl;
std::cout << "\tmax2, loc2 = " << omp_max2 << " , "
<< omp_maxloc2 << std::endl;

#endif

Expand All @@ -217,21 +245,31 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[]))
VALLOC_INT omp_t_minloc(std::numeric_limits<int>::max(), -1);
VALLOC_INT omp_t_maxloc(std::numeric_limits<int>::min(), -1);

int omp_t_min2 = std::numeric_limits<int>::max();
int omp_t_max2 = std::numeric_limits<int>::min();
RAJA::Index_type omp_t_minloc2(-1);
RAJA::Index_type omp_t_maxloc2(-1);

RAJA::forall<EXEC_POL3>(omp_res, arange,
RAJA::expt::Reduce<RAJA::operators::plus>(&omp_t_sum),
RAJA::expt::Reduce<RAJA::operators::minimum>(&omp_t_min),
RAJA::expt::Reduce<RAJA::operators::maximum>(&omp_t_max),
RAJA::expt::Reduce<RAJA::operators::minimum>(&omp_t_minloc),
RAJA::expt::Reduce<RAJA::operators::maximum>(&omp_t_maxloc),
RAJA::expt::ReduceLoc<RAJA::operators::minimum>(&omp_t_min2, &omp_t_minloc2),
RAJA::expt::ReduceLoc<RAJA::operators::maximum>(&omp_t_max2, &omp_t_maxloc2),
RAJA::expt::KernelName("RAJA Reduce Target OpenMP Kernel"),
[=](int i, REF_INT_SUM &_omp_t_sum, REF_INT_MIN &_omp_t_min, REF_INT_MAX &_omp_t_max, REFLOC_INT_MIN &_omp_t_minloc, REFLOC_INT_MAX &_omp_t_maxloc) {
[=](int i, REF_INT_SUM &_omp_t_sum, REF_INT_MIN &_omp_t_min, REF_INT_MAX &_omp_t_max, REFLOC_INT_MIN &_omp_t_minloc, REFLOC_INT_MAX &_omp_t_maxloc, REFLOC_INT_MIN &_omp_t_minloc2, REFLOC_INT_MAX &_omp_t_maxloc2) {
_omp_t_sum += a[i];

_omp_t_min.min(a[i]);
_omp_t_max.max(a[i]);

_omp_t_minloc.minloc(a[i], i);
_omp_t_maxloc.maxloc(a[i], i);

_omp_t_minloc2.minloc(a[i], i);
_omp_t_maxloc2.maxloc(a[i], i);
}
);

Expand All @@ -242,6 +280,10 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[]))
<< omp_t_minloc.getLoc() << std::endl;
std::cout << "\tmax, loc = " << omp_t_maxloc.getVal() << " , "
<< omp_t_maxloc.getLoc() << std::endl;
std::cout << "\tmin2, loc2 = " << omp_t_min2 << " , "
<< omp_t_minloc2 << std::endl;
std::cout << "\tmax2, loc2 = " << omp_t_max2 << " , "
<< omp_t_maxloc2 << std::endl;

#endif

Expand All @@ -266,21 +308,31 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[]))
VALLOC_INT cuda_minloc(std::numeric_limits<int>::max(), -1);
VALLOC_INT cuda_maxloc(std::numeric_limits<int>::min(), -1);

int cuda_min2 = std::numeric_limits<int>::max();
int cuda_max2 = std::numeric_limits<int>::min();
RAJA::Index_type cuda_minloc2(-1);
RAJA::Index_type cuda_maxloc2(-1);

RAJA::forall<EXEC_POL3>(cuda_res, arange,
RAJA::expt::Reduce<RAJA::operators::plus>(&cuda_sum),
RAJA::expt::Reduce<RAJA::operators::minimum>(&cuda_min),
RAJA::expt::Reduce<RAJA::operators::maximum>(&cuda_max),
RAJA::expt::Reduce<RAJA::operators::minimum>(&cuda_minloc),
RAJA::expt::Reduce<RAJA::operators::maximum>(&cuda_maxloc),
RAJA::expt::ReduceLoc<RAJA::operators::minimum>(&cuda_min2, &cuda_minloc2),
RAJA::expt::ReduceLoc<RAJA::operators::maximum>(&cuda_max2, &cuda_maxloc2),
RAJA::expt::KernelName("RAJA Reduce CUDA Kernel"),
[=] RAJA_DEVICE (int i, REF_INT_SUM &_cuda_sum, REF_INT_MIN &_cuda_min, REF_INT_MAX &_cuda_max, REFLOC_INT_MIN &_cuda_minloc, REFLOC_INT_MAX &_cuda_maxloc) {
[=] RAJA_DEVICE (int i, REF_INT_SUM &_cuda_sum, REF_INT_MIN &_cuda_min, REF_INT_MAX &_cuda_max, REFLOC_INT_MIN &_cuda_minloc, REFLOC_INT_MAX &_cuda_maxloc, REFLOC_INT_MIN &_cuda_minloc2, REFLOC_INT_MAX &_cuda_maxloc2) {
_cuda_sum += d_a[i];

_cuda_min.min(d_a[i]);
_cuda_max.max(d_a[i]);

_cuda_minloc.minloc(a[i], i);
_cuda_maxloc.maxloc(a[i], i);

_cuda_minloc2.minloc(a[i], i);
_cuda_maxloc2.maxloc(a[i], i);
}
);

Expand All @@ -291,6 +343,10 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[]))
<< cuda_minloc.getLoc() << std::endl;
std::cout << "\tmax, loc = " << cuda_maxloc.getVal() << " , "
<< cuda_maxloc.getLoc() << std::endl;
std::cout << "\tmin2, loc2 = " << cuda_min2 << " , "
<< cuda_minloc2 << std::endl;
std::cout << "\tmax2, loc2 = " << cuda_max2 << " , "
<< cuda_maxloc2 << std::endl;
cuda_res.deallocate(d_a);
#endif

Expand All @@ -314,21 +370,31 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[]))
VALLOC_INT hip_minloc(std::numeric_limits<int>::max(), -1);
VALLOC_INT hip_maxloc(std::numeric_limits<int>::min(), -1);

int hip_min2 = std::numeric_limits<int>::max();
int hip_max2 = std::numeric_limits<int>::min();
RAJA::Index_type hip_minloc2(-1);
RAJA::Index_type hip_maxloc2(-1);

RAJA::forall<EXEC_POL3>(hip_res, arange,
RAJA::expt::Reduce<RAJA::operators::plus>(&hip_sum),
RAJA::expt::Reduce<RAJA::operators::minimum>(&hip_min),
RAJA::expt::Reduce<RAJA::operators::maximum>(&hip_max),
RAJA::expt::Reduce<RAJA::operators::minimum>(&hip_minloc),
RAJA::expt::Reduce<RAJA::operators::maximum>(&hip_maxloc),
RAJA::expt::ReduceLoc<RAJA::operators::minimum>(&hip_min2, &hip_minloc2),
RAJA::expt::ReduceLoc<RAJA::operators::maximum>(&hip_max2, &hip_maxloc2),
RAJA::expt::KernelName("RAJA Reduce HIP Kernel"),
[=] RAJA_DEVICE (int i, REF_INT_SUM &_hip_sum, REF_INT_MIN &_hip_min, REF_INT_MAX &_hip_max, REFLOC_INT_MIN &_hip_minloc, REFLOC_INT_MAX &_hip_maxloc) {
[=] RAJA_DEVICE (int i, REF_INT_SUM &_hip_sum, REF_INT_MIN &_hip_min, REF_INT_MAX &_hip_max, REFLOC_INT_MIN &_hip_minloc, REFLOC_INT_MAX &_hip_maxloc, REFLOC_INT_MIN &_hip_minloc2, REFLOC_INT_MAX &_hip_maxloc2) {
_hip_sum += d_a[i];

_hip_min.min(d_a[i]);
_hip_max.max(d_a[i]);

_hip_minloc.minloc(d_a[i], i);
_hip_maxloc.maxloc(d_a[i], i);

_hip_minloc2.minloc(d_a[i], i);
_hip_maxloc2.maxloc(d_a[i], i);
}
);

Expand All @@ -339,6 +405,10 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[]))
<< hip_minloc.getLoc() << std::endl;
std::cout << "\tmax, loc = " << hip_maxloc.getVal() << " , "
<< hip_maxloc.getLoc() << std::endl;
std::cout << "\tmin2, loc2 = " << hip_min2 << " , "
<< hip_minloc2 << std::endl;
std::cout << "\tmax2, loc2 = " << hip_max2 << " , "
<< hip_maxloc2 << std::endl;

hip_res.deallocate(d_a);
#endif
Expand All @@ -363,21 +433,31 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[]))
VALLOC_INT sycl_minloc(std::numeric_limits<int>::max(), -1);
VALLOC_INT sycl_maxloc(std::numeric_limits<int>::min(), -1);

int sycl_min2 = std::numeric_limits<int>::max();
int sycl_max2 = std::numeric_limits<int>::min();
RAJA::Index_type sycl_minloc2(-1);
RAJA::Index_type sycl_maxloc2(-1);

RAJA::forall<EXEC_POL3>(sycl_res, arange,
RAJA::expt::Reduce<RAJA::operators::plus>(&sycl_sum),
RAJA::expt::Reduce<RAJA::operators::minimum>(&sycl_min),
RAJA::expt::Reduce<RAJA::operators::maximum>(&sycl_max),
RAJA::expt::Reduce<RAJA::operators::minimum>(&sycl_minloc),
RAJA::expt::Reduce<RAJA::operators::maximum>(&sycl_maxloc),
RAJA::expt::ReduceLoc<RAJA::operators::minimum>(&sycl_min2, &sycl_minloc2),
RAJA::expt::ReduceLoc<RAJA::operators::maximum>(&sycl_max2, &sycl_maxloc2),
RAJA::expt::KernelName("RAJA Reduce SYCL Kernel"),
[=] RAJA_DEVICE (int i, REF_INT_SUM &_sycl_sum, REF_INT_MIN &_sycl_min, REF_INT_MAX &_sycl_max, REFLOC_INT_MIN &_sycl_minloc, REFLOC_INT_MAX &_sycl_maxloc) {
[=] RAJA_DEVICE (int i, REF_INT_SUM &_sycl_sum, REF_INT_MIN &_sycl_min, REF_INT_MAX &_sycl_max, REFLOC_INT_MIN &_sycl_minloc, REFLOC_INT_MAX &_sycl_maxloc, REFLOC_INT_MIN &_sycl_minloc2, REFLOC_INT_MAX &_sycl_maxloc2) {
_sycl_sum += d_a[i];

_sycl_min.min(d_a[i]);
_sycl_max.max(d_a[i]);

_sycl_minloc.minloc(d_a[i], i);
_sycl_maxloc.maxloc(d_a[i], i);

_sycl_minloc2.minloc(d_a[i], i);
_sycl_maxloc2.maxloc(d_a[i], i);
}
);

Expand All @@ -388,6 +468,10 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[]))
<< sycl_minloc.getLoc() << std::endl;
std::cout << "\tmax, loc = " << sycl_maxloc.getVal() << " , "
<< sycl_maxloc.getLoc() << std::endl;
std::cout << "\tmin2, loc2 = " << sycl_min2 << " , "
<< sycl_minloc2 << std::endl;
std::cout << "\tmax2, loc2 = " << sycl_max2 << " , "
<< sycl_maxloc2 << std::endl;

sycl_res.deallocate(d_a);
#endif
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,24 @@ void ForallReduceMaxLocBasicTestImpl(const SEG_TYPE& seg,
ASSERT_EQ(static_cast<DATA_TYPE>(max2.getVal()), static_cast<DATA_TYPE>(max.getVal()));
ASSERT_EQ(static_cast<IDX_TYPE>(max2.getLoc()), static_cast<IDX_TYPE>(max.getLoc()));

DATA_TYPE s_max = max_init;
DATA_TYPE s_max2 = max_init;
IDX_TYPE s_maxloc = maxloc_init;
IDX_TYPE s_maxloc2 = maxloc_init;

factor = 4;
RAJA::forall<EXEC_POLICY>(seg,
RAJA::expt::ReduceLoc<RAJA::operators::maximum>(&s_max, &s_maxloc),
RAJA::expt::ReduceLoc<RAJA::operators::maximum>(&s_max2, &s_maxloc2),
[=] RAJA_HOST_DEVICE(IDX_TYPE idx, VL_LAMBDA_TYPE &m, VL_LAMBDA_TYPE &m2) {
m.maxloc( working_array[idx] * factor, idx);
m2.max(max2);
});
ASSERT_EQ(static_cast<DATA_TYPE>(s_max), ref_max * factor);
ASSERT_EQ(static_cast<IDX_TYPE>(s_maxloc), ref_maxloc);
ASSERT_EQ(static_cast<DATA_TYPE>(s_max2), static_cast<DATA_TYPE>(max2.getVal()));
ASSERT_EQ(static_cast<IDX_TYPE>(s_maxloc2), static_cast<IDX_TYPE>(max2.getLoc()));


deallocateForallTestData<DATA_TYPE>(working_res,
working_array,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,24 @@ void ForallReduceMinLocBasicTestImpl(const SEG_TYPE& seg,
});
ASSERT_EQ(static_cast<DATA_TYPE>(min2.getVal()), static_cast<DATA_TYPE>(min.getVal()));
ASSERT_EQ(static_cast<IDX_TYPE>(min2.getLoc()), static_cast<IDX_TYPE>(min.getLoc()));

DATA_TYPE s_min = min_init;
DATA_TYPE s_min2 = min_init;
IDX_TYPE s_minloc = minloc_init;
IDX_TYPE s_minloc2 = minloc_init;

factor = 4;
RAJA::forall<EXEC_POLICY>(seg,
RAJA::expt::ReduceLoc<RAJA::operators::minimum>(&s_min, &s_minloc),
RAJA::expt::ReduceLoc<RAJA::operators::minimum>(&s_min2, &s_minloc2),
[=] RAJA_HOST_DEVICE(IDX_TYPE idx, VL_LAMBDA_TYPE &m, VL_LAMBDA_TYPE &m2) {
m.minloc( working_array[idx] * factor, idx);
m2.min(min2);
});
ASSERT_EQ(static_cast<DATA_TYPE>(s_min), ref_min * factor);
ASSERT_EQ(static_cast<IDX_TYPE>(s_minloc), ref_minloc);
ASSERT_EQ(static_cast<DATA_TYPE>(s_min2), static_cast<DATA_TYPE>(min2.getVal()));
ASSERT_EQ(static_cast<IDX_TYPE>(s_minloc2), static_cast<IDX_TYPE>(min2.getLoc()));


deallocateForallTestData<DATA_TYPE>(working_res,
Expand Down

0 comments on commit 81ee963

Please sign in to comment.