Skip to content

Commit

Permalink
Merge branch 'warnpoint' into 'master'
Browse files Browse the repository at this point in the history
Add Deprecated Warning for Point Members

See merge request StanfordLegion/legion!1336
  • Loading branch information
rainmakereuab committed Jun 23, 2024
2 parents dbd7448 + 2629e0b commit bc02aaa
Show file tree
Hide file tree
Showing 31 changed files with 641 additions and 551 deletions.
3 changes: 3 additions & 0 deletions bindings/regent/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,9 @@ if(Legion_USE_CUDA)
set_target_cuda_standard(Regent STANDARD ${Legion_CUDA_STANDARD})
set_target_cuda_architectures(Regent ARCHITECTURES ${Legion_CUDA_ARCH})
set_target_cuda_warnings_and_errors(Regent WARN_AS_ERROR ${Legion_BUILD_WARN_AS_ERROR})
# Remove this once the Realm::Point class is updated
target_compile_options(Regent PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:
-Xcudafe=--diag_suppress=1444>)
elseif(Legion_USE_HIP)
target_include_directories(Regent PRIVATE ${HIP_INCLUDE_DIRS})
if (Legion_HIP_TARGET STREQUAL "CUDA")
Expand Down
4 changes: 2 additions & 2 deletions examples/attach_2darray_c_fortran_layout/attach_2darray.cc
Original file line number Diff line number Diff line change
Expand Up @@ -232,8 +232,8 @@ void read_field_task(const Task *task,
int errors = 0;
for (PointInRectIterator<2> pir(rect); pir(); pir++) {
double expval = (args.base_val +
((*pir).x * args.step_x) +
((*pir).y * args.step_y));
((*pir)[0] * args.step_x) +
((*pir)[1] * args.step_y));
double actval = acc[*pir];
if(fabs(actval - expval) < 1e-10) {
printf("%.1f\t", actval);
Expand Down
3 changes: 3 additions & 0 deletions examples/circuit/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@ if(Legion_USE_CUDA)
set_target_cuda_standard(circuit STANDARD ${Legion_CUDA_STANDARD})
set_target_cuda_architectures(circuit ARCHITECTURES ${Legion_CUDA_ARCH})
set_target_cuda_warnings_and_errors(circuit WARN_AS_ERROR ${Legion_BUILD_WARN_AS_ERROR})
# Remove this once the Realm::Point class is updated
target_compile_options(circuit PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:
-Xcudafe=--diag_suppress=1444>)
elseif(Legion_USE_HIP)
set(GPU_SOURCES circuit_gpu.cu)
if(Legion_HIP_TARGET STREQUAL "CUDA")
Expand Down
3 changes: 3 additions & 0 deletions examples/future_instance/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -31,5 +31,8 @@ add_executable(future_instance ${GPU_SOURCES})
set_target_cuda_standard(future_instance STANDARD ${Legion_CUDA_STANDARD})
set_target_cuda_architectures(future_instance ARCHITECTURES ${Legion_CUDA_ARCH})
set_target_cuda_warnings_and_errors(future_instance WARN_AS_ERROR ${Legion_BUILD_WARN_AS_ERROR})
# Remove this once the Realm::Point class is updated
target_compile_options(future_instance PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:
-Xcudafe=--diag_suppress=1444>)

target_link_libraries(future_instance Legion::Legion)
18 changes: 9 additions & 9 deletions examples/kokkos_saxpy/kokkos_saxpy.cc
Original file line number Diff line number Diff line change
Expand Up @@ -138,8 +138,8 @@ class SaxpyTask {
typename execution_space::memory_space> y_ofs = acc_y.accessor;

Kokkos::RangePolicy<execution_space> range(runtime->get_executing_processor(ctx).kokkos_work_space(),
subspace.lo.x,
subspace.hi.x + 1);
subspace.lo[0],
subspace.hi[0] + 1);
Kokkos::parallel_for(range,
SaxpyFunctor<execution_space>(args.alpha, x_ofs, y_ofs));
}
Expand Down Expand Up @@ -185,15 +185,15 @@ class SdotTask {
// (i.e. the subspace on which you have privileges matches what
// KokkosBlas is going to compute over), you can just use
// OffsetView::view() to convert
assert(x.begin(0) == subspace.lo.x);
assert(x.begin(0) == subspace.lo[0]);
x_rel = x.view();

// option 2: if you're not sure what the OffsetView's bounds are
// (or if you just like more self-documenting code) you can create
// the subview with the exact bounds you want and then convert that
y_rel = Kokkos::Experimental::subview(y,
std::make_pair(subspace.lo.x,
subspace.hi.x + 1))
std::make_pair(subspace.lo[0],
subspace.hi[0] + 1))
.view();

// the KokkosBlas::dot implementation that returns a float directly
Expand All @@ -217,8 +217,8 @@ class SdotTask {
}
#endif
Kokkos::RangePolicy<execution_space> range(runtime->get_executing_processor(ctx).kokkos_work_space(),
subspace.lo.x,
subspace.hi.x + 1);
subspace.lo[0],
subspace.hi[0] + 1);
float sum = 0.0f;
// Kokkos does not support CUDA lambdas by default - check that they
// are present
Expand Down Expand Up @@ -259,15 +259,15 @@ class InitTask {
Kokkos::LayoutStride,
typename execution_space::memory_space> view = acc.accessor;

size_t n_elements = subspace.hi.x - subspace.lo.x + 1;
size_t n_elements = subspace.hi[0] - subspace.lo[0] + 1;
Kokkos::RangePolicy<execution_space> range(runtime->get_executing_processor(ctx).kokkos_work_space(),
0, n_elements);
Kokkos::parallel_for(range,
KOKKOS_LAMBDA (int i) {
// using a relative address, but value to store
// is based on global index
// have to use a relative address!
view(i) = (i + subspace.lo.x) + offset;
view(i) = (i + subspace.lo[0]) + offset;
});
}
};
Expand Down
2 changes: 1 addition & 1 deletion examples/python_interop/python_interop.cc
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ int64_t init_task(const Task *task,

// Fill memory with some recognizable pattern.
for (PointInRectIterator<2> pir(rect); pir(); pir++) {
double value = (double)((*pir)[0]*(rect.hi.y - rect.lo.y + 1) + (*pir)[1]);
double value = (double)((*pir)[0]*(rect.hi[1] - rect.lo[1] + 1) + (*pir)[1]);
acc[*pir] = value;
}

Expand Down
3 changes: 3 additions & 0 deletions examples/realm_saxpy/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ if(Legion_USE_CUDA)
set_target_cuda_standard(realm_saxpy STANDARD ${Legion_CUDA_STANDARD})
set_target_cuda_architectures(realm_saxpy ARCHITECTURES ${Legion_CUDA_ARCH})
set_target_cuda_warnings_and_errors(realm_saxpy WARN_AS_ERROR ${Legion_BUILD_WARN_AS_ERROR})
# Remove this once the Realm::Point class is updated
target_compile_options(realm_saxpy PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:
-Xcudafe=--diag_suppress=1444>)
elseif(Legion_USE_HIP)
set(GPU_SOURCES realm_saxpy_gpu.cu)
if(Legion_HIP_TARGET STREQUAL "CUDA")
Expand Down
84 changes: 42 additions & 42 deletions examples/realm_stencil/realm_stencil.cc
Original file line number Diff line number Diff line change
Expand Up @@ -206,15 +206,15 @@ void get_base_and_stride(RegionInstance inst, FieldID fid, DTYPE *&base, size_t
{
AffineAccessor<DTYPE, 2, coord_t> acc = AffineAccessor<DTYPE, 2, coord_t>(inst, fid);
base = reinterpret_cast<DTYPE *>(acc.ptr(inst.get_indexspace<2, coord_t>().bounds.lo));
assert(acc.strides.x == sizeof(DTYPE));
stride = acc.strides.y;
assert(acc.strides[0] == sizeof(DTYPE));
stride = acc.strides[1];
}

void dump(RegionInstance inst, FieldID fid, Rect2 bounds, const char *prefix)
{
AffineAccessor<DTYPE, 2, coord_t> acc = AffineAccessor<DTYPE, 2, coord_t>(inst, fid);
for (PointInRectIterator<2, coord_t> it(bounds); it.valid; it.step()) {
printf("%s: %2lld %2lld value %8.3f\n", prefix, it.p.x, it.p.y, acc.read(it.p));
printf("%s: %2lld %2lld value %8.3f\n", prefix, it.p[0], it.p[1], acc.read(it.p));
}
}

Expand Down Expand Up @@ -253,10 +253,10 @@ void inline_copy_raw(RegionInstance src_inst, RegionInstance dst_inst,

copy2D(src_base, dst_base,
src_stride/sizeof(DTYPE),
src_offset.x, src_offset.x + size.x,
src_offset.y, src_offset.y + size.y,
src_offset[0], src_offset[0] + size[0],
src_offset[1], src_offset[1] + size[1],
dst_stride/sizeof(DTYPE),
dst_offset.x, dst_offset.y);
dst_offset[0], dst_offset[1]);
}

void stencil_task(const void *args, size_t arglen,
Expand Down Expand Up @@ -292,10 +292,10 @@ void stencil_task(const void *args, size_t arglen,

stencil(private_base_input, private_base_output, weights,
private_stride_input/sizeof(DTYPE),
interior_offset.x,
interior_offset.x + interior_size.x,
interior_offset.y,
interior_offset.y + interior_size.y);
interior_offset[0],
interior_offset[0] + interior_size[0],
interior_offset[1],
interior_offset[1] + interior_size[1]);
}

void increment_task(const void *args, size_t arglen,
Expand All @@ -314,10 +314,10 @@ void increment_task(const void *args, size_t arglen,

increment(private_base_input,
private_stride_input/sizeof(DTYPE),
outer_offset.x,
outer_offset.x + outer_size.x,
outer_offset.y,
outer_offset.y + outer_size.y);
outer_offset[0],
outer_offset[0] + outer_size[0],
outer_offset[1],
outer_offset[1] + outer_size[1]);

if (a.xp_inst.exists())
inline_copy(a.private_inst, a.xp_inst, FID_INPUT,
Expand Down Expand Up @@ -734,20 +734,20 @@ void top_level_task(const void *args, size_t arglen,
std::vector<Event> events;
for (PointInRectIterator<2, coord_t> it(shards); it.valid; it.step()) {
Point2 i(it.p);
Rect2 xp_bounds(Point2(x_blocks[i.x].hi + 1, y_blocks[i.y].lo),
Point2(x_blocks[i.x].hi + RADIUS, y_blocks[i.y].hi));
Rect2 xm_bounds(Point2(x_blocks[i.x].lo - RADIUS, y_blocks[i.y].lo),
Point2(x_blocks[i.x].lo - 1, y_blocks[i.y].hi));
Rect2 yp_bounds(Point2(x_blocks[i.x].lo, y_blocks[i.y].hi + 1),
Point2(x_blocks[i.x].hi, y_blocks[i.y].hi + RADIUS));
Rect2 ym_bounds(Point2(x_blocks[i.x].lo, y_blocks[i.y].lo - RADIUS),
Point2(x_blocks[i.x].hi, y_blocks[i.y].lo - 1));
Rect2 xp_bounds(Point2(x_blocks[i[0]].hi + 1, y_blocks[i[1]].lo),
Point2(x_blocks[i[0]].hi + RADIUS, y_blocks[i[1]].hi));
Rect2 xm_bounds(Point2(x_blocks[i[0]].lo - RADIUS, y_blocks[i[1]].lo),
Point2(x_blocks[i[0]].lo - 1, y_blocks[i[1]].hi));
Rect2 yp_bounds(Point2(x_blocks[i[0]].lo, y_blocks[i[1]].hi + 1),
Point2(x_blocks[i[0]].hi, y_blocks[i[1]].hi + RADIUS));
Rect2 ym_bounds(Point2(x_blocks[i[0]].lo, y_blocks[i[1]].lo - RADIUS),
Point2(x_blocks[i[0]].hi, y_blocks[i[1]].lo - 1));

Processor shard_proc(shard_procs[i]);
Memory memory(proc_regmems[shard_proc]);

// Region allocation has to be done on the remote node
if (i.x != shards.hi.x) {
if (i[0] != shards.hi[0]) {
CreateRegionArgs args;
args.bounds = xp_bounds;
args.memory = memory;
Expand All @@ -756,7 +756,7 @@ void top_level_task(const void *args, size_t arglen,
events.push_back(shard_proc.spawn(CREATE_REGION_TASK, &args, sizeof(args)));
}

if (i.x != shards.lo.x) {
if (i[0] != shards.lo[0]) {
CreateRegionArgs args;
args.bounds = xm_bounds;
args.memory = memory;
Expand All @@ -765,7 +765,7 @@ void top_level_task(const void *args, size_t arglen,
events.push_back(shard_proc.spawn(CREATE_REGION_TASK, &args, sizeof(args)));
}

if (i.y != shards.hi.y) {
if (i[1] != shards.hi[1]) {
CreateRegionArgs args;
args.bounds = yp_bounds;
args.memory = memory;
Expand All @@ -774,7 +774,7 @@ void top_level_task(const void *args, size_t arglen,
events.push_back(shard_proc.spawn(CREATE_REGION_TASK, &args, sizeof(args)));
}

if (i.y != shards.lo.y) {
if (i[1] != shards.lo[1]) {
CreateRegionArgs args;
args.bounds = ym_bounds;
args.memory = memory;
Expand All @@ -800,15 +800,15 @@ void top_level_task(const void *args, size_t arglen,
for (PointInRectIterator<2, coord_t> it(shards); it.valid; it.step()) {
Point2 i(it.p);

if (i.x != shards.hi.x) xp_bars_empty[i] = Barrier::create_barrier(1);
if (i.x != shards.lo.x) xm_bars_empty[i] = Barrier::create_barrier(1);
if (i.y != shards.hi.y) yp_bars_empty[i] = Barrier::create_barrier(1);
if (i.y != shards.lo.y) ym_bars_empty[i] = Barrier::create_barrier(1);
if (i[0] != shards.hi[0]) xp_bars_empty[i] = Barrier::create_barrier(1);
if (i[0] != shards.lo[0]) xm_bars_empty[i] = Barrier::create_barrier(1);
if (i[1] != shards.hi[1]) yp_bars_empty[i] = Barrier::create_barrier(1);
if (i[1] != shards.lo[1]) ym_bars_empty[i] = Barrier::create_barrier(1);

if (i.x != shards.hi.x) xp_bars_full[i] = Barrier::create_barrier(1);
if (i.x != shards.lo.x) xm_bars_full[i] = Barrier::create_barrier(1);
if (i.y != shards.hi.y) yp_bars_full[i] = Barrier::create_barrier(1);
if (i.y != shards.lo.y) ym_bars_full[i] = Barrier::create_barrier(1);
if (i[0] != shards.hi[0]) xp_bars_full[i] = Barrier::create_barrier(1);
if (i[0] != shards.lo[0]) xm_bars_full[i] = Barrier::create_barrier(1);
if (i[1] != shards.hi[1]) yp_bars_full[i] = Barrier::create_barrier(1);
if (i[1] != shards.lo[1]) ym_bars_full[i] = Barrier::create_barrier(1);
}

// Create barrier to keep shard launch synchronized
Expand All @@ -824,15 +824,15 @@ void top_level_task(const void *args, size_t arglen,
for (PointInRectIterator<2, coord_t> it(shards); it.valid; it.step()) {
Point2 i(it.p);

Rect2 interior_bounds(Point2(x_blocks[i.x].lo, y_blocks[i.y].lo),
Point2(x_blocks[i.x].hi, y_blocks[i.y].hi));
Rect2 exterior_bounds(Point2(x_blocks[i.x].lo - RADIUS, y_blocks[i.y].lo - RADIUS),
Point2(x_blocks[i.x].hi + RADIUS, y_blocks[i.y].hi + RADIUS));
Rect2 interior_bounds(Point2(x_blocks[i[0]].lo, y_blocks[i[1]].lo),
Point2(x_blocks[i[0]].hi, y_blocks[i[1]].hi));
Rect2 exterior_bounds(Point2(x_blocks[i[0]].lo - RADIUS, y_blocks[i[1]].lo - RADIUS),
Point2(x_blocks[i[0]].hi + RADIUS, y_blocks[i[1]].hi + RADIUS));
// As interior, but bloated only on the outer edges
Rect2 outer_bounds(Point2(x_blocks[i.x].lo - (i.x == shards.lo.x ? RADIUS : 0),
y_blocks[i.y].lo - (i.y == shards.lo.y ? RADIUS : 0)),
Point2(x_blocks[i.x].hi + (i.x == shards.hi.x ? RADIUS : 0),
y_blocks[i.y].hi + (i.y == shards.hi.y ? RADIUS : 0)));
Rect2 outer_bounds(Point2(x_blocks[i[0]].lo - (i[0] == shards.lo[0] ? RADIUS : 0),
y_blocks[i[1]].lo - (i[1] == shards.lo[1] ? RADIUS : 0)),
Point2(x_blocks[i[0]].hi + (i[0] == shards.hi[0] ? RADIUS : 0),
y_blocks[i[1]].hi + (i[1] == shards.hi[1] ? RADIUS : 0)));

// Pack arguments
ShardArgs args;
Expand Down
3 changes: 3 additions & 0 deletions examples/thrust_interop/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -32,5 +32,8 @@ add_executable(thrust_interop ${CPU_SOURCES} ${GPU_SOURCES})
set_target_cuda_standard(thrust_interop STANDARD ${Legion_CUDA_STANDARD})
set_target_cuda_architectures(thrust_interop ARCHITECTURES ${Legion_CUDA_ARCH})
set_target_cuda_warnings_and_errors(thrust_interop WARN_AS_ERROR ${Legion_BUILD_WARN_AS_ERROR})
# Remove this once the Realm::Point class is updated
target_compile_options(thrust_interop PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:
-Xcudafe=--diag_suppress=1444>)
target_link_libraries(thrust_interop Legion::Legion)
target_compile_options(thrust_interop PRIVATE $<$<COMPILE_LANGUAGE:CXX>:${CXX_BUILD_WARNING_FLAGS}>)
3 changes: 3 additions & 0 deletions runtime/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,7 @@ if(Legion_USE_CUDA)
#TODO(apryakhin): Enable with cmake 3.27
#set_property(TARGET realm_cuda_fatbin PROPERTY CUDA_FATBIN_COMPILATION ON)
target_compile_options(realm_cuda_fatbin PRIVATE $<$<COMPILE_LANG_AND_ID:CUDA,NVIDIA>:
-Xcudafe=--diag_suppress=1444 # Remove once Point class is updated
-Xcudafe=--diag_suppress=boolean_controlling_expr_is_constant
--fatbin>)
target_compile_definitions(realm_cuda_fatbin PRIVATE "CUDA_FATBIN_COMPILATION")
Expand Down Expand Up @@ -934,6 +935,7 @@ add_library(Legion::LegionRuntime ALIAS LegionRuntime)
# Add CUDA-specific properties
if(Legion_USE_CUDA)
target_compile_options(LegionRuntime PRIVATE $<$<COMPILE_LANG_AND_ID:CUDA,NVIDIA>:
-Xcudafe=--diag_suppress=1444 # Remove once Point class is updated
-Xcudafe=--diag_suppress=boolean_controlling_expr_is_constant>)

set_target_cuda_warnings_and_errors(LegionRuntime WARN_AS_ERROR ${Legion_BUILD_WARN_AS_ERROR})
Expand Down Expand Up @@ -971,6 +973,7 @@ if(Legion_USE_HIP)
if(Legion_HIP_TARGET STREQUAL "CUDA")
target_sources(LegionRuntime PRIVATE ${LEGION_HIP_SRC})
target_compile_options(LegionRuntime PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:
-Xcudafe=--diag_suppress=1444 # Remove once Point class is updated
-Xcudafe=--diag_suppress=boolean_controlling_expr_is_constant>)
target_include_directories(LegionRuntime PRIVATE ${HIP_ROOT_DIR}/include)
# complex reduction ops bring in a public dependency on cuda headers
Expand Down
71 changes: 37 additions & 34 deletions runtime/realm/deppart/byfield.cc
Original file line number Diff line number Diff line change
Expand Up @@ -117,40 +117,43 @@ namespace Realm {
while(true) {
FT val = a_data.read(p);
Point<N,T> p2 = p;
while(p2.x < r.hi.x) {
Point<N,T> p3 = p2;
p3.x++;
FT val2 = a_data.read(p3);
if(val != val2) {
// record old strip
BM *&bmp = bitmasks[val];
if(!bmp) bmp = new BM;
bmp->add_rect(Rect<N,T>(p,p2));
//std::cout << val << ": " << p << ".." << p2 << std::endl;
val = val2;
p = p3;
}
p2 = p3;
}
// record whatever strip we have at the end
BM *&bmp = bitmasks[val];
if(!bmp) bmp = new BM;
bmp->add_rect(Rect<N,T>(p,p2));
//std::cout << val << ": " << p << ".." << p2 << std::endl;

// are we done?
if(p2 == r.hi) break;

// now go to the next span, if there is one (can't be in 1-D)
assert(N > 1);
for(int i = 0; i < (N - 1); i++) {
p[i] = r.lo[i];
if(p[i + 1] < r.hi[i+1]) {
p[i + 1] += 1;
break;
}
}
}
while(p2[0] < r.hi[0]) {
Point<N, T> p3 = p2;
p3[0]++;
FT val2 = a_data.read(p3);
if(val != val2) {
// record old strip
BM *&bmp = bitmasks[val];
if(!bmp)
bmp = new BM;
bmp->add_rect(Rect<N, T>(p, p2));
// std::cout << val << ": " << p << ".." << p2 << std::endl;
val = val2;
p = p3;
}
p2 = p3;
}
// record whatever strip we have at the end
BM *&bmp = bitmasks[val];
if(!bmp)
bmp = new BM;
bmp->add_rect(Rect<N, T>(p, p2));
// std::cout << val << ": " << p << ".." << p2 << std::endl;

// are we done?
if(p2 == r.hi)
break;

// now go to the next span, if there is one (can't be in 1-D)
assert(N > 1);
for(int i = 0; i < (N - 1); i++) {
p[i] = r.lo[i];
if(p[i + 1] < r.hi[i + 1]) {
p[i + 1] += 1;
break;
}
}
}
}
}
}
Expand Down
Loading

0 comments on commit bc02aaa

Please sign in to comment.