diff --git a/bindings/regent/CMakeLists.txt b/bindings/regent/CMakeLists.txt index c9e9ff0aa7..fbaa1e8313 100644 --- a/bindings/regent/CMakeLists.txt +++ b/bindings/regent/CMakeLists.txt @@ -66,6 +66,9 @@ if(Legion_USE_CUDA) set_target_cuda_standard(Regent STANDARD ${Legion_CUDA_STANDARD}) set_target_cuda_architectures(Regent ARCHITECTURES ${Legion_CUDA_ARCH}) set_target_cuda_warnings_and_errors(Regent WARN_AS_ERROR ${Legion_BUILD_WARN_AS_ERROR}) + # Remove this once the Realm::Point class is updated + target_compile_options(Regent PRIVATE $<$: + -Xcudafe=--diag_suppress=1444>) elseif(Legion_USE_HIP) target_include_directories(Regent PRIVATE ${HIP_INCLUDE_DIRS}) if (Legion_HIP_TARGET STREQUAL "CUDA") diff --git a/examples/attach_2darray_c_fortran_layout/attach_2darray.cc b/examples/attach_2darray_c_fortran_layout/attach_2darray.cc index db2e006b13..a829152a20 100644 --- a/examples/attach_2darray_c_fortran_layout/attach_2darray.cc +++ b/examples/attach_2darray_c_fortran_layout/attach_2darray.cc @@ -232,8 +232,8 @@ void read_field_task(const Task *task, int errors = 0; for (PointInRectIterator<2> pir(rect); pir(); pir++) { double expval = (args.base_val + - ((*pir).x * args.step_x) + - ((*pir).y * args.step_y)); + ((*pir)[0] * args.step_x) + + ((*pir)[1] * args.step_y)); double actval = acc[*pir]; if(fabs(actval - expval) < 1e-10) { printf("%.1f\t", actval); diff --git a/examples/circuit/CMakeLists.txt b/examples/circuit/CMakeLists.txt index 2c8dbf32ca..62e1d1a05f 100644 --- a/examples/circuit/CMakeLists.txt +++ b/examples/circuit/CMakeLists.txt @@ -34,6 +34,9 @@ if(Legion_USE_CUDA) set_target_cuda_standard(circuit STANDARD ${Legion_CUDA_STANDARD}) set_target_cuda_architectures(circuit ARCHITECTURES ${Legion_CUDA_ARCH}) set_target_cuda_warnings_and_errors(circuit WARN_AS_ERROR ${Legion_BUILD_WARN_AS_ERROR}) + # Remove this once the Realm::Point class is updated + target_compile_options(circuit PRIVATE $<$: + -Xcudafe=--diag_suppress=1444>) elseif(Legion_USE_HIP) set(GPU_SOURCES circuit_gpu.cu) if(Legion_HIP_TARGET STREQUAL "CUDA") diff --git a/examples/future_instance/CMakeLists.txt b/examples/future_instance/CMakeLists.txt index bf8c03043c..d430053592 100644 --- a/examples/future_instance/CMakeLists.txt +++ b/examples/future_instance/CMakeLists.txt @@ -31,5 +31,8 @@ add_executable(future_instance ${GPU_SOURCES}) set_target_cuda_standard(future_instance STANDARD ${Legion_CUDA_STANDARD}) set_target_cuda_architectures(future_instance ARCHITECTURES ${Legion_CUDA_ARCH}) set_target_cuda_warnings_and_errors(future_instance WARN_AS_ERROR ${Legion_BUILD_WARN_AS_ERROR}) +# Remove this once the Realm::Point class is updated +target_compile_options(future_instance PRIVATE $<$: + -Xcudafe=--diag_suppress=1444>) target_link_libraries(future_instance Legion::Legion) diff --git a/examples/kokkos_saxpy/kokkos_saxpy.cc b/examples/kokkos_saxpy/kokkos_saxpy.cc index c240d793f2..c9e66fe82f 100644 --- a/examples/kokkos_saxpy/kokkos_saxpy.cc +++ b/examples/kokkos_saxpy/kokkos_saxpy.cc @@ -138,8 +138,8 @@ class SaxpyTask { typename execution_space::memory_space> y_ofs = acc_y.accessor; Kokkos::RangePolicy range(runtime->get_executing_processor(ctx).kokkos_work_space(), - subspace.lo.x, - subspace.hi.x + 1); + subspace.lo[0], + subspace.hi[0] + 1); Kokkos::parallel_for(range, SaxpyFunctor(args.alpha, x_ofs, y_ofs)); } @@ -185,15 +185,15 @@ class SdotTask { // (i.e. the subspace on which you have privileges matches what // KokkosBlas is going to compute over), you can just use // OffsetView::view() to convert - assert(x.begin(0) == subspace.lo.x); + assert(x.begin(0) == subspace.lo[0]); x_rel = x.view(); // option 2: if you're not sure what the OffsetView's bounds are // (or if you just like more self-documenting code) you can create // the subview with the exact bounds you want and then convert that y_rel = Kokkos::Experimental::subview(y, - std::make_pair(subspace.lo.x, - subspace.hi.x + 1)) + std::make_pair(subspace.lo[0], + subspace.hi[0] + 1)) .view(); // the KokkosBlas::dot implementation that returns a float directly @@ -217,8 +217,8 @@ class SdotTask { } #endif Kokkos::RangePolicy range(runtime->get_executing_processor(ctx).kokkos_work_space(), - subspace.lo.x, - subspace.hi.x + 1); + subspace.lo[0], + subspace.hi[0] + 1); float sum = 0.0f; // Kokkos does not support CUDA lambdas by default - check that they // are present @@ -259,7 +259,7 @@ class InitTask { Kokkos::LayoutStride, typename execution_space::memory_space> view = acc.accessor; - size_t n_elements = subspace.hi.x - subspace.lo.x + 1; + size_t n_elements = subspace.hi[0] - subspace.lo[0] + 1; Kokkos::RangePolicy range(runtime->get_executing_processor(ctx).kokkos_work_space(), 0, n_elements); Kokkos::parallel_for(range, @@ -267,7 +267,7 @@ class InitTask { // using a relative address, but value to store // is based on global index // have to use a relative address! - view(i) = (i + subspace.lo.x) + offset; + view(i) = (i + subspace.lo[0]) + offset; }); } }; diff --git a/examples/python_interop/python_interop.cc b/examples/python_interop/python_interop.cc index 04ffd62e1c..c8370bd66f 100644 --- a/examples/python_interop/python_interop.cc +++ b/examples/python_interop/python_interop.cc @@ -55,7 +55,7 @@ int64_t init_task(const Task *task, // Fill memory with some recognizable pattern. for (PointInRectIterator<2> pir(rect); pir(); pir++) { - double value = (double)((*pir)[0]*(rect.hi.y - rect.lo.y + 1) + (*pir)[1]); + double value = (double)((*pir)[0]*(rect.hi[1] - rect.lo[1] + 1) + (*pir)[1]); acc[*pir] = value; } diff --git a/examples/realm_saxpy/CMakeLists.txt b/examples/realm_saxpy/CMakeLists.txt index dd94fb7876..f4a7eada7e 100644 --- a/examples/realm_saxpy/CMakeLists.txt +++ b/examples/realm_saxpy/CMakeLists.txt @@ -29,6 +29,9 @@ if(Legion_USE_CUDA) set_target_cuda_standard(realm_saxpy STANDARD ${Legion_CUDA_STANDARD}) set_target_cuda_architectures(realm_saxpy ARCHITECTURES ${Legion_CUDA_ARCH}) set_target_cuda_warnings_and_errors(realm_saxpy WARN_AS_ERROR ${Legion_BUILD_WARN_AS_ERROR}) + # Remove this once the Realm::Point class is updated + target_compile_options(realm_saxpy PRIVATE $<$: + -Xcudafe=--diag_suppress=1444>) elseif(Legion_USE_HIP) set(GPU_SOURCES realm_saxpy_gpu.cu) if(Legion_HIP_TARGET STREQUAL "CUDA") diff --git a/examples/realm_stencil/realm_stencil.cc b/examples/realm_stencil/realm_stencil.cc index bb0f21b50c..0ccf9e2471 100644 --- a/examples/realm_stencil/realm_stencil.cc +++ b/examples/realm_stencil/realm_stencil.cc @@ -206,15 +206,15 @@ void get_base_and_stride(RegionInstance inst, FieldID fid, DTYPE *&base, size_t { AffineAccessor acc = AffineAccessor(inst, fid); base = reinterpret_cast(acc.ptr(inst.get_indexspace<2, coord_t>().bounds.lo)); - assert(acc.strides.x == sizeof(DTYPE)); - stride = acc.strides.y; + assert(acc.strides[0] == sizeof(DTYPE)); + stride = acc.strides[1]; } void dump(RegionInstance inst, FieldID fid, Rect2 bounds, const char *prefix) { AffineAccessor acc = AffineAccessor(inst, fid); for (PointInRectIterator<2, coord_t> it(bounds); it.valid; it.step()) { - printf("%s: %2lld %2lld value %8.3f\n", prefix, it.p.x, it.p.y, acc.read(it.p)); + printf("%s: %2lld %2lld value %8.3f\n", prefix, it.p[0], it.p[1], acc.read(it.p)); } } @@ -253,10 +253,10 @@ void inline_copy_raw(RegionInstance src_inst, RegionInstance dst_inst, copy2D(src_base, dst_base, src_stride/sizeof(DTYPE), - src_offset.x, src_offset.x + size.x, - src_offset.y, src_offset.y + size.y, + src_offset[0], src_offset[0] + size[0], + src_offset[1], src_offset[1] + size[1], dst_stride/sizeof(DTYPE), - dst_offset.x, dst_offset.y); + dst_offset[0], dst_offset[1]); } void stencil_task(const void *args, size_t arglen, @@ -292,10 +292,10 @@ void stencil_task(const void *args, size_t arglen, stencil(private_base_input, private_base_output, weights, private_stride_input/sizeof(DTYPE), - interior_offset.x, - interior_offset.x + interior_size.x, - interior_offset.y, - interior_offset.y + interior_size.y); + interior_offset[0], + interior_offset[0] + interior_size[0], + interior_offset[1], + interior_offset[1] + interior_size[1]); } void increment_task(const void *args, size_t arglen, @@ -314,10 +314,10 @@ void increment_task(const void *args, size_t arglen, increment(private_base_input, private_stride_input/sizeof(DTYPE), - outer_offset.x, - outer_offset.x + outer_size.x, - outer_offset.y, - outer_offset.y + outer_size.y); + outer_offset[0], + outer_offset[0] + outer_size[0], + outer_offset[1], + outer_offset[1] + outer_size[1]); if (a.xp_inst.exists()) inline_copy(a.private_inst, a.xp_inst, FID_INPUT, @@ -734,20 +734,20 @@ void top_level_task(const void *args, size_t arglen, std::vector events; for (PointInRectIterator<2, coord_t> it(shards); it.valid; it.step()) { Point2 i(it.p); - Rect2 xp_bounds(Point2(x_blocks[i.x].hi + 1, y_blocks[i.y].lo), - Point2(x_blocks[i.x].hi + RADIUS, y_blocks[i.y].hi)); - Rect2 xm_bounds(Point2(x_blocks[i.x].lo - RADIUS, y_blocks[i.y].lo), - Point2(x_blocks[i.x].lo - 1, y_blocks[i.y].hi)); - Rect2 yp_bounds(Point2(x_blocks[i.x].lo, y_blocks[i.y].hi + 1), - Point2(x_blocks[i.x].hi, y_blocks[i.y].hi + RADIUS)); - Rect2 ym_bounds(Point2(x_blocks[i.x].lo, y_blocks[i.y].lo - RADIUS), - Point2(x_blocks[i.x].hi, y_blocks[i.y].lo - 1)); + Rect2 xp_bounds(Point2(x_blocks[i[0]].hi + 1, y_blocks[i[1]].lo), + Point2(x_blocks[i[0]].hi + RADIUS, y_blocks[i[1]].hi)); + Rect2 xm_bounds(Point2(x_blocks[i[0]].lo - RADIUS, y_blocks[i[1]].lo), + Point2(x_blocks[i[0]].lo - 1, y_blocks[i[1]].hi)); + Rect2 yp_bounds(Point2(x_blocks[i[0]].lo, y_blocks[i[1]].hi + 1), + Point2(x_blocks[i[0]].hi, y_blocks[i[1]].hi + RADIUS)); + Rect2 ym_bounds(Point2(x_blocks[i[0]].lo, y_blocks[i[1]].lo - RADIUS), + Point2(x_blocks[i[0]].hi, y_blocks[i[1]].lo - 1)); Processor shard_proc(shard_procs[i]); Memory memory(proc_regmems[shard_proc]); // Region allocation has to be done on the remote node - if (i.x != shards.hi.x) { + if (i[0] != shards.hi[0]) { CreateRegionArgs args; args.bounds = xp_bounds; args.memory = memory; @@ -756,7 +756,7 @@ void top_level_task(const void *args, size_t arglen, events.push_back(shard_proc.spawn(CREATE_REGION_TASK, &args, sizeof(args))); } - if (i.x != shards.lo.x) { + if (i[0] != shards.lo[0]) { CreateRegionArgs args; args.bounds = xm_bounds; args.memory = memory; @@ -765,7 +765,7 @@ void top_level_task(const void *args, size_t arglen, events.push_back(shard_proc.spawn(CREATE_REGION_TASK, &args, sizeof(args))); } - if (i.y != shards.hi.y) { + if (i[1] != shards.hi[1]) { CreateRegionArgs args; args.bounds = yp_bounds; args.memory = memory; @@ -774,7 +774,7 @@ void top_level_task(const void *args, size_t arglen, events.push_back(shard_proc.spawn(CREATE_REGION_TASK, &args, sizeof(args))); } - if (i.y != shards.lo.y) { + if (i[1] != shards.lo[1]) { CreateRegionArgs args; args.bounds = ym_bounds; args.memory = memory; @@ -800,15 +800,15 @@ void top_level_task(const void *args, size_t arglen, for (PointInRectIterator<2, coord_t> it(shards); it.valid; it.step()) { Point2 i(it.p); - if (i.x != shards.hi.x) xp_bars_empty[i] = Barrier::create_barrier(1); - if (i.x != shards.lo.x) xm_bars_empty[i] = Barrier::create_barrier(1); - if (i.y != shards.hi.y) yp_bars_empty[i] = Barrier::create_barrier(1); - if (i.y != shards.lo.y) ym_bars_empty[i] = Barrier::create_barrier(1); + if (i[0] != shards.hi[0]) xp_bars_empty[i] = Barrier::create_barrier(1); + if (i[0] != shards.lo[0]) xm_bars_empty[i] = Barrier::create_barrier(1); + if (i[1] != shards.hi[1]) yp_bars_empty[i] = Barrier::create_barrier(1); + if (i[1] != shards.lo[1]) ym_bars_empty[i] = Barrier::create_barrier(1); - if (i.x != shards.hi.x) xp_bars_full[i] = Barrier::create_barrier(1); - if (i.x != shards.lo.x) xm_bars_full[i] = Barrier::create_barrier(1); - if (i.y != shards.hi.y) yp_bars_full[i] = Barrier::create_barrier(1); - if (i.y != shards.lo.y) ym_bars_full[i] = Barrier::create_barrier(1); + if (i[0] != shards.hi[0]) xp_bars_full[i] = Barrier::create_barrier(1); + if (i[0] != shards.lo[0]) xm_bars_full[i] = Barrier::create_barrier(1); + if (i[1] != shards.hi[1]) yp_bars_full[i] = Barrier::create_barrier(1); + if (i[1] != shards.lo[1]) ym_bars_full[i] = Barrier::create_barrier(1); } // Create barrier to keep shard launch synchronized @@ -824,15 +824,15 @@ void top_level_task(const void *args, size_t arglen, for (PointInRectIterator<2, coord_t> it(shards); it.valid; it.step()) { Point2 i(it.p); - Rect2 interior_bounds(Point2(x_blocks[i.x].lo, y_blocks[i.y].lo), - Point2(x_blocks[i.x].hi, y_blocks[i.y].hi)); - Rect2 exterior_bounds(Point2(x_blocks[i.x].lo - RADIUS, y_blocks[i.y].lo - RADIUS), - Point2(x_blocks[i.x].hi + RADIUS, y_blocks[i.y].hi + RADIUS)); + Rect2 interior_bounds(Point2(x_blocks[i[0]].lo, y_blocks[i[1]].lo), + Point2(x_blocks[i[0]].hi, y_blocks[i[1]].hi)); + Rect2 exterior_bounds(Point2(x_blocks[i[0]].lo - RADIUS, y_blocks[i[1]].lo - RADIUS), + Point2(x_blocks[i[0]].hi + RADIUS, y_blocks[i[1]].hi + RADIUS)); // As interior, but bloated only on the outer edges - Rect2 outer_bounds(Point2(x_blocks[i.x].lo - (i.x == shards.lo.x ? RADIUS : 0), - y_blocks[i.y].lo - (i.y == shards.lo.y ? RADIUS : 0)), - Point2(x_blocks[i.x].hi + (i.x == shards.hi.x ? RADIUS : 0), - y_blocks[i.y].hi + (i.y == shards.hi.y ? RADIUS : 0))); + Rect2 outer_bounds(Point2(x_blocks[i[0]].lo - (i[0] == shards.lo[0] ? RADIUS : 0), + y_blocks[i[1]].lo - (i[1] == shards.lo[1] ? RADIUS : 0)), + Point2(x_blocks[i[0]].hi + (i[0] == shards.hi[0] ? RADIUS : 0), + y_blocks[i[1]].hi + (i[1] == shards.hi[1] ? RADIUS : 0))); // Pack arguments ShardArgs args; diff --git a/examples/thrust_interop/CMakeLists.txt b/examples/thrust_interop/CMakeLists.txt index ab0211098f..8def2fd329 100644 --- a/examples/thrust_interop/CMakeLists.txt +++ b/examples/thrust_interop/CMakeLists.txt @@ -32,5 +32,8 @@ add_executable(thrust_interop ${CPU_SOURCES} ${GPU_SOURCES}) set_target_cuda_standard(thrust_interop STANDARD ${Legion_CUDA_STANDARD}) set_target_cuda_architectures(thrust_interop ARCHITECTURES ${Legion_CUDA_ARCH}) set_target_cuda_warnings_and_errors(thrust_interop WARN_AS_ERROR ${Legion_BUILD_WARN_AS_ERROR}) +# Remove this once the Realm::Point class is updated +target_compile_options(thrust_interop PRIVATE $<$: + -Xcudafe=--diag_suppress=1444>) target_link_libraries(thrust_interop Legion::Legion) target_compile_options(thrust_interop PRIVATE $<$:${CXX_BUILD_WARNING_FLAGS}>) diff --git a/runtime/CMakeLists.txt b/runtime/CMakeLists.txt index 07e4c15709..871d3a5d87 100644 --- a/runtime/CMakeLists.txt +++ b/runtime/CMakeLists.txt @@ -277,6 +277,7 @@ if(Legion_USE_CUDA) #TODO(apryakhin): Enable with cmake 3.27 #set_property(TARGET realm_cuda_fatbin PROPERTY CUDA_FATBIN_COMPILATION ON) target_compile_options(realm_cuda_fatbin PRIVATE $<$: + -Xcudafe=--diag_suppress=1444 # Remove once Point class is updated -Xcudafe=--diag_suppress=boolean_controlling_expr_is_constant --fatbin>) target_compile_definitions(realm_cuda_fatbin PRIVATE "CUDA_FATBIN_COMPILATION") @@ -934,6 +935,7 @@ add_library(Legion::LegionRuntime ALIAS LegionRuntime) # Add CUDA-specific properties if(Legion_USE_CUDA) target_compile_options(LegionRuntime PRIVATE $<$: + -Xcudafe=--diag_suppress=1444 # Remove once Point class is updated -Xcudafe=--diag_suppress=boolean_controlling_expr_is_constant>) set_target_cuda_warnings_and_errors(LegionRuntime WARN_AS_ERROR ${Legion_BUILD_WARN_AS_ERROR}) @@ -971,6 +973,7 @@ if(Legion_USE_HIP) if(Legion_HIP_TARGET STREQUAL "CUDA") target_sources(LegionRuntime PRIVATE ${LEGION_HIP_SRC}) target_compile_options(LegionRuntime PRIVATE $<$: + -Xcudafe=--diag_suppress=1444 # Remove once Point class is updated -Xcudafe=--diag_suppress=boolean_controlling_expr_is_constant>) target_include_directories(LegionRuntime PRIVATE ${HIP_ROOT_DIR}/include) # complex reduction ops bring in a public dependency on cuda headers diff --git a/runtime/realm/deppart/byfield.cc b/runtime/realm/deppart/byfield.cc index 681c3f4416..f425e9c49b 100644 --- a/runtime/realm/deppart/byfield.cc +++ b/runtime/realm/deppart/byfield.cc @@ -117,40 +117,43 @@ namespace Realm { while(true) { FT val = a_data.read(p); Point p2 = p; - while(p2.x < r.hi.x) { - Point p3 = p2; - p3.x++; - FT val2 = a_data.read(p3); - if(val != val2) { - // record old strip - BM *&bmp = bitmasks[val]; - if(!bmp) bmp = new BM; - bmp->add_rect(Rect(p,p2)); - //std::cout << val << ": " << p << ".." << p2 << std::endl; - val = val2; - p = p3; - } - p2 = p3; - } - // record whatever strip we have at the end - BM *&bmp = bitmasks[val]; - if(!bmp) bmp = new BM; - bmp->add_rect(Rect(p,p2)); - //std::cout << val << ": " << p << ".." << p2 << std::endl; - - // are we done? - if(p2 == r.hi) break; - - // now go to the next span, if there is one (can't be in 1-D) - assert(N > 1); - for(int i = 0; i < (N - 1); i++) { - p[i] = r.lo[i]; - if(p[i + 1] < r.hi[i+1]) { - p[i + 1] += 1; - break; - } - } - } + while(p2[0] < r.hi[0]) { + Point p3 = p2; + p3[0]++; + FT val2 = a_data.read(p3); + if(val != val2) { + // record old strip + BM *&bmp = bitmasks[val]; + if(!bmp) + bmp = new BM; + bmp->add_rect(Rect(p, p2)); + // std::cout << val << ": " << p << ".." << p2 << std::endl; + val = val2; + p = p3; + } + p2 = p3; + } + // record whatever strip we have at the end + BM *&bmp = bitmasks[val]; + if(!bmp) + bmp = new BM; + bmp->add_rect(Rect(p, p2)); + // std::cout << val << ": " << p << ".." << p2 << std::endl; + + // are we done? + if(p2 == r.hi) + break; + + // now go to the next span, if there is one (can't be in 1-D) + assert(N > 1); + for(int i = 0; i < (N - 1); i++) { + p[i] = r.lo[i]; + if(p[i + 1] < r.hi[i + 1]) { + p[i + 1] += 1; + break; + } + } + } } } } diff --git a/runtime/realm/deppart/partitions.cc b/runtime/realm/deppart/partitions.cc index 38c163767d..5a4c822e61 100644 --- a/runtime/realm/deppart/partitions.cc +++ b/runtime/realm/deppart/partitions.cc @@ -349,12 +349,12 @@ namespace Realm { assert(count >= 1); // unsafe to subtract and test against zero - compare first size_t total_x; - if(bounds.lo.x <= bounds.hi.x) - total_x = ((long long)bounds.hi.x) - ((long long)bounds.lo.x) + 1; + if(bounds.lo[0] <= bounds.hi[0]) + total_x = ((long long)bounds.hi[0]) - ((long long)bounds.lo[0]) + 1; else total_x = 0; subspaces.reserve(count); - T px = bounds.lo.x; + T px = bounds.lo[0]; size_t cum_weight = 0; for(size_t i = 0; i < count; i++) { IndexSpace ss(*this); @@ -363,15 +363,15 @@ namespace Realm { // that ratio to avoid overflow problems T nx; if((total_x % total_weight) == 0) - nx = bounds.lo.x + cum_weight * (total_x / total_weight); + nx = bounds.lo[0] + cum_weight * (total_x / total_weight); else - nx = bounds.lo.x + (total_x * cum_weight / total_weight); - // wrap-around here means bad math - assert(nx >= px); - ss.bounds.lo.x = px; - ss.bounds.hi.x = nx - 1; - subspaces.push_back(ss); - px = nx; + nx = bounds.lo[0] + (total_x * cum_weight / total_weight); + // wrap-around here means bad math + assert(nx >= px); + ss.bounds.lo[0] = px; + ss.bounds.hi[0] = nx - 1; + subspaces.push_back(ss); + px = nx; } PartitioningOperation::do_inline_profiling(reqs, inline_start_time); return wait_on; @@ -501,8 +501,9 @@ namespace Realm { RectListAdapter(const Rect<1,T> *_rects, size_t _count) : rects(_rects), count(_count) {} size_t size(void) const { return count; } - T start(size_t idx) const { return rects[idx].lo.x; } - T end(size_t idx) const { return rects[idx].hi.x; } + T start(size_t idx) const { return rects[idx].lo[0]; } + T end(size_t idx) const { return rects[idx].hi[0]; } + protected: const Rect<1,T> *rects; size_t count; @@ -514,14 +515,14 @@ namespace Realm { { if(use_approx) { if(space.dense()) - interval_tree.add_interval(space.bounds.lo.x, space.bounds.hi.x,label); + interval_tree.add_interval(space.bounds.lo[0], space.bounds.hi[0], label); else { SparsityMapImpl<1,T> *impl = SparsityMapImpl<1,T>::lookup(space.sparsity); interval_tree.add_intervals(RectListAdapter(impl->get_approx_rects()), label); } } else { for(IndexSpaceIterator<1,T> it(space); it.valid; it.step()) - interval_tree.add_interval(it.rect.lo.x, it.rect.hi.x, label); + interval_tree.add_interval(it.rect.lo[0], it.rect.hi[0], label); } } @@ -542,14 +543,14 @@ namespace Realm { bool approx) { if(space.dense()) { - interval_tree.test_interval(space.bounds.lo.x, space.bounds.hi.x, overlaps); + interval_tree.test_interval(space.bounds.lo[0], space.bounds.hi[0], overlaps); } else { if(approx) { SparsityMapImpl<1,T> *impl = SparsityMapImpl<1,T>::lookup(space.sparsity); interval_tree.test_sorted_intervals(RectListAdapter(impl->get_approx_rects()), overlaps); } else { for(IndexSpaceIterator<1,T> it(space); it.valid; it.step()) - interval_tree.test_interval(it.rect.lo.x, it.rect.hi.x, overlaps); + interval_tree.test_interval(it.rect.lo[0], it.rect.hi[0], overlaps); } } } diff --git a/runtime/realm/deppart/rectlist.inl b/runtime/realm/deppart/rectlist.inl index e1044e3aaf..6d6bcf54bb 100644 --- a/runtime/realm/deppart/rectlist.inl +++ b/runtime/realm/deppart/rectlist.inl @@ -67,16 +67,16 @@ namespace Realm { while(rects.size() > upper_bound) { // scan the rectangles to decide which to merge - want the smallest gap size_t best_idx = 0; - T best_gap = rects[1].lo.x - rects[0].hi.x; + T best_gap = rects[1].lo[0] - rects[0].hi[0]; for(size_t i = 1; i < max_rects; i++) { - T gap = rects[i + 1].lo.x - rects[i].hi.x; + T gap = rects[i + 1].lo[0] - rects[i].hi[0]; if(gap < best_gap) { best_gap = gap; best_idx = i; } } //std::cout << "merging " << rects[best_idx] << " and " << rects[best_idx + 1] << "\n"; - rects[best_idx].hi.x = rects[best_idx + 1].hi.x; + rects[best_idx].hi[0] = rects[best_idx + 1].hi[0]; rects.erase(rects.begin() + best_idx + 1); } } @@ -93,11 +93,11 @@ namespace Realm { // optimize for sorted insertion (i.e. stuff at end) { Rect &lr = *rects.rbegin(); - if(p.x == (lr.hi.x + 1)) { - lr.hi.x = p.x; + if(p[0] == (lr.hi[0] + 1)) { + lr.hi[0] = p[0]; return; } - if(p.x > (lr.hi.x + 1)) { + if(p[0] > (lr.hi[0] + 1)) { rects.push_back(Rect(p, p)); if((max_rects > 0) && (rects.size() > (size_t)max_rects)) { //std::cout << "too big " << rects.size() << " > " << max_rects << "\n"; @@ -119,9 +119,9 @@ namespace Realm { int hi = rects.size(); while(lo < hi) { int mid = (lo + hi) >> 1; - if(p.x < rects[mid].lo.x) + if(p[0] < rects[mid].lo[0]) hi = mid; - else if(p.x > rects[mid].hi.x) + else if(p[0] > rects[mid].hi[0]) lo = mid + 1; else { // we landed right on an existing rectangle - we're done @@ -132,20 +132,20 @@ namespace Realm { } } // when we get here, 'lo' is the first rectangle above us, so check for a merge below first - if((lo > 0) && (rects[lo - 1].hi.x == (p.x - 1))) { + if((lo > 0) && (rects[lo - 1].hi[0] == (p[0] - 1))) { // merging low - if((lo < (int)rects.size()) && rects[lo].lo.x == (p.x + 1)) { + if((lo < (int)rects.size()) && rects[lo].lo[0] == (p[0] + 1)) { // merging high too - rects[lo - 1].hi.x = rects[lo].hi.x; + rects[lo - 1].hi[0] = rects[lo].hi[0]; rects.erase(rects.begin() + lo); } else { // just low - rects[lo - 1].hi.x = p.x; + rects[lo - 1].hi[0] = p[0]; } } else { - if((lo < (int)rects.size()) && rects[lo].lo.x == (p.x + 1)) { + if((lo < (int)rects.size()) && rects[lo].lo[0] == (p[0] + 1)) { // merging just high - rects[lo].lo.x = p.x; + rects[lo].lo[0] = p[0]; } else { // no merge - must insert rects.insert(rects.begin() + lo, Rect(p, p)); @@ -223,11 +223,11 @@ namespace Realm { if(N == 1) { // try to optimize for sorted insertion (i.e. stuff at end) Rect &lr = *rects.rbegin(); - if(_r.lo.x == (lr.hi.x + 1)) { - lr.hi.x = _r.hi.x; + if(_r.lo[0] == (lr.hi[0] + 1)) { + lr.hi[0] = _r.hi[0]; return; } - if(_r.lo.x > (lr.hi.x + 1)) { + if(_r.lo[0] > (lr.hi[0] + 1)) { rects.push_back(_r); if((max_rects > 0) && (rects.size() > (size_t)max_rects)) { merge_rects(max_rects); @@ -239,12 +239,12 @@ namespace Realm { // that will get big and aren't sorted well (e.g. images), the HybridRectangleList // is a better choice) // use a binary search to skip over all rectangles that are strictly - // below the new rectangle (i.e. all r s.t. r.hi.x + 1 < _r.lo.x) + // below the new rectangle (i.e. all r s.t. r.hi[0] + 1 < _r.lo[0]) int lo = 0; int hi = rects.size(); while(lo < hi) { int mid = (lo + hi) >> 1; - if(rects[mid].hi.x + 1 < _r.lo.x) + if(rects[mid].hi[0] + 1 < _r.lo[0]) lo = mid + 1; else hi = mid; @@ -256,7 +256,7 @@ namespace Realm { // if the new rect fits entirely below the existing one, insert the new // one here and we're done - if(_r.hi.x + 1 < mr.lo.x) { + if(_r.hi[0] + 1 < mr.lo[0]) { rects.insert(rects.begin()+lo, _r); return; } @@ -267,8 +267,8 @@ namespace Realm { int dlo = lo + 1; int dhi = dlo; while((dhi < (int)rects.size()) && - ((mr.hi.x + 1) >= rects[dhi].lo.x)) { - mr.hi.x = std::max(mr.hi.x, rects[dhi].hi.x); + ((mr.hi[0] + 1) >= rects[dhi].lo[0])) { + mr.hi[0] = std::max(mr.hi[0], rects[dhi].hi[0]); dhi++; } if(dhi > dlo) @@ -569,52 +569,52 @@ namespace Realm { // otherwise add to the map assert(!as_map.empty()); - typename std::map::iterator it = as_map.lower_bound(p.x); + typename std::map::iterator it = as_map.lower_bound(p[0]); if(it == as_map.end()) { //std::cout << "add " << p << " BIGGER " << as_map.rbegin()->first << "," << as_map.rbegin()->second << "\n"; // bigger than everything - see if we can merge with the last guy T& last = as_map.rbegin()->second; - if(last == (p.x - 1)) - last = p.x; - else if(last < (p.x - 1)) - as_map[p.x] = p.x; + if(last == (p[0] - 1)) + last = p[0]; + else if(last < (p[0] - 1)) + as_map[p[0]] = p[0]; } - else if(it->first == p.x) { + else if(it->first == p[0]) { //std::cout << "add " << p << " OVERLAP1 " << it->first << "," << it->second << "\n"; // we're the beginning of an existing range - nothing to do } else if(it == as_map.begin()) { //std::cout << "add " << p << " FIRST " << it->first << "," << it->second << "\n"; // we're before everything - see if we can merge with the first guy - if(it->first == (p.x + 1)) { + if(it->first == (p[0] + 1)) { T last = it->second; as_map.erase(it); - as_map[p.x] = last; + as_map[p[0]] = last; } else { - as_map[p.x] = p.x; + as_map[p[0]] = p[0]; } } else { typename std::map::iterator it2 = it; --it2; //std::cout << "add " << p << " BETWEEN " << it->first << "," << it->second << " / " << it2->first << "," << it2->second << "\n"; - if(it2->second >= p.x) { + if(it2->second >= p[0]) { // range below us includes us - nothing to do } else { - bool merge_above = it->first == (p.x + 1); - bool merge_below = it2->second == (p.x - 1); + bool merge_above = it->first == (p[0] + 1); + bool merge_below = it2->second == (p[0] - 1); if(merge_below) { if(merge_above) { it2->second = it->second; as_map.erase(it); } else - it2->second = p.x; + it2->second = p[0]; } else { T last; if(merge_above) { last = it->second; as_map.erase(it); } else - last = p.x; - as_map[p.x] = last; + last = p[0]; + as_map[p[0]] = last; } } } @@ -639,33 +639,33 @@ namespace Realm { // otherwise add to the map assert(!as_map.empty()); - typename std::map::iterator it = as_map.lower_bound(r.lo.x); + typename std::map::iterator it = as_map.lower_bound(r.lo[0]); if(it == as_map.end()) { //std::cout << "add " << p << " BIGGER " << as_map.rbegin()->first << "," << as_map.rbegin()->second << "\n"; // bigger than everything - see if we can merge with the last guy T& last = as_map.rbegin()->second; - if(last == (r.lo.x - 1)) - last = r.hi.x; - else if(last < (r.lo.x - 1)) - as_map[r.lo.x] = r.hi.x; + if(last == (r.lo[0] - 1)) + last = r.hi[0]; + else if(last < (r.lo[0] - 1)) + as_map[r.lo[0]] = r.hi[0]; } else { // if the interval we found isn't the first, we may need to back up one to // find the one that overlaps the start of our range if(it != as_map.begin()) { typename std::map::iterator it2 = it; --it2; - if(it2->second >= (r.lo.x - 1)) + if(it2->second >= (r.lo[0] - 1)) it = it2; } - if(it->first <= r.lo.x) { - assert((it->second + 1) >= r.lo.x); // it had better overlap or just touch + if(it->first <= r.lo[0]) { + assert((it->second + 1) >= r.lo[0]); // it had better overlap or just touch - if(it->second < r.hi.x) - it->second = r.hi.x; + if(it->second < r.hi[0]) + it->second = r.hi[0]; } else { // we are the low end of a range (but may absorb other ranges) - it = as_map.insert(std::make_pair(r.lo.x, r.hi.x)).first; + it = as_map.insert(std::make_pair(r.lo[0], r.hi[0])).first; } // have we subsumed or merged with anything? @@ -691,7 +691,7 @@ namespace Realm { for(typename std::vector >::iterator it = this->rects.begin(); it != this->rects.end(); it++) - as_map[it->lo.x] = it->hi.x; + as_map[it->lo[0]] = it->hi[0]; this->rects.clear(); is_vector = false; } @@ -705,12 +705,12 @@ namespace Realm { it != as_map.end(); it++) { Rect<1,T> r; - r.lo.x = it->first; - r.hi.x = it->second; + r.lo[0] = it->first; + r.hi[0] = it->second; this->rects.push_back(r); } for(size_t i = 1; i < this->rects.size(); i++) - assert(this->rects[i-1].hi.x < (this->rects[i].lo.x - 1)); + assert(this->rects[i-1].hi[0] < (this->rects[i].lo[0] - 1)); as_map.clear(); is_vector = true; } diff --git a/runtime/realm/deppart/setops.cc b/runtime/realm/deppart/setops.cc index 97b5d1e04f..44e5644c1f 100644 --- a/runtime/realm/deppart/setops.cc +++ b/runtime/realm/deppart/setops.cc @@ -606,13 +606,13 @@ namespace Realm { its[i].reset(spaces[i]); if(its[i].valid) { order[n] = i; - T lo = its[i].rect.lo.x; - for(int j = n; j > 0; j--) - if(its[order[j-1]].rect.lo.x > lo) - std::swap(order[j-1], order[j]); - else - break; - n++; + T lo = its[i].rect.lo[0]; + for(int j = n; j > 0; j--) + if(its[order[j - 1]].rect.lo[0] > lo) + std::swap(order[j - 1], order[j]); + else + break; + n++; } } } @@ -695,49 +695,49 @@ namespace Realm { while(it_lhs.valid && it_rhs.valid) { // if either side comes completely before the other, emit it and continue - if(it_lhs.rect.hi.x < (it_rhs.rect.lo.x - 1)) { - bitmask.add_rect(it_lhs.rect); - it_lhs.step(); - continue; - } - - if(it_rhs.rect.hi.x < (it_lhs.rect.lo.x - 1)) { - bitmask.add_rect(it_rhs.rect); - it_rhs.step(); - continue; - } - - // new rectangle will be at least the union of these two - Rect u = it_lhs.rect.union_bbox(it_rhs.rect); - it_lhs.step(); - it_rhs.step(); - // try to consume even more - while(true) { - if(it_lhs.valid && (it_lhs.rect.lo.x <= (u.hi.x + 1))) { - u.hi.x = std::max(u.hi.x, it_lhs.rect.hi.x); - it_lhs.step(); - continue; - } - if(it_rhs.valid && (it_rhs.rect.lo.x <= (u.hi.x + 1))) { - u.hi.x = std::max(u.hi.x, it_rhs.rect.hi.x); - it_rhs.step(); - continue; - } - // if both fail, we're done - break; - } - bitmask.add_rect(u); - } - - // leftover rects from one side or the other just get added - while(it_lhs.valid) { - bitmask.add_rect(it_lhs.rect); - it_lhs.step(); - } - while(it_rhs.valid) { - bitmask.add_rect(it_rhs.rect); - it_rhs.step(); - } + if(it_lhs.rect.hi[0] < (it_rhs.rect.lo[0] - 1)) { + bitmask.add_rect(it_lhs.rect); + it_lhs.step(); + continue; + } + + if(it_rhs.rect.hi[0] < (it_lhs.rect.lo[0] - 1)) { + bitmask.add_rect(it_rhs.rect); + it_rhs.step(); + continue; + } + + // new rectangle will be at least the union of these two + Rect u = it_lhs.rect.union_bbox(it_rhs.rect); + it_lhs.step(); + it_rhs.step(); + // try to consume even more + while(true) { + if(it_lhs.valid && (it_lhs.rect.lo[0] <= (u.hi[0] + 1))) { + u.hi[0] = std::max(u.hi[0], it_lhs.rect.hi[0]); + it_lhs.step(); + continue; + } + if(it_rhs.valid && (it_rhs.rect.lo[0] <= (u.hi[0] + 1))) { + u.hi[0] = std::max(u.hi[0], it_rhs.rect.hi[0]); + it_rhs.step(); + continue; + } + // if both fail, we're done + break; + } + bitmask.add_rect(u); + } + + // leftover rects from one side or the other just get added + while(it_lhs.valid) { + bitmask.add_rect(it_lhs.rect); + it_lhs.step(); + } + while(it_rhs.valid) { + bitmask.add_rect(it_rhs.rect); + it_rhs.step(); + } } else { // N-way merge NWayMerge nwm(inputs); @@ -746,32 +746,34 @@ namespace Realm { //nwm.print(); // consume rectangles off the first one until there's overlap with the next guy - T lo1 = nwm[1].lo.x; - if(nwm[0].hi.x < (lo1 - 1)) { - while(nwm[0].hi.x < (lo1 - 1)) { - bitmask.add_rect(nwm[0]); - if(!nwm.step(0)) break; - } - nwm.update(0); - continue; - } - - // at least a little overlap, so start accumulating a value - Rect u = nwm[0]; - nwm.step(0); nwm.update(0); - while((nwm.size() > 0) && (nwm[0].lo.x <= (u.hi.x + 1))) { - u.hi.x = std::max(u.hi.x, nwm[0].hi.x); - nwm.step(0); - nwm.update(0); - } - bitmask.add_rect(u); - } - - // any stragglers? - if(nwm.size() > 0) - do { - bitmask.add_rect(nwm[0]); - } while(nwm.step(0)); + T lo1 = nwm[1].lo[0]; + if(nwm[0].hi[0] < (lo1 - 1)) { + while(nwm[0].hi[0] < (lo1 - 1)) { + bitmask.add_rect(nwm[0]); + if(!nwm.step(0)) + break; + } + nwm.update(0); + continue; + } + + // at least a little overlap, so start accumulating a value + Rect u = nwm[0]; + nwm.step(0); + nwm.update(0); + while((nwm.size() > 0) && (nwm[0].lo[0] <= (u.hi[0] + 1))) { + u.hi[0] = std::max(u.hi[0], nwm[0].hi[0]); + nwm.step(0); + nwm.update(0); + } + bitmask.add_rect(u); + } + + // any stragglers? + if(nwm.size() > 0) + do { + bitmask.add_rect(nwm[0]); + } while(nwm.step(0)); #if 0 std::vector > its(inputs.size()); std::vector order(inputs.size()); @@ -780,9 +782,9 @@ namespace Realm { its[i].reset(inputs[i]); if(its[i].valid) { order[n] = i; - T lo = its[i].rect.lo.x; + T lo = its[i].rect.lo[0]; for(size_t j = n; j > 0; j--) - if(its[order[j-1]].rect.lo.x > lo) + if(its[order[j-1]].rect.lo[0] > lo) std::swap(order[j-1], order[j]); else break; @@ -797,14 +799,14 @@ namespace Realm { for(size_t i = 0; i < n; i++) std::cout << " " << i << "=" << order[i] << "=" << its[order[i]].rect; std::cout << "]]\n"; // consume rectangles off the first one until there's overlap with the next guy - if(its[order[0]].rect.hi.x < (its[order[1]].rect.lo.x - 1)) { - while(its[order[0]].rect.hi.x < (its[order[1]].rect.lo.x - 1)) { + if(its[order[0]].rect.hi[0] < (its[order[1]].rect.lo[0] - 1)) { + while(its[order[0]].rect.hi[0] < (its[order[1]].rect.lo[0] - 1)) { bitmask.add_rect(its[order[0]].rect); if(!its[order[0]].step()) break; } if(its[order[0]].valid) { for(size_t j = 0; j < n - 1; j++) - if(its[order[j]].rect.lo.x > its[order[j+1]].rect.lo.x) + if(its[order[j]].rect.lo[0] > its[order[j+1]].rect.lo[0]) std::swap(order[j], order[j+1]); else break; @@ -849,13 +851,13 @@ namespace Realm { while(it_lhs.valid && it_rhs.valid) { // if either side comes completely before the other, emit it and continue - if(it_lhs.rect.hi.x < (it_rhs.rect.lo.x - 1)) { + if(it_lhs.rect.hi[0] < (it_rhs.rect.lo[0] - 1)) { bitmask.add_rect(it_lhs.rect); it_lhs.step(); continue; } - if(it_rhs.rect.hi.x < (it_lhs.rect.lo.x - 1)) { + if(it_rhs.rect.hi[0] < (it_lhs.rect.lo[0] - 1)) { bitmask.add_rect(it_rhs.rect); it_rhs.step(); continue; @@ -867,13 +869,13 @@ namespace Realm { it_rhs.step(); // try to consume even more while(true) { - if(it_lhs.valid && (it_lhs.rect.lo.x <= (u.hi.x + 1))) { - u.hi.x = std::max(u.hi.x, it_lhs.rect.hi.x); + if(it_lhs.valid && (it_lhs.rect.lo[0] <= (u.hi[0] + 1))) { + u.hi[0] = std::max(u.hi[0], it_lhs.rect.hi[0]); it_lhs.step(); continue; } - if(it_rhs.valid && (it_rhs.rect.lo.x <= (u.hi.x + 1))) { - u.hi.x = std::max(u.hi.x, it_rhs.rect.hi.x); + if(it_rhs.valid && (it_rhs.rect.lo[0] <= (u.hi[0] + 1))) { + u.hi[0] = std::max(u.hi[0], it_rhs.rect.hi[0]); it_rhs.step(); continue; } @@ -900,9 +902,9 @@ namespace Realm { //nwm.print(); // consume rectangles off the first one until there's overlap with the next guy - T lo1 = nwm[1].lo.x; - if(nwm[0].hi.x < (lo1 - 1)) { - while(nwm[0].hi.x < (lo1 - 1)) { + T lo1 = nwm[1].lo[0]; + if(nwm[0].hi[0] < (lo1 - 1)) { + while(nwm[0].hi[0] < (lo1 - 1)) { bitmask.add_rect(nwm[0]); if(!nwm.step(0)) break; } @@ -913,8 +915,8 @@ namespace Realm { // at least a little overlap, so start accumulating a value Rect u = nwm[0]; nwm.step(0); nwm.update(0); - while((nwm.size() > 0) && (nwm[0].lo.x <= (u.hi.x + 1))) { - u.hi.x = std::max(u.hi.x, nwm[0].hi.x); + while((nwm.size() > 0) && (nwm[0].lo[0] <= (u.hi[0] + 1))) { + u.hi[0] = std::max(u.hi[0], nwm[0].hi[0]); nwm.step(0); nwm.update(0); } @@ -934,9 +936,9 @@ namespace Realm { its[i].reset(inputs[i]); if(its[i].valid) { order[n] = i; - T lo = its[i].rect.lo.x; + T lo = its[i].rect.lo[0]; for(size_t j = n; j > 0; j--) - if(its[order[j-1]].rect.lo.x > lo) + if(its[order[j-1]].rect.lo[0] > lo) std::swap(order[j-1], order[j]); else break; @@ -951,14 +953,14 @@ namespace Realm { for(size_t i = 0; i < n; i++) std::cout << " " << i << "=" << order[i] << "=" << its[order[i]].rect; std::cout << "]]\n"; // consume rectangles off the first one until there's overlap with the next guy - if(its[order[0]].rect.hi.x < (its[order[1]].rect.lo.x - 1)) { - while(its[order[0]].rect.hi.x < (its[order[1]].rect.lo.x - 1)) { + if(its[order[0]].rect.hi[0] < (its[order[1]].rect.lo[0] - 1)) { + while(its[order[0]].rect.hi[0] < (its[order[1]].rect.lo[0] - 1)) { bitmask.add_rect(its[order[0]].rect); if(!its[order[0]].step()) break; } if(its[order[0]].valid) { for(size_t j = 0; j < n - 1; j++) - if(its[order[j]].rect.lo.x > its[order[j+1]].rect.lo.x) + if(its[order[j]].rect.lo[0] > its[order[j+1]].rect.lo[0]) std::swap(order[j], order[j+1]); else break; @@ -1123,28 +1125,29 @@ namespace Realm { // can only generate data while both sides have rectangles left while(it_lhs.valid && it_rhs.valid) { // skip rectangles if they completely preceed the one on the other side - if(it_lhs.rect.hi.x < it_rhs.rect.lo.x) { - it_lhs.step(); - continue; - } - - if(it_rhs.rect.hi.x < it_lhs.rect.lo.x) { - it_rhs.step(); - continue; - } - - // we have at least partial overlap - add the intersection and then consume whichever - // rectangle ended first (or both if equal) - bitmask.add_rect(it_lhs.rect.intersection(it_rhs.rect)); - if(it_lhs.rect.hi.x < it_rhs.rect.hi.x) { - it_lhs.step(); - } else if(it_lhs.rect.hi.x > it_rhs.rect.hi.x) { - it_rhs.step(); - } else { - it_lhs.step(); - it_rhs.step(); - } - } + if(it_lhs.rect.hi[0] < it_rhs.rect.lo[0]) { + it_lhs.step(); + continue; + } + + if(it_rhs.rect.hi[0] < it_lhs.rect.lo[0]) { + it_rhs.step(); + continue; + } + + // we have at least partial overlap - add the intersection and then consume + // whichever + // rectangle ended first (or both if equal) + bitmask.add_rect(it_lhs.rect.intersection(it_rhs.rect)); + if(it_lhs.rect.hi[0] < it_rhs.rect.hi[0]) { + it_lhs.step(); + } else if(it_lhs.rect.hi[0] > it_rhs.rect.hi[0]) { + it_rhs.step(); + } else { + it_lhs.step(); + it_rhs.step(); + } + } } else { assert(0); } @@ -1302,51 +1305,51 @@ namespace Realm { while(it_lhs.valid) { // throw away any rhs rectangles that come before this one - while(it_rhs.valid && (it_rhs.rect.hi.x < it_lhs.rect.lo.x)) - it_rhs.step(); - - // out of rhs rectangles? just copy over all the rest on the lhs and we're done - if(!it_rhs.valid) { - while(it_lhs.valid) { - bitmask.add_rect(it_lhs.rect); - it_lhs.step(); - } - break; - } - - // an lhs rectangle that is entirely below the first rhs is taken as is - if(it_lhs.rect.hi.x < it_rhs.rect.lo.x) { - bitmask.add_rect(it_lhs.rect); - it_lhs.step(); - continue; - } - - // last case - partial overlap - subtract out rhs rect(s) - if(it_lhs.valid) { - Point p = it_lhs.rect.lo; - while(it_rhs.valid) { - if(p.x < it_rhs.rect.lo.x) { - // add a partial rect below the rhs - Point p2 = it_rhs.rect.lo; - p2.x -= 1; - bitmask.add_rect(Rect(p, p2)); - } - - // if the rhs ends after the lhs, we're done - if(it_rhs.rect.hi.x >= it_lhs.rect.hi.x) - break; - - // otherwise consume the rhs and update p - p = it_rhs.rect.hi; - p.x += 1; - if(!it_rhs.step() || (it_lhs.rect.hi.x < it_rhs.rect.lo.x)) { - // no rhs left in this lhs piece - emit the rest and break out - bitmask.add_rect(Rect(p, it_lhs.rect.hi)); - break; - } - } - it_lhs.step(); - } + while(it_rhs.valid && (it_rhs.rect.hi[0] < it_lhs.rect.lo[0])) + it_rhs.step(); + + // out of rhs rectangles? just copy over all the rest on the lhs and we're done + if(!it_rhs.valid) { + while(it_lhs.valid) { + bitmask.add_rect(it_lhs.rect); + it_lhs.step(); + } + break; + } + + // an lhs rectangle that is entirely below the first rhs is taken as is + if(it_lhs.rect.hi[0] < it_rhs.rect.lo[0]) { + bitmask.add_rect(it_lhs.rect); + it_lhs.step(); + continue; + } + + // last case - partial overlap - subtract out rhs rect(s) + if(it_lhs.valid) { + Point p = it_lhs.rect.lo; + while(it_rhs.valid) { + if(p[0] < it_rhs.rect.lo[0]) { + // add a partial rect below the rhs + Point p2 = it_rhs.rect.lo; + p2[0] -= 1; + bitmask.add_rect(Rect(p, p2)); + } + + // if the rhs ends after the lhs, we're done + if(it_rhs.rect.hi[0] >= it_lhs.rect.hi[0]) + break; + + // otherwise consume the rhs and update p + p = it_rhs.rect.hi; + p[0] += 1; + if(!it_rhs.step() || (it_lhs.rect.hi[0] < it_rhs.rect.lo[0])) { + // no rhs left in this lhs piece - emit the rest and break out + bitmask.add_rect(Rect(p, it_lhs.rect.hi)); + break; + } + } + it_lhs.step(); + } } return; } diff --git a/runtime/realm/deppart/sparsity_impl.cc b/runtime/realm/deppart/sparsity_impl.cc index 362c8d8b15..51029cbf7a 100644 --- a/runtime/realm/deppart/sparsity_impl.cc +++ b/runtime/realm/deppart/sparsity_impl.cc @@ -1164,96 +1164,98 @@ namespace Realm { else if(N == 1) { // demand that our input data is sorted for(size_t i = 1; i < count; i++) - assert(rects[i-1].hi.x < (rects[i].lo.x - 1)); - - // fast case - all these rectangles are after all the ones we have now - if(this->entries.empty() || (this->entries.rbegin()->bounds.hi.x < rects[0].lo.x)) { - // special case when merging occurs with the last entry from before - size_t n = this->entries.size(); - if((n > 0) && (this->entries.rbegin()->bounds.hi.x == (rects[0].lo.x - 1))) { - this->entries.resize(n + count - 1); - assert(!this->entries[n - 1].sparsity.exists()); - assert(this->entries[n - 1].bitmap == 0); - this->entries[n - 1].bounds.hi = rects[0].hi; - for(size_t i = 1; i < count; i++) { - this->entries[n - 1 + i].bounds = rects[i]; - this->entries[n - 1 + i].sparsity.id = 0; // no sparsity map - this->entries[n - 1 + i].bitmap = 0; - } - } else { - this->entries.resize(n + count); - for(size_t i = 0; i < count; i++) { - this->entries[n + i].bounds = rects[i]; - this->entries[n + i].sparsity.id = 0; // no sparsity map - this->entries[n + i].bitmap = 0; - } - } - } else { - // do a merge of the new data with the old - std::vector > old_data; - old_data.swap(this->entries); - size_t i = 0; - size_t n = 0; - typename std::vector >::const_iterator old_it = old_data.begin(); - while((i < count) && (old_it != old_data.end())) { - if(rects[i].hi.x < (old_it->bounds.lo.x - 1)) { - this->entries.resize(n + 1); - this->entries[n].bounds = rects[i]; - this->entries[n].sparsity.id = 0; // no sparsity map - this->entries[n].bitmap = 0; - n++; - i++; - continue; - } + assert(rects[i - 1].hi[0] < (rects[i].lo[0] - 1)); + + // fast case - all these rectangles are after all the ones we have now + if(this->entries.empty() || + (this->entries.rbegin()->bounds.hi[0] < rects[0].lo[0])) { + // special case when merging occurs with the last entry from before + size_t n = this->entries.size(); + if((n > 0) && (this->entries.rbegin()->bounds.hi[0] == (rects[0].lo[0] - 1))) { + this->entries.resize(n + count - 1); + assert(!this->entries[n - 1].sparsity.exists()); + assert(this->entries[n - 1].bitmap == 0); + this->entries[n - 1].bounds.hi = rects[0].hi; + for(size_t i = 1; i < count; i++) { + this->entries[n - 1 + i].bounds = rects[i]; + this->entries[n - 1 + i].sparsity.id = 0; // no sparsity map + this->entries[n - 1 + i].bitmap = 0; + } + } else { + this->entries.resize(n + count); + for(size_t i = 0; i < count; i++) { + this->entries[n + i].bounds = rects[i]; + this->entries[n + i].sparsity.id = 0; // no sparsity map + this->entries[n + i].bitmap = 0; + } + } + } else { + // do a merge of the new data with the old + std::vector> old_data; + old_data.swap(this->entries); + size_t i = 0; + size_t n = 0; + typename std::vector>::const_iterator old_it = + old_data.begin(); + while((i < count) && (old_it != old_data.end())) { + if(rects[i].hi[0] < (old_it->bounds.lo[0] - 1)) { + this->entries.resize(n + 1); + this->entries[n].bounds = rects[i]; + this->entries[n].sparsity.id = 0; // no sparsity map + this->entries[n].bitmap = 0; + n++; + i++; + continue; + } - if(old_it->bounds.hi.x < (rects[i].lo.x - 1)) { - this->entries.push_back(*old_it); - n++; - old_it++; - continue; - } + if(old_it->bounds.hi[0] < (rects[i].lo[0] - 1)) { + this->entries.push_back(*old_it); + n++; + old_it++; + continue; + } - Rect u = rects[i].union_bbox(old_it->bounds); - // step rects, but not old_it - want sanity checks below to be done - i++; - while(true) { - if((i < count) && (rects[i].lo.x <= (u.hi.x + 1))) { - u.hi.x = std::max(u.hi.x, rects[i].hi.x); - i++; - continue; - } - if((old_it != old_data.end()) && (old_it->bounds.lo.x <= (u.hi.x + 1))) { - assert(!old_it->sparsity.exists()); - assert(old_it->bitmap == 0); - u.hi.x = std::max(u.hi.x, old_it->bounds.hi.x); - old_it++; - continue; - } - // if neither test passed, the chain is broken - break; - } - this->entries.resize(n + 1); - this->entries[n].bounds = u; - this->entries[n].sparsity.id = 0; // no sparsity map - this->entries[n].bitmap = 0; - n++; - } + Rect u = rects[i].union_bbox(old_it->bounds); + // step rects, but not old_it - want sanity checks below to be done + i++; + while(true) { + if((i < count) && (rects[i].lo[0] <= (u.hi[0] + 1))) { + u.hi[0] = std::max(u.hi[0], rects[i].hi[0]); + i++; + continue; + } + if((old_it != old_data.end()) && (old_it->bounds.lo[0] <= (u.hi[0] + 1))) { + assert(!old_it->sparsity.exists()); + assert(old_it->bitmap == 0); + u.hi[0] = std::max(u.hi[0], old_it->bounds.hi[0]); + old_it++; + continue; + } + // if neither test passed, the chain is broken + break; + } + this->entries.resize(n + 1); + this->entries[n].bounds = u; + this->entries[n].sparsity.id = 0; // no sparsity map + this->entries[n].bitmap = 0; + n++; + } - // leftovers... - while(i < count) { - this->entries.resize(n + 1); - this->entries[n].bounds = rects[i]; - this->entries[n].sparsity.id = 0; // no sparsity map - this->entries[n].bitmap = 0; - n++; - i++; - } + // leftovers... + while(i < count) { + this->entries.resize(n + 1); + this->entries[n].bounds = rects[i]; + this->entries[n].sparsity.id = 0; // no sparsity map + this->entries[n].bitmap = 0; + n++; + i++; + } - while(old_it != old_data.end()) { - this->entries.push_back(*old_it); - old_it++; - } - } + while(old_it != old_data.end()) { + this->entries.push_back(*old_it); + old_it++; + } + } } else { // each new rectangle has to be tested against existing ones for // containment, overlap (which can cause splitting), or mergeability @@ -1603,7 +1605,7 @@ namespace Realm { std::vector gap_sizes(max_rects - 1, 0); std::vector gap_idxs(max_rects - 1, -1); for(int i = 1; i < n; i++) { - T gap = entries[i].bounds.lo.x - entries[i - 1].bounds.hi.x; + T gap = entries[i].bounds.lo[0] - entries[i - 1].bounds.hi[0]; if(gap <= gap_sizes[0]) continue; // the smallest gap is discarded and we insertion-sort this new value in diff --git a/runtime/realm/indexspace.inl b/runtime/realm/indexspace.inl index 734c898ede..8fc4251d28 100644 --- a/runtime/realm/indexspace.inl +++ b/runtime/realm/indexspace.inl @@ -573,9 +573,9 @@ namespace Realm { int hi = entries.size(); while(lo < hi) { size_t mid = (lo + hi) >> 1; // rounding down keeps up from picking hi - if(p.x < entries[mid].bounds.lo.x) + if(p[0] < entries[mid].bounds.lo[0]) hi = mid; - else if(p.x > entries[mid].bounds.hi.x) + else if(p[0] > entries[mid].bounds.hi[0]) lo = mid + 1; else return mid; @@ -606,7 +606,7 @@ namespace Realm { // the search guaranteed we're below the upper bound of the returned entry, // but we might be below the lower bound - if(p.x < e.bounds.lo.x) + if(p[0] < e.bounds.lo[0]) return false; if(e.sparsity.exists()) { diff --git a/runtime/realm/point.h b/runtime/realm/point.h index d4f8f77b40..75af4926b9 100644 --- a/runtime/realm/point.h +++ b/runtime/realm/point.h @@ -56,7 +56,12 @@ namespace Realm { // specializations for N <= 4 defined in point.inl template struct REALM_PUBLIC_API Point { - T x, y, z, w; T rest[N - 4]; + [[deprecated("The \"Point::x,y,z,w\" members will be removed in the next Realm " + "release. Please switch to using Point::operator[] instead.")]] T x, + y, z, w; + [[deprecated( + "The \"Point::rest\" data member will be removed in the next Realm release. " + "Please switch to using Point::operator[] instead.")]] T rest[N - 4]; REALM_CUDA_HD Point(void); diff --git a/runtime/realm/point.inl b/runtime/realm/point.inl index 4326595b74..4bcddaf567 100644 --- a/runtime/realm/point.inl +++ b/runtime/realm/point.inl @@ -31,6 +31,20 @@ namespace Realm { // // class Point +#if defined(__PGIC__) + #pragma warning (push) + #pragma diag_suppress 1445 +#elif defined(__GNUC__) + #pragma GCC diagnostic push + #pragma GCC diagnostic ignored "-Wdeprecated-declarations" +#elif defined(__clang__) + #pragma clang diagnostic push + #pragma clang diagnostic ignored "-Wdeprecated-declarations" +#elif defined(__INTEL_COMPILER) || defined(__INTEL_LLVM_COMPILER) + #pragma warning push + #pragma warning disable 1478 +#endif + template REALM_CUDA_HD inline Point::Point(void) @@ -68,7 +82,7 @@ namespace Realm { inline Point::Point(const Point& copy_from) { for(int i = 0; i < N; i++) - (&x)[i] = (©_from.x)[i]; + (&x)[i] = copy_from[i]; } template @@ -77,7 +91,7 @@ namespace Realm { inline Point& Point::operator=(const Point& copy_from) { for(int i = 0; i < N; i++) - (&x)[i] = (©_from.x)[i]; + (&x)[i] = copy_from[i]; return *this; } @@ -123,7 +137,7 @@ namespace Realm { // specializations for N <= 4 template struct REALM_PUBLIC_API Point<1,T> { - T x; + [[deprecated("The \"Point::x\" member will be removed in the next Realm release. Please switch to using Point::operator[] instead.")]] T x; REALM_CUDA_HD Point(void) {} REALM_CUDA_HD @@ -139,7 +153,7 @@ namespace Realm { // copies allow type coercion (assuming the underlying type does) template REALM_CUDA_HD - Point(const Point<1, T2>& copy_from) : x(copy_from.x) {} + Point(const Point<1, T2>& copy_from) : x(copy_from[0]) {} template REALM_CUDA_HD Point<1,T>& operator=(const Point<1, T2>& copy_from) @@ -174,7 +188,7 @@ namespace Realm { template struct REALM_PUBLIC_API Point<2,T> { - T x, y; + [[deprecated("The \"Point::x,y\" members will be removed in the next Realm release. Please switch to using Point::operator[] instead.")]] T x, y; REALM_CUDA_HD Point(void) {} REALM_CUDA_HD @@ -193,7 +207,7 @@ namespace Realm { template REALM_CUDA_HD Point(const Point<2, T2>& copy_from) - : x(copy_from.x), y(copy_from.y) {} + : x(copy_from[0]), y(copy_from[1]) {} template REALM_CUDA_HD Point<2,T>& operator=(const Point<2,T2>& copy_from) @@ -225,7 +239,7 @@ namespace Realm { template struct REALM_PUBLIC_API Point<3,T> { - T x, y, z; + [[deprecated("The \"Point::x,y,z\" members will be removed in the next Realm release. Please switch to using Point::operator[] instead.")]] T x, y, z; REALM_CUDA_HD Point(void) {} REALM_CUDA_HD @@ -244,7 +258,7 @@ namespace Realm { template REALM_CUDA_HD Point(const Point<3, T2>& copy_from) - : x(copy_from.x), y(copy_from.y), z(copy_from.z) {} + : x(copy_from[0]), y(copy_from[1]), z(copy_from[2]) {} template REALM_CUDA_HD Point<3,T>& operator=(const Point<3,T2>& copy_from) @@ -296,7 +310,7 @@ namespace Realm { template REALM_CUDA_HD Point(const Point<4, T2>& copy_from) - : x(copy_from.x), y(copy_from.y), z(copy_from.z), w(copy_from.w) {} + : x(copy_from[0]), y(copy_from[1]), z(copy_from[2]), w(copy_from[3]) {} template REALM_CUDA_HD Point<4,T>& operator=(const Point<4,T2>& copy_from) @@ -440,6 +454,15 @@ namespace Realm { return lhs; } +#if defined(__PGIC__) + #pragma warning (pop) +#elif defined(__GNUC__) + #pragma GCC diagnostic pop +#elif defined(__clang__) + #pragma clang diagnostic pop +#elif defined(__INTEL_COMPILER) || defined(__INTEL_LLVM_COMPILER) + #pragma warning pop +#endif //////////////////////////////////////////////////////////////////////// // @@ -792,8 +815,8 @@ namespace Realm { #endif if(N == 1) { // 1-D doesn't care about fortran/C order - if(p.x < rect.hi.x) { - p.x++; + if(p[0] < rect.hi[0]) { + p[0]++; return true; } } else { diff --git a/runtime/realm/transfer/transfer.cc b/runtime/realm/transfer/transfer.cc index 2a94f59c51..6d1de0e089 100644 --- a/runtime/realm/transfer/transfer.cc +++ b/runtime/realm/transfer/transfer.cc @@ -1688,40 +1688,40 @@ namespace Realm { int merge_dim = -1; if(N == 1) { // simple 1-D case - if(rects[rect_pos].lo.x == (r.hi.x + 1)) { - merge_dim = 0; - } - } else { - const Rect& r2 = rects[rect_pos]; - int dims_match = 0; - while(dims_match < (N-1)) - if((r.lo[dims_match] == r2.lo[dims_match]) && - (r.hi[dims_match] == r2.hi[dims_match])) - dims_match++; - else - break; - if((r2.lo[dims_match] == (r.hi[dims_match] + 1))) { - merge_dim = dims_match; // unless checks below fail - // rest of dims must be degenerate and match - for(int i = dims_match + 1; i < N; i++) - if((r.lo[i] != r.hi[i]) || - (r2.lo[i] != r.lo[i]) || (r2.hi[i] != r.hi[i])) { - merge_dim = -1; - break; - } - } - } - if(merge_dim >= 0) { - // merge and continue - r.hi[merge_dim] = rects[rect_pos++].hi[merge_dim]; - } else { - // can't merge - return what we've got - return true; - } - } else { - r = rects[rect_pos++]; - nonempty = true; - } + if(rects[rect_pos].lo[0] == (r.hi[0] + 1)) { + merge_dim = 0; + } + } else { + const Rect &r2 = rects[rect_pos]; + int dims_match = 0; + while(dims_match < (N - 1)) + if((r.lo[dims_match] == r2.lo[dims_match]) && + (r.hi[dims_match] == r2.hi[dims_match])) + dims_match++; + else + break; + if((r2.lo[dims_match] == (r.hi[dims_match] + 1))) { + merge_dim = dims_match; // unless checks below fail + // rest of dims must be degenerate and match + for(int i = dims_match + 1; i < N; i++) + if((r.lo[i] != r.hi[i]) || (r2.lo[i] != r.lo[i]) || + (r2.hi[i] != r.hi[i])) { + merge_dim = -1; + break; + } + } + } + if(merge_dim >= 0) { + // merge and continue + r.hi[merge_dim] = rects[rect_pos++].hi[merge_dim]; + } else { + // can't merge - return what we've got + return true; + } + } else { + r = rects[rect_pos++]; + nonempty = true; + } } } } diff --git a/runtime/runtime.mk b/runtime/runtime.mk index bb8eb784a0..69f5400812 100644 --- a/runtime/runtime.mk +++ b/runtime/runtime.mk @@ -666,7 +666,7 @@ NVCC_FLAGS += $(foreach X,$(subst $(COMMA), ,$(GPU_ARCH)),-gencode arch=compute_ NVCC_FLAGS += -gencode arch=compute_$(lastword $(GPU_ARCH))$(COMMA)code=compute_$(lastword $(GPU_ARCH)) endif -NVCC_FLAGS += -Xcudafe --diag_suppress=boolean_controlling_expr_is_constant +NVCC_FLAGS += -Xcudafe --diag_suppress=boolean_controlling_expr_is_constant -Xcudafe --diag_suppress=1444 # cuhook lib ifeq ($(shell uname -s),Darwin) diff --git a/test.py b/test.py index 446ddcdd31..a377a04d47 100755 --- a/test.py +++ b/test.py @@ -511,7 +511,7 @@ def run_test_external2(launcher, root_dir, tmp_dir, bin_dir, env, thread_count, # clone_github('stanfordhpccenter', 'HTR-solver', htr_dir, tmp_dir) # NOTE: the legion-ci branch currently requires g++ (not clang) to build and # is REALLY slow unless you set DEBUG=0 - cmd(['git', 'clone', '-b', 'legion-ci', 'git@gitlab.com:insieme1/htr/htr-solver.git', htr_dir]) + cmd(['git', 'clone', '-b', 'feature/newLegionPoints', 'git@gitlab.com:insieme1/htr/htr-solver.git', htr_dir]) htr_env = dict(list(env.items()) + [ ('LEGION_DIR', root_dir), ('LD_LIBRARY_PATH', '%s:%s' % (env.get('LD_LIBRARY_PATH', ''), os.path.join(root_dir, 'bindings', 'regent'))), diff --git a/test/performance/realm/memcpy/memcpy.cc b/test/performance/realm/memcpy/memcpy.cc index 2e4cb3fb4c..4672a0bfb0 100644 --- a/test/performance/realm/memcpy/memcpy.cc +++ b/test/performance/realm/memcpy/memcpy.cc @@ -228,7 +228,7 @@ class IsolatedDenseTestGraphFactory : public TestGraphFactory { graph.clear(); Realm::Point start_pnt(0); Realm::Point end_pnt(0); - end_pnt.x = size - 1; + end_pnt[0] = size - 1; CopyIndexSpace is(Realm::Rect(start_pnt, end_pnt)); std::vector fields(1, sizeof(size_t)); @@ -276,7 +276,7 @@ class ConcurrentDenseTestGraphFactory : public TestGraphFactory { graph.clear(); Realm::Point start_pnt(0); Realm::Point end_pnt(0); - end_pnt.x = size - 1; + end_pnt[0] = size - 1; CopyIndexSpace is(Realm::Rect(start_pnt, end_pnt)); std::vector fields(1, sizeof(size_t)); diff --git a/test/realm/CMakeLists.txt b/test/realm/CMakeLists.txt index d7847be357..82d07e2b87 100644 --- a/test/realm/CMakeLists.txt +++ b/test/realm/CMakeLists.txt @@ -57,7 +57,7 @@ list(APPEND REALM_TESTS ) if(Legion_USE_CUDA) - + # some tests have CUDA source files too set(CUDASRC_memspeed memspeed_gpu.cu) set(CUDASRC_simple_reduce simple_reduce_gpu.cu) @@ -96,6 +96,9 @@ foreach(test IN LISTS REALM_TESTS) set_target_cuda_standard(${test} STANDARD ${Legion_CUDA_STANDARD}) set_target_cuda_architectures(${test} ARCHITECTURES ${Legion_CUDA_ARCH}) set_target_cuda_warnings_and_errors(${test} WARN_AS_ERROR ${Legion_BUILD_WARN_AS_ERROR}) + # Remove this once the Realm::Point class is updated + target_compile_options(${test} PRIVATE $<$: + -Xcudafe=--diag_suppress=1444>) elseif(HIPSRC_${test}) hip_add_executable(${test} ${test}.cc ${HIPSRC_${test}}) else() @@ -103,7 +106,6 @@ foreach(test IN LISTS REALM_TESTS) endif() target_link_libraries(${test} Legion::Realm) target_compile_options(${test} PRIVATE $<$:${CXX_BUILD_WARNING_FLAGS}>) - if(Legion_USE_HIP) target_include_directories(${test} PRIVATE ${HIP_INCLUDE_DIRS}) if(Legion_HIP_TARGET STREQUAL "CUDA") diff --git a/test/realm/deppart.cc b/test/realm/deppart.cc index 3c1f31c0d0..554dceb557 100644 --- a/test/realm/deppart.cc +++ b/test/realm/deppart.cc @@ -471,8 +471,8 @@ class MiniAeroTest : public TestInterface { a_face_left.write(pf, global_cell_pointer(fx - (reversed ? 0 : 1), cy, cz)); a_face_right.write(pf, global_cell_pointer(fx - (reversed ? 1 : 0), cy, cz)); a_face_type.write(pf, ftype); - pf.x++; - } + pf[0]++; + } } // down/up faces next @@ -507,8 +507,8 @@ class MiniAeroTest : public TestInterface { a_face_left.write(pf, global_cell_pointer(cx, fy - (reversed ? 0 : 1), cz)); a_face_right.write(pf, global_cell_pointer(cx, fy - (reversed ? 1 : 0), cz)); a_face_type.write(pf, ftype); - pf.x++; - } + pf[0]++; + } } // back/front faces last @@ -543,11 +543,11 @@ class MiniAeroTest : public TestInterface { a_face_left.write(pf, global_cell_pointer(cx, cy, fz - (reversed ? 0 : 1))); a_face_right.write(pf, global_cell_pointer(cx, cy, fz - (reversed ? 1 : 0))); a_face_type.write(pf, ftype); - pf.x++; - } + pf[0]++; + } } - assert(pf.x == is_faces.bounds.hi.x + 1); + assert(pf[0] == is_faces.bounds.hi[0] + 1); } if(show_graph) { @@ -748,7 +748,7 @@ class MiniAeroTest : public TestInterface { } } - pc.x++; + pc[0]++; } // check faces @@ -819,7 +819,7 @@ class MiniAeroTest : public TestInterface { } } } - pf.x++; + pf[0]++; } } @@ -1578,44 +1578,48 @@ class PennantTest : public TestInterface { for(int zy = zylo; zy < zyhi; zy++) { for(int zx = zxlo; zx < zxhi; zx++) { // get 4 side pointers - Point<1> ps0 = ps; ps.x++; - Point<1> ps1 = ps; ps.x++; - Point<1> ps2 = ps; ps.x++; - Point<1> ps3 = ps; ps.x++; - - // point pointers are ugly because they can be in neighbors - use a helper - Point<1> pp0 = global_point_pointer(zy, zx); // go CCW - Point<1> pp1 = global_point_pointer(zy+1, zx); - Point<1> pp2 = global_point_pointer(zy+1, zx+1); - Point<1> pp3 = global_point_pointer(zy, zx+1); - - a_zone_color.write(pz, i_args.index); - - a_side_mapsz.write(ps0, pz); - a_side_mapsz.write(ps1, pz); - a_side_mapsz.write(ps2, pz); - a_side_mapsz.write(ps3, pz); - - a_side_mapss3.write(ps0, ps1); - a_side_mapss3.write(ps1, ps2); - a_side_mapss3.write(ps2, ps3); - a_side_mapss3.write(ps3, ps0); - - a_side_mapsp1.write(ps0, pp0); - a_side_mapsp1.write(ps1, pp1); - a_side_mapsp1.write(ps2, pp2); - a_side_mapsp1.write(ps3, pp3); - - a_side_ok.write(ps0, true); - a_side_ok.write(ps1, true); - a_side_ok.write(ps2, true); - a_side_ok.write(ps3, true); - - pz.x++; - } + Point<1> ps0 = ps; + ps[0]++; + Point<1> ps1 = ps; + ps[0]++; + Point<1> ps2 = ps; + ps[0]++; + Point<1> ps3 = ps; + ps[0]++; + + // point pointers are ugly because they can be in neighbors - use a helper + Point<1> pp0 = global_point_pointer(zy, zx); // go CCW + Point<1> pp1 = global_point_pointer(zy + 1, zx); + Point<1> pp2 = global_point_pointer(zy + 1, zx + 1); + Point<1> pp3 = global_point_pointer(zy, zx + 1); + + a_zone_color.write(pz, i_args.index); + + a_side_mapsz.write(ps0, pz); + a_side_mapsz.write(ps1, pz); + a_side_mapsz.write(ps2, pz); + a_side_mapsz.write(ps3, pz); + + a_side_mapss3.write(ps0, ps1); + a_side_mapss3.write(ps1, ps2); + a_side_mapss3.write(ps2, ps3); + a_side_mapss3.write(ps3, ps0); + + a_side_mapsp1.write(ps0, pp0); + a_side_mapsp1.write(ps1, pp1); + a_side_mapsp1.write(ps2, pp2); + a_side_mapsp1.write(ps3, pp3); + + a_side_ok.write(ps0, true); + a_side_ok.write(ps1, true); + a_side_ok.write(ps2, true); + a_side_ok.write(ps3, true); + + pz[0]++; + } } - assert(pz.x == is_zones.bounds.hi.x + 1); - assert(ps.x == is_sides.bounds.hi.x + 1); + assert(pz[0] == is_zones.bounds.hi[0] + 1); + assert(ps[0] == is_sides.bounds.hi[0] + 1); } if(show_graph) { @@ -1809,7 +1813,7 @@ class PennantTest : public TestInterface { errors++; } } - pz.x++; + pz[0]++; } } @@ -1825,7 +1829,7 @@ class PennantTest : public TestInterface { errors++; } } - ps.x++; + ps[0]++; } } diff --git a/test/realm/multiaffine.cc b/test/realm/multiaffine.cc index c5a4e77595..775c457a45 100644 --- a/test/realm/multiaffine.cc +++ b/test/realm/multiaffine.cc @@ -194,8 +194,8 @@ void top_level_task(const void *args, size_t arglen, int errors = 0; if((TestConfig::dim_mask & 1) != 0) { Rect<1> bounds; - bounds.lo.x = 0; - bounds.hi.x = (1 << TestConfig::log2_size) - 1; + bounds.lo[0] = 0; + bounds.hi[0] = (1 << TestConfig::log2_size) - 1; if(!test_case(p, proc_write, IndexSpace<1>(bounds), 8, TestConfig::random_seed, test_id)) errors++; @@ -205,10 +205,10 @@ void top_level_task(const void *args, size_t arglen, int lx2 = (TestConfig::log2_size / 2); int ly2 = (TestConfig::log2_size - lx2); Rect<2> bounds; - bounds.lo.x = 0; - bounds.hi.x = (1 << lx2) - 1; - bounds.lo.y = 0; - bounds.hi.y = (1 << ly2) - 1; + bounds.lo[0] = 0; + bounds.hi[0] = (1 << lx2) - 1; + bounds.lo[1] = 0; + bounds.hi[1] = (1 << ly2) - 1; if(!test_case(p, proc_write, IndexSpace<2>(bounds), 8, TestConfig::random_seed, test_id)) errors++; diff --git a/test/realm/multiaffine_gpu.cu b/test/realm/multiaffine_gpu.cu index 5da53a46e3..f21c76c22c 100644 --- a/test/realm/multiaffine_gpu.cu +++ b/test/realm/multiaffine_gpu.cu @@ -12,14 +12,14 @@ using namespace Realm; template __device__ Point<1,T> choose_thread_point(Rect<1,T> bounds) { - return Point<1,T>(bounds.lo.x + (blockIdx.x * blockDim.x) + threadIdx.x); + return Point<1,T>(bounds.lo[0] + (blockIdx.x * blockDim.x) + threadIdx.x); } template __device__ Point<2,T> choose_thread_point(Rect<2,T> bounds) { - return Point<2,T>(bounds.lo.x + (blockIdx.x * blockDim.x) + threadIdx.x, - bounds.lo.y + (blockIdx.y * blockDim.y) + threadIdx.y); + return Point<2,T>(bounds.lo[0] + (blockIdx.x * blockDim.x) + threadIdx.x, + bounds.lo[1] + (blockIdx.y * blockDim.y) + threadIdx.y); } template @@ -79,4 +79,4 @@ void register_multiaffine_gpu_tasks() CodeDescriptor(ptr_write_task_gpu<2,int>), ProfilingRequestSet(), 0, 0).wait(); -} \ No newline at end of file +} diff --git a/test/realm/scatter.cc b/test/realm/scatter.cc index ddcb3523ce..e7f6236d2f 100644 --- a/test/realm/scatter.cc +++ b/test/realm/scatter.cc @@ -991,18 +991,35 @@ bool scatter_gather_test(const std::vector &mems, T size1, T2 size2, int region2.add_subspaces(is2, pieces2); region2.create_instances(fields2, RoundRobinPicker(mems)).wait(); - region1.template fill
(is1, FID_DATA1, [](Point p) -> DT { return DT(p.x); }, - Event::NO_EVENT).wait(); - region1.template fill
(is1, FID_DATA2, [](Point p) -> DT { return DT(p.x + 100); }, - Event::NO_EVENT).wait(); - - region2.template fill
(is2, FID_DATA1, [](Point p) -> DT { return DT(200 + p.x + 10*p.y); }, - Event::NO_EVENT).wait(); - region2.template fill
(is2, FID_DATA2, [](Point p) -> DT { return DT(300 + p.x + 10*p.y); }, - Event::NO_EVENT).wait(); - - region1.template fill >(is1, FID_PTR1, [=](Point p) -> Point { return Point(p.x % size2); }, - Event::NO_EVENT).wait(); + region1 + .template fill
( + is1, FID_DATA1, [](Point p) -> DT { return DT(p[0]); }, Event::NO_EVENT) + .wait(); + region1 + .template fill
( + is1, FID_DATA2, [](Point p) -> DT { return DT(p[0] + 100); }, + Event::NO_EVENT) + .wait(); + + region2 + .template fill
( + is2, FID_DATA1, + [](Point p) -> DT { return DT(200 + p[0] + 10 * p[1]); }, + Event::NO_EVENT) + .wait(); + region2 + .template fill
( + is2, FID_DATA2, + [](Point p) -> DT { return DT(300 + p[0] + 10 * p[1]); }, + Event::NO_EVENT) + .wait(); + + region1 + .template fill>( + is1, FID_PTR1, + [=](Point p) -> Point { return Point(p[0] % size2); }, + Event::NO_EVENT) + .wait(); if(TestConfig::do_gather) { region1 @@ -1192,13 +1209,22 @@ class RegularFiller { {} template - DT operator()(Point<1,T> p) const { return base + step0 * p.x; } + DT operator()(Point<1, T> p) const + { + return base + step0 * p[0]; + } template - DT operator()(Point<2,T> p) const { return base + step0 * p.x + step1 * p.y; } + DT operator()(Point<2, T> p) const + { + return base + step0 * p[0] + step1 * p[1]; + } template - DT operator()(Point<3,T> p) const { return base + step0 * p.x + step1 * p.y + step2 * p.z; } + DT operator()(Point<3, T> p) const + { + return base + step0 * p[0] + step1 * p[1] + step2 * p[2]; + } protected: DT base, step0, step1, step2; @@ -1279,17 +1305,17 @@ bool range_copy_test(const std::vector& mems, region3.template fill
(is3, FID_DATA1, RegularFiller
(2000), Event::NO_EVENT).wait(); #if 0 - region1.template fill
(is1, FID_DATA1, [](Point p) -> DT { return DT(p.x); }, + region1.template fill
(is1, FID_DATA1, [](Point p) -> DT { return DT(p[0]); }, Event::NO_EVENT).wait(); - region1.template fill
(is1, FID_DATA2, [](Point p) -> DT { return DT(p.x + 100); }, + region1.template fill
(is1, FID_DATA2, [](Point p) -> DT { return DT(p[0] + 100); }, Event::NO_EVENT).wait(); - region2.template fill
(is2, FID_DATA1, [](Point p) -> DT { return DT(200 + p.x + 10*p.y); }, + region2.template fill
(is2, FID_DATA1, [](Point p) -> DT { return DT(200 + p[0] + 10*p[1]); }, Event::NO_EVENT).wait(); - region2.template fill
(is2, FID_DATA2, [](Point p) -> DT { return DT(300 + p.x + 10*p.y); }, + region2.template fill
(is2, FID_DATA2, [](Point p) -> DT { return DT(300 + p[0] + 10*p[1]); }, Event::NO_EVENT).wait(); - region1.template fill >(is1, FID_PTR1, [=](Point p) -> Point { return Point(p.x % size2); }, + region1.template fill >(is1, FID_PTR1, [=](Point p) -> Point { return Point(p[0] % size2); }, Event::NO_EVENT).wait(); #endif diff --git a/test/realm/subgraphs.cc b/test/realm/subgraphs.cc index 32005a0acd..ac7abb701f 100644 --- a/test/realm/subgraphs.cc +++ b/test/realm/subgraphs.cc @@ -67,7 +67,7 @@ void writer_task(const void *args, size_t arglen, AffineAccessor acc(wargs.inst, FID_DATA); for(IndexSpaceIterator<1> it(wargs.is); it.valid; it.step()) for(PointInRectIterator<1> it2(it.rect); it2.valid; it2.step()) { - acc[it2.p] = it2.p.x + wargs.wrval; + acc[it2.p] = it2.p[0] + wargs.wrval; } } @@ -80,7 +80,7 @@ void reader_task(const void *args, size_t arglen, AffineAccessor acc(rargs.inst, FID_DATA); for(IndexSpaceIterator<1> it(rargs.is); it.valid; it.step()) for(PointInRectIterator<1> it2(it.rect); it2.valid; it2.step()) { - int expval = it2.p.x + rargs.rdval; + int expval = it2.p[0] + rargs.rdval; int actval = acc[it2.p]; if(expval == actval) correct++; diff --git a/tutorial/realm/cuda_interop/CMakeLists.txt b/tutorial/realm/cuda_interop/CMakeLists.txt index 9e33bbb8e0..13e3c328fd 100644 --- a/tutorial/realm/cuda_interop/CMakeLists.txt +++ b/tutorial/realm/cuda_interop/CMakeLists.txt @@ -31,6 +31,9 @@ if (REALM_USE_CUDA) set_target_cuda_standard(realm_cuda_interop STANDARD ${Legion_CUDA_STANDARD}) set_target_cuda_architectures(realm_cuda_interop ARCHITECTURES ${Legion_CUDA_ARCH}) set_target_cuda_warnings_and_errors(realm_cuda_interop WARN_AS_ERROR ${Legion_BUILD_WARN_AS_ERROR}) + # Remove this once the Realm::Point class is updated + target_compile_options(realm_cuda_interop PRIVATE $<$: + -Xcudafe=--diag_suppress=1444>) elseif(Legion_USE_HIP) if(Legion_HIP_TARGET STREQUAL "CUDA") add_executable(realm_cuda_interop cuda_interop.cc gpu_kernel.cu) diff --git a/tutorial/realm/deferred_allocation/deferred_allocation.cc b/tutorial/realm/deferred_allocation/deferred_allocation.cc index 926f9b3163..749b2eaa07 100644 --- a/tutorial/realm/deferred_allocation/deferred_allocation.cc +++ b/tutorial/realm/deferred_allocation/deferred_allocation.cc @@ -58,8 +58,8 @@ void update(RegionInstance inst, Rect<1, int> bounds, FieldID fid, int add) { AffineAccessor accessor(inst, fid); PointInRectIterator<1, int> pit(bounds); while (pit.valid) { - accessor[pit.p].x = static_cast(pit.p.x + add); - accessor[pit.p].y = static_cast(pit.p.x + add + 1); + accessor[pit.p].x = static_cast(pit.p[0] + add); + accessor[pit.p].y = static_cast(pit.p[0] + add + 1); pit.step(); } } @@ -69,8 +69,8 @@ void verify(RegionInstance inst, Rect<1, int> bounds, FieldID fid, int add) { AffineAccessor accessor(inst, fid); PointInRectIterator<1, int> pit(bounds); while (pit.valid) { - assert(accessor[pit.p].x == static_cast(pit.p.x + add)); - assert(accessor[pit.p].y == static_cast(pit.p.x + add + 1)); + assert(accessor[pit.p].x == static_cast(pit.p[0] + add)); + assert(accessor[pit.p].y == static_cast(pit.p[0] + add + 1)); log_app.info() << "p=" << pit.p << " x=" << accessor[pit.p].x << " y=" << accessor[pit.p].y; pit.step(); diff --git a/tutorial/realm/region_instances/region_instances.cc b/tutorial/realm/region_instances/region_instances.cc index 86f5fc3666..2a6e612784 100644 --- a/tutorial/realm/region_instances/region_instances.cc +++ b/tutorial/realm/region_instances/region_instances.cc @@ -51,8 +51,8 @@ void update(RegionInstance inst, Rect<1, int> bounds, FieldID fid, int add) { AffineAccessor accessor(inst, fid); PointInRectIterator<1, int> pit(bounds); while (pit.valid) { - accessor[pit.p].x = static_cast(pit.p.x + add); - accessor[pit.p].y = static_cast(pit.p.x + add + 1); + accessor[pit.p].x = static_cast(pit.p[0] + add); + accessor[pit.p].y = static_cast(pit.p[0] + add + 1); pit.step(); } } @@ -62,8 +62,8 @@ void verify(RegionInstance inst, Rect<1, int> bounds, FieldID fid, int add) { AffineAccessor accessor(inst, fid); PointInRectIterator<1, int> pit(bounds); while (pit.valid) { - assert(accessor[pit.p].x == static_cast(pit.p.x + add)); - assert(accessor[pit.p].y == static_cast(pit.p.x + add + 1)); + assert(accessor[pit.p].x == static_cast(pit.p[0] + add)); + assert(accessor[pit.p].y == static_cast(pit.p[0] + add + 1)); log_app.info() << "p=" << pit.p << " x=" << accessor[pit.p].x << " y=" << accessor[pit.p].y; pit.step();