Merge branch 'warnpoint' into 'master'

Add Deprecated Warning for Point Members See merge request StanfordLegion/legion!1336
StanfordLegion · Jun 23, 2024 · bc02aaa · bc02aaa
2 parents dbd7448 + 2629e0b
commit bc02aaa
Show file tree

Hide file tree

Showing 31 changed files with 641 additions and 551 deletions.
diff --git a/bindings/regent/CMakeLists.txt b/bindings/regent/CMakeLists.txt
@@ -66,6 +66,9 @@ if(Legion_USE_CUDA)
   set_target_cuda_standard(Regent STANDARD ${Legion_CUDA_STANDARD})
   set_target_cuda_architectures(Regent ARCHITECTURES ${Legion_CUDA_ARCH})
   set_target_cuda_warnings_and_errors(Regent WARN_AS_ERROR ${Legion_BUILD_WARN_AS_ERROR})
+  # Remove this once the Realm::Point class is updated
+  target_compile_options(Regent PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:
+                           -Xcudafe=--diag_suppress=1444>)
 elseif(Legion_USE_HIP)
   target_include_directories(Regent PRIVATE ${HIP_INCLUDE_DIRS})
   if (Legion_HIP_TARGET STREQUAL "CUDA")

diff --git a/examples/attach_2darray_c_fortran_layout/attach_2darray.cc b/examples/attach_2darray_c_fortran_layout/attach_2darray.cc
@@ -232,8 +232,8 @@ void read_field_task(const Task *task,
   int errors = 0;
   for (PointInRectIterator<2> pir(rect); pir(); pir++) {
     double expval = (args.base_val +
-		     ((*pir).x * args.step_x) +
-		     ((*pir).y * args.step_y));
+		     ((*pir)[0] * args.step_x) +
+		     ((*pir)[1] * args.step_y));
     double actval = acc[*pir];
     if(fabs(actval - expval) < 1e-10) {
       printf("%.1f\t", actval);

diff --git a/examples/circuit/CMakeLists.txt b/examples/circuit/CMakeLists.txt
@@ -34,6 +34,9 @@ if(Legion_USE_CUDA)
   set_target_cuda_standard(circuit STANDARD ${Legion_CUDA_STANDARD})
   set_target_cuda_architectures(circuit ARCHITECTURES ${Legion_CUDA_ARCH})
   set_target_cuda_warnings_and_errors(circuit WARN_AS_ERROR ${Legion_BUILD_WARN_AS_ERROR})
+  # Remove this once the Realm::Point class is updated
+  target_compile_options(circuit PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:
+                           -Xcudafe=--diag_suppress=1444>)
 elseif(Legion_USE_HIP)
   set(GPU_SOURCES circuit_gpu.cu)
   if(Legion_HIP_TARGET STREQUAL "CUDA")

diff --git a/examples/future_instance/CMakeLists.txt b/examples/future_instance/CMakeLists.txt
@@ -31,5 +31,8 @@ add_executable(future_instance ${GPU_SOURCES})
 set_target_cuda_standard(future_instance STANDARD ${Legion_CUDA_STANDARD})
 set_target_cuda_architectures(future_instance ARCHITECTURES ${Legion_CUDA_ARCH})
 set_target_cuda_warnings_and_errors(future_instance WARN_AS_ERROR ${Legion_BUILD_WARN_AS_ERROR})
+# Remove this once the Realm::Point class is updated
+target_compile_options(future_instance PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:
+                         -Xcudafe=--diag_suppress=1444>)
 
 target_link_libraries(future_instance Legion::Legion)
diff --git a/examples/kokkos_saxpy/kokkos_saxpy.cc b/examples/kokkos_saxpy/kokkos_saxpy.cc
@@ -138,8 +138,8 @@ class SaxpyTask {
 				       typename execution_space::memory_space> y_ofs = acc_y.accessor;
 
       Kokkos::RangePolicy<execution_space> range(runtime->get_executing_processor(ctx).kokkos_work_space(),
-						 subspace.lo.x,
-						 subspace.hi.x + 1);
+						 subspace.lo[0],
+						 subspace.hi[0] + 1);
       Kokkos::parallel_for(range,
 			   SaxpyFunctor<execution_space>(args.alpha, x_ofs, y_ofs));
     }
@@ -185,15 +185,15 @@ class SdotTask {
       //   (i.e. the subspace on which you have privileges matches what
       //   KokkosBlas is going to compute over), you can just use
       //   OffsetView::view() to convert
-      assert(x.begin(0) == subspace.lo.x);
+      assert(x.begin(0) == subspace.lo[0]);
       x_rel = x.view();
 
       // option 2: if you're not sure what the OffsetView's bounds are
       //   (or if you just like more self-documenting code) you can create
       //   the subview with the exact bounds you want and then convert that
       y_rel = Kokkos::Experimental::subview(y,
-					    std::make_pair(subspace.lo.x,
-							   subspace.hi.x + 1))
+					    std::make_pair(subspace.lo[0],
+							   subspace.hi[0] + 1))
 	.view();
 
       // the KokkosBlas::dot implementation that returns a float directly
@@ -217,8 +217,8 @@ class SdotTask {
     }
 #endif
     Kokkos::RangePolicy<execution_space> range(runtime->get_executing_processor(ctx).kokkos_work_space(),
-					       subspace.lo.x,
-					       subspace.hi.x + 1);
+					       subspace.lo[0],
+					       subspace.hi[0] + 1);
     float sum = 0.0f;
     // Kokkos does not support CUDA lambdas by default - check that they
     //  are present
@@ -259,15 +259,15 @@ class InitTask {
 		 Kokkos::LayoutStride,
 		 typename execution_space::memory_space> view = acc.accessor;
 
-    size_t n_elements = subspace.hi.x - subspace.lo.x + 1;
+    size_t n_elements = subspace.hi[0] - subspace.lo[0] + 1;
     Kokkos::RangePolicy<execution_space> range(runtime->get_executing_processor(ctx).kokkos_work_space(),
 					       0, n_elements);
     Kokkos::parallel_for(range,
 			 KOKKOS_LAMBDA (int i) {
 			   // using a relative address, but value to store
 			   //  is based on global index
 			   // have to use a relative address!
-			   view(i) = (i + subspace.lo.x) + offset;
+			   view(i) = (i + subspace.lo[0]) + offset;
 			 });
   }
 };

diff --git a/examples/python_interop/python_interop.cc b/examples/python_interop/python_interop.cc
@@ -55,7 +55,7 @@ int64_t init_task(const Task *task,
 
   // Fill memory with some recognizable pattern.
   for (PointInRectIterator<2> pir(rect); pir(); pir++) {
-    double value = (double)((*pir)[0]*(rect.hi.y - rect.lo.y + 1) + (*pir)[1]);
+    double value = (double)((*pir)[0]*(rect.hi[1] - rect.lo[1] + 1) + (*pir)[1]);
     acc[*pir] = value;
   }
 

diff --git a/examples/realm_saxpy/CMakeLists.txt b/examples/realm_saxpy/CMakeLists.txt
@@ -29,6 +29,9 @@ if(Legion_USE_CUDA)
   set_target_cuda_standard(realm_saxpy STANDARD ${Legion_CUDA_STANDARD})
   set_target_cuda_architectures(realm_saxpy ARCHITECTURES ${Legion_CUDA_ARCH})
   set_target_cuda_warnings_and_errors(realm_saxpy WARN_AS_ERROR ${Legion_BUILD_WARN_AS_ERROR})
+  # Remove this once the Realm::Point class is updated
+  target_compile_options(realm_saxpy PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:
+                           -Xcudafe=--diag_suppress=1444>)
 elseif(Legion_USE_HIP)
   set(GPU_SOURCES realm_saxpy_gpu.cu)
   if(Legion_HIP_TARGET STREQUAL "CUDA")

diff --git a/examples/realm_stencil/realm_stencil.cc b/examples/realm_stencil/realm_stencil.cc
@@ -206,15 +206,15 @@ void get_base_and_stride(RegionInstance inst, FieldID fid, DTYPE *&base, size_t
 {
   AffineAccessor<DTYPE, 2, coord_t> acc = AffineAccessor<DTYPE, 2, coord_t>(inst, fid);
   base = reinterpret_cast<DTYPE *>(acc.ptr(inst.get_indexspace<2, coord_t>().bounds.lo));
-  assert(acc.strides.x == sizeof(DTYPE));
-  stride = acc.strides.y;
+  assert(acc.strides[0] == sizeof(DTYPE));
+  stride = acc.strides[1];
 }
 
 void dump(RegionInstance inst, FieldID fid, Rect2 bounds, const char *prefix)
 {
   AffineAccessor<DTYPE, 2, coord_t> acc = AffineAccessor<DTYPE, 2, coord_t>(inst, fid);
   for (PointInRectIterator<2, coord_t> it(bounds); it.valid; it.step()) {
-    printf("%s: %2lld %2lld value %8.3f\n", prefix, it.p.x, it.p.y, acc.read(it.p));
+    printf("%s: %2lld %2lld value %8.3f\n", prefix, it.p[0], it.p[1], acc.read(it.p));
   }
 }
 
@@ -253,10 +253,10 @@ void inline_copy_raw(RegionInstance src_inst, RegionInstance dst_inst,
 
   copy2D(src_base, dst_base,
          src_stride/sizeof(DTYPE),
-         src_offset.x, src_offset.x + size.x,
-         src_offset.y, src_offset.y + size.y,
+         src_offset[0], src_offset[0] + size[0],
+         src_offset[1], src_offset[1] + size[1],
          dst_stride/sizeof(DTYPE),
-         dst_offset.x, dst_offset.y);
+         dst_offset[0], dst_offset[1]);
 }
 
 void stencil_task(const void *args, size_t arglen,
@@ -292,10 +292,10 @@ void stencil_task(const void *args, size_t arglen,
 
   stencil(private_base_input, private_base_output, weights,
           private_stride_input/sizeof(DTYPE),
-          interior_offset.x,
-          interior_offset.x + interior_size.x,
-          interior_offset.y,
-          interior_offset.y + interior_size.y);
+          interior_offset[0],
+          interior_offset[0] + interior_size[0],
+          interior_offset[1],
+          interior_offset[1] + interior_size[1]);
 }
 
 void increment_task(const void *args, size_t arglen,
@@ -314,10 +314,10 @@ void increment_task(const void *args, size_t arglen,
 
   increment(private_base_input,
             private_stride_input/sizeof(DTYPE),
-            outer_offset.x,
-            outer_offset.x + outer_size.x,
-            outer_offset.y,
-            outer_offset.y + outer_size.y);
+            outer_offset[0],
+            outer_offset[0] + outer_size[0],
+            outer_offset[1],
+            outer_offset[1] + outer_size[1]);
 
   if (a.xp_inst.exists())
     inline_copy(a.private_inst, a.xp_inst, FID_INPUT,
@@ -734,20 +734,20 @@ void top_level_task(const void *args, size_t arglen,
     std::vector<Event> events;
     for (PointInRectIterator<2, coord_t> it(shards); it.valid; it.step()) {
       Point2 i(it.p);
-      Rect2 xp_bounds(Point2(x_blocks[i.x].hi + 1,      y_blocks[i.y].lo),
-                      Point2(x_blocks[i.x].hi + RADIUS, y_blocks[i.y].hi));
-      Rect2 xm_bounds(Point2(x_blocks[i.x].lo - RADIUS, y_blocks[i.y].lo),
-                      Point2(x_blocks[i.x].lo - 1,      y_blocks[i.y].hi));
-      Rect2 yp_bounds(Point2(x_blocks[i.x].lo,          y_blocks[i.y].hi + 1),
-                      Point2(x_blocks[i.x].hi,          y_blocks[i.y].hi + RADIUS));
-      Rect2 ym_bounds(Point2(x_blocks[i.x].lo,          y_blocks[i.y].lo - RADIUS),
-                      Point2(x_blocks[i.x].hi,          y_blocks[i.y].lo - 1));
+      Rect2 xp_bounds(Point2(x_blocks[i[0]].hi + 1,      y_blocks[i[1]].lo),
+                      Point2(x_blocks[i[0]].hi + RADIUS, y_blocks[i[1]].hi));
+      Rect2 xm_bounds(Point2(x_blocks[i[0]].lo - RADIUS, y_blocks[i[1]].lo),
+                      Point2(x_blocks[i[0]].lo - 1,      y_blocks[i[1]].hi));
+      Rect2 yp_bounds(Point2(x_blocks[i[0]].lo,          y_blocks[i[1]].hi + 1),
+                      Point2(x_blocks[i[0]].hi,          y_blocks[i[1]].hi + RADIUS));
+      Rect2 ym_bounds(Point2(x_blocks[i[0]].lo,          y_blocks[i[1]].lo - RADIUS),
+                      Point2(x_blocks[i[0]].hi,          y_blocks[i[1]].lo - 1));
 
       Processor shard_proc(shard_procs[i]);
       Memory memory(proc_regmems[shard_proc]);
 
       // Region allocation has to be done on the remote node
-      if (i.x != shards.hi.x) {
+      if (i[0] != shards.hi[0]) {
         CreateRegionArgs args;
         args.bounds = xp_bounds;
         args.memory = memory;
@@ -756,7 +756,7 @@ void top_level_task(const void *args, size_t arglen,
         events.push_back(shard_proc.spawn(CREATE_REGION_TASK, &args, sizeof(args)));
       }
 
-      if (i.x != shards.lo.x) {
+      if (i[0] != shards.lo[0]) {
         CreateRegionArgs args;
         args.bounds = xm_bounds;
         args.memory = memory;
@@ -765,7 +765,7 @@ void top_level_task(const void *args, size_t arglen,
         events.push_back(shard_proc.spawn(CREATE_REGION_TASK, &args, sizeof(args)));
       }
 
-      if (i.y != shards.hi.y) {
+      if (i[1] != shards.hi[1]) {
         CreateRegionArgs args;
         args.bounds = yp_bounds;
         args.memory = memory;
@@ -774,7 +774,7 @@ void top_level_task(const void *args, size_t arglen,
         events.push_back(shard_proc.spawn(CREATE_REGION_TASK, &args, sizeof(args)));
       }
 
-      if (i.y != shards.lo.y) {
+      if (i[1] != shards.lo[1]) {
         CreateRegionArgs args;
         args.bounds = ym_bounds;
         args.memory = memory;
@@ -800,15 +800,15 @@ void top_level_task(const void *args, size_t arglen,
   for (PointInRectIterator<2, coord_t> it(shards); it.valid; it.step()) {
     Point2 i(it.p);
 
-    if (i.x != shards.hi.x) xp_bars_empty[i] = Barrier::create_barrier(1);
-    if (i.x != shards.lo.x) xm_bars_empty[i] = Barrier::create_barrier(1);
-    if (i.y != shards.hi.y) yp_bars_empty[i] = Barrier::create_barrier(1);
-    if (i.y != shards.lo.y) ym_bars_empty[i] = Barrier::create_barrier(1);
+    if (i[0] != shards.hi[0]) xp_bars_empty[i] = Barrier::create_barrier(1);
+    if (i[0] != shards.lo[0]) xm_bars_empty[i] = Barrier::create_barrier(1);
+    if (i[1] != shards.hi[1]) yp_bars_empty[i] = Barrier::create_barrier(1);
+    if (i[1] != shards.lo[1]) ym_bars_empty[i] = Barrier::create_barrier(1);
 
-    if (i.x != shards.hi.x) xp_bars_full[i] = Barrier::create_barrier(1);
-    if (i.x != shards.lo.x) xm_bars_full[i] = Barrier::create_barrier(1);
-    if (i.y != shards.hi.y) yp_bars_full[i] = Barrier::create_barrier(1);
-    if (i.y != shards.lo.y) ym_bars_full[i] = Barrier::create_barrier(1);
+    if (i[0] != shards.hi[0]) xp_bars_full[i] = Barrier::create_barrier(1);
+    if (i[0] != shards.lo[0]) xm_bars_full[i] = Barrier::create_barrier(1);
+    if (i[1] != shards.hi[1]) yp_bars_full[i] = Barrier::create_barrier(1);
+    if (i[1] != shards.lo[1]) ym_bars_full[i] = Barrier::create_barrier(1);
   }
 
   // Create barrier to keep shard launch synchronized
@@ -824,15 +824,15 @@ void top_level_task(const void *args, size_t arglen,
     for (PointInRectIterator<2, coord_t> it(shards); it.valid; it.step()) {
       Point2 i(it.p);
 
-      Rect2 interior_bounds(Point2(x_blocks[i.x].lo, y_blocks[i.y].lo),
-                            Point2(x_blocks[i.x].hi, y_blocks[i.y].hi));
-      Rect2 exterior_bounds(Point2(x_blocks[i.x].lo - RADIUS, y_blocks[i.y].lo - RADIUS),
-                            Point2(x_blocks[i.x].hi + RADIUS, y_blocks[i.y].hi + RADIUS));
+      Rect2 interior_bounds(Point2(x_blocks[i[0]].lo, y_blocks[i[1]].lo),
+                            Point2(x_blocks[i[0]].hi, y_blocks[i[1]].hi));
+      Rect2 exterior_bounds(Point2(x_blocks[i[0]].lo - RADIUS, y_blocks[i[1]].lo - RADIUS),
+                            Point2(x_blocks[i[0]].hi + RADIUS, y_blocks[i[1]].hi + RADIUS));
       // As interior, but bloated only on the outer edges
-      Rect2 outer_bounds(Point2(x_blocks[i.x].lo - (i.x == shards.lo.x ? RADIUS : 0),
-                                y_blocks[i.y].lo - (i.y == shards.lo.y ? RADIUS : 0)),
-                         Point2(x_blocks[i.x].hi + (i.x == shards.hi.x ? RADIUS : 0),
-                                y_blocks[i.y].hi + (i.y == shards.hi.y ? RADIUS : 0)));
+      Rect2 outer_bounds(Point2(x_blocks[i[0]].lo - (i[0] == shards.lo[0] ? RADIUS : 0),
+                                y_blocks[i[1]].lo - (i[1] == shards.lo[1] ? RADIUS : 0)),
+                         Point2(x_blocks[i[0]].hi + (i[0] == shards.hi[0] ? RADIUS : 0),
+                                y_blocks[i[1]].hi + (i[1] == shards.hi[1] ? RADIUS : 0)));
 
       // Pack arguments
       ShardArgs args;

diff --git a/examples/thrust_interop/CMakeLists.txt b/examples/thrust_interop/CMakeLists.txt
@@ -32,5 +32,8 @@ add_executable(thrust_interop ${CPU_SOURCES} ${GPU_SOURCES})
 set_target_cuda_standard(thrust_interop STANDARD ${Legion_CUDA_STANDARD})
 set_target_cuda_architectures(thrust_interop ARCHITECTURES ${Legion_CUDA_ARCH})
 set_target_cuda_warnings_and_errors(thrust_interop WARN_AS_ERROR ${Legion_BUILD_WARN_AS_ERROR})
+# Remove this once the Realm::Point class is updated
+target_compile_options(thrust_interop PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:
+                         -Xcudafe=--diag_suppress=1444>)
 target_link_libraries(thrust_interop Legion::Legion)
 target_compile_options(thrust_interop PRIVATE $<$<COMPILE_LANGUAGE:CXX>:${CXX_BUILD_WARNING_FLAGS}>)
diff --git a/runtime/CMakeLists.txt b/runtime/CMakeLists.txt
@@ -277,6 +277,7 @@ if(Legion_USE_CUDA)
   #TODO(apryakhin): Enable with cmake 3.27
   #set_property(TARGET realm_cuda_fatbin PROPERTY CUDA_FATBIN_COMPILATION ON)
   target_compile_options(realm_cuda_fatbin PRIVATE $<$<COMPILE_LANG_AND_ID:CUDA,NVIDIA>:
+                         -Xcudafe=--diag_suppress=1444 # Remove once Point class is updated
                          -Xcudafe=--diag_suppress=boolean_controlling_expr_is_constant
                          --fatbin>)
   target_compile_definitions(realm_cuda_fatbin PRIVATE "CUDA_FATBIN_COMPILATION")
@@ -934,6 +935,7 @@ add_library(Legion::LegionRuntime ALIAS LegionRuntime)
 # Add CUDA-specific properties
 if(Legion_USE_CUDA)
   target_compile_options(LegionRuntime PRIVATE $<$<COMPILE_LANG_AND_ID:CUDA,NVIDIA>:
+                         -Xcudafe=--diag_suppress=1444 # Remove once Point class is updated
                          -Xcudafe=--diag_suppress=boolean_controlling_expr_is_constant>)
 
   set_target_cuda_warnings_and_errors(LegionRuntime WARN_AS_ERROR ${Legion_BUILD_WARN_AS_ERROR})
@@ -971,6 +973,7 @@ if(Legion_USE_HIP)
   if(Legion_HIP_TARGET STREQUAL "CUDA")
     target_sources(LegionRuntime PRIVATE ${LEGION_HIP_SRC})
     target_compile_options(LegionRuntime PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:
+                           -Xcudafe=--diag_suppress=1444 # Remove once Point class is updated
                            -Xcudafe=--diag_suppress=boolean_controlling_expr_is_constant>)
     target_include_directories(LegionRuntime PRIVATE ${HIP_ROOT_DIR}/include)
     # complex reduction ops bring in a public dependency on cuda headers

diff --git a/runtime/realm/deppart/byfield.cc b/runtime/realm/deppart/byfield.cc
@@ -117,40 +117,43 @@ namespace Realm {
 	while(true) {
 	  FT val = a_data.read(p);
 	  Point<N,T> p2 = p;
-	  while(p2.x < r.hi.x) {
-	    Point<N,T> p3 = p2;
-	    p3.x++;
-	    FT val2 = a_data.read(p3);
-	    if(val != val2) {
-	      // record old strip
-	      BM *&bmp = bitmasks[val];
-	      if(!bmp) bmp = new BM;
-	      bmp->add_rect(Rect<N,T>(p,p2));
-	      //std::cout << val << ": " << p << ".." << p2 << std::endl;
-	      val = val2;
-	      p = p3;
-	    }
-	    p2 = p3;
-	  }
-	  // record whatever strip we have at the end
-	  BM *&bmp = bitmasks[val];
-	  if(!bmp) bmp = new BM;
-	  bmp->add_rect(Rect<N,T>(p,p2));
-	  //std::cout << val << ": " << p << ".." << p2 << std::endl;
-
-	  // are we done?
-	  if(p2 == r.hi) break;
-
-	  // now go to the next span, if there is one (can't be in 1-D)
-	  assert(N > 1);
-	  for(int i = 0; i < (N - 1); i++) {
-	    p[i] = r.lo[i];
-	    if(p[i + 1] < r.hi[i+1]) {
-	      p[i + 1] += 1;
-	      break;
-	    }
-	  }
-	}
+          while(p2[0] < r.hi[0]) {
+            Point<N, T> p3 = p2;
+            p3[0]++;
+            FT val2 = a_data.read(p3);
+            if(val != val2) {
+              // record old strip
+              BM *&bmp = bitmasks[val];
+              if(!bmp)
+                bmp = new BM;
+              bmp->add_rect(Rect<N, T>(p, p2));
+              // std::cout << val << ": " << p << ".." << p2 << std::endl;
+              val = val2;
+              p = p3;
+            }
+            p2 = p3;
+          }
+          // record whatever strip we have at the end
+          BM *&bmp = bitmasks[val];
+          if(!bmp)
+            bmp = new BM;
+          bmp->add_rect(Rect<N, T>(p, p2));
+          // std::cout << val << ": " << p << ".." << p2 << std::endl;
+
+          // are we done?
+          if(p2 == r.hi)
+            break;
+
+          // now go to the next span, if there is one (can't be in 1-D)
+          assert(N > 1);
+          for(int i = 0; i < (N - 1); i++) {
+            p[i] = r.lo[i];
+            if(p[i + 1] < r.hi[i + 1]) {
+              p[i + 1] += 1;
+              break;
+            }
+          }
+        }
       }
     }
   }