fix for numpy deprecation if truth value of empty array

HDFGroup · Jul 10, 2023 · 115f088 · 115f088
1 parent 36829ff
commit 115f088
Show file tree

Hide file tree

Showing 6 changed files with 91 additions and 82 deletions.
diff --git a/hsds/datanode_lib.py b/hsds/datanode_lib.py
@@ -956,17 +956,6 @@ async def get_chunk(
 
     # validate arguments
     if s3path:
-        """
-        if s3size == 0 and s3offset == 0:
-            # uninitialized chunk ref
-            msg = f"reference chunk not set for id: {chunk_id}, returning 404"
-            log.info(msg)
-            raise HTTPNotFound()  # not found return 404
-        if s3size == 0:
-            msg = f"Unexpected get_chunk parameter - s3path: {s3path} with size 0"
-            log.error(msg)
-            raise HTTPInternalServerError()
-        """
         if bucket:
             msg = "get_chunk - bucket arg should not be used with s3path"
             log.error(msg)

diff --git a/hsds/util/chunkUtil.py b/hsds/util/chunkUtil.py
@@ -8,6 +8,63 @@
 PRIMES = [29, 31, 37, 41, 43, 47, 53, 59, 61, 67]  # for chunk partitioning
 
 
+# compare two numpy arrays.
+# return true if the same (exclusive of null vs. empty array)
+# false otherwise
+
+
+def ndarray_compare(arr1, arr2):
+    if not isinstance(arr1, np.ndarray) and not isinstance(arr2, np.ndarray):
+        if not isinstance(arr1, np.void) and not isinstance(arr2, np.void):
+            return arr1 == arr2
+        if isinstance(arr1, np.void) and not isinstance(arr2, np.void):
+            if arr1.size == 0 and not arr2:
+                return True
+            else:
+                return False
+        if not isinstance(arr1, np.void) and isinstance(arr2, np.void):
+            if not arr1 and arr2.size == 0:
+                return True
+            else:
+                return False
+        # both np.voids
+        if arr1.size != arr2.size:
+            return False
+
+        if len(arr1) != len(arr2):
+            return False
+
+        for i in range(len(arr1)):
+            if not ndarray_compare(arr1[i], arr2[i]):
+                return False
+        return True
+
+    if isinstance(arr1, np.ndarray) and not isinstance(arr2, np.ndarray):
+        # same only if arr1 is empty and arr2 is 0
+        if arr1.size == 0 and not arr2:
+            return True
+        else:
+            return False
+    if not isinstance(arr1, np.ndarray) and isinstance(arr2, np.ndarray):
+        # same only if arr1 is empty and arr2 size is 0
+        if not arr1 and arr2.size == 0:
+            return True
+        else:
+            return False
+
+    # two ndarrays...
+    if arr1.shape != arr2.shape:
+        return False
+    if arr2.dtype != arr2.dtype:
+        return False
+    nElements = np.prod(arr1.shape)
+    arr1 = arr1.reshape((nElements,))
+    arr2 = arr2.reshape((nElements,))
+    for i in range(nElements):
+        if not ndarray_compare(arr1[i], arr2[i]):
+            return False
+    return True
+
 """
 Convert list that may contain bytes type elements to list of string elements
 
@@ -803,27 +860,38 @@ def chunkWriteSelection(chunk_arr=None, slices=None, data=None):
     """
     Write data for requested chunk and selection
     """
-    log.info("chunkWriteSelection")
     dims = chunk_arr.shape
 
     rank = len(dims)
 
     if rank == 0:
         msg = "No dimension passed to chunkReadSelection"
+        log.error(msg)
         raise ValueError(msg)
     if len(slices) != rank:
         msg = "Selection rank does not match dataset rank"
+        log.error(msg)
         raise ValueError(msg)
     if len(data.shape) != rank:
         msg = "Input arr does not match dataset rank"
+        log.error(msg)
         raise ValueError(msg)
 
     updated = False
     # check if the new data modifies the array or not
-    if not np.array_equal(chunk_arr[slices], data):
-        # update chunk array
-        chunk_arr[slices] = data
-        updated = True
+    # TBD - is this worth the cost of comparing two arrays element by element?
+    try:
+        if not ndarray_compare(chunk_arr[slices], data):
+            # if not np.array_equal(chunk_arr[slices], data):
+            # update chunk array
+            chunk_arr[slices] = data
+            updated = True
+    except ValueError as ve:
+        msg = f"array_equal ValueError, chunk_arr[{slices}]: {chunk_arr[slices]} "
+        msg += f"data: {data}, data type: {type(data)} ve: {ve}"
+        log.error(msg)
+        raise
+
     return updated
 
 

diff --git a/hsds/util/storUtil.py b/hsds/util/storUtil.py
@@ -361,18 +361,18 @@ async def getStorBytes(app,
         # uncompress chunks within the fetched data and store to
         # chunk bytes
         if not h5_size:
-            log.error("getStoreBytes - h5_size not set")
+            log.error("getStorBytes - h5_size not set")
             raise HTTPInternalServerError()
         if not chunk_bytes:
-            log.error("getStoreBytes - chunk_bytes not set")
+            log.error("getStorBytes - chunk_bytes not set")
             raise HTTPInternalServerError()
         if len(chunk_locations) * h5_size < len(chunk_bytes):
-            log.error(f"getStoreBytes - invalid chunk_bytes length: {len(chunk_bytes)}")
+            log.error(f"getStorBytes - invalid chunk_bytes length: {len(chunk_bytes)}")
         for chunk_location in chunk_locations:
             log.debug(f"getStoreBytes - processing chunk_location: {chunk_location}")
             n = chunk_location.offset - offset
             if n < 0:
-                log.warn(f"getStoreBytes - unexpected offset for chunk_location: {chunk_location}")
+                log.warn(f"getStorBytes - unexpected offset for chunk_location: {chunk_location}")
                 continue
             m = n + chunk_location.length
             log.debug(f"getStorBytes - extracting chunk from data[{n}:{m}]")

diff --git a/tests/integ/query_test.py b/tests/integ/query_test.py
@@ -502,9 +502,8 @@ def testPutQuery(self):
         params["Limit"] = 1
         update_value = {"open": 999}
         payload = {"value": update_value}
-        rsp = self.session.put(
-            req, params=params, data=json.dumps(update_value), headers=headers
-        )
+
+        rsp = self.session.put(req, params=params, data=json.dumps(update_value), headers=headers)
         self.assertEqual(rsp.status_code, 200)
         rspJson = json.loads(rsp.text)
         self.assertTrue("hrefs" in rspJson)
@@ -537,9 +536,7 @@ def testPutQuery(self):
         params = {"query": "open == 0"}
         update_value = {"open": -999}
         payload = {"value": update_value}
-        rsp = self.session.put(
-            req, params=params, data=json.dumps(update_value), headers=headers
-        )
+        rsp = self.session.put(req, params=params, data=json.dumps(update_value), headers=headers)
         self.assertEqual(rsp.status_code, 200)
         rspJson = json.loads(rsp.text)
         self.assertTrue("value" in rspJson)

diff --git a/tests/integ/vlen_test.py b/tests/integ/vlen_test.py
@@ -17,13 +17,15 @@
 
 sys.path.append("../..")
 from hsds.util.arrayUtil import arrayToBytes, bytesToArray
+from hsds.util.chunkUtil import ndarray_compare
 from hsds.util.hdf5dtype import createDataType
 
 
 class VlenTest(unittest.TestCase):
     def __init__(self, *args, **kwargs):
         super(VlenTest, self).__init__(*args, **kwargs)
         self.base_domain = helper.getTestDomainName(self.__class__.__name__)
+        print(self.base_domain)
         helper.setupDomain(self.base_domain)
         self.endpoint = helper.getEndpoint()
 
@@ -77,9 +79,7 @@ def testPutVLenInt(self):
 
         # write values to dataset
         data = [
-            [
-                1,
-            ],
+            [1,],
             [1, 2],
             [1, 2, 3],
             [1, 2, 3, 4],
@@ -97,6 +97,8 @@ def testPutVLenInt(self):
         self.assertTrue("value" in rspJson)
         value = rspJson["value"]
         self.assertEqual(len(value), 4)
+        print("value:", value)
+        print("data:", data)
         for i in range(4):
             self.assertEqual(value[i], data[i])
 
@@ -221,6 +223,9 @@ def testPutVLenIntBinary(self):
         for i in range(count):
             self.assertEqual(value[i], test_values[i])
 
+        print("data:", data)
+        print("arr:", arr)
+
         # read back a selection
         params = {"select": "[2:3]"}
         rsp = self.session.get(req, headers=headers, params=params)
@@ -291,6 +296,8 @@ def testPutVLen2DInt(self):
         self.assertTrue("value" in rspJson)
         value = rspJson["value"]
         self.assertEqual(len(value), nrow)
+        print("value:", value)
+        print("data:", data)
 
         for i in range(nrow):
             for j in range(ncol):

diff --git a/tests/unit/array_util_test.py b/tests/unit/array_util_test.py
@@ -29,59 +29,7 @@
 from hsds.util.hdf5dtype import special_dtype
 from hsds.util.hdf5dtype import check_dtype
 from hsds.util.hdf5dtype import createDataType
-
-# compare two numpy arrays.
-# return true if the same (exclusive of null vs. empty array)
-# false otherwise
-
-
-def ndarray_compare(arr1, arr2):
-    if not isinstance(arr1, np.ndarray) and not isinstance(arr2, np.ndarray):
-        if not isinstance(arr1, np.void) and not isinstance(arr2, np.void):
-            return arr1 == arr2
-        if isinstance(arr1, np.void) and not isinstance(arr2, np.void):
-            if arr1.size == 0 and not arr2:
-                return True
-            else:
-                return False
-        if not isinstance(arr1, np.void) and isinstance(arr2, np.void):
-            if not arr1 and arr2.size == 0:
-                return True
-            else:
-                return False
-        # both np.voids
-        if arr1.size != arr2.size:
-            return False
-        for i in range(arr1.size):
-            if not ndarray_compare(arr1[i], arr2[i]):
-                return False
-        return True
-
-    if isinstance(arr1, np.ndarray) and not isinstance(arr2, np.ndarray):
-        # same only if arr1 is empty and arr2 is 0
-        if arr1.size == 0 and not arr2:
-            return True
-        else:
-            return False
-    if not isinstance(arr1, np.ndarray) and isinstance(arr2, np.ndarray):
-        # same only if arr1 is empty and arr2 is 0
-        if not arr1 and not arr2.size == 0:
-            return True
-        else:
-            return False
-
-    # two ndarrays...
-    if arr1.shape != arr2.shape:
-        return False
-    if arr2.dtype != arr2.dtype:
-        return False
-    nElements = np.prod(arr1.shape)
-    arr1 = arr1.reshape((nElements,))
-    arr2 = arr2.reshape((nElements,))
-    for i in range(nElements):
-        if not ndarray_compare(arr1[i], arr2[i]):
-            return False
-    return True
+from hsds.util.chunkUtil import ndarray_compare
 
 
 class ArrayUtilTest(unittest.TestCase):