diff --git a/hsds/util/arrayUtil.py b/hsds/util/arrayUtil.py index 432cf8cd..e040df48 100644 --- a/hsds/util/arrayUtil.py +++ b/hsds/util/arrayUtil.py @@ -607,3 +607,69 @@ def __next__(self): self._stop = True return tuple(ret_index) + + +# compare two numpy arrays. +# return true if the same (exclusive of null vs. empty array) +# false otherwise +# TBD: this is slow for multi-megabyte vlen arrays, needs to be optimized + + +def ndarray_compare(arr1, arr2): + if not isinstance(arr1, np.ndarray) and not isinstance(arr2, np.ndarray): + if not isinstance(arr1, np.void) and not isinstance(arr2, np.void): + return arr1 == arr2 + if isinstance(arr1, np.void) and not isinstance(arr2, np.void): + if arr1.size == 0 and not arr2: + return True + else: + return False + if not isinstance(arr1, np.void) and isinstance(arr2, np.void): + if not arr1 and arr2.size == 0: + return True + else: + return False + # both np.voids + if arr1.size != arr2.size: + return False + + if len(arr1) != len(arr2): + return False + + for i in range(len(arr1)): + if not ndarray_compare(arr1[i], arr2[i]): + return False + return True + + if isinstance(arr1, np.ndarray) and not isinstance(arr2, np.ndarray): + # same only if arr1 is empty and arr2 is 0 + if arr1.size == 0 and not arr2: + return True + else: + return False + if not isinstance(arr1, np.ndarray) and isinstance(arr2, np.ndarray): + # same only if arr1 is empty and arr2 size is 0 + if not arr1 and arr2.size == 0: + return True + else: + return False + + # two ndarrays... + if arr1.shape != arr2.shape: + return False + if arr2.dtype != arr2.dtype: + return False + + if isVlen(arr1.dtype): + # need to compare element by element + + nElements = np.prod(arr1.shape) + arr1 = arr1.reshape((nElements,)) + arr2 = arr2.reshape((nElements,)) + for i in range(nElements): + if not ndarray_compare(arr1[i], arr2[i]): + return False + return True + else: + # can just us np array_compare + return np.array_equal(arr1, arr2) diff --git a/hsds/util/chunkUtil.py b/hsds/util/chunkUtil.py index fff7a803..2031eaa0 100644 --- a/hsds/util/chunkUtil.py +++ b/hsds/util/chunkUtil.py @@ -1,5 +1,6 @@ import numpy as np from .. import hsds_logger as log +from .arrayUtil import ndarray_compare CHUNK_BASE = 16 * 1024 # Multiplier by which chunks are adjusted CHUNK_MIN = 512 * 1024 # Soft lower limit (512k) @@ -8,64 +9,6 @@ PRIMES = [29, 31, 37, 41, 43, 47, 53, 59, 61, 67] # for chunk partitioning -# compare two numpy arrays. -# return true if the same (exclusive of null vs. empty array) -# false otherwise - - -def ndarray_compare(arr1, arr2): - if not isinstance(arr1, np.ndarray) and not isinstance(arr2, np.ndarray): - if not isinstance(arr1, np.void) and not isinstance(arr2, np.void): - return arr1 == arr2 - if isinstance(arr1, np.void) and not isinstance(arr2, np.void): - if arr1.size == 0 and not arr2: - return True - else: - return False - if not isinstance(arr1, np.void) and isinstance(arr2, np.void): - if not arr1 and arr2.size == 0: - return True - else: - return False - # both np.voids - if arr1.size != arr2.size: - return False - - if len(arr1) != len(arr2): - return False - - for i in range(len(arr1)): - if not ndarray_compare(arr1[i], arr2[i]): - return False - return True - - if isinstance(arr1, np.ndarray) and not isinstance(arr2, np.ndarray): - # same only if arr1 is empty and arr2 is 0 - if arr1.size == 0 and not arr2: - return True - else: - return False - if not isinstance(arr1, np.ndarray) and isinstance(arr2, np.ndarray): - # same only if arr1 is empty and arr2 size is 0 - if not arr1 and arr2.size == 0: - return True - else: - return False - - # two ndarrays... - if arr1.shape != arr2.shape: - return False - if arr2.dtype != arr2.dtype: - return False - nElements = np.prod(arr1.shape) - arr1 = arr1.reshape((nElements,)) - arr2 = arr2.reshape((nElements,)) - for i in range(nElements): - if not ndarray_compare(arr1[i], arr2[i]): - return False - return True - - """ Convert list that may contain bytes type elements to list of string elements diff --git a/tests/integ/vlen_test.py b/tests/integ/vlen_test.py index ad3a37d9..99682013 100755 --- a/tests/integ/vlen_test.py +++ b/tests/integ/vlen_test.py @@ -96,8 +96,7 @@ def testPutVLenInt(self): self.assertTrue("value" in rspJson) value = rspJson["value"] self.assertEqual(len(value), 4) - print("value:", value) - print("data:", data) + for i in range(4): self.assertEqual(value[i], data[i]) @@ -222,9 +221,6 @@ def testPutVLenIntBinary(self): for i in range(count): self.assertEqual(value[i], test_values[i]) - print("data:", data) - print("arr:", arr) - # read back a selection params = {"select": "[2:3]"} rsp = self.session.get(req, headers=headers, params=params) @@ -295,8 +291,6 @@ def testPutVLen2DInt(self): self.assertTrue("value" in rspJson) value = rspJson["value"] self.assertEqual(len(value), nrow) - print("value:", value) - print("data:", data) for i in range(nrow): for j in range(ncol): diff --git a/tests/unit/array_util_test.py b/tests/unit/array_util_test.py index 77125673..0e36c9de 100644 --- a/tests/unit/array_util_test.py +++ b/tests/unit/array_util_test.py @@ -24,12 +24,12 @@ arrayToBytes, bytesToArray, getByteArraySize, - IndexIterator + IndexIterator, + ndarray_compare ) from hsds.util.hdf5dtype import special_dtype from hsds.util.hdf5dtype import check_dtype from hsds.util.hdf5dtype import createDataType -from hsds.util.chunkUtil import ndarray_compare class ArrayUtilTest(unittest.TestCase): @@ -308,7 +308,7 @@ def testToBytes(self): buffer = arrayToBytes(arr) self.assertEqual(buffer, arr.tobytes()) - # convert back to arry + # convert back to array arr_copy = bytesToArray(buffer, dt, (3,)) self.assertTrue(ndarray_compare(arr, arr_copy)) @@ -483,6 +483,35 @@ def testToBytes(self): arr_copy = bytesToArray(buffer, dt, (4,)) self.assertTrue(ndarray_compare(arr, arr_copy)) + def testArrayCompareInt(self): + # Simple array + dt = np.dtype("