Skip to content

Commit

Permalink
fix for numpy deprecation if truth value of empty array
Browse files Browse the repository at this point in the history
  • Loading branch information
jreadey committed Jul 10, 2023
1 parent 36829ff commit 115f088
Show file tree
Hide file tree
Showing 6 changed files with 91 additions and 82 deletions.
11 changes: 0 additions & 11 deletions hsds/datanode_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -956,17 +956,6 @@ async def get_chunk(

# validate arguments
if s3path:
"""
if s3size == 0 and s3offset == 0:
# uninitialized chunk ref
msg = f"reference chunk not set for id: {chunk_id}, returning 404"
log.info(msg)
raise HTTPNotFound() # not found return 404
if s3size == 0:
msg = f"Unexpected get_chunk parameter - s3path: {s3path} with size 0"
log.error(msg)
raise HTTPInternalServerError()
"""
if bucket:
msg = "get_chunk - bucket arg should not be used with s3path"
log.error(msg)
Expand Down
78 changes: 73 additions & 5 deletions hsds/util/chunkUtil.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,63 @@
PRIMES = [29, 31, 37, 41, 43, 47, 53, 59, 61, 67] # for chunk partitioning


# compare two numpy arrays.
# return true if the same (exclusive of null vs. empty array)
# false otherwise


def ndarray_compare(arr1, arr2):
if not isinstance(arr1, np.ndarray) and not isinstance(arr2, np.ndarray):
if not isinstance(arr1, np.void) and not isinstance(arr2, np.void):
return arr1 == arr2
if isinstance(arr1, np.void) and not isinstance(arr2, np.void):
if arr1.size == 0 and not arr2:
return True
else:
return False
if not isinstance(arr1, np.void) and isinstance(arr2, np.void):
if not arr1 and arr2.size == 0:
return True
else:
return False
# both np.voids
if arr1.size != arr2.size:
return False

if len(arr1) != len(arr2):
return False

for i in range(len(arr1)):
if not ndarray_compare(arr1[i], arr2[i]):
return False
return True

if isinstance(arr1, np.ndarray) and not isinstance(arr2, np.ndarray):
# same only if arr1 is empty and arr2 is 0
if arr1.size == 0 and not arr2:
return True
else:
return False
if not isinstance(arr1, np.ndarray) and isinstance(arr2, np.ndarray):
# same only if arr1 is empty and arr2 size is 0
if not arr1 and arr2.size == 0:
return True
else:
return False

# two ndarrays...
if arr1.shape != arr2.shape:
return False
if arr2.dtype != arr2.dtype:
return False
nElements = np.prod(arr1.shape)
arr1 = arr1.reshape((nElements,))
arr2 = arr2.reshape((nElements,))
for i in range(nElements):
if not ndarray_compare(arr1[i], arr2[i]):
return False
return True

"""
Convert list that may contain bytes type elements to list of string elements
Expand Down Expand Up @@ -803,27 +860,38 @@ def chunkWriteSelection(chunk_arr=None, slices=None, data=None):
"""
Write data for requested chunk and selection
"""
log.info("chunkWriteSelection")
dims = chunk_arr.shape

rank = len(dims)

if rank == 0:
msg = "No dimension passed to chunkReadSelection"
log.error(msg)
raise ValueError(msg)
if len(slices) != rank:
msg = "Selection rank does not match dataset rank"
log.error(msg)
raise ValueError(msg)
if len(data.shape) != rank:
msg = "Input arr does not match dataset rank"
log.error(msg)
raise ValueError(msg)

updated = False
# check if the new data modifies the array or not
if not np.array_equal(chunk_arr[slices], data):
# update chunk array
chunk_arr[slices] = data
updated = True
# TBD - is this worth the cost of comparing two arrays element by element?
try:
if not ndarray_compare(chunk_arr[slices], data):
# if not np.array_equal(chunk_arr[slices], data):
# update chunk array
chunk_arr[slices] = data
updated = True
except ValueError as ve:
msg = f"array_equal ValueError, chunk_arr[{slices}]: {chunk_arr[slices]} "
msg += f"data: {data}, data type: {type(data)} ve: {ve}"
log.error(msg)
raise

return updated


Expand Down
8 changes: 4 additions & 4 deletions hsds/util/storUtil.py
Original file line number Diff line number Diff line change
Expand Up @@ -361,18 +361,18 @@ async def getStorBytes(app,
# uncompress chunks within the fetched data and store to
# chunk bytes
if not h5_size:
log.error("getStoreBytes - h5_size not set")
log.error("getStorBytes - h5_size not set")
raise HTTPInternalServerError()
if not chunk_bytes:
log.error("getStoreBytes - chunk_bytes not set")
log.error("getStorBytes - chunk_bytes not set")
raise HTTPInternalServerError()
if len(chunk_locations) * h5_size < len(chunk_bytes):
log.error(f"getStoreBytes - invalid chunk_bytes length: {len(chunk_bytes)}")
log.error(f"getStorBytes - invalid chunk_bytes length: {len(chunk_bytes)}")
for chunk_location in chunk_locations:
log.debug(f"getStoreBytes - processing chunk_location: {chunk_location}")
n = chunk_location.offset - offset
if n < 0:
log.warn(f"getStoreBytes - unexpected offset for chunk_location: {chunk_location}")
log.warn(f"getStorBytes - unexpected offset for chunk_location: {chunk_location}")
continue
m = n + chunk_location.length
log.debug(f"getStorBytes - extracting chunk from data[{n}:{m}]")
Expand Down
9 changes: 3 additions & 6 deletions tests/integ/query_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -502,9 +502,8 @@ def testPutQuery(self):
params["Limit"] = 1
update_value = {"open": 999}
payload = {"value": update_value}
rsp = self.session.put(
req, params=params, data=json.dumps(update_value), headers=headers
)

rsp = self.session.put(req, params=params, data=json.dumps(update_value), headers=headers)
self.assertEqual(rsp.status_code, 200)
rspJson = json.loads(rsp.text)
self.assertTrue("hrefs" in rspJson)
Expand Down Expand Up @@ -537,9 +536,7 @@ def testPutQuery(self):
params = {"query": "open == 0"}
update_value = {"open": -999}
payload = {"value": update_value}
rsp = self.session.put(
req, params=params, data=json.dumps(update_value), headers=headers
)
rsp = self.session.put(req, params=params, data=json.dumps(update_value), headers=headers)
self.assertEqual(rsp.status_code, 200)
rspJson = json.loads(rsp.text)
self.assertTrue("value" in rspJson)
Expand Down
13 changes: 10 additions & 3 deletions tests/integ/vlen_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,15 @@

sys.path.append("../..")
from hsds.util.arrayUtil import arrayToBytes, bytesToArray
from hsds.util.chunkUtil import ndarray_compare
from hsds.util.hdf5dtype import createDataType


class VlenTest(unittest.TestCase):
def __init__(self, *args, **kwargs):
super(VlenTest, self).__init__(*args, **kwargs)
self.base_domain = helper.getTestDomainName(self.__class__.__name__)
print(self.base_domain)
helper.setupDomain(self.base_domain)
self.endpoint = helper.getEndpoint()

Expand Down Expand Up @@ -77,9 +79,7 @@ def testPutVLenInt(self):

# write values to dataset
data = [
[
1,
],
[1,],
[1, 2],
[1, 2, 3],
[1, 2, 3, 4],
Expand All @@ -97,6 +97,8 @@ def testPutVLenInt(self):
self.assertTrue("value" in rspJson)
value = rspJson["value"]
self.assertEqual(len(value), 4)
print("value:", value)
print("data:", data)
for i in range(4):
self.assertEqual(value[i], data[i])

Expand Down Expand Up @@ -221,6 +223,9 @@ def testPutVLenIntBinary(self):
for i in range(count):
self.assertEqual(value[i], test_values[i])

print("data:", data)
print("arr:", arr)

# read back a selection
params = {"select": "[2:3]"}
rsp = self.session.get(req, headers=headers, params=params)
Expand Down Expand Up @@ -291,6 +296,8 @@ def testPutVLen2DInt(self):
self.assertTrue("value" in rspJson)
value = rspJson["value"]
self.assertEqual(len(value), nrow)
print("value:", value)
print("data:", data)

for i in range(nrow):
for j in range(ncol):
Expand Down
54 changes: 1 addition & 53 deletions tests/unit/array_util_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,59 +29,7 @@
from hsds.util.hdf5dtype import special_dtype
from hsds.util.hdf5dtype import check_dtype
from hsds.util.hdf5dtype import createDataType

# compare two numpy arrays.
# return true if the same (exclusive of null vs. empty array)
# false otherwise


def ndarray_compare(arr1, arr2):
if not isinstance(arr1, np.ndarray) and not isinstance(arr2, np.ndarray):
if not isinstance(arr1, np.void) and not isinstance(arr2, np.void):
return arr1 == arr2
if isinstance(arr1, np.void) and not isinstance(arr2, np.void):
if arr1.size == 0 and not arr2:
return True
else:
return False
if not isinstance(arr1, np.void) and isinstance(arr2, np.void):
if not arr1 and arr2.size == 0:
return True
else:
return False
# both np.voids
if arr1.size != arr2.size:
return False
for i in range(arr1.size):
if not ndarray_compare(arr1[i], arr2[i]):
return False
return True

if isinstance(arr1, np.ndarray) and not isinstance(arr2, np.ndarray):
# same only if arr1 is empty and arr2 is 0
if arr1.size == 0 and not arr2:
return True
else:
return False
if not isinstance(arr1, np.ndarray) and isinstance(arr2, np.ndarray):
# same only if arr1 is empty and arr2 is 0
if not arr1 and not arr2.size == 0:
return True
else:
return False

# two ndarrays...
if arr1.shape != arr2.shape:
return False
if arr2.dtype != arr2.dtype:
return False
nElements = np.prod(arr1.shape)
arr1 = arr1.reshape((nElements,))
arr2 = arr2.reshape((nElements,))
for i in range(nElements):
if not ndarray_compare(arr1[i], arr2[i]):
return False
return True
from hsds.util.chunkUtil import ndarray_compare


class ArrayUtilTest(unittest.TestCase):
Expand Down

0 comments on commit 115f088

Please sign in to comment.