Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Numpy 2,0 support #395

Merged
merged 5 commits into from
Sep 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/python-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ jobs:
fail-fast: false
matrix:
os: [ubuntu-latest, windows-latest]
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
python-version: ["3.9", "3.10", "3.11", "3.12"]
build-method: ["manual", "docker"]

runs-on: ${{ matrix.os }}
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM python:3.10 AS hsds-base
FROM python:3.12 AS hsds-base
# FROM hdfgroup/hdf5lib:1.14.0 as hsds-base

# Install Curl
Expand Down
2 changes: 1 addition & 1 deletion Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ aiobotocore = "==2.5.0"
aiohttp-cors = "*"
aiofiles = "*"
azure-storage-blob = "*"
bitshuffle = "*"
bitshuffle = "git+https://github.com/kiyo-masui/bitshuffle"
botocore = "*"
cryptography = "*"
h5py = ">=3.6.0"
Expand Down
12 changes: 8 additions & 4 deletions hsds/chunk_crawl.py
Original file line number Diff line number Diff line change
Expand Up @@ -688,9 +688,7 @@ def get_status(self):
raise KeyError(msg)
chunk_status = self._status_map[chunk_id]
if chunk_status not in (200, 201):
log.info(
f"returning chunk_status: {chunk_status} for chunk: {chunk_id}"
)
log.info(f"returning chunk_status: {chunk_status} for chunk: {chunk_id}")
return chunk_status

return 200 # all good
Expand Down Expand Up @@ -870,7 +868,13 @@ async def do_work(self, chunk_id, client=None):
log.warn(f"CancelledError for {self._action}({chunk_id}): {cle}")
except HTTPBadRequest as hbr:
status_code = 400
log.error(f"HTTPBadRequest for {self._action}({chunk_id}): {hbr}")
msg = f"HTTPBadRequest for {self._action}({chunk_id}): {hbr}"
if self._action.startswith("write_"):
# treat an 400 on write as a warn
log.warn(msg)
else:
log.error(msg)
break # no retry on 400's
except HTTPNotFound as nfe:
status_code = 404
log.info(f"HTTPNotFoundRequest for {self._action}({chunk_id}): {nfe}")
Expand Down
6 changes: 5 additions & 1 deletion hsds/chunk_dn.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#

import numpy as np
import traceback
from aiohttp.web_exceptions import HTTPBadRequest, HTTPInternalServerError
from aiohttp.web_exceptions import HTTPNotFound, HTTPServiceUnavailable
from aiohttp.web import json_response, StreamResponse
Expand Down Expand Up @@ -283,7 +284,10 @@ async def PUT_Chunk(request):
input_arr = bytesToArray(input_bytes, select_dt, [num_elements, ])
except ValueError as ve:
log.error(f"bytesToArray threw ValueError: {ve}")
raise HTTPInternalServerError()
tb = traceback.format_exc()
log.error(f"traceback: {tb}")

raise HTTPBadRequest(reason="unable to decode bytestring")

if bcshape:
input_arr = input_arr.reshape(bcshape)
Expand Down
5 changes: 3 additions & 2 deletions hsds/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import os
import sys
import yaml
from pkg_resources import resource_filename
from importlib_resources import files

cfg = {}

Expand Down Expand Up @@ -99,10 +99,11 @@ def _load_cfg():
break
if not yml_file:
# use yaml file embedded in package
yml_file = resource_filename("admin", "config/config.yml")
yml_file = files('admin.config').joinpath('config.yml')

if not yml_file:
raise FileNotFoundError("unable to load config.yml")
#
debug(f"_load_cfg with '{yml_file}'")
try:
with open(yml_file, "r") as f:
Expand Down
2 changes: 1 addition & 1 deletion hsds/util/arrayUtil.py
Original file line number Diff line number Diff line change
Expand Up @@ -549,7 +549,7 @@ def getNumpyValue(value, dt=None, encoding=None):
# convert to tuple
value = tuple(value)
elif dt.kind == "f" and isinstance(value, str) and value == "nan":
value = np.NaN
value = np.nan
else:
# use as is
pass
Expand Down
6 changes: 5 additions & 1 deletion hsds/util/httpUtil.py
Original file line number Diff line number Diff line change
Expand Up @@ -377,7 +377,7 @@ async def http_post(app, url, data=None, params=None, client=None):
elif rsp.status == 204: # no data
return None
elif rsp.status == 400:
msg = f"POST request HTTPBadRequest error for url: {url}"
msg = f"POST request HTTPBadRequest error for url: {url}"
log.warn(msg)
raise HTTPBadRequest(reason="Bad Request")
elif rsp.status == 404:
Expand Down Expand Up @@ -445,6 +445,10 @@ async def http_put(app, url, data=None, params=None, client=None):
log.info(f"http_put status: {rsp.status}")
if rsp.status in (200, 201):
pass # expected
elif rsp.status == 400:
msg = f"PUT request HTTPBadRequest error for url: {url}"
log.warn(msg)
raise HTTPBadRequest(reason="Bad Request")
elif rsp.status == 404:
# can come up for replace ops
log.info(f"HTTPNotFound for: {url}")
Expand Down
5 changes: 4 additions & 1 deletion hsds/util/storUtil.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def getCompressors():
def getSupportedFilters(include_compressors=True):
"""return list of other supported filters"""
filters = [
"bitshuffle",
# "bitshuffle",
"shuffle",
"fletcher32",
"nbit", # No-op
Expand Down Expand Up @@ -172,6 +172,9 @@ def _unshuffle(codec, data, dtype=None, chunk_shape=None):
except Exception as e:
log.error(f"except using bitshuffle.decompress_lz4: {e}")
raise HTTPInternalServerError()
else:
log.error(f"Unexpected codec: {codec} for _shuffle")
raise ValueError()

return arr.tobytes()

Expand Down
5 changes: 3 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,12 @@ dependencies = [
"aiohttp_cors",
"aiofiles",
"azure-storage-blob",
"bitshuffle",
"bitshuffle@git+https://github.com/kiyo-masui/bitshuffle",
"cryptography",
"h5py >= 3.6.0",
"importlib_resources",
"numcodecs",
"numpy < 2.0.0",
"numpy",
"psutil",
"pyjwt",
"pytz",
Expand Down
3 changes: 1 addition & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,10 @@ aiobotocore==2.13.0
aiohttp_cors
aiofiles
azure-storage-blob
bitshuffle
cryptography
h5py>=3.6.0
numcodecs
numpy<2.0.0
numpy
psutil
pyjwt
pytz
Expand Down
2 changes: 1 addition & 1 deletion tests/integ/attr_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -1572,7 +1572,7 @@ def testNaNAttributeValue(self):
helper.validateId(root_uuid)

# create attr
value = [np.NaN, ] * 6
value = [np.nan, ] * 6
data = {"type": "H5T_IEEE_F32LE", "shape": 6, "value": value}
attr_name = "nan_arr_attr"
req = self.endpoint + "/groups/" + root_uuid + "/attributes/" + attr_name
Expand Down
6 changes: 3 additions & 3 deletions tests/integ/dataset_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -1381,12 +1381,12 @@ def get_payload(dset_type, fillValue=None):
# create the dataset
req = self.endpoint + "/datasets"

payload = get_payload("H5T_STD_I32LE", fillValue=np.NaN)
payload = get_payload("H5T_STD_I32LE", fillValue=np.nan)
req = self.endpoint + "/datasets"
rsp = self.session.post(req, data=json.dumps(payload), headers=headers)
self.assertEqual(rsp.status_code, 400) # NaN not compatible with integer type

payload = get_payload("H5T_IEEE_F32LE", fillValue=np.NaN)
payload = get_payload("H5T_IEEE_F32LE", fillValue=np.nan)
req = self.endpoint + "/datasets"
rsp = self.session.post(req, data=json.dumps(payload), headers=headers)
self.assertEqual(rsp.status_code, 201) # Dataset created
Expand All @@ -1409,7 +1409,7 @@ def get_payload(dset_type, fillValue=None):
self.assertTrue("fillValue" in creationProps)
self.assertTrue(np.isnan(creationProps["fillValue"]))

# get data json returning "nan" for fillValue rather than np.Nan
# get data json returning "nan" for fillValue rather than np.nan
# the latter works with the Python JSON package, but is not part
# of the formal JSON standard
params = {"ignore_nan": 1}
Expand Down
2 changes: 1 addition & 1 deletion tests/integ/value_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -1442,7 +1442,7 @@ def testNaNFillValue(self):
# create the dataset
req = self.endpoint + "/datasets"
payload = {"type": "H5T_IEEE_F32LE", "shape": 10}
creation_props = {"fillValue": np.NaN}
creation_props = {"fillValue": np.nan}
payload["creationProperties"] = creation_props

req = self.endpoint + "/datasets"
Expand Down
2 changes: 2 additions & 0 deletions tests/unit/compression_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ def testCompression(self):
self.assertEqual(data, data_copy)

def testBitShuffle(self):
print("skipping bitshuffle test")
return
shape = (1_000_000, )
dt = np.dtype("<i4")
arr = np.random.randint(0, 200, shape, dtype=dt)
Expand Down
Loading