diff --git a/Makefile b/Makefile index 0b9b1a6..b8a715f 100644 --- a/Makefile +++ b/Makefile @@ -32,5 +32,5 @@ update-requirements: build # pip-compile gets confused if there's already a requirements.txt file, and # it can't be deleted without breaking the docker mount. So instead do the # compiling in /tmp. Should run test suite afterwards. - docker run --rm -v $(shell pwd)/requirements.txt:/app/requirements.txt -w /tmp opentopodata:$(VERSION) /bin/bash -c "cp /app/requirements.in .; pip-compile requirements.in; cp requirements.txt /app/requirements.txt" + docker run --rm -v $(shell pwd)/requirements.txt:/app/requirements.txt -w /tmp opentopodata:$(VERSION) /bin/bash -c "cp /app/requirements.in .; pip-compile requirements.in --resolver backtracking; cp requirements.txt /app/requirements.txt" diff --git a/docs/changelog.md b/docs/changelog.md index 44902a7..b0d8cf9 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -3,6 +3,8 @@ This is a list of changes to Open Topo Data between each release. +## Version 1.8.4 (18 Aug 2023) + ## Version 1.8.3 (7 Feb 2023) * Fix memory leak ([#68](https://github.com/ajnisbet/opentopodata/issues/68)) diff --git a/example-config.yaml b/example-config.yaml index 4df114c..4262c25 100644 --- a/example-config.yaml +++ b/example-config.yaml @@ -5,10 +5,16 @@ # 400 error will be thrown above this limit. max_locations_per_request: 100 + # CORS header. Should be null for no CORS, '*' for all domains, or a url with # protocol, domain, and port ('https://api.example.com/'). Default is null. access_control_allow_origin: "*" + +# Use mmap to cache files for faster repeated reads on slow networked filesystems. +# See https://github.com/ajnisbet/opentopodata/pull/74 +read_with_mmap: false + datasets: # A small testing dataset is included in the repo. diff --git a/opentopodata/backend.py b/opentopodata/backend.py index afce035..04b2782 100644 --- a/opentopodata/backend.py +++ b/opentopodata/backend.py @@ -70,7 +70,7 @@ def _validate_points_lie_within_raster(xs, ys, lats, lons, bounds, res): return sorted(oob_indices) -def _get_elevation_from_path(lats, lons, path, interpolation): +def _get_elevation_from_path(lats, lons, path, interpolation, use_mmap=False): """Read values at locations in a raster. Args: @@ -87,71 +87,70 @@ def _get_elevation_from_path(lats, lons, path, interpolation): lats = np.asarray(lats) try: - with open(path, "rb") as bf: - with mmap.mmap(bf.fileno(), length=0, access=mmap.ACCESS_READ) as mmap_obj: - with rasterio.open(mmap_obj) as f: - if f.crs is None: - msg = "Dataset has no coordinate reference system." - msg += f" Check the file '{path}' is a geo raster." - msg += " Otherwise you'll have to add the crs manually with a tool like gdaltranslate." - raise InputError(msg) - - try: - if f.crs.is_epsg_code: - xs, ys = utils.reproject_latlons( - lats, lons, epsg=f.crs.to_epsg() - ) - else: - xs, ys = utils.reproject_latlons( - lats, lons, wkt=f.crs.to_wkt() - ) - except ValueError: - raise InputError( - "Unable to transform latlons to dataset projection." + with open(path, "rb") as dataset: + if use_mmap: + dataset = mmap.mmap(dataset.fileno()) + with rasterio.open(dataset) as f: + if f.crs is None: + msg = "Dataset has no coordinate reference system." + msg += f" Check the file '{path}' is a geo raster." + msg += " Otherwise you'll have to add the crs manually with a tool like gdaltranslate." + raise InputError(msg) + + try: + if f.crs.is_epsg_code: + xs, ys = utils.reproject_latlons( + lats, lons, epsg=f.crs.to_epsg() ) + else: + xs, ys = utils.reproject_latlons(lats, lons, wkt=f.crs.to_wkt()) + except ValueError: + raise InputError( + "Unable to transform latlons to dataset projection." + ) - # Check bounds. - oob_indices = _validate_points_lie_within_raster( - xs, ys, lats, lons, f.bounds, f.res + # Check bounds. + oob_indices = _validate_points_lie_within_raster( + xs, ys, lats, lons, f.bounds, f.res + ) + rows, cols = tuple(f.index(xs, ys, op=_noop)) + + # Different versions of rasterio may or may not collapse single + # f.index() lookups into scalars. We want to always have an + # array. + rows = np.atleast_1d(rows) + cols = np.atleast_1d(cols) + + # Offset by 0.5 to convert from center coords (provided by + # f.index) to ul coords (expected by f.read). + rows = rows - 0.5 + cols = cols - 0.5 + + # Because of floating point precision, indices may slightly exceed + # array bounds. Because we've checked the locations are within the + # file bounds, it's safe to clip to the array shape. + rows = rows.clip(0, f.height - 1) + cols = cols.clip(0, f.width - 1) + + # Read the locations, using a 1x1 window. The `masked` kwarg makes + # rasterio replace NODATA values with np.nan. The `boundless` kwarg + # forces the windowed elevation to be a 1x1 array, even when it all + # values are NODATA. + for i, (row, col) in enumerate(zip(rows, cols)): + if i in oob_indices: + z_all.append(None) + continue + window = rasterio.windows.Window(col, row, 1, 1) + z_array = f.read( + indexes=1, + window=window, + resampling=interpolation, + out_dtype=float, + boundless=True, + masked=True, ) - rows, cols = tuple(f.index(xs, ys, op=_noop)) - - # Different versions of rasterio may or may not collapse single - # f.index() lookups into scalars. We want to always have an - # array. - rows = np.atleast_1d(rows) - cols = np.atleast_1d(cols) - - # Offset by 0.5 to convert from center coords (provided by - # f.index) to ul coords (expected by f.read). - rows = rows - 0.5 - cols = cols - 0.5 - - # Because of floating point precision, indices may slightly exceed - # array bounds. Because we've checked the locations are within the - # file bounds, it's safe to clip to the array shape. - rows = rows.clip(0, f.height - 1) - cols = cols.clip(0, f.width - 1) - - # Read the locations, using a 1x1 window. The `masked` kwarg makes - # rasterio replace NODATA values with np.nan. The `boundless` kwarg - # forces the windowed elevation to be a 1x1 array, even when it all - # values are NODATA. - for i, (row, col) in enumerate(zip(rows, cols)): - if i in oob_indices: - z_all.append(None) - continue - window = rasterio.windows.Window(col, row, 1, 1) - z_array = f.read( - indexes=1, - window=window, - resampling=interpolation, - out_dtype=float, - boundless=True, - masked=True, - ) - z = np.ma.filled(z_array, np.nan)[0][0] - z_all.append(z) + z = np.ma.filled(z_array, np.nan)[0][0] + z_all.append(z) # Depending on the file format, when rasterio finds an invalid projection # of file, it might load it with a None crs, or it might throw an error. @@ -162,6 +161,9 @@ def _get_elevation_from_path(lats, lons, path, interpolation): msg += " and that the file is not corrupt." raise InputError(msg) raise e + finally: + if isinstance(dataset, mmap.mmap): + dataset.close() return z_all diff --git a/opentopodata/config.py b/opentopodata/config.py index 9ccf99d..ac94fad 100644 --- a/opentopodata/config.py +++ b/opentopodata/config.py @@ -21,6 +21,7 @@ "dataset.filename_tile_size": 1, "dataset.filename_epsg": utils.WGS84_LATLON_EPSG, "access_control_allow_origin": None, + "read_with_mmap": False, } @@ -147,6 +148,7 @@ def load_config(): config["access_control_allow_origin"] = config.get( "access_control_allow_origin", DEFAULTS["access_control_allow_origin"] ) + config["read_with_mmap"] = config.get("read_with_mmap", DEFAULTS["read_with_mmap"]) # Validate CORS. Must have protocol, domain, and optionally port. _validate_cors(config["access_control_allow_origin"]) diff --git a/requirements.in b/requirements.in index c0312fc..8cf5e93 100644 --- a/requirements.in +++ b/requirements.in @@ -10,5 +10,5 @@ pyproj pytest pytest-cov PyYAML -rasterio==1.2.10 +rasterio<1.3.0 requests diff --git a/requirements.txt b/requirements.txt index 7b614ae..153eedd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,24 +6,24 @@ # affine==2.4.0 # via rasterio -attrs==22.2.0 - # via - # pytest - # rasterio -black==23.1.0 +attrs==23.1.0 + # via rasterio +black==23.7.0 # via -r requirements.in +blinker==1.6.2 + # via flask build==0.10.0 # via pip-tools cachelib==0.9.0 # via flask-caching -certifi==2022.12.7 +certifi==2023.7.22 # via # pyproj # rasterio # requests -charset-normalizer==3.0.1 +charset-normalizer==3.2.0 # via requests -click==8.1.3 +click==8.1.7 # via # black # click-plugins @@ -35,11 +35,11 @@ click-plugins==1.1.1 # via rasterio cligj==0.7.2 # via rasterio -coverage[toml]==7.1.0 +coverage[toml]==7.3.0 # via pytest-cov -exceptiongroup==1.1.0 +exceptiongroup==1.1.3 # via pytest -flask==2.2.2 +flask==2.3.2 # via # -r requirements.in # flask-caching @@ -49,7 +49,7 @@ geographiclib==2.0 # via -r requirements.in idna==3.4 # via requests -importlib-metadata==6.0.0 +importlib-metadata==6.8.0 # via flask iniconfig==2.0.0 # via pytest @@ -57,51 +57,51 @@ itsdangerous==2.1.2 # via flask jinja2==3.1.2 # via flask -markupsafe==2.1.2 +markupsafe==2.1.3 # via # jinja2 # werkzeug mypy-extensions==1.0.0 # via black -numpy==1.24.2 +numpy==1.25.2 # via # -r requirements.in # rasterio # snuggs -packaging==23.0 +packaging==23.1 # via # black # build # pytest -pathspec==0.11.0 +pathspec==0.11.2 # via black -pip-tools==6.12.2 +pip-tools==7.3.0 # via -r requirements.in -platformdirs==3.0.0 +platformdirs==3.10.0 # via black -pluggy==1.0.0 +pluggy==1.2.0 # via pytest polyline==2.0.0 # via -r requirements.in pylibmc==1.6.3 # via -r requirements.in -pyparsing==3.0.9 +pyparsing==3.1.1 # via snuggs -pyproj==3.4.1 +pyproj==3.6.0 # via -r requirements.in pyproject-hooks==1.0.0 # via build -pytest==7.2.1 +pytest==7.4.0 # via # -r requirements.in # pytest-cov -pytest-cov==4.0.0 +pytest-cov==4.1.0 # via -r requirements.in -pyyaml==6.0 +pyyaml==6.0.1 # via -r requirements.in rasterio==1.2.10 # via -r requirements.in -requests==2.28.2 +requests==2.31.0 # via -r requirements.in snuggs==1.4.7 # via rasterio @@ -110,16 +110,18 @@ tomli==2.0.1 # black # build # coverage + # pip-tools + # pyproject-hooks # pytest -typing-extensions==4.4.0 +typing-extensions==4.7.1 # via black -urllib3==1.26.14 +urllib3==2.0.4 # via requests -werkzeug==2.2.2 +werkzeug==2.3.7 # via flask -wheel==0.38.4 +wheel==0.41.1 # via pip-tools -zipp==3.12.1 +zipp==3.16.2 # via importlib-metadata # The following packages are considered to be unsafe in a requirements file: diff --git a/tests/test_backend.py b/tests/test_backend.py index b6e39d1..db6129a 100644 --- a/tests/test_backend.py +++ b/tests/test_backend.py @@ -154,6 +154,16 @@ def test_bilinear_interpolation(self): 0.4, 0.3, self.geotiff_z[:2, :2] ) + def test_mmap(self): + lats = [89.6] + lons = [-179.7] + z = backend._get_elevation_from_path( + lats, lons, ETOPO1_GEOTIFF_PATH, "bilinear", use_mmap=True + ) + assert pytest.approx(z[0]) == self._interp_bilinear( + 0.4, 0.3, self.geotiff_z[:2, :2] + ) + def test_none_outside_dataset(self): lats = [0, 0, -90.1, 90.1] lons = [-180.1, 180.1, 0, 0]