Skip to content

Commit

Permalink
Add add mmap (but some gdal stuff is broken now...)
Browse files Browse the repository at this point in the history
  • Loading branch information
ajnisbet committed Aug 18, 2023
1 parent 6528826 commit 8303210
Show file tree
Hide file tree
Showing 8 changed files with 119 additions and 95 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -32,5 +32,5 @@ update-requirements: build
# pip-compile gets confused if there's already a requirements.txt file, and
# it can't be deleted without breaking the docker mount. So instead do the
# compiling in /tmp. Should run test suite afterwards.
docker run --rm -v $(shell pwd)/requirements.txt:/app/requirements.txt -w /tmp opentopodata:$(VERSION) /bin/bash -c "cp /app/requirements.in .; pip-compile requirements.in; cp requirements.txt /app/requirements.txt"
docker run --rm -v $(shell pwd)/requirements.txt:/app/requirements.txt -w /tmp opentopodata:$(VERSION) /bin/bash -c "cp /app/requirements.in .; pip-compile requirements.in --resolver backtracking; cp requirements.txt /app/requirements.txt"

2 changes: 2 additions & 0 deletions docs/changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
This is a list of changes to Open Topo Data between each release.


## Version 1.8.4 (18 Aug 2023)

## Version 1.8.3 (7 Feb 2023)

* Fix memory leak ([#68](https://github.com/ajnisbet/opentopodata/issues/68))
Expand Down
6 changes: 6 additions & 0 deletions example-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,16 @@
# 400 error will be thrown above this limit.
max_locations_per_request: 100


# CORS header. Should be null for no CORS, '*' for all domains, or a url with
# protocol, domain, and port ('https://api.example.com/'). Default is null.
access_control_allow_origin: "*"


# Use mmap to cache files for faster repeated reads on slow networked filesystems.
# See https://github.com/ajnisbet/opentopodata/pull/74
read_with_mmap: false

datasets:

# A small testing dataset is included in the repo.
Expand Down
128 changes: 65 additions & 63 deletions opentopodata/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def _validate_points_lie_within_raster(xs, ys, lats, lons, bounds, res):
return sorted(oob_indices)


def _get_elevation_from_path(lats, lons, path, interpolation):
def _get_elevation_from_path(lats, lons, path, interpolation, use_mmap=False):
"""Read values at locations in a raster.
Args:
Expand All @@ -87,71 +87,70 @@ def _get_elevation_from_path(lats, lons, path, interpolation):
lats = np.asarray(lats)

try:
with open(path, "rb") as bf:
with mmap.mmap(bf.fileno(), length=0, access=mmap.ACCESS_READ) as mmap_obj:
with rasterio.open(mmap_obj) as f:
if f.crs is None:
msg = "Dataset has no coordinate reference system."
msg += f" Check the file '{path}' is a geo raster."
msg += " Otherwise you'll have to add the crs manually with a tool like gdaltranslate."
raise InputError(msg)

try:
if f.crs.is_epsg_code:
xs, ys = utils.reproject_latlons(
lats, lons, epsg=f.crs.to_epsg()
)
else:
xs, ys = utils.reproject_latlons(
lats, lons, wkt=f.crs.to_wkt()
)
except ValueError:
raise InputError(
"Unable to transform latlons to dataset projection."
with open(path, "rb") as dataset:
if use_mmap:
dataset = mmap.mmap(dataset.fileno())
with rasterio.open(dataset) as f:
if f.crs is None:
msg = "Dataset has no coordinate reference system."
msg += f" Check the file '{path}' is a geo raster."
msg += " Otherwise you'll have to add the crs manually with a tool like gdaltranslate."
raise InputError(msg)

try:
if f.crs.is_epsg_code:
xs, ys = utils.reproject_latlons(
lats, lons, epsg=f.crs.to_epsg()
)
else:
xs, ys = utils.reproject_latlons(lats, lons, wkt=f.crs.to_wkt())
except ValueError:
raise InputError(
"Unable to transform latlons to dataset projection."
)

# Check bounds.
oob_indices = _validate_points_lie_within_raster(
xs, ys, lats, lons, f.bounds, f.res
# Check bounds.
oob_indices = _validate_points_lie_within_raster(
xs, ys, lats, lons, f.bounds, f.res
)
rows, cols = tuple(f.index(xs, ys, op=_noop))

# Different versions of rasterio may or may not collapse single
# f.index() lookups into scalars. We want to always have an
# array.
rows = np.atleast_1d(rows)
cols = np.atleast_1d(cols)

# Offset by 0.5 to convert from center coords (provided by
# f.index) to ul coords (expected by f.read).
rows = rows - 0.5
cols = cols - 0.5

# Because of floating point precision, indices may slightly exceed
# array bounds. Because we've checked the locations are within the
# file bounds, it's safe to clip to the array shape.
rows = rows.clip(0, f.height - 1)
cols = cols.clip(0, f.width - 1)

# Read the locations, using a 1x1 window. The `masked` kwarg makes
# rasterio replace NODATA values with np.nan. The `boundless` kwarg
# forces the windowed elevation to be a 1x1 array, even when it all
# values are NODATA.
for i, (row, col) in enumerate(zip(rows, cols)):
if i in oob_indices:
z_all.append(None)
continue
window = rasterio.windows.Window(col, row, 1, 1)
z_array = f.read(
indexes=1,
window=window,
resampling=interpolation,
out_dtype=float,
boundless=True,
masked=True,
)
rows, cols = tuple(f.index(xs, ys, op=_noop))

# Different versions of rasterio may or may not collapse single
# f.index() lookups into scalars. We want to always have an
# array.
rows = np.atleast_1d(rows)
cols = np.atleast_1d(cols)

# Offset by 0.5 to convert from center coords (provided by
# f.index) to ul coords (expected by f.read).
rows = rows - 0.5
cols = cols - 0.5

# Because of floating point precision, indices may slightly exceed
# array bounds. Because we've checked the locations are within the
# file bounds, it's safe to clip to the array shape.
rows = rows.clip(0, f.height - 1)
cols = cols.clip(0, f.width - 1)

# Read the locations, using a 1x1 window. The `masked` kwarg makes
# rasterio replace NODATA values with np.nan. The `boundless` kwarg
# forces the windowed elevation to be a 1x1 array, even when it all
# values are NODATA.
for i, (row, col) in enumerate(zip(rows, cols)):
if i in oob_indices:
z_all.append(None)
continue
window = rasterio.windows.Window(col, row, 1, 1)
z_array = f.read(
indexes=1,
window=window,
resampling=interpolation,
out_dtype=float,
boundless=True,
masked=True,
)
z = np.ma.filled(z_array, np.nan)[0][0]
z_all.append(z)
z = np.ma.filled(z_array, np.nan)[0][0]
z_all.append(z)

# Depending on the file format, when rasterio finds an invalid projection
# of file, it might load it with a None crs, or it might throw an error.
Expand All @@ -162,6 +161,9 @@ def _get_elevation_from_path(lats, lons, path, interpolation):
msg += " and that the file is not corrupt."
raise InputError(msg)
raise e
finally:
if isinstance(dataset, mmap.mmap):
dataset.close()

return z_all

Expand Down
2 changes: 2 additions & 0 deletions opentopodata/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
"dataset.filename_tile_size": 1,
"dataset.filename_epsg": utils.WGS84_LATLON_EPSG,
"access_control_allow_origin": None,
"read_with_mmap": False,
}


Expand Down Expand Up @@ -147,6 +148,7 @@ def load_config():
config["access_control_allow_origin"] = config.get(
"access_control_allow_origin", DEFAULTS["access_control_allow_origin"]
)
config["read_with_mmap"] = config.get("read_with_mmap", DEFAULTS["read_with_mmap"])

# Validate CORS. Must have protocol, domain, and optionally port.
_validate_cors(config["access_control_allow_origin"])
Expand Down
2 changes: 1 addition & 1 deletion requirements.in
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,5 @@ pyproj
pytest
pytest-cov
PyYAML
rasterio==1.2.10
rasterio<1.3.0
requests
62 changes: 32 additions & 30 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,24 +6,24 @@
#
affine==2.4.0
# via rasterio
attrs==22.2.0
# via
# pytest
# rasterio
black==23.1.0
attrs==23.1.0
# via rasterio
black==23.7.0
# via -r requirements.in
blinker==1.6.2
# via flask
build==0.10.0
# via pip-tools
cachelib==0.9.0
# via flask-caching
certifi==2022.12.7
certifi==2023.7.22
# via
# pyproj
# rasterio
# requests
charset-normalizer==3.0.1
charset-normalizer==3.2.0
# via requests
click==8.1.3
click==8.1.7
# via
# black
# click-plugins
Expand All @@ -35,11 +35,11 @@ click-plugins==1.1.1
# via rasterio
cligj==0.7.2
# via rasterio
coverage[toml]==7.1.0
coverage[toml]==7.3.0
# via pytest-cov
exceptiongroup==1.1.0
exceptiongroup==1.1.3
# via pytest
flask==2.2.2
flask==2.3.2
# via
# -r requirements.in
# flask-caching
Expand All @@ -49,59 +49,59 @@ geographiclib==2.0
# via -r requirements.in
idna==3.4
# via requests
importlib-metadata==6.0.0
importlib-metadata==6.8.0
# via flask
iniconfig==2.0.0
# via pytest
itsdangerous==2.1.2
# via flask
jinja2==3.1.2
# via flask
markupsafe==2.1.2
markupsafe==2.1.3
# via
# jinja2
# werkzeug
mypy-extensions==1.0.0
# via black
numpy==1.24.2
numpy==1.25.2
# via
# -r requirements.in
# rasterio
# snuggs
packaging==23.0
packaging==23.1
# via
# black
# build
# pytest
pathspec==0.11.0
pathspec==0.11.2
# via black
pip-tools==6.12.2
pip-tools==7.3.0
# via -r requirements.in
platformdirs==3.0.0
platformdirs==3.10.0
# via black
pluggy==1.0.0
pluggy==1.2.0
# via pytest
polyline==2.0.0
# via -r requirements.in
pylibmc==1.6.3
# via -r requirements.in
pyparsing==3.0.9
pyparsing==3.1.1
# via snuggs
pyproj==3.4.1
pyproj==3.6.0
# via -r requirements.in
pyproject-hooks==1.0.0
# via build
pytest==7.2.1
pytest==7.4.0
# via
# -r requirements.in
# pytest-cov
pytest-cov==4.0.0
pytest-cov==4.1.0
# via -r requirements.in
pyyaml==6.0
pyyaml==6.0.1
# via -r requirements.in
rasterio==1.2.10
# via -r requirements.in
requests==2.28.2
requests==2.31.0
# via -r requirements.in
snuggs==1.4.7
# via rasterio
Expand All @@ -110,16 +110,18 @@ tomli==2.0.1
# black
# build
# coverage
# pip-tools
# pyproject-hooks
# pytest
typing-extensions==4.4.0
typing-extensions==4.7.1
# via black
urllib3==1.26.14
urllib3==2.0.4
# via requests
werkzeug==2.2.2
werkzeug==2.3.7
# via flask
wheel==0.38.4
wheel==0.41.1
# via pip-tools
zipp==3.12.1
zipp==3.16.2
# via importlib-metadata

# The following packages are considered to be unsafe in a requirements file:
Expand Down
10 changes: 10 additions & 0 deletions tests/test_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,16 @@ def test_bilinear_interpolation(self):
0.4, 0.3, self.geotiff_z[:2, :2]
)

def test_mmap(self):
lats = [89.6]
lons = [-179.7]
z = backend._get_elevation_from_path(
lats, lons, ETOPO1_GEOTIFF_PATH, "bilinear", use_mmap=True
)
assert pytest.approx(z[0]) == self._interp_bilinear(
0.4, 0.3, self.geotiff_z[:2, :2]
)

def test_none_outside_dataset(self):
lats = [0, 0, -90.1, 90.1]
lons = [-180.1, 180.1, 0, 0]
Expand Down

0 comments on commit 8303210

Please sign in to comment.