diff --git a/notebooks/Catalog Tutorial.ipynb b/notebooks/Catalog Tutorial.ipynb index 61ab3cba..4115be29 100644 --- a/notebooks/Catalog Tutorial.ipynb +++ b/notebooks/Catalog Tutorial.ipynb @@ -884,7 +884,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.10" + "version": "3.10.14" } }, "nbformat": 4, diff --git a/notebooks/Continuous Comparison Tutorial.ipynb b/notebooks/Continuous Comparison Tutorial.ipynb index 51b5c92b..b3eec520 100644 --- a/notebooks/Continuous Comparison Tutorial.ipynb +++ b/notebooks/Continuous Comparison Tutorial.ipynb @@ -710,7 +710,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.10" + "version": "3.10.14" } }, "nbformat": 4, diff --git a/notebooks/Tutorial.ipynb b/notebooks/Tutorial.ipynb index 7a61e360..8e04e7dd 100644 --- a/notebooks/Tutorial.ipynb +++ b/notebooks/Tutorial.ipynb @@ -1158,7 +1158,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.10" + "version": "3.10.14" } }, "nbformat": 4, diff --git a/pyproject.toml b/pyproject.toml index 40a909b1..f110f7e4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ authors = [ requires-python = ">=3.8" keywords = ["geospatial", "evaluations"] license = {text = "MIT"} -version = "0.2.6" +version = "0.2.7" dynamic = ["readme", "dependencies"] [project.optional-dependencies] diff --git a/requirements.txt b/requirements.txt index ac3ed7ac..7536bdf0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,17 +1,16 @@ rioxarray>=0.13.4 -dask==2023.5.0 -xarray-spatial==0.3.5 +dask>=2023.5.0,<2025 pandera==0.15.1 shapely==2.0.1 geocube>=0.3.3 pandas==2.0.2 odc-geo==0.4.1 -pydantic==1.10.10 +pydantic>=1.10.13 rio-cogeo==4.0.0 matplotlib==3.7.1 contextily==1.3.0 flox==0.7.2 xskillscore==0.0.24 -pyogrio==0.7.2 +pyogrio>=0.7.2,<=0.8.0 pystac-client==0.7.5 s3fs<=2023.12.1 diff --git a/src/gval/catalogs/catalogs.py b/src/gval/catalogs/catalogs.py index c40b5759..d6c7a68c 100644 --- a/src/gval/catalogs/catalogs.py +++ b/src/gval/catalogs/catalogs.py @@ -6,6 +6,7 @@ # __all__ = ['*'] __author__ = "Fernando Aristizabal" +import gc from typing import Iterable, Optional, Callable, Tuple import os @@ -172,7 +173,7 @@ def loadxr(map, open_kwargs): else: raise ValueError("compare_type must be str or Callable") - # write agreement map to file + # Write agreement map to file if (agreement_map_field is not None) & isinstance( agreement_map, (xr.DataArray, xr.Dataset) ): @@ -181,6 +182,10 @@ def loadxr(map, open_kwargs): row[agreement_map_field], **agreement_map_write_kwargs ) + # Unfortunately necessary until a fix is found in xarray/rioxarray io + del candidate_map, benchmark_map, agreement_map + gc.collect() + return metrics_df # make kwargs for dask apply diff --git a/src/gval/comparison/pairing_functions.py b/src/gval/comparison/pairing_functions.py index 2452137d..1edf453a 100644 --- a/src/gval/comparison/pairing_functions.py +++ b/src/gval/comparison/pairing_functions.py @@ -17,10 +17,34 @@ from numbers import Number import numpy as np -import numba as nb - - -@nb.vectorize(nopython=True) +from numba import vectorize, uint8, int32, int64, float32, float64, boolean + + +# Numba Type Definitions +one_param_function_types = [ + uint8(uint8), + int32(int32), + int64(int64), + float32(float32), + float64(float64), +] +two_param_function_types = [ + uint8(uint8, uint8), + int32(int32, int32), + int64(int64, int64), + float32(float32, float32), + float64(float64, float64), +] +not_natural_number_types = [ + boolean(uint8, boolean), + boolean(int32, boolean), + int64(int64, boolean), + float32(float32, boolean), + float64(float64, boolean), +] + + +@vectorize(not_natural_number_types, nopython=True) def _is_not_natural_number( x: Number, raise_exception: bool ) -> bool: # pragma: no cover @@ -49,7 +73,7 @@ def _is_not_natural_number( return False # treated as natural for this use case # checks for non-negative and whole number - elif (x < 0) | ((x - nb.int64(x)) != 0): + elif (x < 0) | ((x - int64(x)) != 0): if raise_exception: raise ValueError( "Non natural number found (non-negative integers, excluding Inf) [0, 1, 2, 3, 4, ...)" @@ -62,7 +86,7 @@ def _is_not_natural_number( return False -@nb.vectorize(nopython=True) +@vectorize(two_param_function_types, nopython=True) def cantor_pair(c: Number, b: Number) -> Number: # pragma: no cover """ Produces unique natural number for two non-negative natural numbers (0,1,2,...) @@ -92,7 +116,7 @@ def cantor_pair(c: Number, b: Number) -> Number: # pragma: no cover return 0.5 * (c**2 + c + 2 * c * b + 3 * b + b**2) -@nb.vectorize(nopython=True) +@vectorize(two_param_function_types, nopython=True) def szudzik_pair(c: Number, b: Number) -> Number: # pragma: no cover """ Produces unique natural number for two non-negative natural numbers (0,1,2,3,...). @@ -122,7 +146,7 @@ def szudzik_pair(c: Number, b: Number) -> Number: # pragma: no cover return c**2 + c + b if c >= b else b**2 + c -@nb.vectorize(nopython=True) +@vectorize(one_param_function_types, nopython=True) def _negative_value_transformation(x: Number) -> Number: # pragma: no cover """ Transforms negative values for use with pairing functions that only accept non-negative integers. @@ -147,7 +171,7 @@ def _negative_value_transformation(x: Number) -> Number: # pragma: no cover return 2 * x if x >= 0 else -2 * x - 1 -@nb.vectorize(nopython=True) +@vectorize(two_param_function_types, nopython=True) def cantor_pair_signed(c: Number, b: Number) -> Number: # pragma: no cover """ Output unique natural number for each unique combination of whole numbers using Cantor signed method. @@ -177,7 +201,12 @@ def cantor_pair_signed(c: Number, b: Number) -> Number: # pragma: no cover return cantor_pair(ct, bt) -@nb.vectorize(nopython=True) +# from typing import TypeVar +# +# T = TypeVar("T") + + +@vectorize(two_param_function_types, nopython=True) def szudzik_pair_signed(c: Number, b: Number) -> Number: # pragma: no cover """ Output unique natural number for each unique combination of whole numbers using Szudzik signed method._summary_ @@ -386,10 +415,10 @@ def pairing_dict_fn( "Value combination found not accounted for in pairing dictionary" ) - return nb.vectorize(nopython=True)(pairing_dict_fn) + return vectorize(two_param_function_types, nopython=True)(pairing_dict_fn) -@nb.vectorize(nopython=True) +@vectorize(two_param_function_types, nopython=True) def difference(c: Number, b: Number) -> Number: # pragma: no cover """ Calculates the difference between candidate and benchmark. diff --git a/src/gval/comparison/tabulation.py b/src/gval/comparison/tabulation.py index 34df9a59..1e8b1686 100644 --- a/src/gval/comparison/tabulation.py +++ b/src/gval/comparison/tabulation.py @@ -91,16 +91,20 @@ def _crosstab_2d_DataArrays( is_dsk = True agreement_map.name = "group" + ag_dtype = agreement_map.dtype if is_dsk: agreement_counts = xarray_reduce( agreement_map, agreement_map, + engine="numba", expected_groups=dask.array.unique(agreement_map.data), func="count", ) else: - agreement_counts = xarray_reduce(agreement_map, agreement_map, func="count") + agreement_counts = xarray_reduce( + agreement_map, agreement_map, engine="numba", func="count" + ) def not_nan(number): return not np.isnan(number) @@ -129,13 +133,15 @@ def not_nan(number): for x in filter(not_nan, agreement_counts.coords["group"].values) ], "agreement_values": list( - filter(not_nan, agreement_counts.coords["group"].values.astype(float)) + filter( + not_nan, agreement_counts.coords["group"].values.astype(ag_dtype) + ) ), "counts": [ x for x, y in zip( - agreement_counts.values.astype(float), - agreement_counts.coords["group"].values.astype(float), + agreement_counts.values.astype(ag_dtype), + agreement_counts.coords["group"].values.astype(ag_dtype), ) if not np.isnan(y) ], diff --git a/tests/cases_catalogs.py b/tests/cases_catalogs.py index 1848bad3..8e93304e 100644 --- a/tests/cases_catalogs.py +++ b/tests/cases_catalogs.py @@ -79,15 +79,15 @@ pd.DataFrame( { "map_id_candidate": [ - "s3://gval-test/candidate_continuous_0.tif", - "s3://gval-test/candidate_continuous_1.tif", - "s3://gval-test/candidate_continuous_1.tif", + f"{TEST_DATA_DIR}/candidate_continuous_0.tif", + f"{TEST_DATA_DIR}/candidate_continuous_1.tif", + f"{TEST_DATA_DIR}/candidate_continuous_1.tif", ], "compare_id": ["compare1", "compare2", "compare2"], "map_id_benchmark": [ - "s3://gval-test/benchmark_continuous_0.tif", - "s3://gval-test/benchmark_continuous_1.tif", - "s3://gval-test/benchmark_continuous_1.tif", + f"{TEST_DATA_DIR}/benchmark_continuous_0.tif", + f"{TEST_DATA_DIR}/benchmark_continuous_1.tif", + f"{TEST_DATA_DIR}/benchmark_continuous_1.tif", ], "value1_candidate": [1, 2, 2], "value2_candidate": [5, 6, 6], diff --git a/tests/cases_compare.py b/tests/cases_compare.py index 6d65c0cb..724b40d2 100644 --- a/tests/cases_compare.py +++ b/tests/cases_compare.py @@ -225,7 +225,7 @@ def case_make_pairing_dict( pairing_dict_fn_inputs = [ (1, 2, {(1, 2): 3}, 3), (9, 10, {(9, 10.0): 1}, 1), - (-1, 10, {(-1, 10): np.nan}, np.nan), + (-1.0, 10.0, {(-1.0, 10.0): np.nan}, np.nan), ] diff --git a/tests/conftest.py b/tests/conftest.py index ab32f553..bb42f7df 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -18,7 +18,7 @@ from gval.comparison.pairing_functions import PairingDict # name of S3 for test data -TEST_DATA_S3_NAME = "gval-test" +TEST_DATA_S3_NAME = "gval" TEST_DATA_DIR = f"s3://{TEST_DATA_S3_NAME}" diff --git a/tests/test_homogenize.py b/tests/test_homogenize.py index 2eb8b23e..fb47d2bc 100644 --- a/tests/test_homogenize.py +++ b/tests/test_homogenize.py @@ -10,6 +10,7 @@ import xarray as xr import numpy as np import geopandas as gpd +from geopandas.testing import assert_geodataframe_equal from gval.homogenize.spatial_alignment import ( _matching_crs, @@ -191,7 +192,12 @@ def test_vectorize_raster_success(raster_map, expected): vector_df = _vectorize_data(raster_data=raster_map) assert isinstance(vector_df, gpd.GeoDataFrame) - assert vector_df.equals(expected) + assert_geodataframe_equal( + vector_df.sort_values("geometry", ignore_index=True), + expected.sort_values("geometry", ignore_index=True), + check_index_type=False, + check_dtype=False, + ) @parametrize_with_cases(