diff --git a/test/core/test_update.py b/test/core/test_update.py index e1c9ce5e7..16b2b62e8 100644 --- a/test/core/test_update.py +++ b/test/core/test_update.py @@ -3,9 +3,11 @@ import numpy as np import pandas as pd import xarray as xr +import rioxarray from test.sampledata import create_highroc_dataset -from xcube.core.update import update_dataset_var_attrs, update_dataset_attrs +from xcube.core.update import update_dataset_attrs +from xcube.core.update import update_dataset_var_attrs class UpdateVariablePropsTest(unittest.TestCase): @@ -20,7 +22,8 @@ def test_no_change(self): def test_change_all_or_none(self): ds1 = create_highroc_dataset() ds2 = update_dataset_var_attrs(ds1, - [(var_name, {'marker': True}) for var_name in ds1.data_vars]) + [(var_name, {'marker': True}) for + var_name in ds1.data_vars]) self.assertEqual(len(ds1.data_vars), len(ds2.data_vars)) self.assertTrue(all(['marker' in ds2[n].attrs for n in ds2.variables])) @@ -31,7 +34,8 @@ def test_change_some(self): ds1 = create_highroc_dataset() ds2 = update_dataset_var_attrs(ds1, [('conc_chl', {'name': 'chl_c2rcc'}), - ('c2rcc_flags', {'name': 'flags', 'marker': True}), + ('c2rcc_flags', + {'name': 'flags', 'marker': True}), ('rtoa_10', None)]) self.assertEqual(len(ds1.data_vars), len(ds2.data_vars)) @@ -57,8 +61,9 @@ def test_change_some(self): ('rtoa_1', {'name': 'refl_toa'}), ('rtoa_2', {'name': 'refl_toa'}), ('rtoa_3', {'name': 'refl_toa'})]) - self.assertEqual("variable 'rtoa_2' cannot be renamed into 'refl_toa' because the name is already in use", - f'{cm.exception}') + self.assertEqual( + "variable 'rtoa_2' cannot be renamed into 'refl_toa' because the name is already in use", + f'{cm.exception}') class UpdateGlobalAttributesTest(unittest.TestCase): @@ -73,13 +78,17 @@ def test_update_global_attributes(self): res = 0.25 res05 = res / 2 - lon = np.linspace(lon_min + res05, lon_min + num_lons * res - res05, num_lons) - lat = np.linspace(lat_min + res05, lat_min + num_lats * res - res05, num_lats) + lon = np.linspace(lon_min + res05, lon_min + num_lons * res - res05, + num_lons) + lat = np.linspace(lat_min + res05, lat_min + num_lats * res - res05, + num_lats) lon_bnds = np.array([[v - res05, v + res05] for v in lon]) lat_bnds = np.array([[v - res05, v + res05] for v in lat]) - time = [pd.to_datetime(f'2018-06-0{i}T12:00:00') for i in range(1, num_times + 1)] + time = [pd.to_datetime(f'2018-06-0{i}T12:00:00') for i in + range(1, num_times + 1)] time_bnds = [(pd.to_datetime(f'2018-06-0{i}T00:00:00'), - pd.to_datetime(f'2018-06-0{i}T23:00:59')) for i in range(1, num_times + 1)] + pd.to_datetime(f'2018-06-0{i}T23:00:59')) for i in + range(1, num_times + 1)] coords = dict(time=(['time'], time), lat=(['lat'], lat), @@ -90,7 +99,8 @@ def test_update_global_attributes(self): lon_bnds=(['lon', 'bnds'], lon_bnds), **coords) - output_metadata = dict(history='pipo', license='MIT', Conventions='CF-1.7') + output_metadata = dict(history='pipo', license='MIT', + Conventions='CF-1.7') ds1 = xr.Dataset(coords=coords) ds2 = update_dataset_attrs(ds1, global_attrs=output_metadata) @@ -107,8 +117,10 @@ def test_update_global_attributes(self): self.assertEqual(13.5, ds2.attrs.get('geospatial_lat_max')) self.assertEqual(0.25, ds2.attrs.get('geospatial_lat_resolution')) self.assertEqual('degrees_north', ds2.attrs.get('geospatial_lat_units')) - self.assertEqual('2018-06-01T00:00:00.000000000', ds2.attrs.get('time_coverage_start')) - self.assertEqual('2018-06-06T00:00:00.000000000', ds2.attrs.get('time_coverage_end')) + self.assertEqual('2018-06-01T00:00:00.000000000', + ds2.attrs.get('time_coverage_start')) + self.assertEqual('2018-06-06T00:00:00.000000000', + ds2.attrs.get('time_coverage_end')) self.assertIn('date_modified', ds2.attrs) ds1 = xr.Dataset(coords=coords_with_bnds) @@ -126,6 +138,84 @@ def test_update_global_attributes(self): self.assertEqual(13.5, ds2.attrs.get('geospatial_lat_max')) self.assertEqual(0.25, ds2.attrs.get('geospatial_lat_resolution')) self.assertEqual('degrees_north', ds2.attrs.get('geospatial_lat_units')) - self.assertEqual('2018-06-01T00:00:00.000000000', ds2.attrs.get('time_coverage_start')) - self.assertEqual('2018-06-05T23:00:59.000000000', ds2.attrs.get('time_coverage_end')) + self.assertEqual('2018-06-01T00:00:00.000000000', + ds2.attrs.get('time_coverage_start')) + self.assertEqual('2018-06-05T23:00:59.000000000', + ds2.attrs.get('time_coverage_end')) + self.assertIn('date_modified', ds2.attrs) + + def test_update_global_attributes_crs(self): + num_x = 8 + num_y = 6 + num_times = 5 + + x_min = -20. + y_min = 12. + + res = 0.25 + res05 = res / 2 + x = np.linspace(x_min + res05, x_min + num_x * res - res05, num_x) + y = np.linspace(y_min + res05, y_min + num_y * res - res05, num_y) + x_bnds = np.array([[v - res05, v + res05] for v in x]) + y_bnds = np.array([[v - res05, v + res05] for v in y]) + time = [pd.to_datetime(f'2018-06-0{i}T12:00:00') for i in + range(1, num_times + 1)] + time_bnds = [(pd.to_datetime(f'2018-06-0{i}T00:00:00'), + pd.to_datetime(f'2018-06-0{i}T23:00:59')) for i in + range(1, num_times + 1)] + + coords = dict(time=(['time'], time), + y=(['y'], y), + x=(['x'], x)) + + coords_with_bnds = dict(time_bnds=(['time', 'bnds'], time_bnds), + y_bnds=(['y', 'bnds'], y_bnds), + x_bnds=(['x', 'bnds'], x_bnds), + **coords) + + output_metadata = dict(history='pipo', license='MIT', + Conventions='CF-1.7') + + ds1 = xr.Dataset(coords=coords) + ds1.rio.write_crs("epsg:4326", inplace=True, + grid_mapping_name="crs").reset_coords() + ds2 = update_dataset_attrs(ds1, global_attrs=output_metadata) + + self.assertIsNot(ds2, ds1) + self.assertEqual('CF-1.7', ds2.attrs.get('Conventions')) + self.assertEqual('MIT', ds2.attrs.get('license')) + self.assertEqual('pipo', ds2.attrs.get('history')) + self.assertEqual(-20.0, ds2.attrs.get('geospatial_x_min')) + self.assertEqual(-18.0, ds2.attrs.get('geospatial_x_max')) + self.assertEqual(0.25, ds2.attrs.get('geospatial_x_resolution')) + self.assertEqual('degrees_east', ds2.attrs.get('geospatial_x_units')) + self.assertEqual(12.0, ds2.attrs.get('geospatial_y_min')) + self.assertEqual(13.5, ds2.attrs.get('geospatial_y_max')) + self.assertEqual(0.25, ds2.attrs.get('geospatial_y_resolution')) + self.assertEqual('degrees_north', ds2.attrs.get('geospatial_y_units')) + self.assertEqual('2018-06-01T00:00:00.000000000', + ds2.attrs.get('time_coverage_start')) + self.assertEqual('2018-06-06T00:00:00.000000000', + ds2.attrs.get('time_coverage_end')) + self.assertIn('date_modified', ds2.attrs) + + ds1 = xr.Dataset(coords=coords_with_bnds) + ds2 = update_dataset_attrs(ds1, global_attrs=output_metadata) + + self.assertIsNot(ds2, ds1) + self.assertEqual('CF-1.7', ds2.attrs.get('Conventions')) + self.assertEqual('MIT', ds2.attrs.get('license')) + self.assertEqual('pipo', ds2.attrs.get('history')) + self.assertEqual(-20.0, ds2.attrs.get('geospatial_x_min')) + self.assertEqual(-18.0, ds2.attrs.get('geospatial_x_max')) + self.assertEqual(0.25, ds2.attrs.get('geospatial_x_resolution')) + self.assertEqual('degrees_east', ds2.attrs.get('geospatial_x_units')) + self.assertEqual(12.0, ds2.attrs.get('geospatial_y_min')) + self.assertEqual(13.5, ds2.attrs.get('geospatial_y_max')) + self.assertEqual(0.25, ds2.attrs.get('geospatial_y_resolution')) + self.assertEqual('degrees_north', ds2.attrs.get('geospatial_y_units')) + self.assertEqual('2018-06-01T00:00:00.000000000', + ds2.attrs.get('time_coverage_start')) + self.assertEqual('2018-06-05T23:00:59.000000000', + ds2.attrs.get('time_coverage_end')) self.assertIn('date_modified', ds2.attrs) diff --git a/xcube/core/gen2/local/mdadjuster.py b/xcube/core/gen2/local/mdadjuster.py index 598068664..4ab7341be 100644 --- a/xcube/core/gen2/local/mdadjuster.py +++ b/xcube/core/gen2/local/mdadjuster.py @@ -81,42 +81,6 @@ def _check_for_self_destruction(metadata: Dict[str, Any]): def get_geospatial_attrs(gm: GridMapping) -> Dict[str, Any]: - if gm.crs.is_geographic: - lon_min, lat_min, lon_max, lat_max = gm.xy_bbox - lon_res, lat_res = gm.xy_res - else: - x1, y1, x2, y2 = gm.xy_bbox - x_res, y_res = gm.xy_res - # center position - xm1 = (x1 + x2) / 2 - ym1 = (y1 + y2) / 2 - # center position + delta - xm2 = xm1 + x_res - ym2 = ym1 + y_res - transformer = pyproj.Transformer.from_crs(crs_from=gm.crs, - crs_to=CRS_CRS84) - xx, yy = transformer.transform((x1, x2, xm1, xm2), - (y1, y2, ym1, ym2)) - lon_min, lon_max, lon_m1, lon_m2 = xx - lat_min, lat_max, lat_m1, lat_m2 = yy - # Estimate resolution (note, this may be VERY wrong) - lon_res = abs(lon_m2 - lon_m1) - lat_res = abs(lat_m2 - lat_m1) - return dict( - geospatial_lon_units='degrees_east', - geospatial_lon_min=lon_min, - geospatial_lon_max=lon_max, - geospatial_lon_resolution=lon_res, - geospatial_lat_units='degrees_north', - geospatial_lat_min=lat_min, - geospatial_lat_max=lat_max, - geospatial_lat_resolution=lat_res, - geospatial_bounds_crs='CRS84', - geospatial_bounds=f'POLYGON((' - f'{lon_min} {lat_min}, ' - f'{lon_min} {lat_max}, ' - f'{lon_max} {lat_max}, ' - f'{lon_max} {lat_min}, ' - f'{lon_min} {lat_min}' - f'))', - ) + # Note: check if force_geographic is wanted, here it is set for backwards + # compatibility + return dict(gm.to_attrs(force_geographic=True)) diff --git a/xcube/core/gridmapping/base.py b/xcube/core/gridmapping/base.py index 239195ebc..572deb977 100644 --- a/xcube/core/gridmapping/base.py +++ b/xcube/core/gridmapping/base.py @@ -23,7 +23,12 @@ import copy import math import threading -from typing import Any, Tuple, Optional, Union, Mapping, Callable +from typing import Any +from typing import Callable +from typing import Mapping +from typing import Optional +from typing import Tuple +from typing import Union import numpy as np import pyproj @@ -607,6 +612,75 @@ def ij_bboxes_from_xy_bboxes(self, ij_bboxes) return ij_bboxes + def to_attrs(self, + force_geographic: bool = False) -> Mapping[str, Any]: + """ + Get CF-compliant attributes of a dataset. + https://wiki.esipfed.org/Attribute_Convention_for_Data_Discovery_1-3#Recommended + + Defined only for grid mappings with regular x,y coordinates. + + :param force_geographic: Whether to force geographic CRS. + Defaults to false. + + :return: dictionary with dataset coordinate attributes. + """ + + x1, y1, x2, y2 = self.xy_bbox + + if self.crs.is_geographic: + lon_min, lat_min, lon_max, lat_max = self.xy_bbox + lon_res, lat_res = self.xy_res + else: + x_res, y_res = self.xy_res + # center position + xm1 = (x1 + x2) / 2 + ym1 = (y1 + y2) / 2 + # center position + delta + xm2 = xm1 + x_res + ym2 = ym1 + y_res + transformer = pyproj.Transformer.from_crs(crs_from=self.crs, + crs_to=CRS_CRS84) + xx, yy = transformer.transform((x1, x2, xm1, xm2), + (y1, y2, ym1, ym2)) + lon_min, lon_max, lon_m1, lon_m2 = xx + lat_min, lat_max, lat_m1, lat_m2 = yy + # Estimate resolution (note, this may be VERY wrong) + lon_res = abs(lon_m2 - lon_m1) + lat_res = abs(lat_m2 - lat_m1) + + if force_geographic: + geospatial_bounds_crs = 'CRS84' + geospatial_bounds = (f'POLYGON((' + f'{lon_min} {lat_min}, ' + f'{lon_min} {lat_max}, ' + f'{lon_max} {lat_max}, ' + f'{lon_max} {lat_min}, ' + f'{lon_min} {lat_min}' + f'))') + else: + geospatial_bounds_crs = self.crs.srs + geospatial_bounds = (f'POLYGON((' + f'{x1} {y1}, ' + f'{x1} {y2}, ' + f'{x2} {y2}, ' + f'{x2} {y1}, ' + f'{x1} {y1}' + f'))') + + return dict( + geospatial_lon_units='degrees_east', + geospatial_lon_min=lon_min, + geospatial_lon_max=lon_max, + geospatial_lon_resolution=lon_res, + geospatial_lat_units='degrees_north', + geospatial_lat_min=lat_min, + geospatial_lat_max=lat_max, + geospatial_lat_resolution=lat_res, + geospatial_bounds_crs=geospatial_bounds_crs, + geospatial_bounds=geospatial_bounds, + ) + def to_coords(self, xy_var_names: Tuple[str, str] = None, xy_dim_names: Tuple[str, str] = None, @@ -806,9 +880,9 @@ def is_close(self, sx1, sy1, sx2, sy2 = self.xy_bbox ox1, oy1, ox2, oy2 = other.xy_bbox return math.isclose(sx1, ox1, abs_tol=tolerance) \ - and math.isclose(sy1, oy1, abs_tol=tolerance) \ - and math.isclose(sx2, ox2, abs_tol=tolerance) \ - and math.isclose(sy2, oy2, abs_tol=tolerance) + and math.isclose(sy1, oy1, abs_tol=tolerance) \ + and math.isclose(sx2, ox2, abs_tol=tolerance) \ + and math.isclose(sy2, oy2, abs_tol=tolerance) return False @classmethod diff --git a/xcube/core/update.py b/xcube/core/update.py index bdf91e584..94faaf683 100644 --- a/xcube/core/update.py +++ b/xcube/core/update.py @@ -20,19 +20,23 @@ # SOFTWARE. import datetime -from typing import Any, Dict +from typing import Any +from typing import Dict import xarray as xr from xcube.constants import FORMAT_NAME_NETCDF4 from xcube.constants import FORMAT_NAME_ZARR +from xcube.core.gridmapping import GridMapping from xcube.util.config import NameDictPairList _LON_ATTRS_DATA = ('lon', 'lon_bnds', 'degrees_east', - ('geospatial_lon_min', 'geospatial_lon_max', 'geospatial_lon_units', 'geospatial_lon_resolution'), + ('geospatial_lon_min', 'geospatial_lon_max', + 'geospatial_lon_units', 'geospatial_lon_resolution'), float) _LAT_ATTRS_DATA = ('lat', 'lat_bnds', 'degrees_north', - ('geospatial_lat_min', 'geospatial_lat_max', 'geospatial_lat_units', 'geospatial_lat_resolution'), + ('geospatial_lat_min', 'geospatial_lat_max', + 'geospatial_lat_units', 'geospatial_lat_resolution'), float) _TIME_ATTRS_DATA = ('time', 'time_bnds', None, ('time_coverage_start', 'time_coverage_end', None, None), @@ -59,8 +63,13 @@ def update_dataset_attrs(dataset: xr.Dataset, if global_attrs: dataset.attrs.update(global_attrs) - return _update_dataset_attrs(dataset, [_LON_ATTRS_DATA, _LAT_ATTRS_DATA, _TIME_ATTRS_DATA], - update_existing=update_existing, in_place=True) + dataset = update_dataset_spatial_attrs(dataset, + update_existing=update_existing, + in_place=True) + dataset = update_dataset_temporal_attrs(dataset, + update_existing=update_existing, + in_place=True) + return dataset def update_dataset_spatial_attrs(dataset: xr.Dataset, @@ -74,8 +83,18 @@ def update_dataset_spatial_attrs(dataset: xr.Dataset, :param in_place: If ``True``, *dataset* will be modified in place and returned. :return: A new dataset, if *in_place* if ``False`` (default), else the passed and modified *dataset*. """ - return _update_dataset_attrs(dataset, [_LON_ATTRS_DATA, _LAT_ATTRS_DATA], - update_existing=update_existing, in_place=in_place) + if not in_place: + dataset = dataset.copy() + gm = GridMapping.from_dataset(dataset) + is_in_attrs = ['geospatial_lon_min', + 'geospatial_lon_max', + 'geospatial_lat_min', + 'geospatial_lat_max'] + if update_existing or not all(coord_attr in is_in_attrs for coord_attr in + dataset.attrs): + # Update dataset with newly retrieved attributes + dataset.assign_attrs(gm.to_attrs()) + return dataset def update_dataset_temporal_attrs(dataset: xr.Dataset, @@ -89,14 +108,7 @@ def update_dataset_temporal_attrs(dataset: xr.Dataset, :param in_place: If ``True``, *dataset* will be modified in place and returned. :return: A new dataset, if *in_place* is ``False`` (default), else the passed and modified *dataset*. """ - return _update_dataset_attrs(dataset, [_TIME_ATTRS_DATA], - update_existing=update_existing, in_place=in_place) - - -def _update_dataset_attrs(dataset: xr.Dataset, - coord_data, - update_existing: bool = False, - in_place: bool = False) -> xr.Dataset: + coord_data = [_TIME_ATTRS_DATA] if not in_place: dataset = dataset.copy() @@ -121,7 +133,9 @@ def _update_dataset_attrs(dataset: xr.Dataset, coord_v2 = coord_bnds[-1][1] coord_res = (coord_v2 - coord_v1) / coord_bnds.shape[0] coord_res = float(coord_res.values) - coord_min, coord_max = (coord_v1, coord_v2) if coord_res > 0 else (coord_v2, coord_v1) + coord_min, coord_max = ( + coord_v1, coord_v2) if coord_res > 0 else ( + coord_v2, coord_v1) dataset.attrs[coord_min_attr_name] = cast(coord_min.values) dataset.attrs[coord_max_attr_name] = cast(coord_max.values) elif coord is not None \ @@ -134,7 +148,9 @@ def _update_dataset_attrs(dataset: xr.Dataset, coord_v1 -= coord_res / 2 coord_v2 += coord_res / 2 coord_res = float(coord_res.values) - coord_min, coord_max = (coord_v1, coord_v2) if coord_res > 0 else (coord_v2, coord_v1) + coord_min, coord_max = ( + coord_v1, coord_v2) if coord_res > 0 else ( + coord_v2, coord_v1) else: coord_min, coord_max = coord_v1, coord_v2 dataset.attrs[coord_min_attr_name] = cast(coord_min.values) @@ -142,10 +158,10 @@ def _update_dataset_attrs(dataset: xr.Dataset, if coord_units_attr_name is not None and coord_units is not None: dataset.attrs[coord_units_attr_name] = coord_units if coord_res_attr_name is not None and coord_res is not None: - dataset.attrs[coord_res_attr_name] = coord_res if coord_res > 0 else -coord_res + dataset.attrs[ + coord_res_attr_name] = coord_res if coord_res > 0 else -coord_res dataset.attrs['date_modified'] = datetime.datetime.now().isoformat() - return dataset @@ -180,8 +196,9 @@ def update_dataset_var_attrs(dataset: xr.Dataset, if 'name' in var_attrs: new_var_name = var_attrs.pop('name') if new_var_name in new_var_names: - raise ValueError(f'variable {var_name!r} cannot be renamed into {new_var_name!r} ' - 'because the name is already in use') + raise ValueError( + f'variable {var_name!r} cannot be renamed into {new_var_name!r} ' + 'because the name is already in use') new_var_names.add(new_var_name) var_attrs['original_name'] = var_name var_renamings[var_name] = new_var_name @@ -239,7 +256,8 @@ def get_size(i): return size[0] return var.shape[i] - var.encoding.update({chunk_sizes_attr_name: tuple(map(get_size, range(var.ndim)))}) + var.encoding.update( + {chunk_sizes_attr_name: tuple(map(get_size, range(var.ndim)))}) elif chunk_sizes_attr_name in var.encoding: # Remove any explicit and possibly unintended specification del var.encoding[chunk_sizes_attr_name]