diff --git a/.pylintrc b/.pylintrc index ec0a5d9..ebef638 100644 --- a/.pylintrc +++ b/.pylintrc @@ -127,6 +127,7 @@ disable=C0111,missing-docstring, # note: (C0412, ungrouped-imports) is managed via isort tool, ignore false positives per-file-ignores = + notebooks/*:C0103, tests/*:R1729 # Enable the message, report, category or checker with the given id(s). You can diff --git a/CHANGES.md b/CHANGES.md index 608935a..dd2bb66 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -4,7 +4,8 @@ Changes [Unreleased](https://github.com/crim-ca/ncml2stac/tree/master) (latest) ------------------------------------------------------------------------------------------------------------------ - +- Update STAC Item generation to transfer most NCML attribute parsing logic to `STACpopulator` implementation. +- Use changes in https://github.com/crim-ca/stac-populator/pull/23 to evaluate `STACpopulator` new conversion logic. [0.1.0](https://github.com/crim-ca/ncml2stac/tree/0.1.0) (2023-09-29) ------------------------------------------------------------------------------------------------------------------ diff --git a/notebooks/ncml2stac.ipynb b/notebooks/ncml2stac.ipynb index 554a292..f491898 100644 --- a/notebooks/ncml2stac.ipynb +++ b/notebooks/ncml2stac.ipynb @@ -38,7 +38,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 85, "outputs": [], "source": [ "# NOTE:\n", @@ -48,21 +48,31 @@ "# which can cause syntax/indent errors.\n", "try:\n", " # to make optional inputs, define types like so: 'Optional[CWLInput]'\n", - " from typing import Optional # noqa # pylint: disable=W0611\n", + " from typing import Any, MutableSequence, MutableMapping, Optional, TypeAlias, Union\n", " from ipython2cwl.iotypes import CWLFilePathInput, CWLFilePathOutput\n", + "\n", + " JsonLike: TypeAlias = MutableMapping[\n", + " str,\n", + " Optional[Union[str, float, int, bool, MutableSequence[Any], MutableMapping[str, Any]]]\n", + " ]\n", "except ImportError:\n", " pass # ignore explicit typing definitions if modules were not installed (CWL conversion still works)\n", "\n", + "# NOTE: important part for CWL conversion is to apply the typing definition\n", + "# NOTE: application-specific detail: supports NCML URL directly or through a THREDDS catalog URL\n", "input_ncml: \"CWLFilePathInput\" = (\n", - " \"https://raw.githubusercontent.com/crim-ca/stac-populator/ce268cdcde6030b3813f858ab1342b7cafa463e3/\"\n", - " \"tests/data/o3_Amon_GFDL-ESM4_historical_r1i1p1f1_gr1_185001-194912.xml\"\n", + " \"https://pavics.ouranos.ca/twitcher/ows/proxy/\"\n", + " \"thredds/ncml/birdhouse/testdata/xclim/cmip6/sic_SImon_CCCma-CanESM5_ssp245_r13i1p2f1_2020.nc\"\n", + " \"?catalog=https%3A%2F%2Fpavics.ouranos.ca%2Ftwitcher%2Fows%2Fproxy%2F\"\n", + " \"thredds%2Fcatalog%2Fbirdhouse%2Ftestdata%2Fxclim%2Fcmip6%2Fcatalog.html\"\n", + " \"&dataset=birdhouse%2Ftestdata%2Fxclim%2Fcmip6%2Fsic_SImon_CCCma-CanESM5_ssp245_r13i1p2f1_2020.nc\"\n", ")" ], "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-09-28T22:12:35.383018674Z", - "start_time": "2023-09-28T22:12:35.132491964Z" + "end_time": "2023-09-29T22:45:33.099193431Z", + "start_time": "2023-09-29T22:45:33.014184569Z" } }, "id": "61f43c81dc3aa6c2" @@ -87,7 +97,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 86, "outputs": [ { "name": "stdout", @@ -98,7 +108,7 @@ "remote: Counting objects: 100% (1557/1557), done.\u001B[K\r\n", "remote: Compressing objects: 100% (476/476), done.\u001B[K\r\n", "remote: Total 63068 (delta 1258), reused 1327 (delta 1070), pack-reused 61511\u001B[K\r\n", - "Receiving objects: 100% (63068/63068), 6.06 MiB | 2.94 MiB/s, done.\r\n", + "Receiving objects: 100% (63068/63068), 6.06 MiB | 5.05 MiB/s, done.\r\n", "Resolving deltas: 100% (60270/60270), done.\r\n", "\r\n", "Local identity for pyessv-archive set to \"Francis Charette Migneault \"\r\n" @@ -113,56 +123,44 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-09-28T22:12:40.467006089Z", - "start_time": "2023-09-28T22:12:35.228660929Z" + "end_time": "2023-09-29T22:45:37.397132140Z", + "start_time": "2023-09-29T22:45:33.063477776Z" } }, "id": "f10d85e12b47da43" }, { "cell_type": "code", - "execution_count": 30, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2023-09-28 22:12:40.483392 [INFO] :: PYESSV :: Loading vocabularies from /home/francis/.esdoc/pyessv-archive ... please wait\n" - ] - } - ], + "execution_count": 87, + "outputs": [], "source": [ + "import hashlib\n", "import json\n", "import tempfile\n", "from datetime import datetime, date\n", "from enum import Enum\n", "\n", "import numpy as np\n", + "import pystac\n", "import requests\n", "import xncml\n", - "import pystac\n", - "from dateutil.parser import parse as dt_parse\n", - "from pystac.extensions import datacube\n", "from pydantic.networks import Url\n", - "from shapely.geometry.polygon import Polygon\n", - "\n", - "import pyessv\n", - "pyessv.init()\n", "\n", - "from STACpopulator.extensions import cmip6 # pylint: disable=C0413 # requires other module init to avoid ImportError" + "from STACpopulator.extensions import cmip6\n", + "from STACpopulator.stac_utils import CFJsonItem, DatacubeExt" ], "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-09-28T22:12:41.260193681Z", - "start_time": "2023-09-28T22:12:40.469074021Z" + "end_time": "2023-09-29T22:45:37.444764964Z", + "start_time": "2023-09-29T22:45:37.400457899Z" } }, "id": "f68ea4339c5e4a9d" }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 88, "outputs": [ { "name": "stdout", @@ -249,7 +247,7 @@ " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", @@ -375,95 +373,85 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-09-28T22:12:41.417055537Z", - "start_time": "2023-09-28T22:12:41.181965479Z" + "end_time": "2023-09-29T22:45:37.691589341Z", + "start_time": "2023-09-29T22:45:37.443416749Z" } }, "id": "4fc2f66493dc56c5" }, { "cell_type": "code", - "execution_count": 32, - "outputs": [], + "execution_count": 89, + "outputs": [ + { + "data": { + "text/plain": "" + }, + "execution_count": 89, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "ds = xncml.Dataset(input_ncml)\n", - "attrs = ds.to_cf_dict()\n", - "cfmeta = attrs[\"groups\"][\"CFMetadata\"][\"attributes\"]\n", - "bbox = [\n", - " cfmeta[\"geospatial_lon_min\"][0],\n", - " cfmeta[\"geospatial_lon_max\"][0],\n", - " cfmeta[\"geospatial_lat_min\"][0],\n", - " cfmeta[\"geospatial_lat_max\"][0],\n", - "]\n", - "geom = Polygon.from_bounds(*bbox)\n", + "# FIXME: duplicate code\n", + "# this is defined in:\n", + "# https://github.com/crim-ca/stac-populator/blob/arch-changes/implementations/CMIP6-UofT/add_CMIP6.py#L102-L116\n", + "# but we cannot import it since outside of installed 'STACpopulator' module\n", + "def make_cmip6_item_id(_attrs: \"JsonLike\") -> str:\n", + " \"\"\"Return a unique ID for CMIP6 data item.\"\"\"\n", + " keys = [\n", + " \"activity_id\",\n", + " \"institution_id\",\n", + " \"source_id\",\n", + " \"experiment_id\",\n", + " \"variant_label\",\n", + " \"table_id\",\n", + " \"variable_id\",\n", + " \"grid_label\",\n", + " ]\n", + " name = \"_\".join(_attrs[k] for k in keys)\n", + " return hashlib.md5(name.encode(\"utf-8\")).hexdigest()\n", "\n", - "assets = {\n", - " svc.rsplit(\"_service\", 1)[0]: pystac.Asset(href=svc_link)\n", - " for svc, svc_link in\n", - " attrs[\"groups\"][\"THREDDSMetadata\"][\"groups\"][\"services\"][\"attributes\"].items()\n", - "}\n", "\n", - "item = pystac.Item(\n", - " id=\"test\",\n", - " start_datetime=dt_parse(cfmeta[\"time_coverage_start\"]),\n", - " end_datetime=dt_parse(cfmeta[\"time_coverage_end\"]),\n", - " datetime=None, # uses start/end instead\n", - " bbox=bbox,\n", - " geometry=geom.__geo_interface__, # GeoJSON\n", - " properties={}, # filled by extension after\n", - " assets=assets,\n", - ")\n", + "# FIXME: temporary patch of URL/Media-Type\n", + "# https://github.com/crim-ca/stac-populator/pull/23#discussion_r1341819744\n", + "class CFJsonItemNetCDF(CFJsonItem):\n", + " def item_link(self) -> pystac.Link:\n", + " url = self.attrs[\"@location\"] # NetCDF URL\n", + " name = self.attrs[\"groups\"][\"THREDDSMetadata\"][\"attributes\"][\"id\"]\n", + " path = url.split(name, 1)[0]\n", + " parts = list(filter(lambda _: bool(_), path.rsplit(\"/\", 3)))\n", + " service = parts[-2] # always 1 path part for the service\n", + " link = pystac.Link(\n", + " rel=\"source\",\n", + " target=url,\n", + " media_type=\"application/x-netcdf\",\n", + " title=f\"{service}:{name}\"\n", + " )\n", + " return link\n", "\n", - "dc_dims = {}\n", - "dim_spatial_axis = {\n", - " \"i\": \"x\",\n", - " \"j\": \"y\",\n", - " \"k\": \"z\",\n", - " \"x\": \"x\",\n", - " \"y\": \"y\",\n", - " \"z\": \"z\",\n", - "}\n", - "for dim, val in attrs[\"dimensions\"].items():\n", - " dim_props = {\n", - " \"type\": (\n", - " datacube.DimensionType.SPATIAL if dim in list(dim_spatial_axis) else\n", - " datacube.DimensionType.TEMPORAL if dim in [\"date\", \"time\", \"date-time\", \"datetime\"] else\n", - " \"other\"\n", - " ),\n", - " \"length\": val\n", - " }\n", - " if dim_props[\"type\"] == datacube.DimensionType.SPATIAL.value:\n", - " dim_props[\"axis\"] = dim_spatial_axis[dim]\n", - " dc_dims[dim] = datacube.Dimension(dim_props)\n", "\n", - "dc_vars = {}\n", - "for var, props in attrs[\"variables\"].items():\n", - " var_dims = props[\"shape\"]\n", - " var_props = {\n", - " \"type\": (\n", - " datacube.VariableType.DATA\n", - " if all(_dim in dc_dims for _dim in var_dims)\n", - " else datacube.VariableType.AUXILIARY\n", - " ),\n", - " \"dimensions\": var_dims,\n", - " }\n", - " var_unit = props.get(\"attributes\", {}).get(\"units\")\n", - " if var_unit:\n", - " var_props[\"unit\"] = var_unit\n", - " dc_vars[var] = datacube.Variable(var_props)\n", + "# FIXME: partial duplicate code\n", + "# https://github.com/crim-ca/stac-populator/blob/arch-changes/implementations/CMIP6-UofT/add_CMIP6.py#L138-L165\n", + "# should be combined into a single callable function that doesn't depend on the rest of the THREDDS crawling iterator\n", + "ds = xncml.Dataset(input_ncml)\n", + "attrs = ds.to_cf_dict()\n", "\n", - "datacube_ext = datacube.DatacubeExtension.ext(item, add_if_missing=True)\n", - "datacube_ext.apply(dimensions=dc_dims, variables=dc_vars)\n", + "# FIXME: AttributeError\n", + "nc_services = getattr(ds, \"access_urls\", None)\n", + "if nc_services:\n", + " attrs[\"access_urls\"] = nc_services\n", "\n", - "cmip6_attrs = attrs[\"attributes\"]\n", - "cmip6_ext = cmip6.CMIP6Extension.ext(item, add_if_missing=True)\n", - "cmip6_ext.apply(cmip6_attrs)" + "stac_item_id = make_cmip6_item_id(attrs[\"attributes\"])\n", + "attrs[\"id\"] = stac_item_id\n", + "stac_item = CFJsonItemNetCDF(stac_item_id, attrs, cmip6.Properties)\n", + "DatacubeExt(stac_item)" ], "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-09-28T22:12:41.474936053Z", - "start_time": "2023-09-28T22:12:41.427458731Z" + "end_time": "2023-09-29T22:45:37.711546409Z", + "start_time": "2023-09-29T22:45:37.646200547Z" } }, "id": "299946ccd58e2efc" @@ -480,7 +468,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 90, "outputs": [ { "name": "stdout", @@ -489,159 +477,158 @@ "{\n", " \"type\": \"Feature\",\n", " \"stac_version\": \"1.0.0\",\n", - " \"id\": \"test\",\n", + " \"id\": \"36c83a8bb9d382ff2ffed7b9ba422cd3\",\n", " \"properties\": {\n", - " \"start_datetime\": null,\n", - " \"end_datetime\": null,\n", + " \"start_datetime\": \"2019-12-06T12:00:00Z\",\n", + " \"end_datetime\": \"2020-11-04T12:00:00Z\",\n", + " \"datetime\": null,\n", + " \"Conventions\": \"CF-1.7 CMIP-6.2\",\n", + " \"activity_id\": \"ScenarioMIP\",\n", + " \"creation_date\": \"2019-09-25T23:01:33Z\",\n", + " \"data_specs_version\": \"01.00.30\",\n", + " \"experiment\": \"update of RCP4.5 based on SSP2\",\n", + " \"experiment_id\": \"ssp245\",\n", + " \"frequency\": \"mon\",\n", + " \"further_info_url\": \"https://furtherinfo.es-doc.org/CMIP6.CCCma.CanESM5.ssp245.none.r13i1p2f1\",\n", + " \"grid_label\": \"gn\",\n", + " \"institution\": \"Canadian Centre for Climate Modelling and Analysis, Environment and Climate Change Canada, Victoria, BC V8P 5C2, Canada\",\n", + " \"institution_id\": \"CCCma\",\n", + " \"nominal_resolution\": \"100 km\",\n", + " \"realm\": [\n", + " \"seaIce\"\n", + " ],\n", + " \"source\": \"CanESM5 (2019): \\naerosol: interactive\\natmos: CanAM5 (T63L49 native atmosphere, T63 Linear Gaussian Grid; 128 x 64 longitude/latitude; 49 levels; top level 1 hPa)\\natmosChem: specified oxidants for aerosols\\nland: CLASS3.6/CTEM1.2\\nlandIce: specified ice sheets\\nocean: NEMO3.4.1 (ORCA1 tripolar grid, 1 deg with refinement to 1/3 deg within 20 degrees of the equator; 361 x 290 longitude/latitude; 45 vertical levels; top grid cell 0-6.19 m)\\nocnBgchem: Canadian Model of Ocean Carbon (CMOC); NPZD ecosystem with OMIP prescribed carbonate chemistry\\nseaIce: LIM2\",\n", + " \"source_id\": \"CanESM5\",\n", + " \"source_type\": [\n", + " \"AOGCM\"\n", + " ],\n", + " \"sub_experiment\": \"none\",\n", + " \"sub_experiment_id\": \"none\",\n", + " \"table_id\": \"SImon\",\n", + " \"variable_id\": \"siconc\",\n", + " \"variant_label\": \"r13i1p2f1\",\n", + " \"initialization_index\": 1,\n", + " \"physics_index\": 2,\n", + " \"realization_index\": 13,\n", + " \"forcing_index\": 1,\n", + " \"tracking_id\": \"hdl:21.14100/9e4f804b-c161-44fa-acd1-c2e94e220c95\",\n", + " \"version\": \"v20190429\",\n", + " \"product\": \"model-output\",\n", + " \"license\": \"CMIP6 model data produced by The Government of Canada (Canadian Centre for Climate Modelling and Analysis, Environment and Climate Change Canada) is licensed under a Creative Commons Attribution ShareAlike 4.0 International License (https://creativecommons.org/licenses). Consult https://pcmdi.llnl.gov/CMIP6/TermsOfUse for terms of use governing CMIP6 output, including citation requirements and proper acknowledgment. Further information about this data, including some limitations, can be found via the further_info_url (recorded as a global attribute in this file) and at https:///pcmdi.llnl.gov/. The data producers and data providers make no warranty, either express or implied, including, but not limited to, warranties of merchantability and fitness for a particular purpose. All liabilities arising from the supply of the information (including any liability arising in negligence) are excluded to the fullest extent permitted by law.\",\n", + " \"grid\": \"ORCA1 tripolar grid, 1 deg with refinement to 1/3 deg within 20 degrees of the equator; 361 x 290 longitude/latitude; 45 vertical levels; top grid cell 0-6.19 m\",\n", + " \"mip_era\": \"CMIP6\",\n", " \"cube:dimensions\": {\n", " \"time\": {\n", + " \"axis\": \"t\",\n", " \"type\": \"temporal\",\n", - " \"length\": 12\n", - " },\n", - " \"bnds\": {\n", - " \"type\": \"other\",\n", - " \"length\": 2\n", - " },\n", - " \"vertices\": {\n", - " \"type\": \"other\",\n", - " \"length\": 4\n", - " },\n", - " \"maxStrlen64\": {\n", - " \"type\": \"other\",\n", - " \"length\": 64\n", + " \"extent\": null,\n", + " \"description\": [\n", + " \"time\"\n", + " ]\n", " },\n", " \"j\": {\n", + " \"axis\": \"y\",\n", " \"type\": \"spatial\",\n", - " \"length\": 291,\n", - " \"axis\": \"y\"\n", + " \"extent\": [\n", + " 0,\n", + " 291\n", + " ],\n", + " \"description\": [\n", + " \"projection_y_coordinate\",\n", + " \"grid_latitude\",\n", + " \"projection_y_angular_coordinate\"\n", + " ]\n", " },\n", " \"i\": {\n", + " \"axis\": \"x\",\n", " \"type\": \"spatial\",\n", - " \"length\": 360,\n", - " \"axis\": \"x\"\n", + " \"extent\": [\n", + " 0,\n", + " 360\n", + " ],\n", + " \"description\": [\n", + " \"projection_x_coordinate\",\n", + " \"grid_longitude\",\n", + " \"projection_x_angular_coordinate\"\n", + " ]\n", " }\n", " },\n", " \"cube:variables\": {\n", - " \"time\": {\n", - " \"type\": \"data\",\n", - " \"dimensions\": [\n", - " \"time\"\n", - " ],\n", - " \"unit\": \"days since 1850-01-01\"\n", - " },\n", - " \"j\": {\n", - " \"type\": \"data\",\n", - " \"dimensions\": [\n", - " \"j\"\n", - " ],\n", - " \"unit\": \"1\"\n", - " },\n", - " \"i\": {\n", - " \"type\": \"data\",\n", - " \"dimensions\": [\n", - " \"i\"\n", - " ],\n", - " \"unit\": \"1\"\n", - " },\n", " \"time_bnds\": {\n", - " \"type\": \"data\",\n", " \"dimensions\": [\n", " \"time\",\n", " \"bnds\"\n", - " ]\n", + " ],\n", + " \"type\": \"data\",\n", + " \"description\": null,\n", + " \"unit\": null\n", " },\n", " \"vertices_latitude\": {\n", - " \"type\": \"data\",\n", " \"dimensions\": [\n", " \"j\",\n", " \"i\",\n", " \"vertices\"\n", - " ]\n", + " ],\n", + " \"type\": \"data\",\n", + " \"description\": null,\n", + " \"unit\": null\n", " },\n", " \"vertices_longitude\": {\n", - " \"type\": \"data\",\n", " \"dimensions\": [\n", " \"j\",\n", " \"i\",\n", " \"vertices\"\n", - " ]\n", + " ],\n", + " \"type\": \"data\",\n", + " \"description\": null,\n", + " \"unit\": null\n", " },\n", " \"siconc\": {\n", - " \"type\": \"data\",\n", " \"dimensions\": [\n", " \"time\",\n", " \"j\",\n", " \"i\"\n", " ],\n", + " \"type\": \"data\",\n", + " \"description\": \"Sea-Ice Area Percentage (Ocean Grid)\",\n", " \"unit\": \"%\"\n", " },\n", " \"areacello\": {\n", - " \"type\": \"data\",\n", " \"dimensions\": [\n", " \"j\",\n", " \"i\"\n", " ],\n", + " \"type\": \"data\",\n", + " \"description\": \"Grid-Cell Area for Ocean Variables\",\n", " \"unit\": \"m2\"\n", " },\n", " \"type\": {\n", - " \"type\": \"data\",\n", " \"dimensions\": [\n", " \"maxStrlen64\"\n", - " ]\n", + " ],\n", + " \"type\": \"data\",\n", + " \"description\": \"Sea Ice area type\",\n", + " \"unit\": null\n", " },\n", " \"latitude\": {\n", - " \"type\": \"data\",\n", " \"dimensions\": [\n", " \"j\",\n", " \"i\"\n", " ],\n", + " \"type\": \"auxiliary\",\n", + " \"description\": \"latitude\",\n", " \"unit\": \"degrees_north\"\n", " },\n", " \"longitude\": {\n", - " \"type\": \"data\",\n", " \"dimensions\": [\n", " \"j\",\n", " \"i\"\n", " ],\n", + " \"type\": \"auxiliary\",\n", + " \"description\": \"longitude\",\n", " \"unit\": \"degrees_east\"\n", " }\n", - " },\n", - " \"datetime\": null,\n", - " \"Conventions\": \"CF-1.7 CMIP-6.2\",\n", - " \"activity_id\": \"ScenarioMIP\",\n", - " \"creation_date\": \"2019-09-25T23:01:33+00:00\",\n", - " \"data_specs_version\": \"01.00.30\",\n", - " \"experiment\": \"update of RCP4.5 based on SSP2\",\n", - " \"experiment_id\": \"ssp245\",\n", - " \"frequency\": \"mon\",\n", - " \"further_info_url\": \"https://furtherinfo.es-doc.org/CMIP6.CCCma.CanESM5.ssp245.none.r13i1p2f1\",\n", - " \"grid_label\": \"gn\",\n", - " \"institution\": \"Canadian Centre for Climate Modelling and Analysis, Environment and Climate Change Canada, Victoria, BC V8P 5C2, Canada\",\n", - " \"institution_id\": \"CCCma\",\n", - " \"nominal_resolution\": \"100 km\",\n", - " \"realm\": [\n", - " \"seaIce\"\n", - " ],\n", - " \"source\": \"CanESM5 (2019): \\naerosol: interactive\\natmos: CanAM5 (T63L49 native atmosphere, T63 Linear Gaussian Grid; 128 x 64 longitude/latitude; 49 levels; top level 1 hPa)\\natmosChem: specified oxidants for aerosols\\nland: CLASS3.6/CTEM1.2\\nlandIce: specified ice sheets\\nocean: NEMO3.4.1 (ORCA1 tripolar grid, 1 deg with refinement to 1/3 deg within 20 degrees of the equator; 361 x 290 longitude/latitude; 45 vertical levels; top grid cell 0-6.19 m)\\nocnBgchem: Canadian Model of Ocean Carbon (CMOC); NPZD ecosystem with OMIP prescribed carbonate chemistry\\nseaIce: LIM2\",\n", - " \"source_id\": \"CanESM5\",\n", - " \"source_type\": [\n", - " \"AOGCM\"\n", - " ],\n", - " \"sub_experiment\": \"none\",\n", - " \"sub_experiment_id\": \"none\",\n", - " \"table_id\": \"SImon\",\n", - " \"variable_id\": \"siconc\",\n", - " \"variant_label\": \"r13i1p2f1\",\n", - " \"initialization_index\": 1,\n", - " \"physics_index\": 2,\n", - " \"realization_index\": 13,\n", - " \"forcing_index\": 1,\n", - " \"tracking_id\": \"hdl:21.14100/9e4f804b-c161-44fa-acd1-c2e94e220c95\",\n", - " \"version\": \"v20190429\",\n", - " \"product\": \"model-output\",\n", - " \"license\": \"CMIP6 model data produced by The Government of Canada (Canadian Centre for Climate Modelling and Analysis, Environment and Climate Change Canada) is licensed under a Creative Commons Attribution ShareAlike 4.0 International License (https://creativecommons.org/licenses). Consult https://pcmdi.llnl.gov/CMIP6/TermsOfUse for terms of use governing CMIP6 output, including citation requirements and proper acknowledgment. Further information about this data, including some limitations, can be found via the further_info_url (recorded as a global attribute in this file) and at https:///pcmdi.llnl.gov/. The data producers and data providers make no warranty, either express or implied, including, but not limited to, warranties of merchantability and fitness for a particular purpose. All liabilities arising from the supply of the information (including any liability arising in negligence) are excluded to the fullest extent permitted by law.\",\n", - " \"grid\": \"ORCA1 tripolar grid, 1 deg with refinement to 1/3 deg within 20 degrees of the equator; 361 x 290 longitude/latitude; 45 vertical levels; top grid cell 0-6.19 m\",\n", - " \"mip_era\": \"CMIP6\"\n", + " }\n", " },\n", " \"geometry\": {\n", " \"type\": \"Polygon\",\n", @@ -649,63 +636,89 @@ " [\n", " [\n", " 0.049800001084804535,\n", - " 359.99493408203125\n", + " -78.39350128173828\n", " ],\n", " [\n", " 0.049800001084804535,\n", " 89.74176788330078\n", " ],\n", " [\n", - " -78.39350128173828,\n", + " 359.99493408203125,\n", " 89.74176788330078\n", " ],\n", " [\n", - " -78.39350128173828,\n", - " 359.99493408203125\n", + " 359.99493408203125,\n", + " -78.39350128173828\n", " ],\n", " [\n", " 0.049800001084804535,\n", - " 359.99493408203125\n", + " -78.39350128173828\n", " ]\n", " ]\n", " ]\n", " },\n", - " \"links\": [],\n", + " \"links\": [\n", + " {\n", + " \"rel\": \"source\",\n", + " \"href\": \"https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/dodsC/birdhouse/testdata/xclim/cmip6/sic_SImon_CCCma-CanESM5_ssp245_r13i1p2f1_2020.nc\",\n", + " \"type\": \"application/x-netcdf\",\n", + " \"title\": \"thredds:birdhouse/testdata/xclim/cmip6/sic_SImon_CCCma-CanESM5_ssp245_r13i1p2f1_2020.nc\"\n", + " }\n", + " ],\n", " \"assets\": {\n", - " \"httpserver\": {\n", - " \"href\": \"https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/fileServer/birdhouse/testdata/xclim/cmip6/sic_SImon_CCCma-CanESM5_ssp245_r13i1p2f1_2020.nc\"\n", + " \"httpserver_service\": {\n", + " \"href\": \"https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/fileServer/birdhouse/testdata/xclim/cmip6/sic_SImon_CCCma-CanESM5_ssp245_r13i1p2f1_2020.nc\",\n", + " \"type\": \"application/x-netcdf\",\n", + " \"roles\": [\n", + " \"data\"\n", + " ]\n", " },\n", - " \"opendap\": {\n", - " \"href\": \"https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/dodsC/birdhouse/testdata/xclim/cmip6/sic_SImon_CCCma-CanESM5_ssp245_r13i1p2f1_2020.nc\"\n", + " \"opendap_service\": {\n", + " \"href\": \"https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/dodsC/birdhouse/testdata/xclim/cmip6/sic_SImon_CCCma-CanESM5_ssp245_r13i1p2f1_2020.nc\",\n", + " \"type\": \"text/html\",\n", + " \"roles\": [\n", + " \"data\"\n", + " ]\n", " },\n", - " \"wcs\": {\n", - " \"href\": \"https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/wcs/birdhouse/testdata/xclim/cmip6/sic_SImon_CCCma-CanESM5_ssp245_r13i1p2f1_2020.nc?service=WCS&version=1.0.0&request=GetCapabilities\"\n", + " \"wcs_service\": {\n", + " \"href\": \"https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/wcs/birdhouse/testdata/xclim/cmip6/sic_SImon_CCCma-CanESM5_ssp245_r13i1p2f1_2020.nc?service=WCS&version=1.0.0&request=GetCapabilities\",\n", + " \"type\": \"application/xml\",\n", + " \"roles\": [\n", + " \"data\"\n", + " ]\n", " },\n", - " \"wms\": {\n", - " \"href\": \"https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/wms/birdhouse/testdata/xclim/cmip6/sic_SImon_CCCma-CanESM5_ssp245_r13i1p2f1_2020.nc?service=WMS&version=1.3.0&request=GetCapabilities\"\n", + " \"wms_service\": {\n", + " \"href\": \"https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/wms/birdhouse/testdata/xclim/cmip6/sic_SImon_CCCma-CanESM5_ssp245_r13i1p2f1_2020.nc?service=WMS&version=1.3.0&request=GetCapabilities\",\n", + " \"type\": \"application/xml\",\n", + " \"roles\": [\n", + " \"visual\"\n", + " ]\n", " },\n", - " \"nccs\": {\n", - " \"href\": \"https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/ncss/birdhouse/testdata/xclim/cmip6/sic_SImon_CCCma-CanESM5_ssp245_r13i1p2f1_2020.nc/dataset.html\"\n", + " \"nccs_service\": {\n", + " \"href\": \"https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/ncss/birdhouse/testdata/xclim/cmip6/sic_SImon_CCCma-CanESM5_ssp245_r13i1p2f1_2020.nc/dataset.html\",\n", + " \"type\": \"application/x-netcdf\",\n", + " \"roles\": [\n", + " \"data\"\n", + " ]\n", " }\n", " },\n", " \"bbox\": [\n", " 0.049800001084804535,\n", - " 359.99493408203125,\n", " -78.39350128173828,\n", + " 359.99493408203125,\n", " 89.74176788330078\n", " ],\n", " \"stac_extensions\": [\n", - " \"https://stac-extensions.github.io/datacube/v2.0.0/schema.json\",\n", - " \"https://stac-extensions.github.io/cmip6/v1.0.0/schema.json\"\n", + " \"https://stac-extensions.github.io/datacube/v2.0.0/schema.json\"\n", " ]\n", "}\n" ] } ], "source": [ - "stac_item = item.to_dict()\n", + "stac_item_data = stac_item.item.to_dict()\n", "\n", - "def json_encode(obj):\n", + "def json_encode(obj: \"pystac.Item\") -> Union[\"JsonLike\", str]:\n", " if isinstance(obj, (np.ndarray, np.number)):\n", " return obj.tolist()\n", " if isinstance(obj, (Url, Enum)):\n", @@ -714,14 +727,14 @@ " return obj.isoformat()\n", " raise TypeError(f\"Type {type(obj)} not serializable\")\n", "\n", - "stac_item_json = json.dumps(stac_item, default=json_encode, indent=2)\n", + "stac_item_json = json.dumps(stac_item_data, default=json_encode, indent=2)\n", "print(stac_item_json)" ], "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-09-28T22:12:41.484610088Z", - "start_time": "2023-09-28T22:12:41.456950960Z" + "end_time": "2023-09-29T22:45:37.741924891Z", + "start_time": "2023-09-29T22:45:37.672054869Z" } }, "id": "4eeb52c23edccb31" @@ -738,7 +751,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 91, "outputs": [], "source": [ "# NOTE:\n", @@ -746,13 +759,13 @@ "# This is to generate the corresponding glob pattern that will collect the output from the CWL execution.\n", "output: \"CWLFilePathOutput\" = \"ncml2stac.json\"\n", "with open(output, mode=\"w\", encoding=\"utf-8\") as out_file:\n", - " json.dump(stac_item, out_file, default=json_encode)\n" + " json.dump(stac_item_data, out_file, default=json_encode)\n" ], "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-09-28T22:12:41.544969400Z", - "start_time": "2023-09-28T22:12:41.477862056Z" + "end_time": "2023-09-29T22:45:37.742086738Z", + "start_time": "2023-09-29T22:45:37.715603867Z" } }, "id": "e4fa98fcad8b5556" @@ -760,7 +773,7 @@ ], "metadata": { "kernelspec": { - "name": "python3", + "name": "ncml2stac", "language": "python", "display_name": "ncml2stac" }, diff --git a/requirements.txt b/requirements.txt index 27ad137..0920de8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,8 +2,4 @@ # Following does not work # STACpopulator @ https://github.com/crim-ca/stac-populator/archive/refs/heads/weaver-repo2cwl-ncml2stac.zip # Also, editable '-e' required, otherwise module still not found... --e git+https://github.com/crim-ca/stac-populator@weaver-repo2cwl-ncml2stac#egg=STACpopulator -xncml - -# extra requirements for notebook -shapely +-e git+https://github.com/Ouranosinc/stac-populator@collection_link#egg=STACpopulator diff --git a/setup.cfg b/setup.cfg index faba0a0..4a4a05e 100644 --- a/setup.cfg +++ b/setup.cfg @@ -45,8 +45,9 @@ filterwarnings = nb_diff_ignore = /metadata /cells/*/execution_count + /cells/*/outputs/*/execution_count /cells/4/outputs/0/text - /cells/5/outputs/0/text + /cells/5/outputs/ /cells/7/outputs/ [isort]