From 8b262c0956d0006a89b3f7ad19754e62c8829040 Mon Sep 17 00:00:00 2001 From: Bouwe Andela Date: Thu, 12 Dec 2024 17:12:03 +0100 Subject: [PATCH] Add ESMValTool example metric --- Makefile | 1 + packages/ref-metrics-esmvaltool/README.md | 8 + .../ref-metrics-esmvaltool/pyproject.toml | 39 ++++ .../src/ref_metrics_esmvaltool/__init__.py | 16 ++ .../src/ref_metrics_esmvaltool/example.py | 108 ++++++++++ .../src/ref_metrics_esmvaltool/py.typed | 0 .../src/ref_metrics_esmvaltool/recipe.py | 197 ++++++++++++++++++ .../src/ref_metrics_esmvaltool/recipes.txt | 1 + .../tests/unit/test_metrics.py | 47 +++++ .../tests/unit/test_provider.py | 15 ++ 10 files changed, 432 insertions(+) create mode 100644 packages/ref-metrics-esmvaltool/README.md create mode 100644 packages/ref-metrics-esmvaltool/pyproject.toml create mode 100644 packages/ref-metrics-esmvaltool/src/ref_metrics_esmvaltool/__init__.py create mode 100644 packages/ref-metrics-esmvaltool/src/ref_metrics_esmvaltool/example.py create mode 100644 packages/ref-metrics-esmvaltool/src/ref_metrics_esmvaltool/py.typed create mode 100644 packages/ref-metrics-esmvaltool/src/ref_metrics_esmvaltool/recipe.py create mode 100644 packages/ref-metrics-esmvaltool/src/ref_metrics_esmvaltool/recipes.txt create mode 100644 packages/ref-metrics-esmvaltool/tests/unit/test_metrics.py create mode 100644 packages/ref-metrics-esmvaltool/tests/unit/test_provider.py diff --git a/Makefile b/Makefile index 0a1c023..dfddb6f 100644 --- a/Makefile +++ b/Makefile @@ -32,6 +32,7 @@ mypy: ## run mypy on the codebase MYPYPATH=stubs uv run --package ref-core mypy packages/ref-core MYPYPATH=stubs uv run --package ref mypy packages/ref MYPYPATH=stubs uv run --package ref-metrics-example mypy packages/ref-metrics-example + MYPYPATH=stubs uv run --package ref-metrics-esmvaltool mypy packages/ref-metrics-esmvaltool .PHONY: ruff-fixes ruff-fixes: ## fix the code using ruff diff --git a/packages/ref-metrics-esmvaltool/README.md b/packages/ref-metrics-esmvaltool/README.md new file mode 100644 index 0000000..34e7f1d --- /dev/null +++ b/packages/ref-metrics-esmvaltool/README.md @@ -0,0 +1,8 @@ +# ref-metrics-esmvaltool + +Use [ESMValTool](https://esmvaltool.org/) as a REF metrics provider. + +To use this, install ESMValTool and then install the REF into the same conda +environment. + +See [running-metrics-locally](https://cmip-ref.readthedocs.io/en/latest/how-to-guides/running-metrics-locally/) for usage instructions. diff --git a/packages/ref-metrics-esmvaltool/pyproject.toml b/packages/ref-metrics-esmvaltool/pyproject.toml new file mode 100644 index 0000000..0ea27b6 --- /dev/null +++ b/packages/ref-metrics-esmvaltool/pyproject.toml @@ -0,0 +1,39 @@ +[project] +name = "ref-metrics-esmvaltool" +version = "0.1.0" +description = "ESMValTool metrics provider for the CMIP Rapid Evaluation Framework" +readme = "README.md" +authors = [ + { name = "ESMValTool development team", email = "esmvaltool-dev@listserv.dfn.de " } +] +requires-python = ">=3.10" +classifiers = [ + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "Operating System :: OS Independent", + "Intended Audience :: Science/Research", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", + "Topic :: Scientific/Engineering", +] +dependencies = [ + "pooch", + "ref-core", + "ruamel.yaml", + "xarray", +] + +[project.license] +text = "Apache-2.0" + +[tool.uv] +dev-dependencies = [ +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" diff --git a/packages/ref-metrics-esmvaltool/src/ref_metrics_esmvaltool/__init__.py b/packages/ref-metrics-esmvaltool/src/ref_metrics_esmvaltool/__init__.py new file mode 100644 index 0000000..6d56daf --- /dev/null +++ b/packages/ref-metrics-esmvaltool/src/ref_metrics_esmvaltool/__init__.py @@ -0,0 +1,16 @@ +""" +Rapid evaluating CMIP data with ESMValTool. +""" + +import importlib.metadata + +from ref_core.providers import MetricsProvider + +from ref_metrics_esmvaltool.example import GlobalMeanTimeseries + +__version__ = importlib.metadata.version("ref_metrics_esmvaltool") +__core_version__ = importlib.metadata.version("ref_core") + +# Initialise the metrics manager and register the example metric +provider = MetricsProvider("ESMValTool", __version__) +provider.register(GlobalMeanTimeseries()) diff --git a/packages/ref-metrics-esmvaltool/src/ref_metrics_esmvaltool/example.py b/packages/ref-metrics-esmvaltool/src/ref_metrics_esmvaltool/example.py new file mode 100644 index 0000000..02485e0 --- /dev/null +++ b/packages/ref-metrics-esmvaltool/src/ref_metrics_esmvaltool/example.py @@ -0,0 +1,108 @@ +from typing import Any + +import xarray as xr +from ref_core.datasets import FacetFilter, SourceDatasetType +from ref_core.metrics import DataRequirement, Metric, MetricExecutionDefinition, MetricResult +from ruamel.yaml import YAML + +from ref_metrics_esmvaltool.recipe import dataframe_to_recipe, load_recipe, run_recipe + +yaml = YAML() + + +def format_cmec_output_bundle(dataset: xr.Dataset) -> dict[str, Any]: + """ + Create a simple CMEC output bundle for the dataset. + + Parameters + ---------- + dataset + Processed dataset + + Returns + ------- + A CMEC output bundle ready to be written to disk + """ + # TODO: Check how timeseries data are generally serialised + cmec_output = { + "DIMENSIONS": { + "dimensions": { + "source_id": {dataset.attrs["source_id"]: {}}, + "region": {"global": {}}, + "variable": {"tas": {}}, + }, + "json_structure": [ + "model", + "region", + "statistic", + ], + }, + # Is the schema tracked? + "SCHEMA": { + "name": "CMEC-REF", + "package": "example", + "version": "v1", + }, + "RESULTS": { + dataset.attrs["source_id"]: {"global": {"tas": 0}}, + }, + } + + return cmec_output + + +class GlobalMeanTimeseries(Metric): + """ + Calculate the annual mean global mean timeseries for a dataset + """ + + name = "Global Mean Timeseries" + slug = "global-mean-timeseries" + + data_requirements = ( + DataRequirement( + source_type=SourceDatasetType.CMIP6, + filters=(FacetFilter(facets={"variable_id": ("tas",)}),), + # Add cell areas to the groups + # constraints=(AddCellAreas(),), + # Run the metric on each unique combination of model, variable, experiment, and variant + group_by=("source_id", "variable_id", "experiment_id", "variant_label"), + ), + ) + + def run(self, definition: MetricExecutionDefinition) -> MetricResult: + """ + Run a metric + + Parameters + ---------- + definition + A description of the information needed for this execution of the metric + + Returns + ------- + : + The result of running the metric. + """ + # Load recipe and clear unwanted elements + recipe = load_recipe("examples/recipe_python.yml") + recipe["datasets"].clear() + recipe["diagnostics"].pop("map") + variables = recipe["diagnostics"]["timeseries"]["variables"] + variables.clear() + + # Prepare updated variables section in recipe. + recipe_variables = dataframe_to_recipe(definition.metric_dataset[SourceDatasetType.CMIP6].datasets) + for variable in recipe_variables.values(): + variable["preprocessor"] = "annual_mean_global" + variable["caption"] = "Annual global mean {long_name} according to {dataset}." + + # Populate recipe with new variables/datasets. + variables.update(recipe_variables) + + # Run recipe + result_dir = run_recipe(recipe, definition) + result = next(result_dir.glob("work/timeseries/script1/*.nc")) + annual_mean_global_mean_timeseries = xr.open_dataset(result) + + return MetricResult.build(definition, format_cmec_output_bundle(annual_mean_global_mean_timeseries)) diff --git a/packages/ref-metrics-esmvaltool/src/ref_metrics_esmvaltool/py.typed b/packages/ref-metrics-esmvaltool/src/ref_metrics_esmvaltool/py.typed new file mode 100644 index 0000000..e69de29 diff --git a/packages/ref-metrics-esmvaltool/src/ref_metrics_esmvaltool/recipe.py b/packages/ref-metrics-esmvaltool/src/ref_metrics_esmvaltool/recipe.py new file mode 100644 index 0000000..2d8e444 --- /dev/null +++ b/packages/ref-metrics-esmvaltool/src/ref_metrics_esmvaltool/recipe.py @@ -0,0 +1,197 @@ +from __future__ import annotations + +import subprocess +from pathlib import Path +from typing import TYPE_CHECKING, Any + +import pkg_resources +import pooch # type: ignore[import-untyped] +from ref_core.datasets import SourceDatasetType +from ref_core.metrics import MetricExecutionDefinition +from ruamel.yaml import YAML + +if TYPE_CHECKING: + import pandas as pd + +yaml = YAML() + +FACETS = { + "CMIP6": { + "dataset": "source_id", + "ensemble": "member_id", + "exp": "experiment_id", + "grid": "grid_label", + "mip": "table_id", + "short_name": "variable_id", + }, +} + + +def as_isodate(timestamp: pd._libs.tslibs.timestamps.Timestamp) -> str: + """Format a timestamp as an ISO 8601 datetime. + + For example, '2014-12-16 12:00:00' will be formatted as '20141216T120000'. + + Parameters + ---------- + timestamp + The timestamp to format. + + """ + return str(timestamp).replace(" ", "T").replace("-", "").replace(":", "") + + +def as_timerange( + start_time: pd._libs.tslibs.timestamps.Timestamp, + end_time: pd._libs.tslibs.timestamps.Timestamp, +) -> str: + """Format `start_time` and `end_time` as an ESMValTool timerange. + + Parameters + ---------- + start_time + A start time. + end_time + An end time. + + Returns + ------- + A timerange. + """ + return f"{as_isodate(start_time)}/{as_isodate(end_time)}" + + +def as_facets( + row: pd.core.frame.Pandas, # type: ignore[name-defined] +) -> dict[str, Any]: + """Convert a row from the datasets dataframe to ESMValTool facets. + + Parameters + ---------- + row: + A row of the datasets dataframe. + + Returns + ------- + A :obj:`dict` containing facet-value pairs. + + """ + facets = {} + project = row.instance_id.split(".", 2)[0] + facets["project"] = project + for esmvaltool_name, ref_name in FACETS[project].items(): + facets[esmvaltool_name] = getattr(row, ref_name) + facets["timerange"] = as_timerange(row.start_time, row.end_time) + return facets + + +def dataframe_to_recipe(datasets: pd.DataFrame) -> dict[str, Any]: + """Convert the datasets dataframe to a recipe "variables" section. + + Parameters + ---------- + datasets + The pandas dataframe describing the input datasets. + + Returns + ------- + A "variables" section that can be used in an ESMValTool recipe. + """ + variables: dict[str, Any] = {} + for row in datasets.itertuples(): + facets = as_facets(row) + short_name = facets.pop("short_name") + if short_name not in variables: + variables[short_name] = {"additional_datasets": []} + variables[short_name]["additional_datasets"].append(facets) + return variables + + +_ESMVALTOOL_VERSION = "2.11.0" + +_RECIPES = pooch.create( + path=pooch.os_cache("ref_metrics_esmvaltool"), + base_url="https://raw.githubusercontent.com/ESMValGroup/ESMValTool/refs/tags/v{version}/esmvaltool/recipes/", + version=_ESMVALTOOL_VERSION, + env="REF_METRICS_ESMVALTOOL_DATA_DIR", +) +_RECIPES.load_registry(pkg_resources.resource_stream("ref_metrics_esmvaltool", "recipes.txt")) + + +def load_recipe(recipe: str) -> dict[str, Any]: + """Load a recipe. + + Parameters + ---------- + recipe + The name of an ESMValTool recipe. + + Returns + ------- + The loaded recipe. + """ + filename = _RECIPES.fetch(recipe) + return yaml.load(Path(filename).read_text(encoding="utf-8")) # type: ignore[no-any-return] + + +def prepare_climate_data(datasets: pd.DataFrame, climate_data_dir: Path) -> None: + """Symlink the input files from the Pandas dataframe into a directory tree. + + This ensures that ESMValTool can find the data and only uses the + requested data. + + Parameters + ---------- + datasets + The pandas dataframe describing the input datasets. + climate_data_dir + The directory where ESMValTool should look for input data. + """ + for row in datasets.itertuples(): + tgt = climate_data_dir.joinpath(*row.instance_id.split(".")) / Path(row.path).name + tgt.parent.mkdir(parents=True, exist_ok=True) + tgt.symlink_to(row.path) + + +def run_recipe(recipe: dict, definition: MetricExecutionDefinition) -> None: + """Run an ESMValTool recipe. + + Parameters + ---------- + recipe + The ESMValTool recipe. + definition + A description of the information needed for this execution of the metric. + + """ + output_dir = definition.output_fragment + + recipe_path = output_dir / "recipe_test.yml" + with recipe_path.open("w", encoding="utf-8") as file: + yaml.dump(recipe, file) + + climate_data = output_dir / "climate_data" + + prepare_climate_data( + definition.metric_dataset[SourceDatasetType.CMIP6].datasets, + climate_data_dir=climate_data, + ) + + results_dir = output_dir / "results" + config = { + "output_dir": str(results_dir), + "rootpath": { + "default": str(climate_data), + }, + "drs": { + "CMIP6": "ESGF", + }, + } + config_dir = output_dir / "config" + config_dir.mkdir() + with (config_dir / "config.yml").open("w", encoding="utf-8") as file: + yaml.dump(config, file) + + subprocess.check_call(["esmvaltool", "run", f"--config-dir={config_dir}", f"{recipe_path}"]) # noqa: S603, S607 + result = next(results_dir.glob("*")) + return result diff --git a/packages/ref-metrics-esmvaltool/src/ref_metrics_esmvaltool/recipes.txt b/packages/ref-metrics-esmvaltool/src/ref_metrics_esmvaltool/recipes.txt new file mode 100644 index 0000000..66600a8 --- /dev/null +++ b/packages/ref-metrics-esmvaltool/src/ref_metrics_esmvaltool/recipes.txt @@ -0,0 +1 @@ +examples/recipe_python.yml ab3f06d269bb2c1368f4dc39da9bcb232fb2adb1fa556ba769e6c16294ffb4a3 diff --git a/packages/ref-metrics-esmvaltool/tests/unit/test_metrics.py b/packages/ref-metrics-esmvaltool/tests/unit/test_metrics.py new file mode 100644 index 0000000..8c7ae3b --- /dev/null +++ b/packages/ref-metrics-esmvaltool/tests/unit/test_metrics.py @@ -0,0 +1,47 @@ +import pytest +from ref_core.datasets import DatasetCollection, MetricDataset, SourceDatasetType +from ref_core.metrics import MetricExecutionDefinition +from ref_metrics_esmvaltool.example import GlobalMeanTimeseries, calculate_annual_mean_timeseries + + +@pytest.fixture +def metric_dataset(cmip6_data_catalog) -> MetricDataset: + selected_dataset = cmip6_data_catalog[ + cmip6_data_catalog["instance_id"] == cmip6_data_catalog.instance_id.iloc[0] + ] + return MetricDataset( + { + SourceDatasetType.CMIP6: DatasetCollection( + selected_dataset, + "instance_id", + ) + } + ) + + +def test_annual_mean(esgf_data_dir, metric_dataset): + annual_mean = calculate_annual_mean_timeseries(metric_dataset["cmip6"].path.to_list()) + + assert annual_mean.time.size == 286 + + +def test_example_metric(tmp_path, metric_dataset, cmip6_data_catalog): + metric = GlobalMeanTimeseries() + ds = cmip6_data_catalog.groupby("instance_id").first() + + configuration = MetricExecutionDefinition( + output_fragment=tmp_path, + key="global_mean_timeseries", + metric_dataset=MetricDataset( + { + SourceDatasetType.CMIP6: DatasetCollection(ds, "instance_id"), + } + ), + ) + + result = metric.run(configuration) + + assert result.successful + assert result.output_bundle.exists() + assert result.output_bundle.is_file() + assert result.output_bundle.name == "output.json" diff --git a/packages/ref-metrics-esmvaltool/tests/unit/test_provider.py b/packages/ref-metrics-esmvaltool/tests/unit/test_provider.py new file mode 100644 index 0000000..d777576 --- /dev/null +++ b/packages/ref-metrics-esmvaltool/tests/unit/test_provider.py @@ -0,0 +1,15 @@ +from ref_metrics_esmvaltool import __core_version__, __version__, provider + + +# Placeholder to get CI working +def test_version(): + assert __version__ == "0.1.0" + assert __core_version__ == "0.1.0" + + +def test_provider(): + assert provider.name == "ESMValTool" + assert provider.slug == "esmvaltool" + assert provider.version == __version__ + + assert len(provider) == 1