Add ESMValTool example metric

CMIP-REF · Dec 12, 2024 · 8b262c0 · 8b262c0
1 parent c3ea68b
commit 8b262c0
Show file tree

Hide file tree

Showing 10 changed files with 432 additions and 0 deletions.
diff --git a/Makefile b/Makefile
@@ -32,6 +32,7 @@ mypy:  ## run mypy on the codebase
 	MYPYPATH=stubs uv run --package ref-core mypy packages/ref-core
 	MYPYPATH=stubs uv run --package ref mypy packages/ref
 	MYPYPATH=stubs uv run --package ref-metrics-example mypy packages/ref-metrics-example
+	MYPYPATH=stubs uv run --package ref-metrics-esmvaltool mypy packages/ref-metrics-esmvaltool
 
 .PHONY: ruff-fixes
 ruff-fixes:  ## fix the code using ruff

diff --git a/packages/ref-metrics-esmvaltool/README.md b/packages/ref-metrics-esmvaltool/README.md
@@ -0,0 +1,8 @@
+# ref-metrics-esmvaltool
+
+Use [ESMValTool](https://esmvaltool.org/) as a REF metrics provider.
+
+To use this, install ESMValTool and then install the REF into the same conda
+environment.
+
+See [running-metrics-locally](https://cmip-ref.readthedocs.io/en/latest/how-to-guides/running-metrics-locally/) for usage instructions.
diff --git a/packages/ref-metrics-esmvaltool/pyproject.toml b/packages/ref-metrics-esmvaltool/pyproject.toml
@@ -0,0 +1,39 @@
+[project]
+name = "ref-metrics-esmvaltool"
+version = "0.1.0"
+description = "ESMValTool metrics provider for the CMIP Rapid Evaluation Framework"
+readme = "README.md"
+authors = [
+    { name = "ESMValTool development team", email = "esmvaltool-dev@listserv.dfn.de " }
+]
+requires-python = ">=3.10"
+classifiers = [
+    "Development Status :: 4 - Beta",
+    "Intended Audience :: Developers",
+    "Operating System :: OS Independent",
+    "Intended Audience :: Science/Research",
+    "Programming Language :: Python",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Programming Language :: Python :: 3.13",
+    "Topic :: Scientific/Engineering",
+]
+dependencies = [
+    "pooch",
+    "ref-core",
+    "ruamel.yaml",
+    "xarray",
+]
+
+[project.license]
+text = "Apache-2.0"
+
+[tool.uv]
+dev-dependencies = [
+]
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
diff --git a/packages/ref-metrics-esmvaltool/src/ref_metrics_esmvaltool/__init__.py b/packages/ref-metrics-esmvaltool/src/ref_metrics_esmvaltool/__init__.py
@@ -0,0 +1,16 @@
+"""
+Rapid evaluating CMIP data with ESMValTool.
+"""
+
+import importlib.metadata
+
+from ref_core.providers import MetricsProvider
+
+from ref_metrics_esmvaltool.example import GlobalMeanTimeseries
+
+__version__ = importlib.metadata.version("ref_metrics_esmvaltool")
+__core_version__ = importlib.metadata.version("ref_core")
+
+# Initialise the metrics manager and register the example metric
+provider = MetricsProvider("ESMValTool", __version__)
+provider.register(GlobalMeanTimeseries())
diff --git a/packages/ref-metrics-esmvaltool/src/ref_metrics_esmvaltool/example.py b/packages/ref-metrics-esmvaltool/src/ref_metrics_esmvaltool/example.py
@@ -0,0 +1,108 @@
+from typing import Any
+
+import xarray as xr
+from ref_core.datasets import FacetFilter, SourceDatasetType
+from ref_core.metrics import DataRequirement, Metric, MetricExecutionDefinition, MetricResult
+from ruamel.yaml import YAML
+
+from ref_metrics_esmvaltool.recipe import dataframe_to_recipe, load_recipe, run_recipe
+
+yaml = YAML()
+
+
+def format_cmec_output_bundle(dataset: xr.Dataset) -> dict[str, Any]:
+    """
+    Create a simple CMEC output bundle for the dataset.
+
+    Parameters
+    ----------
+    dataset
+        Processed dataset
+
+    Returns
+    -------
+        A CMEC output bundle ready to be written to disk
+    """
+    # TODO: Check how timeseries data are generally serialised
+    cmec_output = {
+        "DIMENSIONS": {
+            "dimensions": {
+                "source_id": {dataset.attrs["source_id"]: {}},
+                "region": {"global": {}},
+                "variable": {"tas": {}},
+            },
+            "json_structure": [
+                "model",
+                "region",
+                "statistic",
+            ],
+        },
+        # Is the schema tracked?
+        "SCHEMA": {
+            "name": "CMEC-REF",
+            "package": "example",
+            "version": "v1",
+        },
+        "RESULTS": {
+            dataset.attrs["source_id"]: {"global": {"tas": 0}},
+        },
+    }
+
+    return cmec_output
+
+
+class GlobalMeanTimeseries(Metric):
+    """
+    Calculate the annual mean global mean timeseries for a dataset
+    """
+
+    name = "Global Mean Timeseries"
+    slug = "global-mean-timeseries"
+
+    data_requirements = (
+        DataRequirement(
+            source_type=SourceDatasetType.CMIP6,
+            filters=(FacetFilter(facets={"variable_id": ("tas",)}),),
+            # Add cell areas to the groups
+            # constraints=(AddCellAreas(),),
+            # Run the metric on each unique combination of model, variable, experiment, and variant
+            group_by=("source_id", "variable_id", "experiment_id", "variant_label"),
+        ),
+    )
+
+    def run(self, definition: MetricExecutionDefinition) -> MetricResult:
+        """
+        Run a metric
+
+        Parameters
+        ----------
+        definition
+            A description of the information needed for this execution of the metric
+
+        Returns
+        -------
+        :
+            The result of running the metric.
+        """
+        # Load recipe and clear unwanted elements
+        recipe = load_recipe("examples/recipe_python.yml")
+        recipe["datasets"].clear()
+        recipe["diagnostics"].pop("map")
+        variables = recipe["diagnostics"]["timeseries"]["variables"]
+        variables.clear()
+
+        # Prepare updated variables section in recipe.
+        recipe_variables = dataframe_to_recipe(definition.metric_dataset[SourceDatasetType.CMIP6].datasets)
+        for variable in recipe_variables.values():
+            variable["preprocessor"] = "annual_mean_global"
+            variable["caption"] = "Annual global mean {long_name} according to {dataset}."
+
+        # Populate recipe with new variables/datasets.
+        variables.update(recipe_variables)
+
+        # Run recipe
+        result_dir = run_recipe(recipe, definition)
+        result = next(result_dir.glob("work/timeseries/script1/*.nc"))
+        annual_mean_global_mean_timeseries = xr.open_dataset(result)
+
+        return MetricResult.build(definition, format_cmec_output_bundle(annual_mean_global_mean_timeseries))
diff --git a/packages/ref-metrics-esmvaltool/src/ref_metrics_esmvaltool/py.typed b/packages/ref-metrics-esmvaltool/src/ref_metrics_esmvaltool/py.typed
diff --git a/packages/ref-metrics-esmvaltool/src/ref_metrics_esmvaltool/recipe.py b/packages/ref-metrics-esmvaltool/src/ref_metrics_esmvaltool/recipe.py
@@ -0,0 +1,197 @@
+from __future__ import annotations
+
+import subprocess
+from pathlib import Path
+from typing import TYPE_CHECKING, Any
+
+import pkg_resources
+import pooch  # type: ignore[import-untyped]
+from ref_core.datasets import SourceDatasetType
+from ref_core.metrics import MetricExecutionDefinition
+from ruamel.yaml import YAML
+
+if TYPE_CHECKING:
+    import pandas as pd
+
+yaml = YAML()
+
+FACETS = {
+    "CMIP6": {
+        "dataset": "source_id",
+        "ensemble": "member_id",
+        "exp": "experiment_id",
+        "grid": "grid_label",
+        "mip": "table_id",
+        "short_name": "variable_id",
+    },
+}
+
+
+def as_isodate(timestamp: pd._libs.tslibs.timestamps.Timestamp) -> str:
+    """Format a timestamp as an ISO 8601 datetime.
+
+    For example, '2014-12-16 12:00:00' will be formatted as '20141216T120000'.
+
+    Parameters
+    ----------
+    timestamp
+        The timestamp to format.
+
+    """
+    return str(timestamp).replace(" ", "T").replace("-", "").replace(":", "")
+
+
+def as_timerange(
+    start_time: pd._libs.tslibs.timestamps.Timestamp,
+    end_time: pd._libs.tslibs.timestamps.Timestamp,
+) -> str:
+    """Format `start_time` and `end_time` as an ESMValTool timerange.
+
+    Parameters
+    ----------
+    start_time
+        A start time.
+    end_time
+        An end time.
+
+    Returns
+    -------
+        A timerange.
+    """
+    return f"{as_isodate(start_time)}/{as_isodate(end_time)}"
+
+
+def as_facets(
+    row: pd.core.frame.Pandas,  # type: ignore[name-defined]
+) -> dict[str, Any]:
+    """Convert a row from the datasets dataframe to ESMValTool facets.
+
+    Parameters
+    ----------
+    row:
+        A row of the datasets dataframe.
+
+    Returns
+    -------
+        A :obj:`dict` containing facet-value pairs.
+
+    """
+    facets = {}
+    project = row.instance_id.split(".", 2)[0]
+    facets["project"] = project
+    for esmvaltool_name, ref_name in FACETS[project].items():
+        facets[esmvaltool_name] = getattr(row, ref_name)
+    facets["timerange"] = as_timerange(row.start_time, row.end_time)
+    return facets
+
+
+def dataframe_to_recipe(datasets: pd.DataFrame) -> dict[str, Any]:
+    """Convert the datasets dataframe to a recipe "variables" section.
+
+    Parameters
+    ----------
+    datasets
+        The pandas dataframe describing the input datasets.
+
+    Returns
+    -------
+        A "variables" section that can be used in an ESMValTool recipe.
+    """
+    variables: dict[str, Any] = {}
+    for row in datasets.itertuples():
+        facets = as_facets(row)
+        short_name = facets.pop("short_name")
+        if short_name not in variables:
+            variables[short_name] = {"additional_datasets": []}
+        variables[short_name]["additional_datasets"].append(facets)
+    return variables
+
+
+_ESMVALTOOL_VERSION = "2.11.0"
+
+_RECIPES = pooch.create(
+    path=pooch.os_cache("ref_metrics_esmvaltool"),
+    base_url="https://raw.githubusercontent.com/ESMValGroup/ESMValTool/refs/tags/v{version}/esmvaltool/recipes/",
+    version=_ESMVALTOOL_VERSION,
+    env="REF_METRICS_ESMVALTOOL_DATA_DIR",
+)
+_RECIPES.load_registry(pkg_resources.resource_stream("ref_metrics_esmvaltool", "recipes.txt"))
+
+
+def load_recipe(recipe: str) -> dict[str, Any]:
+    """Load a recipe.
+
+    Parameters
+    ----------
+    recipe
+        The name of an ESMValTool recipe.
+
+    Returns
+    -------
+        The loaded recipe.
+    """
+    filename = _RECIPES.fetch(recipe)
+    return yaml.load(Path(filename).read_text(encoding="utf-8"))  # type: ignore[no-any-return]
+
+
+def prepare_climate_data(datasets: pd.DataFrame, climate_data_dir: Path) -> None:
+    """Symlink the input files from the Pandas dataframe into a directory tree.
+
+    This ensures that ESMValTool can find the data and only uses the
+    requested data.
+
+    Parameters
+    ----------
+    datasets
+        The pandas dataframe describing the input datasets.
+    climate_data_dir
+        The directory where ESMValTool should look for input data.
+    """
+    for row in datasets.itertuples():
+        tgt = climate_data_dir.joinpath(*row.instance_id.split(".")) / Path(row.path).name
+        tgt.parent.mkdir(parents=True, exist_ok=True)
+        tgt.symlink_to(row.path)
+
+
+def run_recipe(recipe: dict, definition: MetricExecutionDefinition) -> None:
+    """Run an ESMValTool recipe.
+
+    Parameters
+    ----------
+    recipe
+        The ESMValTool recipe.
+    definition
+        A description of the information needed for this execution of the metric.
+
+    """
+    output_dir = definition.output_fragment
+
+    recipe_path = output_dir / "recipe_test.yml"
+    with recipe_path.open("w", encoding="utf-8") as file:
+        yaml.dump(recipe, file)
+
+    climate_data = output_dir / "climate_data"
+
+    prepare_climate_data(
+        definition.metric_dataset[SourceDatasetType.CMIP6].datasets,
+        climate_data_dir=climate_data,
+    )
+
+    results_dir = output_dir / "results"
+    config = {
+        "output_dir": str(results_dir),
+        "rootpath": {
+            "default": str(climate_data),
+        },
+        "drs": {
+            "CMIP6": "ESGF",
+        },
+    }
+    config_dir = output_dir / "config"
+    config_dir.mkdir()
+    with (config_dir / "config.yml").open("w", encoding="utf-8") as file:
+        yaml.dump(config, file)
+
+    subprocess.check_call(["esmvaltool", "run", f"--config-dir={config_dir}", f"{recipe_path}"])  # noqa: S603, S607
+    result = next(results_dir.glob("*"))
+    return result
diff --git a/packages/ref-metrics-esmvaltool/src/ref_metrics_esmvaltool/recipes.txt b/packages/ref-metrics-esmvaltool/src/ref_metrics_esmvaltool/recipes.txt
@@ -0,0 +1 @@
+examples/recipe_python.yml ab3f06d269bb2c1368f4dc39da9bcb232fb2adb1fa556ba769e6c16294ffb4a3
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		examples/recipe_python.yml ab3f06d269bb2c1368f4dc39da9bcb232fb2adb1fa556ba769e6c16294ffb4a3