Skip to content

Commit

Permalink
Add ESMValTool example metric
Browse files Browse the repository at this point in the history
  • Loading branch information
bouweandela committed Dec 12, 2024
1 parent c3ea68b commit 8b262c0
Show file tree
Hide file tree
Showing 10 changed files with 432 additions and 0 deletions.
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ mypy: ## run mypy on the codebase
MYPYPATH=stubs uv run --package ref-core mypy packages/ref-core
MYPYPATH=stubs uv run --package ref mypy packages/ref
MYPYPATH=stubs uv run --package ref-metrics-example mypy packages/ref-metrics-example
MYPYPATH=stubs uv run --package ref-metrics-esmvaltool mypy packages/ref-metrics-esmvaltool

.PHONY: ruff-fixes
ruff-fixes: ## fix the code using ruff
Expand Down
8 changes: 8 additions & 0 deletions packages/ref-metrics-esmvaltool/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# ref-metrics-esmvaltool

Use [ESMValTool](https://esmvaltool.org/) as a REF metrics provider.

To use this, install ESMValTool and then install the REF into the same conda
environment.

See [running-metrics-locally](https://cmip-ref.readthedocs.io/en/latest/how-to-guides/running-metrics-locally/) for usage instructions.
39 changes: 39 additions & 0 deletions packages/ref-metrics-esmvaltool/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
[project]
name = "ref-metrics-esmvaltool"
version = "0.1.0"
description = "ESMValTool metrics provider for the CMIP Rapid Evaluation Framework"
readme = "README.md"
authors = [
{ name = "ESMValTool development team", email = "esmvaltool-dev@listserv.dfn.de " }
]
requires-python = ">=3.10"
classifiers = [
"Development Status :: 4 - Beta",
"Intended Audience :: Developers",
"Operating System :: OS Independent",
"Intended Audience :: Science/Research",
"Programming Language :: Python",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
"Topic :: Scientific/Engineering",
]
dependencies = [
"pooch",
"ref-core",
"ruamel.yaml",
"xarray",
]

[project.license]
text = "Apache-2.0"

[tool.uv]
dev-dependencies = [
]

[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
"""
Rapid evaluating CMIP data with ESMValTool.
"""

import importlib.metadata

from ref_core.providers import MetricsProvider

from ref_metrics_esmvaltool.example import GlobalMeanTimeseries

__version__ = importlib.metadata.version("ref_metrics_esmvaltool")
__core_version__ = importlib.metadata.version("ref_core")

# Initialise the metrics manager and register the example metric
provider = MetricsProvider("ESMValTool", __version__)
provider.register(GlobalMeanTimeseries())
108 changes: 108 additions & 0 deletions packages/ref-metrics-esmvaltool/src/ref_metrics_esmvaltool/example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
from typing import Any

import xarray as xr
from ref_core.datasets import FacetFilter, SourceDatasetType
from ref_core.metrics import DataRequirement, Metric, MetricExecutionDefinition, MetricResult
from ruamel.yaml import YAML

from ref_metrics_esmvaltool.recipe import dataframe_to_recipe, load_recipe, run_recipe

yaml = YAML()


def format_cmec_output_bundle(dataset: xr.Dataset) -> dict[str, Any]:
"""
Create a simple CMEC output bundle for the dataset.
Parameters
----------
dataset
Processed dataset
Returns
-------
A CMEC output bundle ready to be written to disk
"""
# TODO: Check how timeseries data are generally serialised
cmec_output = {
"DIMENSIONS": {
"dimensions": {
"source_id": {dataset.attrs["source_id"]: {}},
"region": {"global": {}},
"variable": {"tas": {}},
},
"json_structure": [
"model",
"region",
"statistic",
],
},
# Is the schema tracked?
"SCHEMA": {
"name": "CMEC-REF",
"package": "example",
"version": "v1",
},
"RESULTS": {
dataset.attrs["source_id"]: {"global": {"tas": 0}},
},
}

return cmec_output


class GlobalMeanTimeseries(Metric):
"""
Calculate the annual mean global mean timeseries for a dataset
"""

name = "Global Mean Timeseries"
slug = "global-mean-timeseries"

data_requirements = (
DataRequirement(
source_type=SourceDatasetType.CMIP6,
filters=(FacetFilter(facets={"variable_id": ("tas",)}),),
# Add cell areas to the groups
# constraints=(AddCellAreas(),),
# Run the metric on each unique combination of model, variable, experiment, and variant
group_by=("source_id", "variable_id", "experiment_id", "variant_label"),
),
)

def run(self, definition: MetricExecutionDefinition) -> MetricResult:
"""
Run a metric
Parameters
----------
definition
A description of the information needed for this execution of the metric
Returns
-------
:
The result of running the metric.
"""
# Load recipe and clear unwanted elements
recipe = load_recipe("examples/recipe_python.yml")
recipe["datasets"].clear()
recipe["diagnostics"].pop("map")
variables = recipe["diagnostics"]["timeseries"]["variables"]
variables.clear()

# Prepare updated variables section in recipe.
recipe_variables = dataframe_to_recipe(definition.metric_dataset[SourceDatasetType.CMIP6].datasets)
for variable in recipe_variables.values():
variable["preprocessor"] = "annual_mean_global"
variable["caption"] = "Annual global mean {long_name} according to {dataset}."

# Populate recipe with new variables/datasets.
variables.update(recipe_variables)

# Run recipe
result_dir = run_recipe(recipe, definition)
result = next(result_dir.glob("work/timeseries/script1/*.nc"))
annual_mean_global_mean_timeseries = xr.open_dataset(result)

return MetricResult.build(definition, format_cmec_output_bundle(annual_mean_global_mean_timeseries))
Empty file.
197 changes: 197 additions & 0 deletions packages/ref-metrics-esmvaltool/src/ref_metrics_esmvaltool/recipe.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
from __future__ import annotations

import subprocess
from pathlib import Path
from typing import TYPE_CHECKING, Any

import pkg_resources
import pooch # type: ignore[import-untyped]
from ref_core.datasets import SourceDatasetType
from ref_core.metrics import MetricExecutionDefinition
from ruamel.yaml import YAML

if TYPE_CHECKING:
import pandas as pd

yaml = YAML()

FACETS = {
"CMIP6": {
"dataset": "source_id",
"ensemble": "member_id",
"exp": "experiment_id",
"grid": "grid_label",
"mip": "table_id",
"short_name": "variable_id",
},
}


def as_isodate(timestamp: pd._libs.tslibs.timestamps.Timestamp) -> str:
"""Format a timestamp as an ISO 8601 datetime.
For example, '2014-12-16 12:00:00' will be formatted as '20141216T120000'.
Parameters
----------
timestamp
The timestamp to format.
"""
return str(timestamp).replace(" ", "T").replace("-", "").replace(":", "")


def as_timerange(
start_time: pd._libs.tslibs.timestamps.Timestamp,
end_time: pd._libs.tslibs.timestamps.Timestamp,
) -> str:
"""Format `start_time` and `end_time` as an ESMValTool timerange.
Parameters
----------
start_time
A start time.
end_time
An end time.
Returns
-------
A timerange.
"""
return f"{as_isodate(start_time)}/{as_isodate(end_time)}"


def as_facets(
row: pd.core.frame.Pandas, # type: ignore[name-defined]
) -> dict[str, Any]:
"""Convert a row from the datasets dataframe to ESMValTool facets.
Parameters
----------
row:
A row of the datasets dataframe.
Returns
-------
A :obj:`dict` containing facet-value pairs.
"""
facets = {}
project = row.instance_id.split(".", 2)[0]
facets["project"] = project
for esmvaltool_name, ref_name in FACETS[project].items():
facets[esmvaltool_name] = getattr(row, ref_name)
facets["timerange"] = as_timerange(row.start_time, row.end_time)
return facets


def dataframe_to_recipe(datasets: pd.DataFrame) -> dict[str, Any]:
"""Convert the datasets dataframe to a recipe "variables" section.
Parameters
----------
datasets
The pandas dataframe describing the input datasets.
Returns
-------
A "variables" section that can be used in an ESMValTool recipe.
"""
variables: dict[str, Any] = {}
for row in datasets.itertuples():
facets = as_facets(row)
short_name = facets.pop("short_name")
if short_name not in variables:
variables[short_name] = {"additional_datasets": []}
variables[short_name]["additional_datasets"].append(facets)
return variables


_ESMVALTOOL_VERSION = "2.11.0"

_RECIPES = pooch.create(
path=pooch.os_cache("ref_metrics_esmvaltool"),
base_url="https://raw.githubusercontent.com/ESMValGroup/ESMValTool/refs/tags/v{version}/esmvaltool/recipes/",
version=_ESMVALTOOL_VERSION,
env="REF_METRICS_ESMVALTOOL_DATA_DIR",
)
_RECIPES.load_registry(pkg_resources.resource_stream("ref_metrics_esmvaltool", "recipes.txt"))


def load_recipe(recipe: str) -> dict[str, Any]:
"""Load a recipe.
Parameters
----------
recipe
The name of an ESMValTool recipe.
Returns
-------
The loaded recipe.
"""
filename = _RECIPES.fetch(recipe)
return yaml.load(Path(filename).read_text(encoding="utf-8")) # type: ignore[no-any-return]


def prepare_climate_data(datasets: pd.DataFrame, climate_data_dir: Path) -> None:
"""Symlink the input files from the Pandas dataframe into a directory tree.
This ensures that ESMValTool can find the data and only uses the
requested data.
Parameters
----------
datasets
The pandas dataframe describing the input datasets.
climate_data_dir
The directory where ESMValTool should look for input data.
"""
for row in datasets.itertuples():
tgt = climate_data_dir.joinpath(*row.instance_id.split(".")) / Path(row.path).name
tgt.parent.mkdir(parents=True, exist_ok=True)
tgt.symlink_to(row.path)


def run_recipe(recipe: dict, definition: MetricExecutionDefinition) -> None:
"""Run an ESMValTool recipe.
Parameters
----------
recipe
The ESMValTool recipe.
definition
A description of the information needed for this execution of the metric.
"""
output_dir = definition.output_fragment

recipe_path = output_dir / "recipe_test.yml"
with recipe_path.open("w", encoding="utf-8") as file:
yaml.dump(recipe, file)

climate_data = output_dir / "climate_data"

prepare_climate_data(
definition.metric_dataset[SourceDatasetType.CMIP6].datasets,
climate_data_dir=climate_data,
)

results_dir = output_dir / "results"
config = {
"output_dir": str(results_dir),
"rootpath": {
"default": str(climate_data),
},
"drs": {
"CMIP6": "ESGF",
},
}
config_dir = output_dir / "config"
config_dir.mkdir()
with (config_dir / "config.yml").open("w", encoding="utf-8") as file:
yaml.dump(config, file)

subprocess.check_call(["esmvaltool", "run", f"--config-dir={config_dir}", f"{recipe_path}"]) # noqa: S603, S607
result = next(results_dir.glob("*"))
return result
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
examples/recipe_python.yml ab3f06d269bb2c1368f4dc39da9bcb232fb2adb1fa556ba769e6c16294ffb4a3
Loading

0 comments on commit 8b262c0

Please sign in to comment.