Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/main' into 271-consolidation-on-…
Browse files Browse the repository at this point in the history
…pathlib-module
  • Loading branch information
marc-white committed Dec 11, 2024
2 parents d6d4261 + 3590e46 commit 699a236
Show file tree
Hide file tree
Showing 29 changed files with 3,821 additions and 554 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/e2e.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,6 @@ jobs:
key: ${{secrets.DEPLOY_KEY}}
script: |
cd ${{secrets.GADI_REPO_PATH}}
qsub bin/build_all.sh
qsub bin/test_end_to_end.sh
2,390 changes: 2,390 additions & 0 deletions bin/new-build-checks.ipynb

Large diffs are not rendered by default.

11 changes: 10 additions & 1 deletion docs/contributing/sources.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,13 @@ Intake-ESM datastore for some climate data on Gadi (e.g. by following :ref:`data
to be findable and useable by others in the community. Or you're aware of an ACCESS-related climate data product on
Gadi that you think should be included in the catalog. Either way, we'd like to hear from you. Please open a
catalog data request `here <https://github.com/ACCESS-NRI/access-nri-intake-catalog/issues/new/choose>`_ providing
details of the data product to add.
details of the data product to add.

.. warning::
If you are providing an existing Intake-ESM datastore to be added to :code:`access-nri-intake-catalog`, the
datastore must be in its final form **before** you make a data request. If a datastore is changed
after we have verified that we are able to ingest it, it will break future catalog builds and may be
removed.

If you need to update a datastore that is already in :code:`access-nri-intake-catalog`, please contact us as
described above.
11 changes: 10 additions & 1 deletion docs/datastores/adding.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,13 @@ we're happy to help you through the process.
.. note::
Datastores don't have to have been created by access-nri-intake Builders in order to be added to the
catalog. If you have an Intake-ESM datastore (or indeed another type of Intake source) that you think should be in the
catalog, please open a catalog data request.
catalog, please open a catalog data request.

.. warning::
If you are providing an existing Intake-ESM datastore to be added to :code:`access-nri-intake-catalog`, the
datastore must be in its final form **before** you make a data request. If a datastore is changed
after we have verified that we are able to ingest it, it will break future catalog builds and may be
removed.

If you need to update a datastore that is already in :code:`access-nri-intake-catalog`, please contact us as
described above.
49 changes: 35 additions & 14 deletions docs/generate_includes.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,25 +3,46 @@

""" Generate includes for documentation """

import os
import re
import warnings
from pathlib import Path

import yaml

STORAGE_FLAG_REGEXP = r"^/g/data/(?P<proj>[a-z]{1,2}[0-9]{1,2})/.*?$"


def storage_includes() -> None:
here = Path(__file__).parent.absolute()

project_list = set()
for source_yaml in (here.parent / "config").glob("*.yaml"):
print(source_yaml)
with open(source_yaml) as fobj:
contents = yaml.safe_load(fobj)

# Loop over the sources in the YAML, extract all storage flags
# Will ignore anything that doesn't look like /g/data/<flag>/....
try:
for source in contents["sources"]:
metadata_match = re.match(STORAGE_FLAG_REGEXP, source["metadata_yaml"])
if metadata_match:
project_list.add(metadata_match.group("proj"))
for data_path in source["path"]:
data_path_match = re.match(STORAGE_FLAG_REGEXP, data_path)
if data_path_match:
project_list.add(data_path_match.group("proj"))
except KeyError:
warnings.warn(f"Unable to parse config YAML file {source_yaml} - skipping")
continue

def storage_includes():
here = os.path.abspath(os.path.dirname(__file__))
with open(
os.path.join(here, "..", "src", "access_nri_intake", "data", "catalog.yaml")
) as fobj:
contents = yaml.safe_load(fobj)
storage_flags = contents["sources"]["access_nri"]["metadata"]["storage"]
project_list = [
f"* :code:`{proj.removeprefix('gdata/')}`" for proj in storage_flags.split("+")
]
with open("storage_flags.rst", "w") as fobj:
fobj.write(f".. code-block::\n\n {storage_flags}")
with open("project_list.rst", "w") as fobj:
fobj.write("\n".join(project_list) + "\n")
[fobj.write(f"* :code:`{proj}`\n") for proj in project_list]
storage_string = "+".join([f"gdata/{proj}" for proj in project_list])
with open("storage_flags.rst", "w") as fobj:
fobj.write(f".. code-block::\n\n {storage_string}")

return None


if __name__ == "__main__":
Expand Down
25 changes: 25 additions & 0 deletions docs/management/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,13 @@ or may not include an update to the ACCESS-NRI catalog files on Gadi.

#. Enter the new version (vX.X.X) as the tag and release title. Add a brief description of the release.

.. note::

It is recommended to attempt a beta release before committing to a major code update.
In this case, the version number requires an ordinal after the :code:`b`, e.g., :code:`vYYYY-MM-DDb0`. If the
ordinal isn't provided, the GitHub PyPI build action will append one, which breaks the linkage
between the PyPI and Conda build actions.

#. Click on "Publish release". This should create the release on GitHub and trigger the workflow that builds and uploads
the new version to PyPI and conda

Expand All @@ -35,9 +42,27 @@ Generating a new catalog version
$ cd bin
$ qsub -v version=${RELEASE} build_all.sh

.. note::
Running the build script requires access to an up-to-date checkout of the :code:`access-nri-intake-catalog`
repository. The default location for this is :code:`/g/data/xp65/admin/access-nri-intake-catalog`. If you do
not have the ability to update this checkout, you may use a local one; however, you will need to update
the :code:`CONFIG_DIR` variable in :code:`bin/build_all.sh` to point at your checkout location.

.. note::
If :code:`version` is not provided, the default used is the current date, in the format :code:`vYYYY-MM-DD`. This should
be acceptable in most cases.
#. Updating :code:`access_nri_intake_catalog` is no longer necessary - the new catalog will be available immediately as
:code:`intake.cat.access_nri`.


New release with new catalog
^^^^^^^^^^^^^^^^^^^^^^^^^^^^

In the case of a linked release of a new major :code:`access-nri-intake-catalog` and a new catalog
build, the recommened process is:

#. Create a beta release of :code:`access-nri-intake-catalog`;
#. Use the beta release to build a new catalog;
#. Iterate over the above steps until the desired result is achieved;
#. Make a definitive code release.
14 changes: 9 additions & 5 deletions docs/project_list.rst
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
* :code:`rr3`
* :code:`rt52`
* :code:`hq89`
* :code:`zz63`
* :code:`al33`
* :code:`cj50`
* :code:`dk92`
* :code:`fs38`
* :code:`ik11`
* :code:`oi10`
* :code:`ik11`
* :code:`ig45`
* :code:`fs38`
* :code:`p73`
* :code:`rr3`
* :code:`xp65`
* :code:`py18`
* :code:`cj50`
2 changes: 1 addition & 1 deletion docs/storage_flags.rst
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
.. code-block::
gdata/al33+gdata/cj50+gdata/dk92+gdata/fs38+gdata/ik11+gdata/oi10+gdata/p73+gdata/rr3+gdata/xp65
gdata/rr3+gdata/rt52+gdata/hq89+gdata/zz63+gdata/al33+gdata/oi10+gdata/ik11+gdata/ig45+gdata/fs38+gdata/p73+gdata/xp65+gdata/py18+gdata/cj50
8 changes: 5 additions & 3 deletions src/access_nri_intake/catalog/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
"""Manager for adding/updating intake sources in an intake-dataframe-catalog like the ACCESS-NRI catalog"""

import os
from pathlib import Path

import intake
from intake_dataframe_catalog.core import DfFileCatalog, DfFileCatalogError
Expand Down Expand Up @@ -32,7 +33,7 @@ class CatalogManager:
Add/update intake sources in an intake-dataframe-catalog like the ACCESS-NRI catalog
"""

def __init__(self, path: str):
def __init__(self, path: Path | str):
"""
Initialise a CatalogManager instance to add/update intake sources in a
intake-dataframe-catalog like the ACCESS-NRI catalog
Expand All @@ -42,10 +43,11 @@ def __init__(self, path: str):
path: str
The path to the intake-dataframe-catalog
"""
path = Path(path)

self.path = path
self.path = str(path)

self.mode = "a" if os.path.exists(path) else "w"
self.mode = "a" if path.exists() else "w"

try:
self.dfcat = DfFileCatalog(
Expand Down
38 changes: 36 additions & 2 deletions src/access_nri_intake/catalog/translators.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
"""

from dataclasses import dataclass
from functools import partial
from functools import partial, wraps
from typing import Callable

import pandas as pd
Expand All @@ -17,6 +17,9 @@
from . import COLUMNS_WITH_ITERABLES
from .utils import _to_tuple, tuplify_series

# Note: important that when using @tuplify_series and @trace_failure decorators,
# trace failure is the innermost decorator

FREQUENCY_TRANSLATIONS = {
"monthly-averaged-by-hour": "1hr",
"monthly-averaged-by-day": "1hr",
Expand All @@ -36,6 +39,29 @@
}


def trace_failure(func: Callable) -> Callable:
"""
Decorator that wraps a function and prints a message if it raises an exception
"""

@wraps(func)
def wrapper(*args, **kwargs):
func_name = func.__name__
colname = func_name[1:].split("_")[0]
# Ensure the first argument is an instance of the class
if not isinstance(args[0], DefaultTranslator):
raise TypeError("Decorator can only be applied to class methods")

try:
return func(*args, **kwargs)
except KeyError as exc:
raise KeyError(
f"Unable to translate '{colname}' column with translator '{args[0].__class__.__name__}'"
) from exc

return wrapper


class TranslatorError(Exception):
"Generic Exception for the Translator classes"

Expand Down Expand Up @@ -192,20 +218,23 @@ def set_dispatch(
self._dispatch[core_colname] = func
setattr(self._dispatch_keys, core_colname, input_name)

@trace_failure
def _realm_translator(self) -> pd.Series:
"""
Return realm, fixing a few issues
"""
return _cmip_realm_translator(self.source.df[self._dispatch_keys.realm])

@tuplify_series
@trace_failure
def _model_translator(self) -> pd.Series:
"""
Return model from dispatch_keys.model
"""
return self.source.df[self._dispatch_keys.model]

@tuplify_series
@trace_failure
def _frequency_translator(self) -> pd.Series:
"""
Return frequency, fixing a few issues
Expand All @@ -215,6 +244,7 @@ def _frequency_translator(self) -> pd.Series:
)

@tuplify_series
@trace_failure
def _variable_translator(self) -> pd.Series:
"""
Return variable as a tuple
Expand Down Expand Up @@ -355,7 +385,9 @@ def __init__(self, source, columns):

super().__init__(source, columns)
self.set_dispatch(
input_name="source_id", core_colname="model", func=super()._model_translator
input_name="project_id",
core_colname="model",
func=super()._model_translator,
)
self.set_dispatch(
input_name="variable_id",
Expand Down Expand Up @@ -407,6 +439,7 @@ def __init__(self, source, columns):
)

@tuplify_series
@trace_failure
def _model_translator(self):
"""
Get the model from the path. This is a slightly hacky approach, using the
Expand All @@ -425,6 +458,7 @@ def _realm_translator(self):
return self.source.df.apply(lambda x: ("none",), 1)

@tuplify_series
@trace_failure
def _frequency_translator(self):
"""
Get the frequency from the path
Expand Down
19 changes: 15 additions & 4 deletions src/access_nri_intake/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -406,9 +406,20 @@ def metadata_validate(argv: Sequence[str] | None = None):
raise FileNotFoundError(f"No such file(s): {f}")


def metadata_template(loc=None):
def metadata_template(loc: str | Path | None = None) -> None:
"""
Create an empty template for a metadata.yaml file using the experiment schema
Create an empty template for a metadata.yaml file using the experiment schema.
Writes the template to the current working directory by default.
Parameters:
-----
loc (str, Path, optional): The directory in which to save the template.
Defaults to the current working directory.
Returns:
-----
None
"""

if loc is None:
Expand All @@ -424,9 +435,9 @@ def metadata_template(loc=None):
description = f"<{descr['description']}>"

if _can_be_array(descr):
description = [description]
description = [description] # type: ignore

template[name] = description

with open(os.path.join(loc, "metadata.yaml"), "w") as outfile:
with open((Path(loc) / "metadata.yaml"), "w") as outfile:
yaml.dump(template, outfile, default_flow_style=False, sort_keys=False)
Loading

0 comments on commit 699a236

Please sign in to comment.