diff --git a/docs/contributing/sources.rst b/docs/contributing/sources.rst index f202abd..5008bc3 100644 --- a/docs/contributing/sources.rst +++ b/docs/contributing/sources.rst @@ -8,4 +8,13 @@ Intake-ESM datastore for some climate data on Gadi (e.g. by following :ref:`data to be findable and useable by others in the community. Or you're aware of an ACCESS-related climate data product on Gadi that you think should be included in the catalog. Either way, we'd like to hear from you. Please open a catalog data request `here `_ providing -details of the data product to add. \ No newline at end of file +details of the data product to add. + +.. warning:: + If you are providing an existing Intake-ESM datastore to be added to :code:`access-nri-intake-catalog`, the + datastore must be in its final form **before** you make a data request. If a datastore is changed + after we have verified that we are able to ingest it, it will break future catalog builds and may be + removed. + + If you need to update a datastore that is already in :code:`access-nri-intake-catalog`, please contact us as + described above. \ No newline at end of file diff --git a/docs/datastores/adding.rst b/docs/datastores/adding.rst index 0bc294e..5ff801d 100644 --- a/docs/datastores/adding.rst +++ b/docs/datastores/adding.rst @@ -19,4 +19,13 @@ we're happy to help you through the process. .. note:: Datastores don't have to have been created by access-nri-intake Builders in order to be added to the catalog. If you have an Intake-ESM datastore (or indeed another type of Intake source) that you think should be in the - catalog, please open a catalog data request. \ No newline at end of file + catalog, please open a catalog data request. + +.. warning:: + If you are providing an existing Intake-ESM datastore to be added to :code:`access-nri-intake-catalog`, the + datastore must be in its final form **before** you make a data request. If a datastore is changed + after we have verified that we are able to ingest it, it will break future catalog builds and may be + removed. + + If you need to update a datastore that is already in :code:`access-nri-intake-catalog`, please contact us as + described above. \ No newline at end of file diff --git a/docs/generate_includes.py b/docs/generate_includes.py index a100970..7bc5015 100755 --- a/docs/generate_includes.py +++ b/docs/generate_includes.py @@ -3,25 +3,46 @@ """ Generate includes for documentation """ -import os +import re +import warnings +from pathlib import Path import yaml +STORAGE_FLAG_REGEXP = r"^/g/data/(?P[a-z]{1,2}[0-9]{1,2})/.*?$" + + +def storage_includes() -> None: + here = Path(__file__).parent.absolute() + + project_list = set() + for source_yaml in (here.parent / "config").glob("*.yaml"): + print(source_yaml) + with open(source_yaml) as fobj: + contents = yaml.safe_load(fobj) + + # Loop over the sources in the YAML, extract all storage flags + # Will ignore anything that doesn't look like /g/data//.... + try: + for source in contents["sources"]: + metadata_match = re.match(STORAGE_FLAG_REGEXP, source["metadata_yaml"]) + if metadata_match: + project_list.add(metadata_match.group("proj")) + for data_path in source["path"]: + data_path_match = re.match(STORAGE_FLAG_REGEXP, data_path) + if data_path_match: + project_list.add(data_path_match.group("proj")) + except KeyError: + warnings.warn(f"Unable to parse config YAML file {source_yaml} - skipping") + continue -def storage_includes(): - here = os.path.abspath(os.path.dirname(__file__)) - with open( - os.path.join(here, "..", "src", "access_nri_intake", "data", "catalog.yaml") - ) as fobj: - contents = yaml.safe_load(fobj) - storage_flags = contents["sources"]["access_nri"]["metadata"]["storage"] - project_list = [ - f"* :code:`{proj.removeprefix('gdata/')}`" for proj in storage_flags.split("+") - ] - with open("storage_flags.rst", "w") as fobj: - fobj.write(f".. code-block::\n\n {storage_flags}") with open("project_list.rst", "w") as fobj: - fobj.write("\n".join(project_list) + "\n") + [fobj.write(f"* :code:`{proj}`\n") for proj in project_list] + storage_string = "+".join([f"gdata/{proj}" for proj in project_list]) + with open("storage_flags.rst", "w") as fobj: + fobj.write(f".. code-block::\n\n {storage_string}") + + return None if __name__ == "__main__": diff --git a/docs/project_list.rst b/docs/project_list.rst index 83e9fdb..37c0f32 100644 --- a/docs/project_list.rst +++ b/docs/project_list.rst @@ -1,9 +1,13 @@ -* :code:`al33` -* :code:`cj50` -* :code:`dk92` -* :code:`fs38` +* :code:`py18` +* :code:`xp65` * :code:`ik11` +* :code:`cj50` +* :code:`zz63` +* :code:`rt52` +* :code:`al33` * :code:`oi10` -* :code:`p73` * :code:`rr3` -* :code:`xp65` +* :code:`ig45` +* :code:`hq89` +* :code:`fs38` +* :code:`p73` diff --git a/docs/storage_flags.rst b/docs/storage_flags.rst index 165b38d..41cbfe9 100644 --- a/docs/storage_flags.rst +++ b/docs/storage_flags.rst @@ -1,3 +1,3 @@ .. code-block:: - gdata/al33+gdata/cj50+gdata/dk92+gdata/fs38+gdata/ik11+gdata/oi10+gdata/p73+gdata/rr3+gdata/xp65 \ No newline at end of file + gdata/py18+gdata/xp65+gdata/ik11+gdata/cj50+gdata/zz63+gdata/rt52+gdata/al33+gdata/oi10+gdata/rr3+gdata/ig45+gdata/hq89+gdata/fs38+gdata/p73 \ No newline at end of file