From a7d52c397377f77758f24e9eaf6f35072fd899a2 Mon Sep 17 00:00:00 2001 From: Dougie Squire Date: Mon, 31 Jul 2023 14:01:31 +1000 Subject: [PATCH 1/6] update docs on environments --- docs/datastores/builders.rst | 2 +- docs/project_list.rst | 1 + docs/storage_flags.rst | 2 +- docs/usage/how.rst | 39 +++++++++++++------------ src/access_nri_intake/cli.py | 1 + src/access_nri_intake/data/catalog.yaml | 2 +- 6 files changed, 26 insertions(+), 21 deletions(-) diff --git a/docs/datastores/builders.rst b/docs/datastores/builders.rst index 4a103d60..cb2efac4 100644 --- a/docs/datastores/builders.rst +++ b/docs/datastores/builders.rst @@ -8,7 +8,7 @@ set of Intake-ESM datastore Builders for different ACCESS model outputs. In gene datastore for your ACCESS model output should be as simple as passing your output base directory to an appropriate Builder. -The access-nri-intake package is installed in the :code:`xp65` and :code:`hh5` analysis environments, or +The access-nri-intake package is installed in the :code:`hh5` and :code:`xp65` analysis environments, or users can install it into their own environment (see :ref:`installation` for details). The Builders can be imported from the :code:`access_nri_intake.source.builders` submodule. diff --git a/docs/project_list.rst b/docs/project_list.rst index 5b59336e..83e9fdba 100644 --- a/docs/project_list.rst +++ b/docs/project_list.rst @@ -6,3 +6,4 @@ * :code:`oi10` * :code:`p73` * :code:`rr3` +* :code:`xp65` diff --git a/docs/storage_flags.rst b/docs/storage_flags.rst index d01a52f3..165b38d8 100644 --- a/docs/storage_flags.rst +++ b/docs/storage_flags.rst @@ -1,3 +1,3 @@ .. code-block:: - gdata/al33+gdata/cj50+gdata/dk92+gdata/fs38+gdata/ik11+gdata/oi10+gdata/p73+gdata/rr3 \ No newline at end of file + gdata/al33+gdata/cj50+gdata/dk92+gdata/fs38+gdata/ik11+gdata/oi10+gdata/p73+gdata/rr3+gdata/xp65 \ No newline at end of file diff --git a/docs/usage/how.rst b/docs/usage/how.rst index 315f0dbb..28c28b9f 100644 --- a/docs/usage/how.rst +++ b/docs/usage/how.rst @@ -19,26 +19,30 @@ In order to use the catalog, you will need to have the following: Note you will need to join a project with a compute allocation. If you don't know what project is appropriate you will need to seek help from your local group or IT support. +#. **Access to the** :code:`xp65` **project**: this project houses the catalog table files. See the + `NCI documentation for how to join projects `_. + #. **Access to the projects that house the data you're interested in**: the catalog references data products across multiple projects on Gadi. Currently, data is included from the following projects: .. include:: ../project_list.rst If you wish to be able to access all the data in the catalog, you will need to be a member of all - these projects. See the `NCI documentation for how to join projects - `_. + these projects. .. attention:: Catalog users will only be able to load data from projects that they have access to. -#. **Access to the** :code:`xp65` **or** :code:`hh5` **projects**: these projects provide public - analysis environments in which the ACCESS-NRI catalog is installed (along with many other useful - packages). Alternatively, you can install the catalog into your own environment. +#. **An installation of the catalog**: the catalog is pre-installed in the + `CLEX CMS "analysis3" conda environment `_ and the + ACCESS-NRI "access-med" conda environment. Users are encouraged to use one of these environments to + use the catalog. Alternatively, you can install the catalog into your own environment as described + below. - .. warning:: - The ACCESS-NRI catalog is actually not yet installed in the :code:`hh5` environments, so for now - you'll have to use the :code:`xp65` environment. + .. attention:: + In order to use the CLEX CMS conda environments, you will need to also be a member of the + :code:`hh5` project .. _installation: @@ -46,7 +50,7 @@ Installing the catalog ^^^^^^^^^^^^^^^^^^^^^^ Most users will not need to install the catalog themselves and will instead use the catalog through one -of the public analysis environments provided in either :code:`xp65` or :code:`hh5` (see below). +of the public analysis environments provided in either :code:`hh5` or :code:`xp65` (see below). Advanced users that want to install the catalog into their own environment can do so in three ways: @@ -86,22 +90,21 @@ data it references are available from your session. In particular: * **Setting the storage flags**: in addition to being a member of the projects you want to access, you also have to explicity tell the JupyterLab app that you want to access them in your session. Specify - the project storage paths by entering them in the “Storage” dropdown. To allow access to all data - products in the catalog enter: + the project storage paths by entering them in the “Storage” dropdown. To allow access to the catalog + and all the data products it contains enter: .. include:: ../storage_flags.rst - If you want to use the :code:`xp65` or :code:`hh5` analysis environment, you'll also need to add - :code:`gdata/xp65` or :code:`gdata/hh5`, respectively. + If you want to use the :code:`hh5` analysis environment, you'll also need to add :code:`gdata/hh5`. .. attention:: You need to be a member of all projects you enter here. You can see what projects you are part of at `https://my.nci.org.au/mancini `_. * **Setting the environment**: you need to make sure that the catalog is installed in your JupyterLab - session. As mentioned above, the easiest way to do this is to use either the :code:`xp65` or - :code:`hh5` public analysis environments. You can activate the :code:`xp65` environment within your + session. As mentioned above, the easiest way to do this is to use either the :code:`hh5` or + :code:`xp65` public analysis environments. You can activate the :code:`hh5` environment within your JupyterLab session using the "Advanced options" to set the "Module directories" to - :code:`/g/data/xp65/public/modules` and "Modules" to :code:`conda/are`. Similarly, to use the - :code:`hh5` environment, set "Module directories" to :code:`/g/data/hh5/public/modules` and "Modules" - to :code:`conda/analysis3`. + :code:`/g/data/hh5/public/modules` and "Modules" to :code:`conda/analysis3`. Similarly, to use the + :code:`xp65` environment, set "Module directories" to :code:`/g/data/xp65/public/modules` and "Modules" + to :code:`conda/are`. diff --git a/src/access_nri_intake/cli.py b/src/access_nri_intake/cli.py index 0eca175a..7a405e4a 100644 --- a/src/access_nri_intake/cli.py +++ b/src/access_nri_intake/cli.py @@ -184,6 +184,7 @@ def _get_project(path): project |= set(esm_ds.df["path"].map(_get_project)) project |= {_get_project(path) for path in args["path"]} + project |= {_get_project(build_base_path)} storage_flags = "+".join(sorted([f"gdata/{proj}" for proj in project])) # Build the catalog diff --git a/src/access_nri_intake/data/catalog.yaml b/src/access_nri_intake/data/catalog.yaml index 5f035da9..0b6d6241 100644 --- a/src/access_nri_intake/data/catalog.yaml +++ b/src/access_nri_intake/data/catalog.yaml @@ -13,7 +13,7 @@ sources: description: ACCESS-NRI intake catalog driver: intake_dataframe_catalog.core.DfFileCatalog metadata: - storage: gdata/al33+gdata/cj50+gdata/dk92+gdata/fs38+gdata/ik11+gdata/oi10+gdata/p73+gdata/rr3 + storage: gdata/al33+gdata/cj50+gdata/dk92+gdata/fs38+gdata/ik11+gdata/oi10+gdata/p73+gdata/rr3+gdata/xp65 version: '{{version}}' parameters: version: From 70616089176292571a66244574c6957994c3394a Mon Sep 17 00:00:00 2001 From: Dougie Squire Date: Mon, 31 Jul 2023 14:35:51 +1000 Subject: [PATCH 2/6] add note to usage quickstart about lazy eval --- docs/usage/quickstart.ipynb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/usage/quickstart.ipynb b/docs/usage/quickstart.ipynb index a27376ad..bec04607 100644 --- a/docs/usage/quickstart.ipynb +++ b/docs/usage/quickstart.ipynb @@ -1776,7 +1776,7 @@ "id": "d292b4d3-e72b-48e7-92b8-7bb8994012c1", "metadata": {}, "source": [ - "Now that we have our data, we can do our \"analysis\", which here is to simply plot the timeseries." + "Now that we have our datasets, we can do our \"analysis\", which here is to simply plot the timeseries. Note that at this point, we still haven't actually loaded any product data into memory. Our datasets are [dask-backed xarray Dataset objects](https://docs.xarray.dev/en/stable/user-guide/dask.html#parallel-computing-with-dask) that will only be evaluated when required (or computed explicitly), for example when we try to plot our data:" ] }, { @@ -4280,9 +4280,9 @@ ], "metadata": { "kernelspec": { - "display_name": "Python (access-nri-intake-test)", + "display_name": "Python 3 (ipykernel)", "language": "python", - "name": "access-nri-intake-test" + "name": "python3" }, "language_info": { "codemirror_mode": { From 9f6fc9052e6501b7504811e1e7cd0c5837c8ed3a Mon Sep 17 00:00:00 2001 From: Dougie Squire Date: Mon, 31 Jul 2023 15:32:04 +1000 Subject: [PATCH 3/6] only available in analysis3-unstable atm --- docs/usage/how.rst | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/usage/how.rst b/docs/usage/how.rst index 28c28b9f..fc86053d 100644 --- a/docs/usage/how.rst +++ b/docs/usage/how.rst @@ -35,10 +35,10 @@ In order to use the catalog, you will need to have the following: Catalog users will only be able to load data from projects that they have access to. #. **An installation of the catalog**: the catalog is pre-installed in the - `CLEX CMS "analysis3" conda environment `_ and the - ACCESS-NRI "access-med" conda environment. Users are encouraged to use one of these environments to - use the catalog. Alternatively, you can install the catalog into your own environment as described - below. + `CLEX CMS "analysis3" conda environment `_ (>=23.04) and + the ACCESS-NRI "access-med" conda environment (all versions). Users are encouraged to use one of these + environments to use the catalog. Alternatively, you can install the catalog into your own environment + as described below. .. attention:: In order to use the CLEX CMS conda environments, you will need to also be a member of the @@ -105,6 +105,6 @@ data it references are available from your session. In particular: session. As mentioned above, the easiest way to do this is to use either the :code:`hh5` or :code:`xp65` public analysis environments. You can activate the :code:`hh5` environment within your JupyterLab session using the "Advanced options" to set the "Module directories" to - :code:`/g/data/hh5/public/modules` and "Modules" to :code:`conda/analysis3`. Similarly, to use the - :code:`xp65` environment, set "Module directories" to :code:`/g/data/xp65/public/modules` and "Modules" - to :code:`conda/are`. + :code:`/g/data/hh5/public/modules` and "Modules" to :code:`conda/analysis3-unstable`. Similarly, to use + the :code:`xp65` environment, set "Module directories" to :code:`/g/data/xp65/public/modules` and + "Modules" to :code:`conda/are`. From 60a51a5387da1f45ffff74cb26108f341ced27b3 Mon Sep 17 00:00:00 2001 From: dougiesquire Date: Thu, 10 Aug 2023 13:02:11 +1000 Subject: [PATCH 4/6] add get_timeinfo test --- tests/test_source_utils.py | 45 +++++++++++++++++++++++++++++++++++++- 1 file changed, 44 insertions(+), 1 deletion(-) diff --git a/tests/test_source_utils.py b/tests/test_source_utils.py index 849f2bae..718aa732 100644 --- a/tests/test_source_utils.py +++ b/tests/test_source_utils.py @@ -4,8 +4,13 @@ from pathlib import Path import pytest +import xarray as xr -from access_nri_intake.source.utils import parse_access_filename, parse_access_ncfile +from access_nri_intake.source.utils import ( + get_timeinfo, + parse_access_filename, + parse_access_ncfile, +) @pytest.mark.parametrize( @@ -298,3 +303,41 @@ def test_parse_access_ncfile(test_data, filename, expected): file = str(test_data / Path(filename)) assert parse_access_ncfile(file) == expected + + +@pytest.mark.parametrize( + "start_end, expected", + [ + ([0.0, 0.00625], ("1900-01-01, 00:00:00", "1900-01-01, 00:09:00", "subhr")), + ([0.0, 0.125], ("1900-01-01, 00:00:00", "1900-01-01, 03:00:00", "3hr")), + ([0.0, 0.25], ("1900-01-01, 00:00:00", "1900-01-01, 06:00:00", "6hr")), + ([0.0, 1.0], ("1900-01-01, 00:00:00", "1900-01-02, 00:00:00", "1day")), + ([0.0, 31.0], ("1900-01-01, 00:00:00", "1900-02-01, 00:00:00", "1mon")), + ([0.0, 90.0], ("1900-01-01, 00:00:00", "1900-04-01, 00:00:00", "3mon")), + ([0.0, 365.0], ("1900-01-01, 00:00:00", "1901-01-01, 00:00:00", "1yr")), + ([0.0, 730.0], ("1900-01-01, 00:00:00", "1902-01-01, 00:00:00", "2yr")), + ], +) +@pytest.mark.parametrize("bounds", [True, False]) +def test_get_timeinfo(start_end, expected, bounds): + if bounds: + time = (start_end[0] + start_end[1]) / 2 + ds = xr.Dataset( + data_vars={ + "dummy": ("time", [0]), + "time_bounds": (("time", "nv"), [start_end]), + }, + coords={"time": [time]}, + ) + ds["time"].attrs = dict(bounds="time_bounds") + else: + ds = xr.Dataset( + data_vars={"dummy": ("time", [0, 0])}, + coords={"time": start_end}, + ) + + ds["time"].attrs |= dict( + units="days since 1900-01-01 00:00:00", calendar="GREGORIAN" + ) + + assert get_timeinfo(ds) == expected From 5f2dbb263d1c49d26247a8bd000acf1e04709ca6 Mon Sep 17 00:00:00 2001 From: dougiesquire Date: Thu, 10 Aug 2023 13:05:45 +1000 Subject: [PATCH 5/6] allow get_timeinfo to return subhr --- src/access_nri_intake/source/utils.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/access_nri_intake/source/utils.py b/src/access_nri_intake/source/utils.py index 52c6db2f..27ef0d93 100644 --- a/src/access_nri_intake/source/utils.py +++ b/src/access_nri_intake/source/utils.py @@ -70,8 +70,11 @@ def _todate(t): frequency = f"{months}mon" elif dt.days >= 1: frequency = f"{dt.days}day" + elif dt.seconds >= 3600: + hours = round(dt.seconds / 3600) + frequency = f"{hours}hr" else: - frequency = f"{dt.seconds // 3600}hr" + frequency = "subhr" start_time = ts.strftime(time_format) end_time = te.strftime(time_format) From 12d833730c6b146bf0d3aed59567590056d70f57 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 11 Sep 2023 09:42:14 +0000 Subject: [PATCH 6/6] Bump actions/checkout from 3 to 4 Bumps [actions/checkout](https://github.com/actions/checkout) from 3 to 4. - [Release notes](https://github.com/actions/checkout/releases) - [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md) - [Commits](https://github.com/actions/checkout/compare/v3...v4) --- updated-dependencies: - dependency-name: actions/checkout dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- .github/workflows/cd.yml | 4 ++-- .github/workflows/ci.yml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml index ccec29b0..19c0014e 100644 --- a/.github/workflows/cd.yml +++ b/.github/workflows/cd.yml @@ -12,7 +12,7 @@ jobs: steps: - name: Checkout source - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Set up Python 3.11 uses: actions/setup-python@v4 @@ -39,7 +39,7 @@ jobs: steps: - name: Checkout source - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Setup conda environment uses: conda-incubator/setup-miniconda@v2 diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 5a379986..fd2a7a0b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -6,7 +6,7 @@ jobs: pre-commit: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: actions/setup-python@v4 - uses: pre-commit/action@v3.0.0 @@ -19,7 +19,7 @@ jobs: steps: - name: Checkout source - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Setup conda environment uses: conda-incubator/setup-miniconda@v2