From b0fcefa53bb61f550e54ffdbe822128e3dd9dafe Mon Sep 17 00:00:00 2001 From: Charles Turner <52199577+charles-turner-1@users.noreply.github.com> Date: Wed, 11 Dec 2024 07:14:07 +0800 Subject: [PATCH] 290 align test datasets & Broken cordex datastore (#300) * WIP * WIP * Ready to try live translator tests on Gadi * docstring * Fixed e2e workflow, updated cordex translator & test data * Formatting --- .github/workflows/e2e.yaml | 2 +- src/access_nri_intake/catalog/translators.py | 4 +- tests/conftest.py | 5 ++ tests/data/esm_datastore/cordex-ig45.csv | 12 ++--- tests/data/esm_datastore/cordex-ig45.json | 12 ----- tests/e2e/__init__.py | 5 ++ tests/e2e/test_datasets_representative.py | 50 ++++++++++++++++++++ tests/e2e/test_end_to_end.py | 4 +- 8 files changed, 71 insertions(+), 23 deletions(-) create mode 100644 tests/e2e/test_datasets_representative.py diff --git a/.github/workflows/e2e.yaml b/.github/workflows/e2e.yaml index b53d73f9..954ab856 100644 --- a/.github/workflows/e2e.yaml +++ b/.github/workflows/e2e.yaml @@ -18,6 +18,6 @@ jobs: key: ${{secrets.DEPLOY_KEY}} script: | cd ${{secrets.GADI_REPO_PATH}} - qsub bin/build_all.sh + qsub bin/test_end_to_end.sh diff --git a/src/access_nri_intake/catalog/translators.py b/src/access_nri_intake/catalog/translators.py index ea2dc096..432d6247 100644 --- a/src/access_nri_intake/catalog/translators.py +++ b/src/access_nri_intake/catalog/translators.py @@ -385,7 +385,9 @@ def __init__(self, source, columns): super().__init__(source, columns) self.set_dispatch( - input_name="source_id", core_colname="model", func=super()._model_translator + input_name="project_id", + core_colname="model", + func=super()._model_translator, ) self.set_dispatch( input_name="variable_id", diff --git a/tests/conftest.py b/tests/conftest.py index 8b14d9d8..fb7c2551 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -25,6 +25,11 @@ def config_dir(): return Path(here / "e2e/configs") +@fixture(scope="session") +def live_config_dir(): + return Path(here).parent / "config" + + @fixture(scope="session") def BASE_DIR(tmp_path_factory): yield tmp_path_factory.mktemp("catalog-dir") diff --git a/tests/data/esm_datastore/cordex-ig45.csv b/tests/data/esm_datastore/cordex-ig45.csv index 06f6a2fd..9d71aeb0 100644 --- a/tests/data/esm_datastore/cordex-ig45.csv +++ b/tests/data/esm_datastore/cordex-ig45.csv @@ -1,6 +1,6 @@ -path,file_type,project_id,resolution,institution_id,source_id,experiment_id,member_id,frequency,variable_id,version,time_range -/g/data/ig45/QldFCP-2/output/CMIP6/DD/AUS-10i/UQ-DEC/ACCESS-CM2/ssp126/r2i1p1f1/CCAMoc-v2112/v1-r1/day/hus200/v20240709/hus200_AUS-10i_ACCESS-CM2_ssp126_r2i1p1f1_UQ-DEC_CCAMoc-v2112_v1-r1_day_20580101-20581231.nc,f,output,AUS-10i,UQ-DEC,ACCESS-CM2,ssp126,r2i1p1f1,day,hus200,v20240709,20580101-20581231 -/g/data/ig45/QldFCP-2/CORDEX/CMIP6/DD/AUS-20i/UQ-DEC/ACCESS-ESM1-5/ssp126/r20i1p1f1/CCAMoc-v2112/v1-r1/mon/va925/v20240722/va925_AUS-20i_ACCESS-ESM1-5_ssp126_r20i1p1f1_UQ-DEC_CCAMoc-v2112_v1-r1_mon_208101-209012.nc,f,CORDEX,AUS-20i,UQ-DEC,ACCESS-ESM1-5,ssp126,r20i1p1f1,mon,va925,v20240722,208101-209012 -/g/data/ig45/QldFCP-2/CORDEX/CMIP6/DD/AUS-20i/UQ-DEC/ACCESS-ESM1-5/ssp370/r6i1p1f1/CCAM-v2105/v1-r1/mon/clh/v20240722/clh_AUS-20i_ACCESS-ESM1-5_ssp370_r6i1p1f1_UQ-DEC_CCAM-v2105_v1-r1_mon_201501-202012.nc,f,CORDEX,AUS-20i,UQ-DEC,ACCESS-ESM1-5,ssp370,r6i1p1f1,mon,clh,v20240722,201501-202012 -/g/data/ig45/QldFCP-2/output/CMIP6/DD/AUS-10i/UQ-DEC/ACCESS-CM2/ssp126/r2i1p1f1/CCAMoc-v2112/v1-r1/day/ta850/v20240709/ta850_AUS-10i_ACCESS-CM2_ssp126_r2i1p1f1_UQ-DEC_CCAMoc-v2112_v1-r1_day_20340101-20341231.nc,f,output,AUS-10i,UQ-DEC,ACCESS-CM2,ssp126,r2i1p1f1,day,ta850,v20240709,20340101-20341231 -/g/data/ig45/QldFCP-2/CORDEX/CMIP6/DD/AUS-20i/UQ-DEC/NorESM2-MM/ssp126/r1i1p1f1/CCAMoc-v2112/v1-r1/mon/hus200/v20240722/hus200_AUS-20i_NorESM2-MM_ssp126_r1i1p1f1_UQ-DEC_CCAMoc-v2112_v1-r1_mon_201501-202012.nc,f,CORDEX,AUS-20i,UQ-DEC,NorESM2-MM,ssp126,r1i1p1f1,mon,hus200,v20240722,201501-202012 +path,file_type,project_id,experiment_id,member_id,frequency,variable_id,version,time_range +/g/data/ig45/QldFCP-2/CORDEX/CMIP6/DD/AUS-20i/UQ-DEC/ACCESS-ESM1-5/ssp126/r20i1p1f1/CCAMoc-v2112/v1-r1/mon/va925/v20240722/va925_AUS-20i_ACCESS-ESM1-5_ssp126_r20i1p1f1_UQ-DEC_CCAMoc-v2112_v1-r1_mon_208101-209012.nc,f,ACCESS-ESM1-5,ssp126,r20i1p1f1,mon,va925,v20240722,208101-209012 +/g/data/ig45/QldFCP-2/CORDEX/CMIP6/DD/AUS-20i/UQ-DEC/ACCESS-ESM1-5/ssp370/r6i1p1f1/CCAM-v2105/v1-r1/mon/clh/v20240722/clh_AUS-20i_ACCESS-ESM1-5_ssp370_r6i1p1f1_UQ-DEC_CCAM-v2105_v1-r1_mon_201501-202012.nc,f,ACCESS-ESM1-5,ssp370,r6i1p1f1,mon,clh,v20240722,201501-202012 +/g/data/ig45/QldFCP-2/CORDEX/CMIP6/DD/AUS-20i/UQ-DEC/NorESM2-MM/ssp126/r1i1p1f1/CCAMoc-v2112/v1-r1/mon/hus200/v20240722/hus200_AUS-20i_NorESM2-MM_ssp126_r1i1p1f1_UQ-DEC_CCAMoc-v2112_v1-r1_mon_201501-202012.nc,f,NorESM2-MM,ssp126,r1i1p1f1,mon,hus200,v20240722,201501-202012 +/g/data/ig45/QldFCP-2/output/CMIP6/DD/AUS-10i/UQ-DEC/ACCESS-CM2/ssp126/r2i1p1f1/CCAMoc-v2112/v1-r1/day/hus200/v20240709/hus200_AUS-10i_ACCESS-CM2_ssp126_r2i1p1f1_UQ-DEC_CCAMoc-v2112_v1-r1_day_20580101-20581231.nc,f,ACCESS-CM2,ssp126,r2i1p1f1,day,hus200,v20240709,20580101-20581231 +/g/data/ig45/QldFCP-2/output/CMIP6/DD/AUS-10i/UQ-DEC/ACCESS-CM2/ssp126/r2i1p1f1/CCAMoc-v2112/v1-r1/day/ta850/v20240709/ta850_AUS-10i_ACCESS-CM2_ssp126_r2i1p1f1_UQ-DEC_CCAMoc-v2112_v1-r1_day_20340101-20341231.nc,f,ACCESS-CM2,ssp126,r2i1p1f1,day,ta850,v20240709,20340101-20341231 diff --git a/tests/data/esm_datastore/cordex-ig45.json b/tests/data/esm_datastore/cordex-ig45.json index fab7b871..66c92e80 100644 --- a/tests/data/esm_datastore/cordex-ig45.json +++ b/tests/data/esm_datastore/cordex-ig45.json @@ -11,9 +11,6 @@ "groupby_attrs": [ "file_type", "project_id", - "resolution", - "institution_id", - "source_id", "experiment_id", "member_id", "frequency", @@ -39,15 +36,6 @@ { "column_name": "project_id" }, - { - "column_name": "resolution" - }, - { - "column_name": "institution_id" - }, - { - "column_name": "source_id" - }, { "column_name": "experiment_id" }, diff --git a/tests/e2e/__init__.py b/tests/e2e/__init__.py index e69de29b..ee6a234d 100644 --- a/tests/e2e/__init__.py +++ b/tests/e2e/__init__.py @@ -0,0 +1,5 @@ +import pytest + +e2e = pytest.mark.skipif( + "not config.getoption('--e2e')", +) diff --git a/tests/e2e/test_datasets_representative.py b/tests/e2e/test_datasets_representative.py new file mode 100644 index 00000000..843ce8c7 --- /dev/null +++ b/tests/e2e/test_datasets_representative.py @@ -0,0 +1,50 @@ +import pytest +import yaml + +import access_nri_intake.catalog.translators as translators +from access_nri_intake.cli import build + +from . import e2e + + +@e2e +@pytest.mark.parametrize( + "translator_name", + [ + t + for t in dir(translators) + if t.endswith("Translator") and not t.startswith("Default") + ], +) +def test_alignment(translator_name, live_config_dir, BASE_DIR, v_num): + # Now live test the translator. Honestly, might be overkill - it might be easier + # to just extract the json files, open them, check they match the test data + filenames = [f for f in live_config_dir.glob("*.yaml")] + # Now we want to open them & throw away anything where builder != null. + translator_fnames = [] + + for fname in filenames: + with open(fname) as fo: + catalog_metadata = yaml.load(fo, yaml.FullLoader) + if catalog_metadata["translator"] == translator_name: + translator_fnames.append(str(fname)) + + assert len(translator_fnames) == 1 + + try: + build( + [ + *translator_fnames, + "--build_base_path", + str(BASE_DIR), + "--catalog_base_path", + "./", + "--catalog_file", + "metacatalog.csv", + "--version", + v_num, + "--no_update", + ] + ) + except Exception as exc: + assert False, f"Failed to build {translator_name} with exception {exc}" diff --git a/tests/e2e/test_end_to_end.py b/tests/e2e/test_end_to_end.py index edb31515..05262f79 100644 --- a/tests/e2e/test_end_to_end.py +++ b/tests/e2e/test_end_to_end.py @@ -6,9 +6,7 @@ from access_nri_intake.cli import build -e2e = pytest.mark.skipif( - "not config.getoption('--e2e')", -) +from . import e2e @e2e