diff --git a/src/access_nri_intake/source/builders.py b/src/access_nri_intake/source/builders.py index 0d55435f..4b485af1 100644 --- a/src/access_nri_intake/source/builders.py +++ b/src/access_nri_intake/source/builders.py @@ -266,6 +266,91 @@ def parser(file): return {INVALID_ASSET: file, TRACEBACK: traceback.format_exc()} +class AccessOm3Builder(BaseBuilder): + """Intake-ESM datastore builder for ACCESS-OM3 COSIMA datasets""" + + def __init__(self, path): + """ + Initialise a AccessOm3Builder + + Parameters + ---------- + path : str or list of str + Path or list of paths to crawl for assets/files. + """ + + kwargs = dict( + path=path, + depth=2, + exclude_patterns=[ + "*restart*", + "*MOM_IC.nc", + "*ocean_geometry.nc", + "*ocean.stats.nc", + "*Vertical_coordinate.nc", + ], + include_patterns=["*.nc"], + data_format="netcdf", + groupby_attrs=["file_id", "frequency"], + aggregations=[ + { + "type": "join_existing", + "attribute_name": "start_date", + "options": { + "dim": "time", + "combine": "by_coords", + }, + }, + ], + ) + + super().__init__(**kwargs) + + @staticmethod + def parser(file): + try: + ( + filename, + file_id, + _, + frequency, + start_date, + end_date, + variable_list, + variable_long_name_list, + variable_standard_name_list, + variable_cell_methods_list, + variable_units_list, + ) = parse_access_ncfile(file) + + if ("mom6" in filename) or ("ww3" in filename): + realm = "ocean" + elif "cice" in filename: + realm = "seaIce" + else: + raise ParserError(f"Cannot determine realm for file {file}") + + info = { + "path": str(file), + "realm": realm, + "variable": variable_list, + "frequency": frequency, + "start_date": start_date, + "end_date": end_date, + "variable_long_name": variable_long_name_list, + "variable_standard_name": variable_standard_name_list, + "variable_cell_methods": variable_cell_methods_list, + "variable_units": variable_units_list, + "filename": filename, + "file_id": file_id, + } + + return info + + except Exception: + return {INVALID_ASSET: file, TRACEBACK: traceback.format_exc()} + + class AccessEsm15Builder(BaseBuilder): """Intake-ESM datastore builder for ACCESS-ESM1.5 datasets""" diff --git a/src/access_nri_intake/source/utils.py b/src/access_nri_intake/source/utils.py index df3f636f..4ed96682 100644 --- a/src/access_nri_intake/source/utils.py +++ b/src/access_nri_intake/source/utils.py @@ -169,7 +169,10 @@ def parse_access_filename(filename): """ # ACCESS output file patterns + # TODO: these should be defined per driver to prevent new patterns from breaking old drivers not_multi_digit = "(?:\\d(?!\\d)|[^\\d](?=\\d)|[^\\d](?!\\d))" + om3_components = "(?:cice|mom6|ww3)" + ymds = "\\d{4}[_,-]\\d{2}[_,-]\\d{2}[_,-]\\d{5}" ymd = "\\d{4}[_,-]\\d{2}[_,-]\\d{2}" ym = "\\d{4}[_,-]\\d{2}" y = "\\d{4}" @@ -181,6 +184,7 @@ def parse_access_filename(filename): r"^ocean.*[^\d]_(\d{2})$", # A few wierd files in ACCESS-OM2 01deg_jra55v13_ryf9091 r"^.*\.p.(\d{6})_.*", # ACCESS-CM2 atmosphere r"^.*\.p.-(\d{6})_.*", # ACCESS-ESM1.5 atmosphere + rf"[^\.]*\.{om3_components}\..*({ymds}|{ymd})$", # ACCESS-OM3 ] # Frequency translations frequencies = { diff --git a/tests/test_source_utils.py b/tests/test_source_utils.py index b1afccee..097b4e59 100644 --- a/tests/test_source_utils.py +++ b/tests/test_source_utils.py @@ -104,6 +104,47 @@ "rregionPrydz_temp_xflux_adv", ("rregionPrydz_temp_xflux_adv", None, None), ), + # Example ACCESS-OM3 filenames + ( + "GMOM_JRA_WD.ww3.hi.1958-01-02-00000", + ( + "GMOM_JRA_WD_ww3_hi_XXXX_XX_XX_XXXXX", + "1958-01-02-00000", + None, + ), + ), + ( + "GMOM_JRA.cice.h.1900-01-01", + ( + "GMOM_JRA_cice_h_XXXX_XX_XX", + "1900-01-01", + None, + ), + ), + ( + "GMOM_JRA.mom6.ocean_sfc_1900_01_01", + ( + "GMOM_JRA_mom6_ocean_sfc_XXXX_XX_XX", + "1900_01_01", + None, + ), + ), + ( + "GMOM_JRA.mom6.sfc_1900_01_01", + ( + "GMOM_JRA_mom6_sfc_XXXX_XX_XX", + "1900_01_01", + None, + ), + ), + ( + "GMOM_JRA.mom6.static", + ( + "GMOM_JRA_mom6_static", + None, + None, + ), + ), ], ) def test_parse_access_filename(filename, expected):