Skip to content

Commit

Permalink
Merge pull request #529 from cmarshak/gunw-weather-model-availability
Browse files Browse the repository at this point in the history
For GUNW workflow with bucket argument, does nothing if SLCs are not within Weather Model's Valid Range
  • Loading branch information
jlmaurer authored Jul 25, 2023
2 parents 8006b32 + b9f8ea7 commit 296da7c
Show file tree
Hide file tree
Showing 10 changed files with 188 additions and 13 deletions.
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,9 @@ and uses [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+ For the GUNW workflow:
- Updated GUNW workflow to expose input arguments (usually passed through command line options) within the python function for testing
- Include integration test of HRRR for GUNW workflow
- Test the json write (do not test s3 upload/download) - ensures that weather_model list is included in json `metadata`
- Test the json write (do not test s3 upload/download) in that it conforms to the DAAC ingest schema correctly - we add a weather model field to the metadata in this workflow
- Removed comments in GUNW test suite that were left during previous development
- If a bucket is provided and the GUNWs reference or secondary scenes are not in the valid range, we do nothing - this is to ensure that GUNWs can still be delivered to the DAAC without painful operator (i.e. person submitting to the hyp3 API) book-keeping

## [0.4.2]

Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,4 +44,5 @@
setup(
ext_modules=cython_extensions + pybind_extensions,
cmdclass={"build_ext": build_ext},
package_data={'tools': ['RAiDER/models/*.zip']}
)
13 changes: 11 additions & 2 deletions test/conftest.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from pathlib import Path
from typing import Callable

import pytest

Expand Down Expand Up @@ -30,8 +31,16 @@ def test_dir_path() -> Path:


@pytest.fixture(scope='session')
def test_gunw_path() -> Path:
return TEST_DIR / 'gunw_test_data' / 'S1-GUNW-D-R-071-tops-20200130_20200124-135156-34956N_32979N-PP-913f-v2_0_4.nc'
def test_gunw_path_factory() -> Callable:
def factory(location: str = 'california-t71') -> Path:
if location == 'california-t71':
file_name = 'S1-GUNW-D-R-071-tops-20200130_20200124-135156-34956N_32979N-PP-913f-v2_0_4.nc'
elif location == 'alaska':
file_name = 'S1-GUNW-D-R-059-tops-20230320_20220418-180300-00179W_00051N-PP-c92e-v2_0_6.nc'
else:
raise NotImplementedError
return TEST_DIR / 'gunw_test_data' / file_name
return factory


@pytest.fixture(scope='session')
Expand Down
Binary file not shown.
70 changes: 68 additions & 2 deletions test/test_GUNW.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,15 @@
import unittest

import jsonschema
import pandas as pd
import pytest
import rasterio as rio
import xarray as xr

import RAiDER
import RAiDER.cli.raider as raider
from RAiDER import aws
from RAiDER.aria.prepFromGUNW import check_weather_model_availability
from RAiDER.cli.raider import calcDelaysGUNW

def compute_transform(lats, lons):
Expand All @@ -28,10 +30,10 @@ def compute_transform(lats, lons):

@pytest.mark.isce3
@pytest.mark.parametrize('weather_model_name', ['GMAO', 'HRRR'])
def test_GUNW_update(test_dir_path, test_gunw_path, weather_model_name):
def test_GUNW_update(test_dir_path, test_gunw_path_factory, weather_model_name):
scenario_dir = test_dir_path / 'GUNW'
scenario_dir.mkdir(exist_ok=True, parents=True)
orig_GUNW = test_gunw_path
orig_GUNW = test_gunw_path_factory()
updated_GUNW = scenario_dir / orig_GUNW.name
shutil.copy(orig_GUNW, updated_GUNW)

Expand Down Expand Up @@ -78,6 +80,7 @@ def test_GUNW_metadata_update(test_gunw_json_path, test_gunw_json_schema_path, t
mocker.patch("RAiDER.aws.get_s3_file", side_effect=['foo.nc', temp_json_path])
mocker.patch("RAiDER.aws.upload_file_to_s3")
mocker.patch("RAiDER.aria.prepFromGUNW.main", return_value=['my_path_cfg', 'my_wavelength'])
mocker.patch("RAiDER.aria.prepFromGUNW.check_weather_model_availability", return_value=True)
mocker.patch("RAiDER.cli.raider.calcDelays", return_value=['file1', 'file2'])
mocker.patch("RAiDER.aria.calcGUNW.tropo_gunw_slc")
mocker.patch("os.getcwd", return_value='myDir')
Expand Down Expand Up @@ -114,3 +117,66 @@ def test_GUNW_metadata_update(test_gunw_json_path, test_gunw_json_schema_path, t
unittest.mock.call('foo.nc', 'myBucket', 'myPrefix'),
unittest.mock.call(temp_json_path, 'myBucket', 'myPrefix'),
]


@pytest.mark.parametrize('weather_model_name', ['GMAO', 'HRRR', 'HRES', 'ERA5', 'ERA5'])
def test_check_weather_model_availability(test_gunw_path_factory, weather_model_name, mocker):
# Should be True for all weather models
# S1-GUNW-D-R-071-tops-20200130_20200124-135156-34956N_32979N-PP-913f-v2_0_4.nc
test_gunw_path = test_gunw_path_factory()
assert check_weather_model_availability(test_gunw_path, weather_model_name)

# Let's mock an earlier date for some models
mocker.patch("RAiDER.aria.prepFromGUNW.get_acq_from_slc_id", side_effect=[pd.Timestamp('2015-01-01'),
pd.Timestamp('2014-01-01')])
cond = check_weather_model_availability(test_gunw_path, weather_model_name)
if weather_model_name in ['HRRR', 'GMAO']:
cond = not cond
assert cond


@pytest.mark.parametrize('weather_model_name', ['GMAO', 'HRRR'])
def test_check_weather_model_availability_over_alaska(test_gunw_path_factory, weather_model_name, mocker):
# Should be True for all weather models
# S1-GUNW-D-R-059-tops-20230320_20220418-180300-00179W_00051N-PP-c92e-v2_0_6.nc
test_gunw_path = test_gunw_path_factory(location='alaska')
assert check_weather_model_availability(test_gunw_path, weather_model_name)

# Let's mock an earlier date
mocker.patch("RAiDER.aria.prepFromGUNW.get_acq_from_slc_id", side_effect=[pd.Timestamp('2017-01-01'),
pd.Timestamp('2016-01-01')])
cond = check_weather_model_availability(test_gunw_path, weather_model_name)
if weather_model_name == 'HRRR':
cond = not cond
assert cond


@pytest.mark.parametrize('weather_model_name', ['HRRR', 'GMAO'])
@pytest.mark.parametrize('location', ['california-t71', 'alaska'])
def test_weather_model_availability_integration(location, test_gunw_path_factory, tmp_path, weather_model_name, mocker):
temp_json_path = tmp_path / 'temp.json'
test_gunw_path = test_gunw_path_factory(location=location)
shutil.copy(test_gunw_path, temp_json_path)

# We will pass the test GUNW to the workflow
mocker.patch("RAiDER.aws.get_s3_file", side_effect=[test_gunw_path, 'foo.json'])
mocker.patch("RAiDER.aws.upload_file_to_s3")
# These are outside temporal availability of GMAO and HRRR
ref_date, sec_date = pd.Timestamp('2015-01-01'), pd.Timestamp('2014-01-01')
mocker.patch("RAiDER.aria.prepFromGUNW.get_acq_from_slc_id", side_effect=[ref_date, sec_date])
# Don't specify side-effects or return values, because never called
mocker.patch("RAiDER.aria.prepFromGUNW.main")
mocker.patch("RAiDER.cli.raider.calcDelays")
mocker.patch("RAiDER.aria.calcGUNW.tropo_gunw_slc")

iargs = ['--weather-model', weather_model_name,
'--bucket', 'myBucket',
'--bucket-prefix', 'myPrefix']
out = calcDelaysGUNW(iargs)
# Check it returned None
assert out is None

# Check these functions were not called
RAiDER.cli.raider.calcDelays.assert_not_called()
RAiDER.aria.prepFromGUNW.main.assert_not_called()
RAiDER.aria.calcGUNW.tropo_gunw_slc.assert_not_called()
82 changes: 82 additions & 0 deletions tools/RAiDER/aria/prepFromGUNW.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,20 +10,102 @@
import numpy as np
import xarray as xr
import rasterio
import geopandas as gpd
import pandas as pd
import yaml
import shapely.wkt
from dataclasses import dataclass
import sys
from shapely.geometry import box
from rasterio.crs import CRS

import RAiDER
from RAiDER.utilFcns import rio_open, writeArrayToRaster
from RAiDER.logger import logger
from RAiDER.models import credentials
from RAiDER.models.hrrr import HRRR_CONUS_COVERAGE_POLYGON, AK_GEO
from eof.download import download_eofs

## cube spacing in degrees for each model
DCT_POSTING = {'HRRR': 0.05, 'HRES': 0.10, 'GMAO': 0.10, 'ERA5': 0.10, 'ERA5T': 0.10}


def get_slc_ids_from_gunw(gunw_path: str,
reference_or_secondary: str = 'reference') -> list[str]:
if reference_or_secondary not in ['reference', 'secondary']:
raise ValueError('"reference_or_secondary" must be either "reference" or "secondary"')
group = f'science/radarMetaData/inputSLC/{reference_or_secondary}'
with xr.open_dataset(gunw_path, group=group) as ds:
slc_ids = ds['L1InputGranules'].values
return slc_ids


def get_acq_from_slc_id(slc_id: str) -> pd.Timestamp:
ts_str = slc_id.split('_')[5]
return pd.Timestamp(ts_str)


def check_weather_model_availability(gunw_path: str,
weather_model_name: str) -> bool:
"""Checks weather reference and secondary dates of GUNW occur within
weather model valid range
Parameters
----------
gunw_path : str
weather_model_name : str
Should be one of 'HRRR', 'HRES', 'ERA5', 'ERA5T', 'GMAO'.
Returns
-------
bool:
True if both reference and secondary acquisitions are within the valid range. We assume that
reference_date > secondary_date (i.e. reference scenes are most recent)
Raises
------
ValueError
- If weather model is not correctly referencing the Class from RAiDER.models
- HRRR was requested and it's not in the HRRR CONUS or HRRR AK coverage area
"""
ref_slc_ids = get_slc_ids_from_gunw(gunw_path, reference_or_secondary='reference')
sec_slc_ids = get_slc_ids_from_gunw(gunw_path, reference_or_secondary='secondary')

ref_ts = get_acq_from_slc_id(ref_slc_ids[0])
sec_ts = get_acq_from_slc_id(sec_slc_ids[0])

if weather_model_name == 'HRRR':
group = '/science/grids/data/'
variable = 'coherence'
with rasterio.open(f'netcdf:{gunw_path}:{group}/{variable}') as ds:
gunw_poly = box(*ds.bounds)
if HRRR_CONUS_COVERAGE_POLYGON.intersects(gunw_poly):
pass
elif AK_GEO.intersects(gunw_poly):
weather_model_name = 'HRRRAK'
else:
raise ValueError('HRRR was requested but it is not available in this area')

# source: https://stackoverflow.com/a/7668273
# Allows us to get weather models as strings
# getattr(module, 'HRRR') will return HRRR class
module = sys.modules['RAiDER.models']
weather_model_names = module.__all__
if weather_model_name not in weather_model_names:
raise ValueError(f'The "weather_model_name" must be in {", ".join(weather_model_names)}')

weather_model_cls = getattr(module, weather_model_name)
weather_model = weather_model_cls()

wm_start_date, wm_end_date = weather_model._valid_range
if isinstance(wm_end_date, str) and wm_end_date == 'Present':
wm_end_date = datetime.today() - weather_model._lag_time
elif not isinstance(wm_end_date, datetime):
raise ValueError(f'the weather model\'s end date is not valid: {wm_end_date}')
ref_cond = ref_ts <= wm_end_date
sec_cond = sec_ts >= wm_start_date
return ref_cond and sec_cond


@dataclass
class GUNW:
path_gunw: str
Expand Down
6 changes: 6 additions & 0 deletions tools/RAiDER/cli/raider.py
Original file line number Diff line number Diff line change
Expand Up @@ -511,6 +511,12 @@ def calcDelaysGUNW(iargs: list[str] = None):
# args.files = glob.glob(args.files) # eventually support multiple files
if not args.file and args.bucket:
args.file = aws.get_s3_file(args.bucket, args.bucket_prefix, '.nc')
if not RAiDER.aria.prepFromGUNW.check_weather_model_availability(args.file, args.weather_model):
# NOTE: We want to submit jobs that are outside of acceptable weather model range
# and still deliver these products to the DAAC without this layer. Therefore
# we include this within this portion of the control flow.
print('Nothing to do because outside of weather model range')
return
json_file_path = aws.get_s3_file(args.bucket, args.bucket_prefix, '.json')
json_data = json.load(open(json_file_path))
json_data['metadata'].setdefault('weather_model', []).append(args.weather_model)
Expand Down
7 changes: 7 additions & 0 deletions tools/RAiDER/models/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from .era5 import ERA5
from .era5t import ERA5T
from .gmao import GMAO
from .hres import HRES
from .hrrr import HRRR, HRRRAK

__all__ = ['HRRR', 'HRRRAK', 'GMAO', 'ERA5', 'ERA5T', 'HRES']
Binary file added tools/RAiDER/models/data/alaska.geojson.zip
Binary file not shown.
19 changes: 11 additions & 8 deletions tools/RAiDER/models/hrrr.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import numpy as np

from herbie import Herbie
import geopandas as gpd
from pathlib import Path
from pyproj import CRS, Transformer
from shapely.geometry import Polygon, box
Expand Down Expand Up @@ -167,6 +168,13 @@ def load_weather_hrrr(filename):

return _xs, _ys, lons, lats, qs, temps, pl, geo_hgt, proj

HRRR_CONUS_COVERAGE_POLYGON = Polygon(((-125, 21), (-133, 49), (-60, 49), (-72, 21)))
HRRR_AK_COVERAGE_POLYGON = Polygon(((195, 40), (157, 55), (175, 70), (260, 77), (232, 52)))
HRRR_AK_PROJ = CRS.from_string('+proj=stere +ellps=sphere +a=6371229.0 +b=6371229.0 +lat_0=90 +lon_0=225.0 '
'+x_0=0.0 +y_0=0.0 +lat_ts=60.0 +no_defs +type=crs')
# Source: https://eric.clst.org/tech/usgeojson/
AK_GEO = gpd.read_file(Path(__file__).parent / 'data' / 'alaska.geojson.zip').geometry.unary_union


class HRRR(WeatherModel):
def __init__(self):
Expand Down Expand Up @@ -225,7 +233,7 @@ def __init__(self):
f'+b={earth_radius} +units=m +no_defs')
self._proj = p1

self._valid_bounds = Polygon(((-125, 21), (-133, 49), (-60, 49), (-72, 21)))
self._valid_bounds = HRRR_CONUS_COVERAGE_POLYGON


def _fetch(self, out):
Expand Down Expand Up @@ -333,15 +341,10 @@ def __init__(self):
self._time_res = TIME_RES['HRRR-AK']
self._valid_range = (datetime.datetime(2018, 7, 13), "Present")
self._lag_time = datetime.timedelta(hours=3)
self._valid_bounds = Polygon(((195, 40), (157, 55), (175, 70), (260, 77), (232, 52)))

self._valid_bounds = HRRR_AK_COVERAGE_POLYGON
# The projection information gets read directly from the weather model file but we
# keep this here for object instantiation.
self._proj = CRS.from_string(
'+proj=stere +ellps=sphere +a=6371229.0 +b=6371229.0 +lat_0=90 +lon_0=225.0 ' +
'+x_0=0.0 +y_0=0.0 +lat_ts=60.0 +no_defs +type=crs'
)

self._proj = HRRR_AK_PROJ

def _fetch(self, out):
bounds = self._ll_bounds.copy()
Expand Down

0 comments on commit 296da7c

Please sign in to comment.