-
Notifications
You must be signed in to change notification settings - Fork 2
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
DAS-2232 - Functionality added to support SMAP L3 products #15
Changes from 38 commits
79b4c3f
789cc6b
c4abf26
be280f0
2a48f93
2c93a4b
dfb1e15
e2ff61f
756f7c0
53f1660
f9f5e8b
f836ee6
c35c8ee
ffe035a
9c9f190
7eda980
f07b544
3b453e5
681b20d
5d609c9
91c51c0
2296a35
2efc4c7
f628166
ebac2a0
3b6d605
802fe0e
60fb22a
631dc24
1e7bc35
36e15c7
5c5eb85
80c2fb2
30eccd0
16872b7
822758f
7883465
dd98e81
de350b6
0666248
e07099a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
1.0.5 | ||
1.1.0 |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,5 +16,5 @@ | |
# | ||
harmony-py~=0.4.10 | ||
netCDF4~=1.6.4 | ||
notebook~=7.0.4 | ||
notebook~=7.2.2 | ||
xarray~=2023.9.0 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,353 @@ | ||
""" This module contains utility functions used for | ||
coordinate variables and methods to convert the | ||
coordinate variable data to x/y dimension scales | ||
""" | ||
|
||
from typing import Dict, Set, Tuple | ||
|
||
import numpy as np | ||
from netCDF4 import Dataset | ||
from numpy import ndarray | ||
from pyproj import CRS | ||
from varinfo import VariableFromDmr, VarInfoFromDmr | ||
|
||
from hoss.exceptions import ( | ||
IrregularCoordinateDatasets, | ||
MissingCoordinateDataset, | ||
MissingValidCoordinateDataset, | ||
) | ||
from hoss.projection_utilities import ( | ||
get_x_y_extents_from_geographic_points, | ||
) | ||
|
||
|
||
def get_override_projected_dimension_name( | ||
varinfo: VarInfoFromDmr, | ||
variable_name: str, | ||
) -> str: | ||
"""returns the x-y projection variable names that would | ||
match the geo coordinate names. The `latitude` coordinate | ||
variable name gets converted to 'projected_y' dimension scale | ||
and the `longitude` coordinate variable name gets converted to | ||
'projected_x' | ||
|
||
""" | ||
override_variable = varinfo.get_variable(variable_name) | ||
projected_dimension_name = '' | ||
if override_variable is not None: | ||
if override_variable.is_latitude(): | ||
projected_dimension_name = 'projected_y' | ||
elif override_variable.is_longitude(): | ||
projected_dimension_name = 'projected_x' | ||
return projected_dimension_name | ||
|
||
|
||
def get_override_projected_dimensions( | ||
varinfo: VarInfoFromDmr, | ||
variable_name: str, | ||
) -> list[str]: | ||
""" | ||
Returns the projected dimensions names from coordinate variables | ||
""" | ||
latitude_coordinates, longitude_coordinates = get_coordinate_variables( | ||
varinfo, [variable_name] | ||
) | ||
if latitude_coordinates and longitude_coordinates: | ||
# there should be only 1 lat and lon coordinate for one variable | ||
override_dimensions = [] | ||
override_dimensions.append( | ||
get_override_projected_dimension_name(varinfo, latitude_coordinates[0]) | ||
) | ||
override_dimensions.append( | ||
get_override_projected_dimension_name(varinfo, longitude_coordinates[0]) | ||
) | ||
|
||
else: | ||
# if the override is the variable | ||
override_projected_dimension_name = get_override_projected_dimension_name( | ||
varinfo, variable_name | ||
) | ||
override_dimensions = ['projected_y', 'projected_x'] | ||
if override_projected_dimension_name not in override_dimensions: | ||
override_dimensions = [] | ||
return override_dimensions | ||
|
||
|
||
def get_variables_with_anonymous_dims( | ||
varinfo: VarInfoFromDmr, required_variables: set[str] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nitpick: It probably isn't necessary in this function to refer to the variables as |
||
) -> bool: | ||
""" | ||
returns the list of required variables without any | ||
dimensions | ||
""" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Two quick things here:
|
||
return set( | ||
required_variable | ||
for required_variable in required_variables | ||
if len(varinfo.get_variable(required_variable).dimensions) == 0 | ||
) | ||
|
||
|
||
def get_coordinate_variables( | ||
varinfo: VarInfoFromDmr, | ||
requested_variables: Set[str], | ||
) -> tuple[list, list]: | ||
"""This method returns coordinate variables that are referenced | ||
in the variables requested. It returns it in a specific order | ||
[latitude, longitude] | ||
""" | ||
|
||
coordinate_variables_set = varinfo.get_references_for_attribute( | ||
requested_variables, 'coordinates' | ||
) | ||
|
||
latitude_coordinate_variables = [ | ||
coordinate | ||
for coordinate in coordinate_variables_set | ||
if varinfo.get_variable(coordinate).is_latitude() | ||
] | ||
|
||
longitude_coordinate_variables = [ | ||
coordinate | ||
for coordinate in coordinate_variables_set | ||
if varinfo.get_variable(coordinate).is_longitude() | ||
] | ||
|
||
return latitude_coordinate_variables, longitude_coordinate_variables | ||
|
||
|
||
def update_dimension_variables( | ||
prefetch_dataset: Dataset, | ||
latitude_coordinate: VariableFromDmr, | ||
longitude_coordinate: VariableFromDmr, | ||
crs: CRS, | ||
) -> Dict[str, ndarray]: | ||
"""Generate artificial 1D dimensions variable for each | ||
2D dimension or coordinate variable | ||
|
||
For each dimension variable: | ||
(1) Check if the dimension variable is 1D. | ||
(2) If it is not 1D and is 2D get the dimension sizes | ||
(3) Get the corner points from the coordinate variables | ||
(4) Get the x-y max-min values | ||
(5) Generate the x-y dimscale array and return to the calling method | ||
|
||
""" | ||
lat_arr, lon_arr = get_lat_lon_arrays( | ||
prefetch_dataset, | ||
latitude_coordinate, | ||
longitude_coordinate, | ||
) | ||
|
||
lat_fill, lon_fill = get_fill_values_for_coordinates( | ||
latitude_coordinate, longitude_coordinate | ||
) | ||
|
||
row_size, col_size = get_row_col_sizes_from_coordinate_datasets( | ||
lat_arr, | ||
lon_arr, | ||
) | ||
|
||
geo_grid_corners = get_geo_grid_corners( | ||
lat_arr, | ||
lon_arr, | ||
lat_fill, | ||
lon_fill, | ||
) | ||
|
||
x_y_extents = get_x_y_extents_from_geographic_points(geo_grid_corners, crs) | ||
|
||
# get grid size and resolution | ||
x_min = x_y_extents['x_min'] | ||
x_max = x_y_extents['x_max'] | ||
y_min = x_y_extents['y_min'] | ||
y_max = x_y_extents['y_max'] | ||
x_resolution = (x_max - x_min) / row_size | ||
y_resolution = (y_max - y_min) / col_size | ||
|
||
# create the xy dim scales | ||
lat_asc, lon_asc = is_lat_lon_ascending(lat_arr, lon_arr, lat_fill, lon_fill) | ||
|
||
if lon_asc: | ||
x_dim = np.arange(x_min, x_max, x_resolution) | ||
else: | ||
x_dim = np.arange(x_min, x_max, -x_resolution) | ||
|
||
if lat_asc: | ||
y_dim = np.arange(y_max, y_min, y_resolution) | ||
else: | ||
y_dim = np.arange(y_max, y_min, -y_resolution) | ||
|
||
return {'projected_y': y_dim, 'projected_x': x_dim} | ||
|
||
|
||
def get_row_col_sizes_from_coordinate_datasets( | ||
lat_arr: ndarray, | ||
lon_arr: ndarray, | ||
) -> Tuple[int, int]: | ||
""" | ||
This method returns the row and column sizes of the coordinate datasets | ||
|
||
""" | ||
row_size = 0 | ||
col_size = 0 | ||
Comment on lines
+173
to
+174
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is one example but there are quite a few places in this code that I think default values are being used instead of raising a genuine exception. If none of the conditions below are met, then it would be much better to catch the issue now and raise a user-friendly exception before later code tries to use these values and finds it can't do what it needs to (and raises some unintelligible exception to the end user). I think the question to ask with a bunch of these functions is: if they fall back on the default values, can the rest of the code work using those return values. I think in this case (and a few others) the honest answer is no. |
||
if lat_arr.ndim > 1: | ||
col_size = lat_arr.shape[0] | ||
row_size = lat_arr.shape[1] | ||
if (lon_arr.shape[0] != lat_arr.shape[0]) or (lon_arr.shape[1] != lat_arr.shape[1]): | ||
raise IrregularCoordinateDatasets(lon_arr.shape, lat_arr.shape) | ||
if lat_arr.ndim and lon_arr.ndim == 1: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This condition isn't doing what I believe you think it is. My guess is you are trying to check if both
I think what you are trying to do is:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry about that. I think it was a typo. I corrected it |
||
col_size = lat_arr.size | ||
row_size = lon_arr.size | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a bit wonky: The middle check that the array sizes are equal explicitly checks the size of the 0th and 1st axes of the array. But after that you have a condition for whether the arrays only have one dimensions. This means one of two things:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it is corrected. |
||
return row_size, col_size | ||
|
||
|
||
def is_lat_lon_ascending( | ||
lat_arr: ndarray, | ||
lon_arr: ndarray, | ||
lat_fill: float, | ||
lon_fill: float, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are these always |
||
) -> tuple[bool, bool]: | ||
""" | ||
Checks if the latitude and longitude cooordinate datasets have values | ||
that are ascending | ||
""" | ||
|
||
lat_col = lat_arr[:, 0] | ||
lon_row = lon_arr[0, :] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is making an assumption that latitude and longitude always represent the same axes in an array for all collections. We know that isn't true: for example the GPM_3IMERGHH collection from GES DISC have things the wrong way around (time, lon, lat). It would be better to take both arrays and check one row and one column of each to find which array is varying along each dimension. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I need to check both to get the same index values There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Your answer does not explain what will happen for collections when the ordering is reversed. I'm about to write an big old essay on the way to do this that will avoid any assumptions on latitude and longitude ordering (which I genuinely think will lead to problems). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The new method gets row and column on both lat/lon datasets. Will add a test for lat/lon with both types of ordering There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. updated - de350b6 |
||
|
||
lat_col_valid_indices = get_valid_indices(lat_col, lat_fill, 'latitude') | ||
latitude_ascending = ( | ||
lat_col[lat_col_valid_indices[1]] > lat_col[lat_col_valid_indices[0]] | ||
) | ||
|
||
lon_row_valid_indices = get_valid_indices(lon_row, lon_fill, 'longitude') | ||
longitude_ascending = ( | ||
lon_row[lon_row_valid_indices[1]] > lon_row[lon_row_valid_indices[0]] | ||
) | ||
|
||
return latitude_ascending, longitude_ascending | ||
|
||
|
||
def get_lat_lon_arrays( | ||
prefetch_dataset: Dataset, | ||
latitude_coordinate: VariableFromDmr, | ||
longitude_coordinate: VariableFromDmr, | ||
) -> Tuple[ndarray, ndarray]: | ||
""" | ||
This method is used to return the lat lon arrays from a 2D | ||
coordinate dataset. | ||
""" | ||
try: | ||
lat_arr = prefetch_dataset[latitude_coordinate.full_name_path][:] | ||
lon_arr = prefetch_dataset[longitude_coordinate.full_name_path][:] | ||
return lat_arr, lon_arr | ||
except Exception as exception: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (Channeling Probably seems like a small thing, but catching a specific exception type would mean you are only catching the exception you expect. If a different issue arises in the same code, then it will be handled differently (which is good, because that's probably an unexpected bug). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ...but I do like that this code is raising an exception instead of passing dummy values as return values. Nice! |
||
raise MissingCoordinateDataset('latitude/longitude') from exception | ||
|
||
|
||
def get_geo_grid_corners( | ||
lat_arr: ndarray, | ||
lon_arr: ndarray, | ||
lat_fill: float, | ||
lon_fill: float, | ||
) -> list[Tuple[float, float]]: | ||
""" | ||
This method is used to return the lat lon corners from a 2D | ||
coordinate dataset. It gets the row and column of the latitude and longitude | ||
arrays to get the corner points. This does a check for fill values and | ||
This method does not check if there are fill values in the corner points | ||
to go down to the next row and col. The fill values in the corner points | ||
still needs to be addressed. It will raise an exception in those | ||
cases. | ||
""" | ||
|
||
top_left_row_idx = 0 | ||
top_left_col_idx = 0 | ||
|
||
# get the first row from the longitude dataset | ||
lon_row = lon_arr[top_left_row_idx, :] | ||
lon_row_valid_indices = get_valid_indices(lon_row, lon_fill, 'longitude') | ||
|
||
# get the index of the minimum longitude after checking for invalid entries | ||
top_left_col_idx = lon_row_valid_indices[lon_row[lon_row_valid_indices].argmin()] | ||
min_lon = lon_row[top_left_col_idx] | ||
|
||
# get the index of the maximum longitude after checking for invalid entries | ||
top_right_col_idx = lon_row_valid_indices[lon_row[lon_row_valid_indices].argmax()] | ||
max_lon = lon_row[top_right_col_idx] | ||
|
||
# get the last valid longitude column to get the latitude array | ||
lat_col = lat_arr[:, top_right_col_idx] | ||
lat_col_valid_indices = get_valid_indices(lat_col, lat_fill, 'latitude') | ||
|
||
# get the index of minimum latitude after checking for valid values | ||
bottom_right_row_idx = lat_col_valid_indices[ | ||
lat_col[lat_col_valid_indices].argmin() | ||
] | ||
min_lat = lat_col[bottom_right_row_idx] | ||
|
||
# get the index of maximum latitude after checking for valid values | ||
top_right_row_idx = lat_col_valid_indices[lat_col[lat_col_valid_indices].argmax()] | ||
max_lat = lat_col[top_right_row_idx] | ||
|
||
geo_grid_corners = [ | ||
(min_lon, max_lat), | ||
(max_lon, max_lat), | ||
(max_lon, min_lat), | ||
(min_lon, min_lat), | ||
] | ||
return geo_grid_corners | ||
|
||
|
||
def get_valid_indices( | ||
coordinate_row_col: ndarray, coordinate_fill: float, coordinate_name: str | ||
) -> ndarray: | ||
""" | ||
Returns indices of a valid array without fill values | ||
""" | ||
valid_indices = np.empty((0, 0)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Two things: One: this is another instance where Two: More importantly, using There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the valid_indices in the return will be out of scope right? that is why I had the default declaration |
||
if coordinate_fill: | ||
valid_indices = np.where( | ||
~np.isclose(coordinate_row_col, float(coordinate_fill)) | ||
)[0] | ||
Comment on lines
+388
to
+390
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Okay. This condition is saying to find all the non-fill indices. But neither of the checks below that are specific to longitude or latitude are being applied. Do you really want that to be an either/or thing? |
||
elif coordinate_name == 'longitude': | ||
valid_indices = np.where( | ||
(coordinate_row_col >= -180.0) & (coordinate_row_col <= 180.0) | ||
)[0] | ||
elif coordinate_name == 'latitude': | ||
valid_indices = np.where( | ||
(coordinate_row_col >= -90.0) & (coordinate_row_col <= 90.0) | ||
)[0] | ||
|
||
# if the first row does not have valid indices, | ||
# should go down to the next row. We throw an exception | ||
# for now till that gets addressed | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've gotten on my soapbox already about this, but I have a really hard time with PRs that partially implement a bunch of functions. Each new incremental version of a service should be ship-shape and production worthy. This comment indicates we know there are cases where this new code will fail. |
||
if not valid_indices.size: | ||
raise MissingValidCoordinateDataset(coordinate_name) | ||
return valid_indices | ||
|
||
|
||
def get_fill_values_for_coordinates( | ||
latitude_coordinate: VariableFromDmr, | ||
longitude_coordinate: VariableFromDmr, | ||
) -> tuple[float | None, float | None]: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This function casts any fill value as a float - but what about variables that aren't floats? |
||
""" | ||
returns fill values for the variable. If it does not exist | ||
checks for the overrides from the json file. If there is no | ||
overrides, returns None | ||
""" | ||
|
||
lat_fill = None | ||
lon_fill = None | ||
lat_fill_value = latitude_coordinate.get_attribute_value('_FillValue') | ||
lon_fill_value = longitude_coordinate.get_attribute_value('_FillValue') | ||
# if fill_value is None: | ||
# check if there are overrides in hoss_config.json using varinfo | ||
# else | ||
|
||
if lat_fill_value is not None: | ||
lat_fill = float(lat_fill_value) | ||
if lon_fill_value is not None: | ||
lon_fill = float(lon_fill_value) | ||
|
||
return float(lat_fill), float(lon_fill) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. A few things here:
That's already accounted for in Next: It would be cleaner to just make this function get the fill value of a single variable and call it separately for Last: This could be simplified a lot (assuming you only try and do one variable at a time):
That's not to say I'm totally onboard with casting the value to a float, but this code would do that for you. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also this
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It would fit much better with the rest of the style of the code if you used
else
for these sorts of default values, instead of declaring them and then overriding them if a condition is met.Yes, it's a personal preference, but it's consistent with the rest of the code (which is the more important thing here).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think I had to declare it above as default value to keep the variable in scope for the return call
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think you've understood my comment - you have two choices here: declare it first, or declare it in an
else
. I am asking you to be consistent to the rest of the style of the repository and put the default value in anelse
statement.To avoid two
else
statements, you could restructure the logic below as:I still have two other concerns here:
projected_x
andprojected_y
will cause clashes for those SMAP L3 products with both global and polar grids. (This must be resolved before merging this PR)There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.