Skip to content

Commit

Permalink
Chunked xarray.ds weather for geospatial bug fix
Browse files Browse the repository at this point in the history
  • Loading branch information
tobin-ford committed Aug 16, 2024
1 parent a5a4a72 commit a727796
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 6 deletions.
8 changes: 4 additions & 4 deletions pvdeg/geospatial.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,17 +269,17 @@ def output_template(
dims = set([d for dim in shapes.values() for d in dim])
dims_size = dict(ds_gids.sizes) | add_dims

# if len(ds_gids.chunks) == 0:
# raise ValueError(f"argument ds_gids must contain chunks")

output_template = xr.Dataset(
data_vars={
var: (dim, da.empty([dims_size[d] for d in dim]), attrs.get(var))
for var, dim in shapes.items()
},
coords={dim: ds_gids[dim] for dim in dims},
attrs=global_attrs,
) # .chunk({dim: ds_gids.chunks[dim] for dim in dims})
) # moved chunks down from here

if ds_gids.chunks: # chunk to match input
output_template = output_template.chunk({dim: ds_gids.chunks[dim] for dim in dims})

return output_template

Expand Down
27 changes: 26 additions & 1 deletion pvdeg/scenario.py
Original file line number Diff line number Diff line change
Expand Up @@ -1206,7 +1206,9 @@ def addLocation(

geo_meta = geo_meta[geo_meta["county"].isin(county)]

geo_meta, geo_gids = pvdeg.utilities.gid_downsampling(
# we don't downsample weather data until this runs
# because on NSRDB we are storing weather OUT of MEMORY with dask
geo_meta, geo_gids = pvdeg.utilities.gid_downsampling(
geo_meta, downsample_factor
)

Expand Down Expand Up @@ -1525,6 +1527,29 @@ def coords_tonumpy(self) -> np.array:

return coords

def geospatial_data(self) -> tuple[xr.Dataset, pd.DataFrame]:
"""
Extract the geospatial weather dataset and metadata dataframe from the scenario object
Example Use:
>>> geo_weather, geo_meta = GeospatialScenario.geospatial_data()
This gets us the result we would use in the traditional pvdeg geospatial approach.
Parameters:
-----------
None
Returns:
--------
(weather_data, meta_data): (xr.Dataset, pd.DataFrame)
A tuple of weather data as an `xarray.Dataset` and the corresponding meta data as a dataframe.
"""
# downsample here, not done already happens at pipeline runtime
geo_weather_sub = self.weather_data.sel(gid=self.meta_data.index)
return geo_weather_sub, self.meta_data


def addJob(
self,
func: Callable = None,
Expand Down
2 changes: 1 addition & 1 deletion tests/test_geospatial.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
).compute()


def test_analysis_standoff():
def test_analysis_standoff_unchunked():
res_ds = pvdeg.geospatial.analysis(
weather_ds=GEO_WEATHER,
meta_df=GEO_META,
Expand Down

0 comments on commit a727796

Please sign in to comment.