From 487e32ea303c4bc36113d669600c62731c654b94 Mon Sep 17 00:00:00 2001 From: martin-springer Date: Fri, 13 Sep 2024 13:12:35 -0600 Subject: [PATCH] fix duplicate gids in elevation_stochastic_downselect --- pvdeg/geospatial.py | 19 ++++++++++----- pvdeg/scenario.py | 56 ++++++++++++++++++++++++--------------------- 2 files changed, 43 insertions(+), 32 deletions(-) diff --git a/pvdeg/geospatial.py b/pvdeg/geospatial.py index 2ffdb62..d54877e 100644 --- a/pvdeg/geospatial.py +++ b/pvdeg/geospatial.py @@ -27,6 +27,7 @@ from typing import Tuple from shapely import LineString, MultiLineString + def start_dask(hpc=None): """ Starts a dask cluster for parallel processing. @@ -270,15 +271,21 @@ def output_template( output_template = xr.Dataset( data_vars={ - var: (dim, da.empty([dims_size[d] for d in dim]), attrs.get(var)) # this will produce a dask array with 1 chunk of the same size as the input + var: ( + dim, + da.empty([dims_size[d] for d in dim]), + attrs.get(var), + ) # this will produce a dask array with 1 chunk of the same size as the input for var, dim in shapes.items() }, coords={dim: ds_gids[dim] for dim in dims}, attrs=global_attrs, - ) - - if ds_gids.chunks: # chunk to match input - output_template = output_template.chunk({dim: ds_gids.chunks[dim] for dim in dims}) + ) + + if ds_gids.chunks: # chunk to match input + output_template = output_template.chunk( + {dim: ds_gids.chunks[dim] for dim in dims} + ) return output_template @@ -916,7 +923,7 @@ def elevation_stochastic_downselect( a=len(coords), p=normalized_weights / np.sum(normalized_weights), size=m ) - return selected_indicies + return np.unique(selected_indicies) def interpolate_analysis( diff --git a/pvdeg/scenario.py b/pvdeg/scenario.py index be6c53b..c9ff6eb 100644 --- a/pvdeg/scenario.py +++ b/pvdeg/scenario.py @@ -206,11 +206,13 @@ def addLocation( elif weather_db == "PVGIS": pass else: - raise ValueError(f""" - email : {self.email} \n api-key : {self.api_key} - Must provide an email and api key during class initialization + raise ValueError( + f""" + email : {self.email} \n api-key : {self.api_key} + Must provide an email and api key during class initialization when using NDSRDB : {weather_db} == 'PSM3' - """) + """ + ) point_weather, point_meta = pvdeg.weather.get( weather_db, id=weather_id, **weather_arg @@ -925,7 +927,7 @@ def format_modules(self): module_content = f"""

- + {module['module_name']}

@@ -957,7 +959,7 @@ def format_results(self): module_content = f"""

- + {module_name}

@@ -969,7 +971,7 @@ def format_results(self): module_content += f"""
- + {function_name}
@@ -1029,7 +1031,7 @@ def format_weather(self): weather_data_html = f"""

- + Weather Data

@@ -1053,7 +1055,7 @@ def format_pipeline(self): step_content = f"""

- + {step['job'].__name__}, #{step_name}

@@ -1124,7 +1126,7 @@ def addLocation( see_added: bool = False, ) -> None: """ - Add a location to the scenario. This can be done in three ways: Pass (region, region_col) for gid list. + Add a location to the scenario. This can be done in three ways: Pass (region, region_col) for gid list. Parameters: ----------- @@ -1135,9 +1137,9 @@ def addLocation( - ``country='United States'`` - ``country=['United States']`` - ``country=['Mexico', 'Canada']`` - + state : str - combination of states or provinces to include from NSRDB. + combination of states or provinces to include from NSRDB. Supports two-letter codes for American states. Can mix two-letter codes with full length strings. Can take single string, or list of strings (len >= 1) Examples: @@ -1147,8 +1149,8 @@ def addLocation( county : str county to include from NSRDB. If duplicate county exists in two - states present in the ``state`` argument, both will be included. - If no state is provided + states present in the ``state`` argument, both will be included. + If no state is provided downsample_factor : int downsample the weather and metadata attached to the region you have selected. default(0), means no downsampling year : int @@ -1206,9 +1208,9 @@ def addLocation( geo_meta = geo_meta[geo_meta["county"].isin(county)] - # we don't downsample weather data until this runs + # we don't downsample weather data until this runs # because on NSRDB we are storing weather OUT of MEMORY with dask - geo_meta, geo_gids = pvdeg.utilities.gid_downsampling( + geo_meta, geo_gids = pvdeg.utilities.gid_downsampling( geo_meta, downsample_factor ) @@ -1530,7 +1532,7 @@ def coords_tonumpy(self) -> np.array: def geospatial_data(self) -> tuple[xr.Dataset, pd.DataFrame]: """ Extract the geospatial weather dataset and metadata dataframe from the scenario object - + Example Use: >>> geo_weather, geo_meta = GeospatialScenario.geospatial_data() @@ -1542,13 +1544,14 @@ def geospatial_data(self) -> tuple[xr.Dataset, pd.DataFrame]: Returns: -------- - (weather_data, meta_data): (xr.Dataset, pd.DataFrame) + (weather_data, meta_data): (xr.Dataset, pd.DataFrame) A tuple of weather data as an `xarray.Dataset` and the corresponding meta data as a dataframe. """ # downsample here, not done already happens at pipeline runtime - geo_weather_sub = self.weather_data.sel(gid=self.meta_data.index) + geo_weather_sub = self.weather_data.sel(gid=self.meta_data.index).chunk( + chunks={"time": -1, "gid": 50} + ) return geo_weather_sub, self.meta_data - def addJob( self, @@ -1890,10 +1893,11 @@ def plot_world( data_variable: str, cmap: str = "viridis", ): - da = (self.results)[data_variable] - fig, ax = plt.subplots(figsize=(10, 6), subplot_kw={'projection': ccrs.PlateCarree()}) + fig, ax = plt.subplots( + figsize=(10, 6), subplot_kw={"projection": ccrs.PlateCarree()} + ) da.plot(ax=ax, transform=ccrs.PlateCarree(), cmap=cmap) ax.set_extent([-180, 180, -90, 90], crs=ccrs.PlateCarree()) @@ -1904,7 +1908,7 @@ def plot_world( ax.add_feature(cfeature.LAND) ax.add_feature(cfeature.OCEAN) - ax.add_feature(cfeature.LAKES, edgecolor='black') + ax.add_feature(cfeature.LAKES, edgecolor="black") plt.show() # test this @@ -1959,7 +1963,7 @@ def format_pipeline(self): step_content = f"""

- + {step['job'].__name__}, #{step_name}

@@ -2029,7 +2033,7 @@ def format_results(self): result_content = f"""

- + Geospatial Result

@@ -2074,7 +2078,7 @@ def format_meta(self): meta_data_html = f"""

- + Meta Data