Aggregate area/number

burggraaff · Feb 29, 2024 · 1e61a2e · 1e61a2e
1 parent afc888a
commit 1e61a2e
Show file tree

Hide file tree

Showing 3 changed files with 41 additions and 16 deletions.
diff --git a/fpcup/analysis.py b/fpcup/analysis.py
@@ -12,8 +12,12 @@
 from ._typing import Callable, FuncDict, Iterable
 
 # Columns in summary data that should be averaged over for aggregates
+# H3pandas does not support the tuple-dict system, e.g. {"n": ("DVS", "count")}, so it has to be done in an ugly way
 KEYS_AGGREGATE = ["LAIMAX", "TWSO", "CTRAT", "CEVST"] + ["DOE", "DOM"]
+count_dict = {"DVS": "size"}
+rename_after_aggregation = {"DVS": "n"}
 mean_dict = {key: "mean" for key in KEYS_AGGREGATE}
+mean_dict = {**count_dict, **mean_dict}
 
 def weighted_mean_for_DF(data: pd.DataFrame, *, weightby: str="area") -> Callable:
     """
@@ -48,7 +52,12 @@ def weighted_mean_dict(data: pd.DataFrame, *,
     wm_numerical = weighted_mean_for_DF(data, weightby=weightby)
     wm_datetime = weighted_mean_datetime(data, weightby=weightby)
 
-    return {key: wm_datetime if is_datetime(data[key]) else wm_numerical for key in keys}
+    aggregator_mean = {key: wm_datetime if is_datetime(data[key]) else wm_numerical for key in keys}
+    aggregator_area = {"area": "sum"}
+
+    aggregator = {**count_dict, **aggregator_area, **aggregator_mean}
+
+    return aggregator
 
 def default_aggregator(data: pd.DataFrame, *,
                        keys=KEYS_AGGREGATE, weightby: str="area") -> FuncDict:

diff --git a/fpcup/geo.py b/fpcup/geo.py
@@ -14,7 +14,7 @@
 from tqdm import tqdm
 
 from ._typing import Aggregator, AreaDict, BoundaryDict, Callable, Iterable, Optional, PathOrStr
-from .analysis import default_aggregator
+from .analysis import default_aggregator, rename_after_aggregation
 from .constants import CRS_AMERSFOORT, WGS84
 from .settings import DEFAULT_DATA
 
@@ -190,7 +190,7 @@ def entries_in_province(data: gpd.GeoDataFrame, province: str) -> gpd.GeoDataFra
     return data.loc[entries]
 
 
-def maintain_crs(func: Callable):
+def maintain_crs(func: Callable) -> Callable:
     """
     Decorator that ensures the output of an aggregation function is in the same CRS as the input, regardless of transformations that happen along the way.
     """
@@ -222,7 +222,7 @@ def aggregate_province(_data: gpd.GeoDataFrame, *,
     # Aggregate the data
     if aggregator is None:
         aggregator = default_aggregator(data)
-    data_province = data.groupby("province").agg(aggregator)
+    data_province = data.groupby("province").agg(aggregator).rename(columns=rename_after_aggregation)
 
     # Add the province geometries
     data_province = add_province_geometry(data_province)
@@ -274,7 +274,7 @@ def aggregate_h3(_data: gpd.GeoDataFrame, *,
         aggregator = default_aggregator(data, weightby=weightby)
     if level is None:
         level = _default_h3_level(clipto)
-    data_h3 = data.h3.geo_to_h3_aggregate(level, aggregator)
+    data_h3 = data.h3.geo_to_h3_aggregate(level, aggregator).rename(columns=rename_after_aggregation)
 
     # Clip the data if desired
     if clipto is not None:

diff --git a/fpcup/plotting.py b/fpcup/plotting.py
@@ -32,6 +32,7 @@
 _RASTERIZE_LIMIT_GEO = 250  # Plot geo data in raster format if there are more than this number
 _RASTERIZE_GEO = lambda data: (len(data) > _RASTERIZE_LIMIT_GEO)
 
+KEYS_AGGREGATE_PLOT = ("n", "area", *KEYS_AGGREGATE)
 
 def plot_outline(ax: plt.Axes, province: str="Netherlands", *,
                  coarse: bool=False, crs: str=CRS_AMERSFOORT, **kwargs) -> None:
@@ -286,11 +287,6 @@ def wofost_summary_histogram(summary: Summary, keys: Iterable[str]=KEYS_AGGREGAT
     if isinstance(weights, str):
         weights = summary[weights]
 
-    # Determine some parameters before the loop
-    summary_cols = summary[keys]
-    vmin, vmax = summary_cols.min(), summary_cols.max()
-    bins = summary_cols.apply(_numerical_or_date_bins)
-
     # Configure figure and axes if none were provided
     if NEW_FIGURE:
         fig, axs = plt.subplots(nrows=1, ncols=len(keys), sharey=True, figsize=(3*len(keys), 3))
@@ -299,19 +295,24 @@ def wofost_summary_histogram(summary: Summary, keys: Iterable[str]=KEYS_AGGREGAT
 
     # Plot the histograms
     for ax, key in zip(axs, keys):
+        # Leave an empty panel for keys which may be passed from plot_wofost_summary but which only apply to the geo plot
+        if key in ("n", "area"):
+            ax.set_axis_off()
+            continue
+
         column = summary[key]
 
         if is_datetime(column):
             _configure_histogram_datetime(ax)
 
-        column.plot.hist(ax=ax, bins=bins[key], weights=weights, facecolor="black")
+        bins = _numerical_or_date_bins(column)
+        column.plot.hist(ax=ax, bins=bins, weights=weights, facecolor="black")
 
         # Panel settings
         ax.set_title(key)
-        ax.set_xlim(vmin[key], vmax[key])
+        ax.set_xlim(column.min(), column.max())
 
     # Labels
-    axs[0].set_ylabel("Distribution")
     fig.align_xlabels()
     if NEW_FIGURE:
         if title is None:
@@ -328,7 +329,14 @@ def wofost_summary_histogram(summary: Summary, keys: Iterable[str]=KEYS_AGGREGAT
         plt.close()
 
 
-def wofost_summary_geo(data_geo: gpd.GeoDataFrame, keys: Iterable[str]=KEYS_AGGREGATE, *,
+def _remove_area_from_keys(keys: Iterable[str]) -> tuple[str]:
+    """
+    Remove the "area" key from a list of keys.
+    """
+    return tuple(k for k in keys if k != "area")
+
+
+def wofost_summary_geo(data_geo: gpd.GeoDataFrame, keys: Iterable[str]=KEYS_AGGREGATE_PLOT, *,
                        axs: Optional[Iterable[plt.Axes]]=None, title: Optional[str]=None, rasterized: bool=True,
                        province: Optional[str]="Netherlands", coarse: bool=False,
                        saveto: Optional[PathOrStr]=None, **kwargs) -> None:
@@ -340,6 +348,10 @@ def wofost_summary_geo(data_geo: gpd.GeoDataFrame, keys: Iterable[str]=KEYS_AGGR
     """
     NEW_FIGURE = (axs is None)
 
+    # Don't plot area if this is not available
+    if NEW_FIGURE and "area" in keys and "area" not in data_geo.columns:
+        keys = _remove_area_from_keys(keys)
+
     # Configure figure and axes if none were provided
     if NEW_FIGURE:
         fig, axs = plt.subplots(nrows=1, ncols=len(keys), sharex=True, sharey=True, figsize=(3*len(keys), 3))
@@ -375,11 +387,15 @@ def wofost_summary_geo(data_geo: gpd.GeoDataFrame, keys: Iterable[str]=KEYS_AGGR
         plt.close()
 
 
-def plot_wofost_summary(summary: Summary, keys: Iterable[str]=KEYS_AGGREGATE, *,
+def plot_wofost_summary(summary: Summary, keys: Iterable[str]=KEYS_AGGREGATE_PLOT, *,
                                  aggregate: bool=True, aggregate_kwds={}, weights: Optional[str | Iterable[RealNumber]]=None, title: Optional[str]=None, province: Optional[str]="Netherlands", saveto: Optional[PathOrStr]=None) -> None:
     """
     Plot histograms and (aggregate) maps showing WOFOST run summaries.
     """
+    # Don't plot area if this is not available
+    if "area" in keys and "area" not in summary.columns:
+        keys = _remove_area_from_keys(keys)
+
     # Create figure and panels
     fig, axs = plt.subplots(nrows=2, ncols=len(keys), sharey="row", figsize=(15, 5), gridspec_kw={"hspace": 0.25, "height_ratios": [1, 1.5]})
 
@@ -390,7 +406,7 @@ def plot_wofost_summary(summary: Summary, keys: Iterable[str]=KEYS_AGGREGATE, *,
     # Aggregate the data if desired
     if aggregate:
         data_geo = aggregate_h3(summary, clipto=province, weightby=weights, **aggregate_kwds)
-        rasterized = False
+        rasterized = True
         dpi = rcParams["savefig.dpi"]
     else:
         data_geo = summary