Skip to content

Commit

Permalink
Aggregate area/number
Browse files Browse the repository at this point in the history
  • Loading branch information
burggraaff committed Feb 29, 2024
1 parent afc888a commit 1e61a2e
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 16 deletions.
11 changes: 10 additions & 1 deletion fpcup/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,12 @@
from ._typing import Callable, FuncDict, Iterable

# Columns in summary data that should be averaged over for aggregates
# H3pandas does not support the tuple-dict system, e.g. {"n": ("DVS", "count")}, so it has to be done in an ugly way
KEYS_AGGREGATE = ["LAIMAX", "TWSO", "CTRAT", "CEVST"] + ["DOE", "DOM"]
count_dict = {"DVS": "size"}
rename_after_aggregation = {"DVS": "n"}
mean_dict = {key: "mean" for key in KEYS_AGGREGATE}
mean_dict = {**count_dict, **mean_dict}

def weighted_mean_for_DF(data: pd.DataFrame, *, weightby: str="area") -> Callable:
"""
Expand Down Expand Up @@ -48,7 +52,12 @@ def weighted_mean_dict(data: pd.DataFrame, *,
wm_numerical = weighted_mean_for_DF(data, weightby=weightby)
wm_datetime = weighted_mean_datetime(data, weightby=weightby)

return {key: wm_datetime if is_datetime(data[key]) else wm_numerical for key in keys}
aggregator_mean = {key: wm_datetime if is_datetime(data[key]) else wm_numerical for key in keys}
aggregator_area = {"area": "sum"}

aggregator = {**count_dict, **aggregator_area, **aggregator_mean}

return aggregator

def default_aggregator(data: pd.DataFrame, *,
keys=KEYS_AGGREGATE, weightby: str="area") -> FuncDict:
Expand Down
8 changes: 4 additions & 4 deletions fpcup/geo.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from tqdm import tqdm

from ._typing import Aggregator, AreaDict, BoundaryDict, Callable, Iterable, Optional, PathOrStr
from .analysis import default_aggregator
from .analysis import default_aggregator, rename_after_aggregation
from .constants import CRS_AMERSFOORT, WGS84
from .settings import DEFAULT_DATA

Expand Down Expand Up @@ -190,7 +190,7 @@ def entries_in_province(data: gpd.GeoDataFrame, province: str) -> gpd.GeoDataFra
return data.loc[entries]


def maintain_crs(func: Callable):
def maintain_crs(func: Callable) -> Callable:
"""
Decorator that ensures the output of an aggregation function is in the same CRS as the input, regardless of transformations that happen along the way.
"""
Expand Down Expand Up @@ -222,7 +222,7 @@ def aggregate_province(_data: gpd.GeoDataFrame, *,
# Aggregate the data
if aggregator is None:
aggregator = default_aggregator(data)
data_province = data.groupby("province").agg(aggregator)
data_province = data.groupby("province").agg(aggregator).rename(columns=rename_after_aggregation)

# Add the province geometries
data_province = add_province_geometry(data_province)
Expand Down Expand Up @@ -274,7 +274,7 @@ def aggregate_h3(_data: gpd.GeoDataFrame, *,
aggregator = default_aggregator(data, weightby=weightby)
if level is None:
level = _default_h3_level(clipto)
data_h3 = data.h3.geo_to_h3_aggregate(level, aggregator)
data_h3 = data.h3.geo_to_h3_aggregate(level, aggregator).rename(columns=rename_after_aggregation)

# Clip the data if desired
if clipto is not None:
Expand Down
38 changes: 27 additions & 11 deletions fpcup/plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
_RASTERIZE_LIMIT_GEO = 250 # Plot geo data in raster format if there are more than this number
_RASTERIZE_GEO = lambda data: (len(data) > _RASTERIZE_LIMIT_GEO)

KEYS_AGGREGATE_PLOT = ("n", "area", *KEYS_AGGREGATE)

def plot_outline(ax: plt.Axes, province: str="Netherlands", *,
coarse: bool=False, crs: str=CRS_AMERSFOORT, **kwargs) -> None:
Expand Down Expand Up @@ -286,11 +287,6 @@ def wofost_summary_histogram(summary: Summary, keys: Iterable[str]=KEYS_AGGREGAT
if isinstance(weights, str):
weights = summary[weights]

# Determine some parameters before the loop
summary_cols = summary[keys]
vmin, vmax = summary_cols.min(), summary_cols.max()
bins = summary_cols.apply(_numerical_or_date_bins)

# Configure figure and axes if none were provided
if NEW_FIGURE:
fig, axs = plt.subplots(nrows=1, ncols=len(keys), sharey=True, figsize=(3*len(keys), 3))
Expand All @@ -299,19 +295,24 @@ def wofost_summary_histogram(summary: Summary, keys: Iterable[str]=KEYS_AGGREGAT

# Plot the histograms
for ax, key in zip(axs, keys):
# Leave an empty panel for keys which may be passed from plot_wofost_summary but which only apply to the geo plot
if key in ("n", "area"):
ax.set_axis_off()
continue

column = summary[key]

if is_datetime(column):
_configure_histogram_datetime(ax)

column.plot.hist(ax=ax, bins=bins[key], weights=weights, facecolor="black")
bins = _numerical_or_date_bins(column)
column.plot.hist(ax=ax, bins=bins, weights=weights, facecolor="black")

# Panel settings
ax.set_title(key)
ax.set_xlim(vmin[key], vmax[key])
ax.set_xlim(column.min(), column.max())

# Labels
axs[0].set_ylabel("Distribution")
fig.align_xlabels()
if NEW_FIGURE:
if title is None:
Expand All @@ -328,7 +329,14 @@ def wofost_summary_histogram(summary: Summary, keys: Iterable[str]=KEYS_AGGREGAT
plt.close()


def wofost_summary_geo(data_geo: gpd.GeoDataFrame, keys: Iterable[str]=KEYS_AGGREGATE, *,
def _remove_area_from_keys(keys: Iterable[str]) -> tuple[str]:
"""
Remove the "area" key from a list of keys.
"""
return tuple(k for k in keys if k != "area")


def wofost_summary_geo(data_geo: gpd.GeoDataFrame, keys: Iterable[str]=KEYS_AGGREGATE_PLOT, *,
axs: Optional[Iterable[plt.Axes]]=None, title: Optional[str]=None, rasterized: bool=True,
province: Optional[str]="Netherlands", coarse: bool=False,
saveto: Optional[PathOrStr]=None, **kwargs) -> None:
Expand All @@ -340,6 +348,10 @@ def wofost_summary_geo(data_geo: gpd.GeoDataFrame, keys: Iterable[str]=KEYS_AGGR
"""
NEW_FIGURE = (axs is None)

# Don't plot area if this is not available
if NEW_FIGURE and "area" in keys and "area" not in data_geo.columns:
keys = _remove_area_from_keys(keys)

# Configure figure and axes if none were provided
if NEW_FIGURE:
fig, axs = plt.subplots(nrows=1, ncols=len(keys), sharex=True, sharey=True, figsize=(3*len(keys), 3))
Expand Down Expand Up @@ -375,11 +387,15 @@ def wofost_summary_geo(data_geo: gpd.GeoDataFrame, keys: Iterable[str]=KEYS_AGGR
plt.close()


def plot_wofost_summary(summary: Summary, keys: Iterable[str]=KEYS_AGGREGATE, *,
def plot_wofost_summary(summary: Summary, keys: Iterable[str]=KEYS_AGGREGATE_PLOT, *,
aggregate: bool=True, aggregate_kwds={}, weights: Optional[str | Iterable[RealNumber]]=None, title: Optional[str]=None, province: Optional[str]="Netherlands", saveto: Optional[PathOrStr]=None) -> None:
"""
Plot histograms and (aggregate) maps showing WOFOST run summaries.
"""
# Don't plot area if this is not available
if "area" in keys and "area" not in summary.columns:
keys = _remove_area_from_keys(keys)

# Create figure and panels
fig, axs = plt.subplots(nrows=2, ncols=len(keys), sharey="row", figsize=(15, 5), gridspec_kw={"hspace": 0.25, "height_ratios": [1, 1.5]})

Expand All @@ -390,7 +406,7 @@ def plot_wofost_summary(summary: Summary, keys: Iterable[str]=KEYS_AGGREGATE, *,
# Aggregate the data if desired
if aggregate:
data_geo = aggregate_h3(summary, clipto=province, weightby=weights, **aggregate_kwds)
rasterized = False
rasterized = True
dpi = rcParams["savefig.dpi"]
else:
data_geo = summary
Expand Down

0 comments on commit 1e61a2e

Please sign in to comment.