Skip to content

Commit

Permalink
Aggregate statistics by province
Browse files Browse the repository at this point in the history
  • Loading branch information
burggraaff committed Feb 20, 2024
1 parent 2ca8c69 commit 4121831
Show file tree
Hide file tree
Showing 4 changed files with 86 additions and 9 deletions.
2 changes: 1 addition & 1 deletion fpcup/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
from . import constants, io, plotting, settings
from . import analysis, constants, io, plotting, settings
from . import agro, crop, site, soil, weather
from .model import run_pcse_single, run_pcse_ensemble, run_pcse_ensemble_parallel
20 changes: 20 additions & 0 deletions fpcup/analysis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
"""
Functions for analysis
"""
import numpy as np
import pandas as pd
import geopandas as gpd

from ._typing import Callable

def weighted_mean_for_DF(data: pd.DataFrame, *, weightby: str="area") -> Callable:
"""
Generate a weighted mean function for the given dataframe, to be used in .agg.
Example:
weight_by_area = weighted_mean_for_DF(summary)
weighted_average_yield = summary.agg(wm_twso=("TWSO", weight_by_area))
"""
def weighted_mean(x):
return np.average(x, weights=data.loc[x.index, weightby])

return weighted_mean
44 changes: 36 additions & 8 deletions fpcup/province.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@

import geopandas as gpd
import numpy as np
from pandas import Series
from pandas import DataFrame, Series
from tqdm import tqdm

from ._typing import Iterable
from ._typing import Iterable, Optional
from .constants import CRS_AMERSFOORT
from .settings import DEFAULT_DATA

Expand All @@ -16,14 +16,14 @@
nl_boundary = nl.boundary

# Load the provinces
provinces = gpd.read_file(DEFAULT_DATA/"NL_provinces.geojson")
provinces_coarse = gpd.read_file(DEFAULT_DATA/"NL_provinces_coarse.geojson")
province_names = list(provinces["naamOfficieel"]) + ["Friesland"]
_provinces = gpd.read_file(DEFAULT_DATA/"NL_provinces.geojson")
_provinces_coarse = gpd.read_file(DEFAULT_DATA/"NL_provinces_coarse.geojson")
province_names = list(_provinces["naamOfficieel"]) + ["Friesland"]

# Access individual provinces using a dictionary, e.g. province_boundary["Zuid-Holland"]
province_area = {name: poly for name, poly in zip(provinces["naamOfficieel"], provinces["geometry"])}
province_boundary = {name: gpd.GeoSeries(outline) for name, outline in zip(provinces["naamOfficieel"], provinces.boundary)}
province_coarse = {name: poly for name, poly in zip(provinces_coarse["naamOfficieel"], provinces_coarse["geometry"])}
province_area = {name: poly for name, poly in zip(_provinces["naamOfficieel"], _provinces["geometry"])}
province_boundary = {name: gpd.GeoSeries(outline) for name, outline in zip(_provinces["naamOfficieel"], _provinces.boundary)}
province_coarse = {name: poly for name, poly in zip(_provinces_coarse["naamOfficieel"], _provinces_coarse["geometry"])}

# Add an alias for Friesland/Fryslân
province_area["Friesland"] = province_area["Fryslân"]
Expand Down Expand Up @@ -92,3 +92,31 @@ def add_provinces(data: gpd.GeoDataFrame, *, new_column: str="province", provinc

# Add the series to the dataframe
data[new_column] = province_list

def add_province_geometry(data: DataFrame, which: str="area", *, column_name: Optional[str]=None, crs: str=CRS_AMERSFOORT) -> gpd.GeoDataFrame:
"""
Add a column with province geometry to a DataFrame with a province name column/index.
"""
# Remove entries that are not in the province list
pass

# Use the index if no column was provided
if column_name is None:
column = data.index.to_series()
else:
column = data[column_name]

# Apply the dictionary mapping
geometry = column.map(province_area)
data_new = gpd.GeoDataFrame(data, geometry=geometry, crs=crs)

# Change to outline if desired
# (Doing this at the start gives an error)
if which.lower() == "area":
pass
elif which.lower() == "outline":
data_new["geometry"] = data_new.boundary
else:
raise ValueError(f"Cannot add geometries of type `{which}`")

return data_new
29 changes: 29 additions & 0 deletions plot_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,35 @@
if args.verbose:
print(f"Saved batch results plot to {filename_summary.absolute()}")

# Aggregate results by province
weighted_mean = fpcup.analysis.weighted_mean_for_DF(summary)
byprovince = summary.groupby("province")

keys_to_aggregate = ["DVS", "LAIMAX", "TWSO", "CTRAT", "CEVST", "RD", "DOS", "DOE", "DOM"]

# Calculate the mean per province of several variables, weighted by plot area if possible
keys_to_average = ["DVS", "LAIMAX", "TWSO", "CTRAT", "CEVST", "RD"]
try:
byprovince_mean = byprovince[keys_to_average].agg(weighted_mean)
# Use a normal mean if there is no area information available
except KeyError:
byprovince_mean = byprovince[keys_to_average].mean()
filename_means = results_dir / f"WOFOST_{tag}-mean.csv"
if args.verbose:
print("Could not calculate weighted means because there is no 'area' column -- defaulting to a regular mean")
else: # This runs if the original `try` block succeeded
filename_means = results_dir / f"WOFOST_{tag}-weighted-mean.csv"
if args.verbose:
print("Calculated weighted means")

byprovince_mean.to_csv(filename_means)
if args.verbose:
print(f"Saved aggregate (weighted) mean columns to {filename_means.absolute()}")

# Add geometry and plot the results
byprovince_mean = fpcup.province.add_province_geometry(byprovince_mean, "area")
byprovince_mean.plot("LAIMAX")

# Space between summary and outputs sections
if args.verbose:
print()
Expand Down

0 comments on commit 4121831

Please sign in to comment.