Skip to content

Commit

Permalink
Generate test sites more efficiently / in specific geometries
Browse files Browse the repository at this point in the history
  • Loading branch information
burggraaff committed Feb 28, 2024
1 parent d7e1a2a commit afc888a
Show file tree
Hide file tree
Showing 3 changed files with 149 additions and 34 deletions.
59 changes: 59 additions & 0 deletions fpcup/geo.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,15 @@
Geography-related constants and methods.
Includes polygons/outlines of the Netherlands and its provinces.
"""
import random
from functools import wraps

import geopandas as gpd
gpd.options.io_engine = "pyogrio"
import h3pandas
import numpy as np
from pandas import DataFrame, Series
from shapely import Geometry, Point, Polygon
from tqdm import tqdm

from ._typing import Aggregator, AreaDict, BoundaryDict, Callable, Iterable, Optional, PathOrStr
Expand Down Expand Up @@ -48,6 +50,16 @@
boundary_coarse = {name: gpd.GeoSeries(poly.boundary, crs=CRS_AMERSFOORT) for name, poly in area_coarse.items()}


def transform_geometry(geometry: Geometry, crs_old: str, crs_new: str) -> Geometry:
"""
Transform a bare Geometry object from one CRS to another.
"""
geometry_gpd = gpd.GeoSeries(geometry, crs=crs_old)
transformed_gpd = geometry_gpd.to_crs(crs_new)
geometry_new = transformed_gpd.iloc[0]
return geometry_new


def process_input_province(province: str) -> str:
"""
Take an input province name and turn it into the standard format.
Expand Down Expand Up @@ -274,3 +286,50 @@ def aggregate_h3(_data: gpd.GeoDataFrame, *,
data_h3 = data_h3.loc[~data_h3.is_empty]

return data_h3


def _generate_random_point_within_bounds(min_lon, min_lat, max_lon, max_lat) -> Point:
"""
Generate one random shapely Point within the given bounds.
The inputs are in the same order as the outputs of Polygon.bounds (in WGS84), so this function can be called as _generate_random_point_within_bounds(*bounds).
"""
latitude = random.uniform(min_lat, max_lat)
longitude = random.uniform(min_lon, max_lon)
return Point(longitude, latitude)


def _generate_random_point_in_geometry(geometry: Geometry, *, crs=CRS_AMERSFOORT) -> Point:
"""
Generate a single random point that lies within a geometry.
Unfortunately quite slow due to the many "contains" checks.
"""
polygon = transform_geometry(geometry, crs, WGS84) # Convert to WGS84 coordinates
while True: # Infinite generator
# Generate points randomly until one falls within the given geometry
p = _generate_random_point_within_bounds(*polygon.bounds)

# If a point was successfully generated, yield it
if polygon.contains(p):
# Doing a first check with the convex hull does not help here
yield p


def _generate_random_point_in_geometry_batch(geometry: Geometry, n: int, *, crs=CRS_AMERSFOORT) -> gpd.GeoSeries:
"""
Generate a batch of random points and return the ones that fall within the given geometry.
"""
polygon = transform_geometry(geometry, crs, WGS84) # Convert to WGS84 coordinates

points = gpd.GeoSeries((_generate_random_point_within_bounds(*polygon.bounds) for i in range(n)), crs=WGS84)
points_in_polygon = points.loc[points.within(polygon)]

return points_in_polygon


def coverage_of_bounding_box(geometry: Geometry) -> float:
"""
Calculate the fraction of its bounding box covered by a given geometry.
"""
min_x, min_y, max_x, max_y = geometry.bounds
area_box = (max_x - min_x) * (max_y - min_y)
return geometry.area / area_box
116 changes: 84 additions & 32 deletions fpcup/site.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,22 @@
"""
Site-related stuff: load data etc
"""
import random
from functools import partial, wraps
from itertools import product
from random import shuffle

import numpy as np
from pandas import concat
from shapely import Point
from tqdm import trange

from pcse.util import WOFOST72SiteDataProvider, WOFOST80SiteDataProvider
from pcse.util import _GenericSiteDataProvider as PCSESiteDataProvider

from ._brp_dictionary import brp_categories_NL2EN
from ._typing import RealNumber
from ._typing import Callable, Coordinates, Iterable, RealNumber
from .constants import CRS_AMERSFOORT, WGS84
from .geo import area, _generate_random_point_in_geometry, _generate_random_point_in_geometry_batch, coverage_of_bounding_box


def example(*args, **kwargs) -> PCSESiteDataProvider:
"""
Expand All @@ -19,48 +25,94 @@ def example(*args, **kwargs) -> PCSESiteDataProvider:
sitedata = WOFOST72SiteDataProvider(WAV=10)
return sitedata

def grid_coordinate_range(latitude: tuple[RealNumber], longitude: tuple[RealNumber], *,
shuffle_result=True) -> list[tuple[float]]:
"""
Generate all pairs of latitude/longitude coordinates in given latitude/longitude ranges.
Inputs should be tuples that can be passed to np.arange as parameters.
The output is given as a list so it can be iterated over multiple times.
If `shuffle_result` is True (default: True), the list is shuffled randomly, so that incomplete runs will still provide adequate coverage over the whole area.

Example:
coords = grid_coordinate_range(latitudes=(50, 52, 0.1), longitudes=(5, 10, 1))
def combine_and_shuffle_coordinates(latitudes: Iterable[RealNumber], longitudes: Iterable[RealNumber], *,
shuffle: bool=True) -> list[Coordinates]:
"""
latitude_range = np.arange(*latitude)
longitude_range = np.arange(*longitude)
coordinates = product(latitude_range, longitude_range)
Post-process lists of latitudes and longitudes into a list of combined coordinate pairs.
"""
# Combine the coordinates into pairs
coordinates = product(latitudes, longitudes)
coordinates = list(coordinates)

# Randomise order if desired
if shuffle_result:
shuffle(coordinates) # In-place
# Randomise order
if shuffle:
random.shuffle(coordinates) # In-place

return coordinates

def grid_coordinate_linspace(latitude: tuple[RealNumber], longitude: tuple[RealNumber], n: int, *,
shuffle_result=True) -> list[tuple[float]]:
def generate_sites_range(latitude: tuple[RealNumber], longitude: tuple[RealNumber], *,
shuffle: bool=True) -> tuple[np.ndarray, np.ndarray]:
"""
Generate all pairs of latitude/longitude coordinates in given latitude/longitude ranges.
Inputs should be tuples that can be passed to np.arange as parameters.
"""
latitudes = np.arange(*latitude)
longitudes = np.arange(*longitude)
return combine_and_shuffle_coordinates(latitudes, longitudes, shuffle=shuffle)


def generate_sites_space(latitude: tuple[RealNumber], longitude: tuple[RealNumber], n: int, *,
shuffle: bool=True) -> tuple[np.ndarray, np.ndarray]:
"""
Generate n pairs of latitude/longitude coordinates in a given area bound by latitude/longitude ranges.
Inputs should be tuples of (min, max) latitude/longitude.
The output is given as a list so it can be iterated over multiple times.
Note: the true size of the output may be smaller than n due to rounding.
If `shuffle_result` is True (default: True), the list is shuffled randomly, so that incomplete runs will still provide adequate coverage over the whole area.
Example:
coords = grid_coordinate_range(latitudes=(50, 52), longitudes=(5, 10), n=1000)
"""
n_each = int(np.sqrt(n))
latitude_range = np.linspace(*latitude, n_each)
longitude_range = np.linspace(*longitude, n_each)
coordinates = product(latitude_range, longitude_range)
coordinates = list(coordinates)
latitudes = np.linspace(*latitude, n_each)
longitudes = np.linspace(*longitude, n_each)
return combine_and_shuffle_coordinates(latitudes, longitudes, shuffle=shuffle)


def points_to_coordinates(points: Iterable[Point]) -> list[Coordinates]:
"""
Split shapely Points into (latitude, longitude) pairs.
"""
return [(p.y, p.x) for p in points]


def generate_sites_in_province_frombatch(province: str, n: int, **kwargs) -> list[Coordinates]:
"""
Generate n pairs of latitude/longitude coordinates that are (roughly) uniformly distributed over the given province.
Points are generated in WGS84 so they may not be uniformly distributed in other CRSes.
The output is given as a list so it can be iterated over multiple times.
"""
geometry = area[province] # geometry in CRS_AMERSFOORT

# Estimate roughly by how much to overshoot for each iteration
coverage = coverage_of_bounding_box(geometry)
n_safe = int(n / coverage)

# Generate the first iteration of points
points = _generate_random_point_in_geometry_batch(geometry, n_safe)

# Iterate until there are enough points
while len(points) < n:
new_points = _generate_random_point_in_geometry_batch(geometry, n_safe//10)
points = concat([points, new_points])

# Randomise order if desired
if shuffle_result:
shuffle(coordinates) # In-place
# Cut off any excess
points = points.iloc[:n]

# Extract and return the latitudes and longitudes
coordinates = points_to_coordinates(points)
return coordinates


def generate_sites_in_province_fromgenerator(province: str, n: int, *,
progressbar=True, leave_progressbar=True) -> list[Coordinates]:
"""
Generate n pairs of latitude/longitude coordinates that are (roughly) uniformly distributed over the given province.
Points are generated in WGS84 so they may not be uniformly distributed in other CRSes.
The output is given as a list so it can be iterated over multiple times.
"""
geometry = area[province] # geometry in CRS_AMERSFOORT
point_generator = _generate_random_point_in_geometry(geometry, crs=CRS_AMERSFOORT)
points = [next(point_generator) for i in trange(n, desc="Generating sites", unit="site", disable=not progressbar, leave=leave_progressbar)]
coordinates = points_to_coordinates(points)

return coordinates


generate_sites_in_province = generate_sites_in_province_fromgenerator
8 changes: 6 additions & 2 deletions test/batch_locations.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,12 @@

# Generate the coordinates
crs = fpcup.constants.WGS84
bounds = fpcup.geo.boundary[args.province].to_crs(crs).bounds.iloc[0]
coords = fpcup.site.grid_coordinate_linspace(latitude=(bounds["miny"], bounds["maxy"]), longitude=(bounds["minx"], bounds["maxx"]), n=args.number)
SINGLE_PROVINCE = (args.province != "Netherlands")

if SINGLE_PROVINCE:
coords = fpcup.site.generate_sites_in_province(args.province, args.number, leave_progressbar=args.verbose)
else:
coords = fpcup.site.generate_sites_space(latitude=(50.7, 53.6), longitude=(3.3, 7.3), n=args.number)
if args.verbose:
print(f"Generated {len(coords)} coordinates")

Expand Down

0 comments on commit afc888a

Please sign in to comment.