From afc888ad6ec1391d131875938b0bbf47b3337bf5 Mon Sep 17 00:00:00 2001 From: Olivier Burggraaff Date: Wed, 28 Feb 2024 19:13:31 +0100 Subject: [PATCH] Generate test sites more efficiently / in specific geometries --- fpcup/geo.py | 59 ++++++++++++++++++++ fpcup/site.py | 116 +++++++++++++++++++++++++++++----------- test/batch_locations.py | 8 ++- 3 files changed, 149 insertions(+), 34 deletions(-) diff --git a/fpcup/geo.py b/fpcup/geo.py index ddaf913..eb0c1cc 100644 --- a/fpcup/geo.py +++ b/fpcup/geo.py @@ -2,6 +2,7 @@ Geography-related constants and methods. Includes polygons/outlines of the Netherlands and its provinces. """ +import random from functools import wraps import geopandas as gpd @@ -9,6 +10,7 @@ import h3pandas import numpy as np from pandas import DataFrame, Series +from shapely import Geometry, Point, Polygon from tqdm import tqdm from ._typing import Aggregator, AreaDict, BoundaryDict, Callable, Iterable, Optional, PathOrStr @@ -48,6 +50,16 @@ boundary_coarse = {name: gpd.GeoSeries(poly.boundary, crs=CRS_AMERSFOORT) for name, poly in area_coarse.items()} +def transform_geometry(geometry: Geometry, crs_old: str, crs_new: str) -> Geometry: + """ + Transform a bare Geometry object from one CRS to another. + """ + geometry_gpd = gpd.GeoSeries(geometry, crs=crs_old) + transformed_gpd = geometry_gpd.to_crs(crs_new) + geometry_new = transformed_gpd.iloc[0] + return geometry_new + + def process_input_province(province: str) -> str: """ Take an input province name and turn it into the standard format. @@ -274,3 +286,50 @@ def aggregate_h3(_data: gpd.GeoDataFrame, *, data_h3 = data_h3.loc[~data_h3.is_empty] return data_h3 + + +def _generate_random_point_within_bounds(min_lon, min_lat, max_lon, max_lat) -> Point: + """ + Generate one random shapely Point within the given bounds. + The inputs are in the same order as the outputs of Polygon.bounds (in WGS84), so this function can be called as _generate_random_point_within_bounds(*bounds). + """ + latitude = random.uniform(min_lat, max_lat) + longitude = random.uniform(min_lon, max_lon) + return Point(longitude, latitude) + + +def _generate_random_point_in_geometry(geometry: Geometry, *, crs=CRS_AMERSFOORT) -> Point: + """ + Generate a single random point that lies within a geometry. + Unfortunately quite slow due to the many "contains" checks. + """ + polygon = transform_geometry(geometry, crs, WGS84) # Convert to WGS84 coordinates + while True: # Infinite generator + # Generate points randomly until one falls within the given geometry + p = _generate_random_point_within_bounds(*polygon.bounds) + + # If a point was successfully generated, yield it + if polygon.contains(p): + # Doing a first check with the convex hull does not help here + yield p + + +def _generate_random_point_in_geometry_batch(geometry: Geometry, n: int, *, crs=CRS_AMERSFOORT) -> gpd.GeoSeries: + """ + Generate a batch of random points and return the ones that fall within the given geometry. + """ + polygon = transform_geometry(geometry, crs, WGS84) # Convert to WGS84 coordinates + + points = gpd.GeoSeries((_generate_random_point_within_bounds(*polygon.bounds) for i in range(n)), crs=WGS84) + points_in_polygon = points.loc[points.within(polygon)] + + return points_in_polygon + + +def coverage_of_bounding_box(geometry: Geometry) -> float: + """ + Calculate the fraction of its bounding box covered by a given geometry. + """ + min_x, min_y, max_x, max_y = geometry.bounds + area_box = (max_x - min_x) * (max_y - min_y) + return geometry.area / area_box diff --git a/fpcup/site.py b/fpcup/site.py index 7f28043..07ed2b1 100644 --- a/fpcup/site.py +++ b/fpcup/site.py @@ -1,16 +1,22 @@ """ Site-related stuff: load data etc """ +import random +from functools import partial, wraps from itertools import product -from random import shuffle import numpy as np +from pandas import concat +from shapely import Point +from tqdm import trange from pcse.util import WOFOST72SiteDataProvider, WOFOST80SiteDataProvider from pcse.util import _GenericSiteDataProvider as PCSESiteDataProvider -from ._brp_dictionary import brp_categories_NL2EN -from ._typing import RealNumber +from ._typing import Callable, Coordinates, Iterable, RealNumber +from .constants import CRS_AMERSFOORT, WGS84 +from .geo import area, _generate_random_point_in_geometry, _generate_random_point_in_geometry_batch, coverage_of_bounding_box + def example(*args, **kwargs) -> PCSESiteDataProvider: """ @@ -19,48 +25,94 @@ def example(*args, **kwargs) -> PCSESiteDataProvider: sitedata = WOFOST72SiteDataProvider(WAV=10) return sitedata -def grid_coordinate_range(latitude: tuple[RealNumber], longitude: tuple[RealNumber], *, - shuffle_result=True) -> list[tuple[float]]: - """ - Generate all pairs of latitude/longitude coordinates in given latitude/longitude ranges. - Inputs should be tuples that can be passed to np.arange as parameters. - The output is given as a list so it can be iterated over multiple times. - If `shuffle_result` is True (default: True), the list is shuffled randomly, so that incomplete runs will still provide adequate coverage over the whole area. - Example: - coords = grid_coordinate_range(latitudes=(50, 52, 0.1), longitudes=(5, 10, 1)) +def combine_and_shuffle_coordinates(latitudes: Iterable[RealNumber], longitudes: Iterable[RealNumber], *, + shuffle: bool=True) -> list[Coordinates]: """ - latitude_range = np.arange(*latitude) - longitude_range = np.arange(*longitude) - coordinates = product(latitude_range, longitude_range) + Post-process lists of latitudes and longitudes into a list of combined coordinate pairs. + """ + # Combine the coordinates into pairs + coordinates = product(latitudes, longitudes) coordinates = list(coordinates) - # Randomise order if desired - if shuffle_result: - shuffle(coordinates) # In-place + # Randomise order + if shuffle: + random.shuffle(coordinates) # In-place return coordinates -def grid_coordinate_linspace(latitude: tuple[RealNumber], longitude: tuple[RealNumber], n: int, *, - shuffle_result=True) -> list[tuple[float]]: +def generate_sites_range(latitude: tuple[RealNumber], longitude: tuple[RealNumber], *, + shuffle: bool=True) -> tuple[np.ndarray, np.ndarray]: + """ + Generate all pairs of latitude/longitude coordinates in given latitude/longitude ranges. + Inputs should be tuples that can be passed to np.arange as parameters. + """ + latitudes = np.arange(*latitude) + longitudes = np.arange(*longitude) + return combine_and_shuffle_coordinates(latitudes, longitudes, shuffle=shuffle) + + +def generate_sites_space(latitude: tuple[RealNumber], longitude: tuple[RealNumber], n: int, *, + shuffle: bool=True) -> tuple[np.ndarray, np.ndarray]: """ Generate n pairs of latitude/longitude coordinates in a given area bound by latitude/longitude ranges. Inputs should be tuples of (min, max) latitude/longitude. - The output is given as a list so it can be iterated over multiple times. Note: the true size of the output may be smaller than n due to rounding. - If `shuffle_result` is True (default: True), the list is shuffled randomly, so that incomplete runs will still provide adequate coverage over the whole area. - - Example: - coords = grid_coordinate_range(latitudes=(50, 52), longitudes=(5, 10), n=1000) """ n_each = int(np.sqrt(n)) - latitude_range = np.linspace(*latitude, n_each) - longitude_range = np.linspace(*longitude, n_each) - coordinates = product(latitude_range, longitude_range) - coordinates = list(coordinates) + latitudes = np.linspace(*latitude, n_each) + longitudes = np.linspace(*longitude, n_each) + return combine_and_shuffle_coordinates(latitudes, longitudes, shuffle=shuffle) + + +def points_to_coordinates(points: Iterable[Point]) -> list[Coordinates]: + """ + Split shapely Points into (latitude, longitude) pairs. + """ + return [(p.y, p.x) for p in points] + + +def generate_sites_in_province_frombatch(province: str, n: int, **kwargs) -> list[Coordinates]: + """ + Generate n pairs of latitude/longitude coordinates that are (roughly) uniformly distributed over the given province. + Points are generated in WGS84 so they may not be uniformly distributed in other CRSes. + The output is given as a list so it can be iterated over multiple times. + """ + geometry = area[province] # geometry in CRS_AMERSFOORT + + # Estimate roughly by how much to overshoot for each iteration + coverage = coverage_of_bounding_box(geometry) + n_safe = int(n / coverage) + + # Generate the first iteration of points + points = _generate_random_point_in_geometry_batch(geometry, n_safe) + + # Iterate until there are enough points + while len(points) < n: + new_points = _generate_random_point_in_geometry_batch(geometry, n_safe//10) + points = concat([points, new_points]) - # Randomise order if desired - if shuffle_result: - shuffle(coordinates) # In-place + # Cut off any excess + points = points.iloc[:n] + # Extract and return the latitudes and longitudes + coordinates = points_to_coordinates(points) return coordinates + + +def generate_sites_in_province_fromgenerator(province: str, n: int, *, + progressbar=True, leave_progressbar=True) -> list[Coordinates]: + """ + Generate n pairs of latitude/longitude coordinates that are (roughly) uniformly distributed over the given province. + Points are generated in WGS84 so they may not be uniformly distributed in other CRSes. + The output is given as a list so it can be iterated over multiple times. + """ + geometry = area[province] # geometry in CRS_AMERSFOORT + point_generator = _generate_random_point_in_geometry(geometry, crs=CRS_AMERSFOORT) + points = [next(point_generator) for i in trange(n, desc="Generating sites", unit="site", disable=not progressbar, leave=leave_progressbar)] + coordinates = points_to_coordinates(points) + + return coordinates + + +generate_sites_in_province = generate_sites_in_province_fromgenerator diff --git a/test/batch_locations.py b/test/batch_locations.py index 990d8c0..95d9bb6 100644 --- a/test/batch_locations.py +++ b/test/batch_locations.py @@ -19,8 +19,12 @@ # Generate the coordinates crs = fpcup.constants.WGS84 -bounds = fpcup.geo.boundary[args.province].to_crs(crs).bounds.iloc[0] -coords = fpcup.site.grid_coordinate_linspace(latitude=(bounds["miny"], bounds["maxy"]), longitude=(bounds["minx"], bounds["maxx"]), n=args.number) +SINGLE_PROVINCE = (args.province != "Netherlands") + +if SINGLE_PROVINCE: + coords = fpcup.site.generate_sites_in_province(args.province, args.number, leave_progressbar=args.verbose) +else: + coords = fpcup.site.generate_sites_space(latitude=(50.7, 53.6), longitude=(3.3, 7.3), n=args.number) if args.verbose: print(f"Generated {len(coords)} coordinates")