From fd3527687c1f328c2187599d501a125748ebdfac Mon Sep 17 00:00:00 2001 From: Shawn Date: Sat, 6 Apr 2024 14:43:15 -0600 Subject: [PATCH 1/2] Two speedups - allow the sample window size to be set from samgeo.generate. Fewer, larger windows take less time as long as you have the memory. - Skip any sample windows which are 100% nodata, with the option to set lower thresholds. --- samgeo/common.py | 10 ++++++++++ samgeo/samgeo.py | 12 ++++++++++++ 2 files changed, 22 insertions(+) diff --git a/samgeo/common.py b/samgeo/common.py index d990e590..d1d6a847 100644 --- a/samgeo/common.py +++ b/samgeo/common.py @@ -1122,6 +1122,8 @@ def tiff_to_tiff( func, data_to_rgb=chw_to_hwc, sample_size=(512, 512), + sample_nodata_threshold=1.0, + nodata_value=None, sample_resize=None, bound=128, foreground=True, @@ -1132,6 +1134,9 @@ def tiff_to_tiff( with rasterio.open(src_fp) as src: profile = src.profile + if nodata_value is None: + nodata_values = profile.get('nodata', None) + # Computer blocks rh, rw = profile["height"], profile["width"] sh, sw = sample_size @@ -1154,6 +1159,11 @@ def tiff_to_tiff( for b in tqdm(sample_grid): # Read each tile from the source r = read_block(src, **b) + + if nodata_value is not None: + if (r==nodata_value).mean() >= sample_nodata_threshold: + continue + # Extract the first 3 channels as RGB uint8_rgb_in = data_to_rgb(r) orig_size = uint8_rgb_in.shape[:2] diff --git a/samgeo/samgeo.py b/samgeo/samgeo.py index 70f4f86e..9e056868 100644 --- a/samgeo/samgeo.py +++ b/samgeo/samgeo.py @@ -152,6 +152,9 @@ def generate( output=None, foreground=True, batch=False, + batch_sample_size=(512,512), + batch_nodata_threshold=1.0, + nodata_value=None, erosion_kernel=None, mask_multiplier=255, unique=True, @@ -164,6 +167,12 @@ def generate( output (str, optional): The path to the output image. Defaults to None. foreground (bool, optional): Whether to generate the foreground mask. Defaults to True. batch (bool, optional): Whether to generate masks for a batch of image tiles. Defaults to False. + batch_sample_size (tuple, optional): When batch=True, the size of the sample window when iterating over rasters. + batch_nodata_threshold (float,optional): Batch samples with a fraction of nodata pixels above this threshold will + not be used to generate a mask. The default, 1.0, will skip samples with 100% nodata values. This is useful + when rasters have large areas of nodata values which can be skipped. + nodata_value (int, optional): Nodata value to use in checking batch_nodata_threshold. The default, None, + will use the nodata value in the raster metadata if present. erosion_kernel (tuple, optional): The erosion kernel for filtering object masks and extract borders. Such as (3, 3) or (5, 5). Set to None to disable it. Defaults to None. mask_multiplier (int, optional): The mask multiplier for the output mask, which is usually a binary mask [0, 1]. @@ -190,6 +199,9 @@ def generate( output, self, foreground=foreground, + sample_size=batch_sample_size, + sample_nodata_threshold=batch_nodata_threshold, + nodata_value=nodata_value, erosion_kernel=erosion_kernel, mask_multiplier=mask_multiplier, **kwargs, From 171d82fffd53fdbd0d0ec4634de2a02a5fb57b95 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 6 Apr 2024 20:54:35 +0000 Subject: [PATCH 2/2] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- samgeo/common.py | 8 ++++---- samgeo/samgeo.py | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/samgeo/common.py b/samgeo/common.py index d1d6a847..db617ef7 100644 --- a/samgeo/common.py +++ b/samgeo/common.py @@ -1135,7 +1135,7 @@ def tiff_to_tiff( profile = src.profile if nodata_value is None: - nodata_values = profile.get('nodata', None) + nodata_values = profile.get("nodata", None) # Computer blocks rh, rw = profile["height"], profile["width"] @@ -1159,11 +1159,11 @@ def tiff_to_tiff( for b in tqdm(sample_grid): # Read each tile from the source r = read_block(src, **b) - + if nodata_value is not None: - if (r==nodata_value).mean() >= sample_nodata_threshold: + if (r == nodata_value).mean() >= sample_nodata_threshold: continue - + # Extract the first 3 channels as RGB uint8_rgb_in = data_to_rgb(r) orig_size = uint8_rgb_in.shape[:2] diff --git a/samgeo/samgeo.py b/samgeo/samgeo.py index 9e056868..7c7e373e 100644 --- a/samgeo/samgeo.py +++ b/samgeo/samgeo.py @@ -152,7 +152,7 @@ def generate( output=None, foreground=True, batch=False, - batch_sample_size=(512,512), + batch_sample_size=(512, 512), batch_nodata_threshold=1.0, nodata_value=None, erosion_kernel=None, @@ -167,12 +167,12 @@ def generate( output (str, optional): The path to the output image. Defaults to None. foreground (bool, optional): Whether to generate the foreground mask. Defaults to True. batch (bool, optional): Whether to generate masks for a batch of image tiles. Defaults to False. - batch_sample_size (tuple, optional): When batch=True, the size of the sample window when iterating over rasters. + batch_sample_size (tuple, optional): When batch=True, the size of the sample window when iterating over rasters. batch_nodata_threshold (float,optional): Batch samples with a fraction of nodata pixels above this threshold will not be used to generate a mask. The default, 1.0, will skip samples with 100% nodata values. This is useful - when rasters have large areas of nodata values which can be skipped. + when rasters have large areas of nodata values which can be skipped. nodata_value (int, optional): Nodata value to use in checking batch_nodata_threshold. The default, None, - will use the nodata value in the raster metadata if present. + will use the nodata value in the raster metadata if present. erosion_kernel (tuple, optional): The erosion kernel for filtering object masks and extract borders. Such as (3, 3) or (5, 5). Set to None to disable it. Defaults to None. mask_multiplier (int, optional): The mask multiplier for the output mask, which is usually a binary mask [0, 1].