From 8a4041901771661c0416ec8abf955fa2eb405115 Mon Sep 17 00:00:00 2001 From: Alessandro Polidori <61737239+AlessandroPolidori@users.noreply.github.com> Date: Tue, 5 Mar 2024 11:27:22 +0100 Subject: [PATCH] fix: Sorting bug and add pre-sorting for segmentation datamodule (#110) * fix: sorting bug and add pre-sorting * build: Update version and changelog * fix: Revert base segmentation experiment --- CHANGELOG.md | 10 ++++++++++ pyproject.toml | 2 +- quadra/__init__.py | 2 +- quadra/datamodules/segmentation.py | 22 +++++++++++++++++++++- 4 files changed, 33 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0360767f..b227507f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,16 @@ # Changelog All notable changes to this project will be documented in this file. +### [2.0.4] + +#### Fixed + +- Fix segmentation num_data_train sorting + +#### Added + +- Add default presorting to segmentation samples + ### [2.0.3] #### Fixed diff --git a/pyproject.toml b/pyproject.toml index 47c7ecd7..be02f35d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "quadra" -version = "2.0.3" +version = "2.0.4" description = "Deep Learning experiment orchestration library" authors = [ "Federico Belotti ", diff --git a/quadra/__init__.py b/quadra/__init__.py index 0525ae75..6ef6310e 100644 --- a/quadra/__init__.py +++ b/quadra/__init__.py @@ -1,4 +1,4 @@ -__version__ = "2.0.3" +__version__ = "2.0.4" def get_version(): diff --git a/quadra/datamodules/segmentation.py b/quadra/datamodules/segmentation.py index 5fc5058a..a9bedc1c 100644 --- a/quadra/datamodules/segmentation.py +++ b/quadra/datamodules/segmentation.py @@ -590,14 +590,34 @@ def _prepare_data(self) -> None: masks_train = samples_and_masks_train[:, 0, 1] masks_val = samples_and_masks_val[:, 0, 1] + # Pre-ordering train and val samples for determinism + # They will be shuffled (with a seed) during training + sorting_indices_train = np.argsort(list(samples_train)) + samples_train = [samples_train[i] for i in sorting_indices_train] + targets_train = [targets_train[i] for i in sorting_indices_train] + masks_train = [masks_train[i] for i in sorting_indices_train] + + sorting_indices_val = np.argsort(samples_val) + samples_val = [samples_val[i] for i in sorting_indices_val] + targets_val = [targets_val[i] for i in sorting_indices_val] + masks_val = [masks_val[i] for i in sorting_indices_val] + if self.exclude_good: samples_train = list(np.array(samples_train)[np.array(targets_train)[:, 0] == 0]) masks_train = list(np.array(masks_train)[np.array(targets_train)[:, 0] == 0]) targets_train = list(np.array(targets_train)[np.array(targets_train)[:, 0] == 0]) if self.num_data_train is not None: + # Generate a random permutation + random_permutation = list(range(len(samples_train))) random.seed(self.seed) - random.shuffle(samples_train) + random.shuffle(random_permutation) + + # Shuffle samples_train, targets_train, and masks_train using the same permutation + samples_train = [samples_train[i] for i in random_permutation] + targets_train = [targets_train[i] for i in random_permutation] + masks_train = [masks_train[i] for i in random_permutation] + samples_train = np.array(samples_train)[: self.num_data_train] targets_train = np.array(targets_train)[: self.num_data_train] masks_train = np.array(masks_train)[: self.num_data_train]