From 43fc14cf48b6e2d4aa1734ebef3046522f34f753 Mon Sep 17 00:00:00 2001 From: tolgakopar Date: Tue, 16 Apr 2024 10:56:18 +0200 Subject: [PATCH 01/11] added 1d augmentations implemented in Q3 --- .../model/training/augmentation/__init__.py | 0 .../augmentation/time_series_augmentations.py | 178 ++++++++++++++++++ 2 files changed, 178 insertions(+) create mode 100644 epochalyst/pipeline/model/training/augmentation/__init__.py create mode 100644 epochalyst/pipeline/model/training/augmentation/time_series_augmentations.py diff --git a/epochalyst/pipeline/model/training/augmentation/__init__.py b/epochalyst/pipeline/model/training/augmentation/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/epochalyst/pipeline/model/training/augmentation/time_series_augmentations.py b/epochalyst/pipeline/model/training/augmentation/time_series_augmentations.py new file mode 100644 index 0000000..6804646 --- /dev/null +++ b/epochalyst/pipeline/model/training/augmentation/time_series_augmentations.py @@ -0,0 +1,178 @@ +"""Contains implementation of several custom time series augmentations using PyTorch""" + +from dataclasses import dataclass + +import numpy as np +import torch + + +@dataclass +class CutMix1D(torch.nn.Module): + """CutMix augmentation for 1D signals. + Randomly select a percentage between 'low' and 'high' to preserve on the left side of the signal. + The right side will be replaced by the corresponding range from another sample from the batch. + The labels become the weighted average of the mixed signals where weights are the mix ratios. + """ + + p: float = 0.5 + low: float = 0 + high: float = 1 + + def __call__( + self, x: torch.Tensor, y: torch.Tensor + ) -> tuple[torch.Tensor, torch.Tensor]: + """Appply CutMix to the batch of 1D signal. + + :param x: Input features. (N,C,L) + :param y: Input labels. (N,C) + :return: The augmented features and labels""" + + indices = torch.arange(x.shape[0], device=x.device, dtype=torch.int) + shuffled_indices = torch.randperm(indices.shape[0]) + + low_len = int(self.low * x.shape[-1]) + high_len = int(self.high * x.shape[-1]) + cutoff_indices = torch.randint( + low_len, high_len, (x.shape[-1],), device=x.device, dtype=torch.int + ) + cutoff_rates = cutoff_indices.float() / x.shape[-1] + + augmented_x = x.clone() + augmented_y = y.clone().float() + for i in range(x.shape[0]): + if torch.rand(1) < self.p: + augmented_x[i, :, cutoff_indices[i] :] = x[ + shuffled_indices[i], :, cutoff_indices[i] : + ] + augmented_y[i] = y[i] * cutoff_rates[i] + y[shuffled_indices[i]] * ( + 1 - cutoff_rates[i] + ) + return augmented_x, augmented_y + + +@dataclass +class MixUp1D(torch.nn.Module): + """MixUp augmentation for 1D signals. + Randomly takes the weighted average of 2 samples and their labels with random weights.""" + + p: float = 0.5 + + def __call__( + self, x: torch.Tensor, y: torch.Tensor + ) -> tuple[torch.Tensor, torch.Tensor]: + """Appply MixUp to the batch of 1D signal. + + :param x: Input features. (N,C,L) + :param y: Input labels. (N,C) + :return: The augmented features and labels""" + indices = torch.arange(x.shape[0], device=x.device, dtype=torch.int) + shuffled_indices = torch.randperm(indices.shape[0]) + + augmented_x = x.clone() + augmented_y = y.clone().float() + for i in range(x.shape[0]): + if torch.rand(1) < self.p: + lambda_ = torch.rand(1, device=x.device) + augmented_x[i] = lambda_ * x[i] + (1 - lambda_) * x[shuffled_indices[i]] + augmented_y[i] = lambda_ * y[i] + (1 - lambda_) * y[shuffled_indices[i]] + return augmented_x, augmented_y + + +@dataclass +class Mirror1D(torch.nn.Module): + """Mirror augmentation for 1D signals. + Mirrors the signal around its mean in the horizontal(time) axis.""" + + p: float = 0.5 + + def __call__(self, x: torch.Tensor) -> torch.Tensor: + """Apply the augmentation to the input signal. + + :param x: Input features. (N,C,L) + :return: Augmented features. (N,C,L) + """ + augmented_x = x.clone() + for i in range(x.shape[0]): + if torch.rand(1) < self.p: + augmented_x[i] = -1 * x[i] + 2 * x[i].mean(dim=-1).unsqueeze(-1) + return augmented_x + + +@dataclass +class RandomAmplitudeShift(torch.nn.Module): + """Randomly scale the amplitude of all the frequencies in the signal.""" + + low: float = 0.5 + high: float = 1.5 + p: float = 0.5 + + def __call__(self, x: torch.Tensor) -> torch.Tensor: + """Apply the augmentation to the input signal. + + :param x: Input features. (N,C,L) + :return: Augmented features. (N,C,L) + """ + if torch.rand(1) < self.p: + # Take the rfft of the input tensor + x_freq = torch.fft.rfft(x, dim=-1) + # Create a random tensor of scaler in the range [low,high] + random_amplitude = ( + torch.rand(*x_freq.shape, device=x.device, dtype=x.dtype) + * (self.high - self.low) + + self.low + ) + # Multiply the rfft with the random amplitude + x_freq = x_freq * random_amplitude + # Take the irfft of the result + return torch.fft.irfft(x_freq, dim=-1) + return x + + +@dataclass +class RandomPhaseShift(torch.nn.Module): + """Randomly shift the phase of all the frequencies in the signal.""" + + shift_limit: float = 0.25 + p: float = 0.5 + + def __call__(self, x: torch.Tensor) -> torch.Tensor: + """Apply Random phase shift to each frequency of the fft of the input signal. + + :param x: Input features. (N,C,L) + :return: augmented features. (N,C,L) + """ + if torch.rand(1) < self.p: + # Take the rfft of the input tensor + x_freq = torch.fft.rfft(x, dim=-1) + # Create a random tensor of complex numbers each with a random phase but with magnitude of 1 + random_phase = ( + torch.rand(*x_freq.shape, device=x.device, dtype=x.dtype) + * 2 + * np.pi + * self.shift_limit + ) + random_phase = torch.cos(random_phase) + 1j * torch.sin(random_phase) + # Multiply the rfft with the random phase + x_freq = x_freq * random_phase + # Take the irfft of the result + return torch.fft.irfft(x_freq, dim=-1) + return x + + +@dataclass +class Reverse1D(torch.nn.Module): + """Reverse augmentation for 1D signals.""" + + p: float = 0.5 + + def __call__(self, x: torch.Tensor) -> torch.Tensor: + """Apply the augmentation to the input signal. + + :param x: Input features. (N,C,L) + :return: Augmented features (N,C,L) + """ + augmented_x = x.clone() + for i in range(x.shape[0]): + if torch.rand(1) < self.p: + augmented_x[i] = torch.flip(x[i], [-1]) + return augmented_x From 0de2214821b42b674263afa954886bda82ccc691 Mon Sep 17 00:00:00 2001 From: tolgakopar Date: Tue, 16 Apr 2024 10:58:54 +0200 Subject: [PATCH 02/11] changed shape docs for augmentations --- .../augmentation/time_series_augmentations.py | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/epochalyst/pipeline/model/training/augmentation/time_series_augmentations.py b/epochalyst/pipeline/model/training/augmentation/time_series_augmentations.py index 6804646..0241fbd 100644 --- a/epochalyst/pipeline/model/training/augmentation/time_series_augmentations.py +++ b/epochalyst/pipeline/model/training/augmentation/time_series_augmentations.py @@ -23,7 +23,7 @@ def __call__( ) -> tuple[torch.Tensor, torch.Tensor]: """Appply CutMix to the batch of 1D signal. - :param x: Input features. (N,C,L) + :param x: Input features. (N,C,L)|(N,L) :param y: Input labels. (N,C) :return: The augmented features and labels""" @@ -62,7 +62,7 @@ def __call__( ) -> tuple[torch.Tensor, torch.Tensor]: """Appply MixUp to the batch of 1D signal. - :param x: Input features. (N,C,L) + :param x: Input features. (N,C,L)|(N,L) :param y: Input labels. (N,C) :return: The augmented features and labels""" indices = torch.arange(x.shape[0], device=x.device, dtype=torch.int) @@ -88,8 +88,8 @@ class Mirror1D(torch.nn.Module): def __call__(self, x: torch.Tensor) -> torch.Tensor: """Apply the augmentation to the input signal. - :param x: Input features. (N,C,L) - :return: Augmented features. (N,C,L) + :param x: Input features. (N,C,L)|(N,L) + :return: Augmented features. (N,C,L)|(N,L) """ augmented_x = x.clone() for i in range(x.shape[0]): @@ -109,8 +109,8 @@ class RandomAmplitudeShift(torch.nn.Module): def __call__(self, x: torch.Tensor) -> torch.Tensor: """Apply the augmentation to the input signal. - :param x: Input features. (N,C,L) - :return: Augmented features. (N,C,L) + :param x: Input features. (N,C,L)|(N,L) + :return: Augmented features. (N,C,L)|(N,L) """ if torch.rand(1) < self.p: # Take the rfft of the input tensor @@ -138,8 +138,8 @@ class RandomPhaseShift(torch.nn.Module): def __call__(self, x: torch.Tensor) -> torch.Tensor: """Apply Random phase shift to each frequency of the fft of the input signal. - :param x: Input features. (N,C,L) - :return: augmented features. (N,C,L) + :param x: Input features. (N,C,L)|(N,L) + :return: augmented features. (N,C,L)|(N,L) """ if torch.rand(1) < self.p: # Take the rfft of the input tensor @@ -168,8 +168,8 @@ class Reverse1D(torch.nn.Module): def __call__(self, x: torch.Tensor) -> torch.Tensor: """Apply the augmentation to the input signal. - :param x: Input features. (N,C,L) - :return: Augmented features (N,C,L) + :param x: Input features. (N,C,L)|(N,L) + :return: Augmented features (N,C,L)|(N,L) """ augmented_x = x.clone() for i in range(x.shape[0]): From e4670a9ce7e8468b781b60d8248cc5038b35fe80 Mon Sep 17 00:00:00 2001 From: tolgakopar Date: Tue, 16 Apr 2024 11:01:39 +0200 Subject: [PATCH 03/11] added subtract channels --- .../augmentation/time_series_augmentations.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/epochalyst/pipeline/model/training/augmentation/time_series_augmentations.py b/epochalyst/pipeline/model/training/augmentation/time_series_augmentations.py index 0241fbd..d6b8229 100644 --- a/epochalyst/pipeline/model/training/augmentation/time_series_augmentations.py +++ b/epochalyst/pipeline/model/training/augmentation/time_series_augmentations.py @@ -176,3 +176,21 @@ def __call__(self, x: torch.Tensor) -> torch.Tensor: if torch.rand(1) < self.p: augmented_x[i] = torch.flip(x[i], [-1]) return augmented_x + + +@dataclass +class SubstractChannels(torch.nn.Module): + """Randomly substract other channels from the current one.""" + + p: float = 0.5 + + def __call__(self, x: torch.Tensor) -> torch.Tensor: + """Apply substracting other channels to the input signal. + :param x: Input features. (N,C,L) + :return: Augmented features. (N,C,L) + """ + if torch.rand(1) < self.p: + length = x.shape[1] - 1 + total = x.sum(dim=1) / length + x = x - total.unsqueeze(1) + (x / length) + return x From 6d6f6aa1a02a1df909914a7c8ba35a2599187452 Mon Sep 17 00:00:00 2001 From: tolgakopar Date: Tue, 16 Apr 2024 11:19:12 +0200 Subject: [PATCH 04/11] Added cutmix and mixup augmentation Wrappers for the kornia implementation that fit in out pipeline --- .../augmentation/image_augmentations.py | 98 +++++++++++++++++++ .../augmentation/time_series_augmentations.py | 2 +- 2 files changed, 99 insertions(+), 1 deletion(-) create mode 100644 epochalyst/pipeline/model/training/augmentation/image_augmentations.py diff --git a/epochalyst/pipeline/model/training/augmentation/image_augmentations.py b/epochalyst/pipeline/model/training/augmentation/image_augmentations.py new file mode 100644 index 0000000..76524f0 --- /dev/null +++ b/epochalyst/pipeline/model/training/augmentation/image_augmentations.py @@ -0,0 +1,98 @@ +"""Contains implementation of several image augmentations using PyTorch.""" + +from dataclasses import dataclass, field +from typing import Any + +import torch + + +def get_kornia_mix() -> Any: + try: + import kornia + + return kornia.augmentation._2d.mix + except ImportError: + raise ImportError( + "If you want to use this augmentation you must install kornia" + ) + + +@dataclass +class CutMix: + """2D CutMix implementation for spectrogram data augmentation. + + :param cut_size: The size of the cut + :param same_on_batch: Apply the same transformation across the batch + :param p: The probability of applying the filter + """ + + cut_size: tuple[float, float] = field(default=(0.0, 1.0)) + same_on_batch: bool = False + p: float = 0.5 + + def __post_init__(self) -> None: + """Check if the filter type is valid.""" + self.cutmix = get_kornia_mix().cutmix.RandomCutMixV2( + p=self.p, + cut_size=self.cut_size, + same_on_batch=self.same_on_batch, + data_keys=["input", "class"], + ) + + def __call__( + self, x: torch.Tensor, y: torch.Tensor + ) -> tuple[torch.Tensor, torch.Tensor]: + """Randomly patch the input with another sample.""" + dummy_labels = torch.arange(x.size(0)) + augmented_x, augmentation_info = self.cutmix(x, dummy_labels) + augmentation_info = augmentation_info[0] + + y = y.float() + y_result = y.clone() + for i in range(augmentation_info.shape[0]): + y_result[i] = ( + y[i] * (1 - augmentation_info[i, 2]) + + y[int(augmentation_info[i, 1])] * augmentation_info[i, 2] + ) + + return augmented_x, y_result + + +@dataclass +class MixUp: + """2D MixUp implementation for spectrogram data augmentation. + + :param lambda_val: The range of the mixup coefficient + :param same_on_batch: Apply the same transformation across the batch + :param p: The probability of applying the filter + """ + + lambda_val: tuple[float, float] = field(default=(0.0, 1.0)) + same_on_batch: bool = False + p: float = 0.5 + + def __post_init__(self) -> None: + """Check if the filter type is valid.""" + self.mixup = get_kornia_mix().mixup.RandomMixUpV2( + p=self.p, + lambda_val=self.lambda_val, + same_on_batch=self.same_on_batch, + data_keys=["input", "class"], + ) + + def __call__( + self, x: torch.Tensor, y: torch.Tensor + ) -> tuple[torch.Tensor, torch.Tensor]: + """Randomly patch the input with another sample.""" + dummy_labels = torch.arange(x.size(0)) + augmented_x, augmentation_info = self.mixup(x, dummy_labels) + + y = y.float() + y_result = y.clone() + for i in range(augmentation_info.shape[0]): + y_result[i] = ( + y[i] * (1 - augmentation_info[i, 2]) + + y[int(augmentation_info[i, 1])] * augmentation_info[i, 2] + ) + + return augmented_x, y_result diff --git a/epochalyst/pipeline/model/training/augmentation/time_series_augmentations.py b/epochalyst/pipeline/model/training/augmentation/time_series_augmentations.py index d6b8229..da92db3 100644 --- a/epochalyst/pipeline/model/training/augmentation/time_series_augmentations.py +++ b/epochalyst/pipeline/model/training/augmentation/time_series_augmentations.py @@ -1,4 +1,4 @@ -"""Contains implementation of several custom time series augmentations using PyTorch""" +"""Contains implementation of several custom time series augmentations using PyTorch.""" from dataclasses import dataclass From 50e53f8ace3af9bae895e12010d8b917eb79bb39 Mon Sep 17 00:00:00 2001 From: tolgakopar Date: Tue, 16 Apr 2024 11:33:35 +0200 Subject: [PATCH 05/11] added utils for augmentaions sequential apply one and no-op. Since no-op is not a container I called the folder utils --- .../model/training/augmentation/utils.py | 115 ++++++++++++++++++ 1 file changed, 115 insertions(+) create mode 100644 epochalyst/pipeline/model/training/augmentation/utils.py diff --git a/epochalyst/pipeline/model/training/augmentation/utils.py b/epochalyst/pipeline/model/training/augmentation/utils.py new file mode 100644 index 0000000..f7b3a6d --- /dev/null +++ b/epochalyst/pipeline/model/training/augmentation/utils.py @@ -0,0 +1,115 @@ +""" +This module provides utility classes for applying augmentations to data. + +Classes: +- CustomApplyOne: A custom sequential class for applying a single augmentation fro a selection based on their probabilities. +- CustomSequential: A custom sequential class for applying augmentations sequentially. +- NoOp: A class representing a no-operation augmentation. +""" + +from dataclasses import dataclass, field +from typing import Any + +import torch + + +@dataclass +class CustomApplyOne: + """Custom sequential class for augmentations.""" + + probabilities_tensor: torch.Tensor + x_transforms: list[Any] = field(default_factory=list) + xy_transforms: list[Any] = field(default_factory=list) + + def __post_init__(self) -> None: + """Post initialization function of CustomApplyOne.""" + self.probabilities = [] + if self.x_transforms is not None: + for transform in self.x_transforms: + self.probabilities.append(transform.p) + if self.xy_transforms is not None: + for transform in self.xy_transforms: + self.probabilities.append(transform.p) + + # Make tensor from probs + self.probabilities_tensor = torch.tensor(self.probabilities_tensor) + # Ensure sum is 1 + self.probabilities_tensor /= self.probabilities_tensor.sum() + self.all_transforms = self.x_transforms + self.xy_transforms + + def __call__( + self, x: torch.Tensor, y: torch.Tensor + ) -> tuple[torch.Tensor, torch.Tensor]: + """Apply the augmentations sequentially. + + Args: + x (torch.Tensor): The input tensor. + y (torch.Tensor): The target tensor. + + Returns: + tuple[torch.Tensor, torch.Tensor]: The augmented input and target tensors. + """ + transform = self.all_transforms[ + int( + torch.multinomial( + self.probabilities_tensor, 1, replacement=False + ).item() + ) + ] + if transform in self.x_transforms: + x = transform(x) + if transform in self.xy_transforms: + x, y = transform(x, y) + return x, y + + +@dataclass +class CustomSequential: + """Custom sequential class for augmentations. + + This class applies augmentations sequentially without probabilities. + """ + + x_transforms: list[Any] = field(default_factory=list) + xy_transforms: list[Any] = field(default_factory=list) + + def __call__( + self, x: torch.Tensor, y: torch.Tensor + ) -> tuple[torch.Tensor, torch.Tensor]: + """Apply the augmentations sequentially. + + Args: + x (torch.Tensor): The input tensor. + y (torch.Tensor): The target tensor. + + Returns: + tuple[torch.Tensor, torch.Tensor]: The augmented input and target tensors. + """ + if self.x_transforms is not None: + for transform in self.x_transforms: + x = transform(x) + if self.xy_transforms is not None: + for transform in self.xy_transforms: + x, y = transform(x, y) + return x, y + + +@dataclass +class NoOp(torch.nn.Module): + """CutMix augmentation for 1D signals. + + This class represents a no-operation augmentation. + """ + + p: float = 0.5 + + def __call__(self, x: torch.Tensor) -> torch.Tensor: + """Apply the augmentation to the input signal. + + Args: + x (torch.Tensor): The input signal tensor. + + Returns: + torch.Tensor: The augmented input signal tensor. + """ + return x From 52d53ac588cd644e7000394a9fa719f880534b53 Mon Sep 17 00:00:00 2001 From: tolgakopar Date: Tue, 16 Apr 2024 13:18:38 +0200 Subject: [PATCH 06/11] Added tests for cutmix and mixup --- .../augmentation/image_augmentations.py | 5 ++- requirements.txt | 1 + .../augmentation/test_image_augmentations.py | 45 +++++++++++++++++++ .../test_time_series_augmentations.py | 0 .../model/training/augmentation/test_utils.py | 0 5 files changed, 50 insertions(+), 1 deletion(-) create mode 100644 tests/pipeline/model/training/augmentation/test_image_augmentations.py create mode 100644 tests/pipeline/model/training/augmentation/test_time_series_augmentations.py create mode 100644 tests/pipeline/model/training/augmentation/test_utils.py diff --git a/epochalyst/pipeline/model/training/augmentation/image_augmentations.py b/epochalyst/pipeline/model/training/augmentation/image_augmentations.py index 76524f0..8f0c049 100644 --- a/epochalyst/pipeline/model/training/augmentation/image_augmentations.py +++ b/epochalyst/pipeline/model/training/augmentation/image_augmentations.py @@ -42,7 +42,10 @@ def __post_init__(self) -> None: def __call__( self, x: torch.Tensor, y: torch.Tensor ) -> tuple[torch.Tensor, torch.Tensor]: - """Randomly patch the input with another sample.""" + """Randomly patch the input with another sample. + :param x: Input images. (N,C,W,H) + :param y: Input labels. (N,C) + """ dummy_labels = torch.arange(x.size(0)) augmented_x, augmentation_info = self.cutmix(x, dummy_labels) augmentation_info = augmentation_info[0] diff --git a/requirements.txt b/requirements.txt index 45a2357..ddbcc5e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -41,3 +41,4 @@ tqdm==4.66.2 typing_extensions==4.10.0 urllib3==2.2.1 zipp==3.17.0 +kornia==0.7.2 diff --git a/tests/pipeline/model/training/augmentation/test_image_augmentations.py b/tests/pipeline/model/training/augmentation/test_image_augmentations.py new file mode 100644 index 0000000..5d78762 --- /dev/null +++ b/tests/pipeline/model/training/augmentation/test_image_augmentations.py @@ -0,0 +1,45 @@ +from epochalyst.pipeline.model.training.augmentation import image_augmentations +import torch + + +class TestImageAugmentations: + def test_cutmix(self): + # Create a CutMix instance + cutmix = image_augmentations.CutMix(p=1.0) + + # Create dummy input and labels + x = torch.cat( + [torch.ones(16, 1, 100, 100), torch.zeros(16, 1, 100, 100)], dim=0 + ) + # Multiclass labels + y = torch.cat([torch.ones(16, 2), torch.zeros(16, 2)], dim=0) + # Apply CutMix augmentation + augmented_x, augmented_y = cutmix(x, y) + + # Assert the output shapes are correct + assert augmented_x.shape == x.shape + assert augmented_y.shape == y.shape + + # Because the images are all ones and zeros the mean of the pixels should be equal to the labels after being transformed + assert torch.all( + torch.isclose(augmented_x.mean(dim=-1).mean(dim=-1), augmented_y) + ) + + def test_mixup(self): + mixup = image_augmentations.MixUp(p=1.0) + # Create dummy input and labels + x = torch.cat( + [torch.ones(16, 1, 100, 100), torch.zeros(16, 1, 100, 100)], dim=0 + ) + # Multiclass labels + y = torch.cat([torch.ones(16, 2), torch.zeros(16, 2)], dim=0) + # Apply CutMix augmentation + augmented_x, augmented_y = mixup(x, y) + # Assert the output shapes are correct + assert augmented_x.shape == x.shape + assert augmented_y.shape == y.shape + + # Because the images are all ones and zeros the mean of the pixels should be equal to the labels after being transformed + assert torch.all( + torch.isclose(augmented_x.mean(dim=-1).mean(dim=-1), augmented_y) + ) diff --git a/tests/pipeline/model/training/augmentation/test_time_series_augmentations.py b/tests/pipeline/model/training/augmentation/test_time_series_augmentations.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/pipeline/model/training/augmentation/test_utils.py b/tests/pipeline/model/training/augmentation/test_utils.py new file mode 100644 index 0000000..e69de29 From 78507a76db6d393aba6214a4cb22140638ca2687 Mon Sep 17 00:00:00 2001 From: tolgakopar Date: Tue, 16 Apr 2024 14:54:20 +0200 Subject: [PATCH 07/11] Added most of the 1d augmentation tests Only need coverage for random amplitude shift --- .../augmentation/time_series_augmentations.py | 10 +- .../augmentation/test_image_augmentations.py | 8 +- .../test_time_series_augmentations.py | 118 ++++++++++++++++++ 3 files changed, 127 insertions(+), 9 deletions(-) diff --git a/epochalyst/pipeline/model/training/augmentation/time_series_augmentations.py b/epochalyst/pipeline/model/training/augmentation/time_series_augmentations.py index da92db3..7e25493 100644 --- a/epochalyst/pipeline/model/training/augmentation/time_series_augmentations.py +++ b/epochalyst/pipeline/model/training/augmentation/time_series_augmentations.py @@ -23,7 +23,7 @@ def __call__( ) -> tuple[torch.Tensor, torch.Tensor]: """Appply CutMix to the batch of 1D signal. - :param x: Input features. (N,C,L)|(N,L) + :param x: Input features. (N,C,L) :param y: Input labels. (N,C) :return: The augmented features and labels""" @@ -138,8 +138,8 @@ class RandomPhaseShift(torch.nn.Module): def __call__(self, x: torch.Tensor) -> torch.Tensor: """Apply Random phase shift to each frequency of the fft of the input signal. - :param x: Input features. (N,C,L)|(N,L) - :return: augmented features. (N,C,L)|(N,L) + :param x: Input features. (N,C,L)|(N,L)|(L) + :return: augmented features. (N,C,L)|(N,L)|(L) """ if torch.rand(1) < self.p: # Take the rfft of the input tensor @@ -189,6 +189,10 @@ def __call__(self, x: torch.Tensor) -> torch.Tensor: :param x: Input features. (N,C,L) :return: Augmented features. (N,C,L) """ + if x.shape[1] == 1: + raise ValueError( + "Sequence only has 1 channel. No channels to subtract from each other" + ) if torch.rand(1) < self.p: length = x.shape[1] - 1 total = x.sum(dim=1) / length diff --git a/tests/pipeline/model/training/augmentation/test_image_augmentations.py b/tests/pipeline/model/training/augmentation/test_image_augmentations.py index 5d78762..1fe2c14 100644 --- a/tests/pipeline/model/training/augmentation/test_image_augmentations.py +++ b/tests/pipeline/model/training/augmentation/test_image_augmentations.py @@ -21,9 +21,7 @@ def test_cutmix(self): assert augmented_y.shape == y.shape # Because the images are all ones and zeros the mean of the pixels should be equal to the labels after being transformed - assert torch.all( - torch.isclose(augmented_x.mean(dim=-1).mean(dim=-1), augmented_y) - ) + assert torch.allclose(augmented_x.mean(dim=-1).mean(dim=-1), augmented_y) def test_mixup(self): mixup = image_augmentations.MixUp(p=1.0) @@ -40,6 +38,4 @@ def test_mixup(self): assert augmented_y.shape == y.shape # Because the images are all ones and zeros the mean of the pixels should be equal to the labels after being transformed - assert torch.all( - torch.isclose(augmented_x.mean(dim=-1).mean(dim=-1), augmented_y) - ) + assert torch.allclose(augmented_x.mean(dim=-1).mean(dim=-1), augmented_y) diff --git a/tests/pipeline/model/training/augmentation/test_time_series_augmentations.py b/tests/pipeline/model/training/augmentation/test_time_series_augmentations.py index e69de29..64663a9 100644 --- a/tests/pipeline/model/training/augmentation/test_time_series_augmentations.py +++ b/tests/pipeline/model/training/augmentation/test_time_series_augmentations.py @@ -0,0 +1,118 @@ +import numpy as np +from epochalyst.pipeline.model.training.augmentation import time_series_augmentations +import torch + + +def set_torch_seed(seed: int = 42) -> None: + """Set torch seed for reproducibility. + + :param seed: seed to set + + :return: None + """ + torch.manual_seed(seed) + torch.cuda.manual_seed(seed) + torch.cuda.manual_seed_all(seed) + # When running on the CuDNN backend, two further options must be set + torch.backends.cudnn.deterministic = True + torch.backends.cudnn.benchmark = False + + +class TestTimeSeriesAugmentations: + def test_cutmix1d(self): + set_torch_seed(42) + cutmix1d = time_series_augmentations.CutMix1D(p=1.0) + # Create dummy input and labels + x = torch.cat([torch.ones(16, 1, 100), torch.zeros(16, 1, 100)], dim=0) + # Multiclass labels + y = torch.cat([torch.ones(16, 2), torch.zeros(16, 2)], dim=0) + # Apply CutMix augmentation + augmented_x, augmented_y = cutmix1d(x, y) + + # Assert the output shapes are correct + assert augmented_x.shape == x.shape + assert augmented_y.shape == y.shape + + # Because the images are all ones and zeros the mean of the pixels should be equal to the labels after being transformed + assert torch.allclose(augmented_x.mean(dim=-1), augmented_y) + + def test_mixup1d(self): + set_torch_seed(42) + mixup1d = time_series_augmentations.CutMix1D(p=1.0) + # Create dummy input and labels + x = torch.cat([torch.ones(16, 1, 100), torch.zeros(16, 1, 100)], dim=0) + # Multiclass labels + y = torch.cat([torch.ones(16, 2), torch.zeros(16, 2)], dim=0) + # Apply CutMix augmentation + augmented_x, augmented_y = mixup1d(x, y) + + # Assert the output shapes are correct + assert augmented_x.shape == x.shape + assert augmented_y.shape == y.shape + + # Because the images are all ones and zeros the mean of the pixels should be equal to the labels after being transformed + assert torch.allclose(augmented_x.mean(dim=-1), augmented_y) + + def test_mirror1d(self): + set_torch_seed(42) + mirror1d = time_series_augmentations.Mirror1D(p=1.0) + x = torch.cat([torch.ones(32, 1, 50), torch.zeros(32, 1, 50)], dim=-1) + + augmented_x = mirror1d(x) + + # Assert the output shape is correct + assert augmented_x.shape == x.shape + + # Assert x is mirrored + assert torch.allclose( + augmented_x, + torch.cat([torch.zeros(32, 1, 50), torch.ones(32, 1, 50)], dim=-1), + ) + + def test_random_amplitude_shift(self): + pass + + def test_random_phase_shift(self): + set_torch_seed(42) + random_phase_shift = time_series_augmentations.RandomPhaseShift(p=1.0) + x = torch.sin(torch.linspace(0, 2 * np.pi, 1000)) + augmented_x = random_phase_shift(x) + + # Assert the output shape is correct + assert augmented_x.shape == x.shape + + # Assert x is not equal to augmented x + assert not torch.allclose(augmented_x, x) + # Aseert that the absolute value of the rfft is still the same. Very high atol beacuse sin function isn't precise with 1000 points + assert torch.allclose( + torch.abs(torch.fft.rfft(x, dim=-1)), + torch.abs(torch.fft.rfft(augmented_x, dim=-1)), + atol=0.05, + ) + # Assert that the mean is still around 0 and equal to the original mean + assert torch.isclose(augmented_x.mean(), x.mean()) + assert torch.isclose(augmented_x.mean(), torch.tensor([0]).float()) + + def test_reverse_1d(self): + set_torch_seed(42) + reverse = time_series_augmentations.Reverse1D(p=1.0) + x = torch.sin(torch.linspace(0, 2 * np.pi, 1000)).unsqueeze(0) + test_x = torch.sin(torch.linspace(np.pi, 3 * np.pi, 1000)).unsqueeze(0) + augmented_x = reverse(x) + + # Assert the output shape is correct + assert augmented_x.shape == x.shape + # Assert the reversed sine wave is equal to 180 degrees phase shifted version + assert torch.allclose(test_x, augmented_x, atol=0.05) + + def test_subtract_channels(self): + set_torch_seed(42) + subtract_channels = time_series_augmentations.SubstractChannels(p=1.0) + # Only works for multi-channel sequences + x = torch.ones(32, 2, 100) + augmented_x = subtract_channels(x) + + # Assert the output shape is correct + assert augmented_x.shape == x.shape + + assert torch.allclose(torch.zeros(*augmented_x.shape), augmented_x) From be9752fd66bd8bb07c7767b53db44d394077d258 Mon Sep 17 00:00:00 2001 From: tolgakopar Date: Tue, 16 Apr 2024 15:07:36 +0200 Subject: [PATCH 08/11] Added test for amplitude shift Also lowered the tolerance for one of the tests --- .../test_time_series_augmentations.py | 24 +++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/tests/pipeline/model/training/augmentation/test_time_series_augmentations.py b/tests/pipeline/model/training/augmentation/test_time_series_augmentations.py index 64663a9..3320b2d 100644 --- a/tests/pipeline/model/training/augmentation/test_time_series_augmentations.py +++ b/tests/pipeline/model/training/augmentation/test_time_series_augmentations.py @@ -70,7 +70,27 @@ def test_mirror1d(self): ) def test_random_amplitude_shift(self): - pass + set_torch_seed(42) + low = 0.5 + high = 1.5 + random_amplitude_shift = time_series_augmentations.RandomAmplitudeShift( + p=1.0, low=low, high=high + ) + # Sum of 2 signals with the 2nd one being half the frequency of the first one + x = torch.sin(torch.linspace(0, 2 * np.pi, 1000)) + torch.sin( + torch.linspace(0, np.pi, 1000) + ) + augmented_x = random_amplitude_shift(x) + + # Assert the output shape is correct + assert augmented_x.shape == x.shape + # Assert that the resulting signals amplitudes do not go over the bounds that have been set + assert torch.all( + torch.abs(torch.fft.rfft(x)) * low <= torch.abs(torch.fft.rfft(augmented_x)) + ) & torch.all( + torch.abs(torch.fft.rfft(augmented_x)) + <= torch.abs(torch.fft.rfft(x)) * high + ) def test_random_phase_shift(self): set_torch_seed(42) @@ -103,7 +123,7 @@ def test_reverse_1d(self): # Assert the output shape is correct assert augmented_x.shape == x.shape # Assert the reversed sine wave is equal to 180 degrees phase shifted version - assert torch.allclose(test_x, augmented_x, atol=0.05) + assert torch.allclose(test_x, augmented_x, atol=0.0000005) def test_subtract_channels(self): set_torch_seed(42) From 6bd6e5a6c798ba64aa8fecc243de080bb3308a86 Mon Sep 17 00:00:00 2001 From: tolgakopar Date: Tue, 16 Apr 2024 15:55:17 +0200 Subject: [PATCH 09/11] fixed mixup test issue and added util tests --- .../model/training/augmentation/utils.py | 13 ++- .../test_time_series_augmentations.py | 2 +- .../model/training/augmentation/test_utils.py | 87 +++++++++++++++++++ 3 files changed, 93 insertions(+), 9 deletions(-) diff --git a/epochalyst/pipeline/model/training/augmentation/utils.py b/epochalyst/pipeline/model/training/augmentation/utils.py index f7b3a6d..21f8a09 100644 --- a/epochalyst/pipeline/model/training/augmentation/utils.py +++ b/epochalyst/pipeline/model/training/augmentation/utils.py @@ -17,7 +17,7 @@ class CustomApplyOne: """Custom sequential class for augmentations.""" - probabilities_tensor: torch.Tensor + probabilities_tensor: torch.Tensor = field(init=False) x_transforms: list[Any] = field(default_factory=list) xy_transforms: list[Any] = field(default_factory=list) @@ -32,7 +32,7 @@ def __post_init__(self) -> None: self.probabilities.append(transform.p) # Make tensor from probs - self.probabilities_tensor = torch.tensor(self.probabilities_tensor) + self.probabilities_tensor = torch.tensor(self.probabilities) # Ensure sum is 1 self.probabilities_tensor /= self.probabilities_tensor.sum() self.all_transforms = self.x_transforms + self.xy_transforms @@ -42,12 +42,9 @@ def __call__( ) -> tuple[torch.Tensor, torch.Tensor]: """Apply the augmentations sequentially. - Args: - x (torch.Tensor): The input tensor. - y (torch.Tensor): The target tensor. - - Returns: - tuple[torch.Tensor, torch.Tensor]: The augmented input and target tensors. + :param x: Input features + :param y: Input labels + :return: Augmented features and labels """ transform = self.all_transforms[ int( diff --git a/tests/pipeline/model/training/augmentation/test_time_series_augmentations.py b/tests/pipeline/model/training/augmentation/test_time_series_augmentations.py index 3320b2d..5868cb6 100644 --- a/tests/pipeline/model/training/augmentation/test_time_series_augmentations.py +++ b/tests/pipeline/model/training/augmentation/test_time_series_augmentations.py @@ -38,7 +38,7 @@ def test_cutmix1d(self): def test_mixup1d(self): set_torch_seed(42) - mixup1d = time_series_augmentations.CutMix1D(p=1.0) + mixup1d = time_series_augmentations.MixUp1D(p=1.0) # Create dummy input and labels x = torch.cat([torch.ones(16, 1, 100), torch.zeros(16, 1, 100)], dim=0) # Multiclass labels diff --git a/tests/pipeline/model/training/augmentation/test_utils.py b/tests/pipeline/model/training/augmentation/test_utils.py index e69de29..f792797 100644 --- a/tests/pipeline/model/training/augmentation/test_utils.py +++ b/tests/pipeline/model/training/augmentation/test_utils.py @@ -0,0 +1,87 @@ +from epochalyst.pipeline.model.training.augmentation import utils +import torch + + +def set_torch_seed(seed: int = 42) -> None: + """Set torch seed for reproducibility. + + :param seed: seed to set + + :return: None + """ + torch.manual_seed(seed) + torch.cuda.manual_seed(seed) + torch.cuda.manual_seed_all(seed) + # When running on the CuDNN backend, two further options must be set + torch.backends.cudnn.deterministic = True + torch.backends.cudnn.benchmark = False + + +class TestUtils: + def test_no_op(self): + no_op = utils.NoOp() + x = torch.rand(4, 1, 100, 100) + augmented_x = no_op(x) + + assert torch.all(augmented_x == x) + + def test_custom_sequential(self): + class DummyXStep: + def __call__(self, x: torch.Tensor): + return x + 1 + + class DummyXYStep: + def __call__(self, x: torch.Tensor, y: torch.Tensor): + return x + 1, y + 1 + + step1 = DummyXStep() + step2 = DummyXYStep() + + sequential = utils.CustomSequential(x_transforms=[step1], xy_transforms=[step2]) + + x = torch.ones(32, 1, 100) + y = torch.zeros(32, 1) + augmented_x, augmented_y = sequential(x, y) + + assert torch.all(augmented_x == x + 2) + assert torch.all(augmented_y == y + 1) + + def test_custom_apply_one(self): + class DummyXStep: + def __init__(self, p): + self.p = p + + def __call__(self, x: torch.Tensor): + return x + 1 + + class DummyXYStep: + def __init__(self, p): + self.p = p + + def __call__(self, x: torch.Tensor, y: torch.Tensor): + return x, y + 1 + + set_torch_seed(42) + step1 = DummyXStep(p=0.33) + step2 = DummyXStep(p=0.33) + step3 = DummyXYStep(p=0.33) + + apply_one = utils.CustomApplyOne(x_transforms=[step1, step2]) + + x = torch.ones(32, 1, 1) + y = torch.zeros(32, 1) + augmented_x, augmented_y = apply_one(x, y) + + assert torch.all(augmented_x == x + 1) + + apply_one = utils.CustomApplyOne( + x_transforms=[step1, step2], xy_transforms=[step3] + ) + augmented_x = x + augmented_y = y + for _ in range(10000): + augmented_x, augmented_y = apply_one(augmented_x, augmented_y) + # Assert that the xy transform is applied roughly 1/3 of the time + assert torch.all(3300 <= augmented_y) & torch.all(augmented_y <= 3366) + # Assert that the x transform is applied roughly 2/3 of the time + assert torch.all(6633 <= augmented_x) & torch.all(augmented_x <= 6700) From 1e6e7048c90f7ea2c64914e68fbffc884c2663e3 Mon Sep 17 00:00:00 2001 From: tolgakopar Date: Tue, 16 Apr 2024 16:16:23 +0200 Subject: [PATCH 10/11] added more line coverage for the tests --- .../augmentation/test_image_augmentations.py | 10 ++++++ .../test_time_series_augmentations.py | 32 +++++++++++++++++-- 2 files changed, 40 insertions(+), 2 deletions(-) diff --git a/tests/pipeline/model/training/augmentation/test_image_augmentations.py b/tests/pipeline/model/training/augmentation/test_image_augmentations.py index 1fe2c14..fb02bbb 100644 --- a/tests/pipeline/model/training/augmentation/test_image_augmentations.py +++ b/tests/pipeline/model/training/augmentation/test_image_augmentations.py @@ -23,6 +23,11 @@ def test_cutmix(self): # Because the images are all ones and zeros the mean of the pixels should be equal to the labels after being transformed assert torch.allclose(augmented_x.mean(dim=-1).mean(dim=-1), augmented_y) + cutmix = image_augmentations.CutMix(p=0) + augmented_x, augmented_y = cutmix(x, y) + + assert torch.all(augmented_x == x) & torch.all(augmented_y == y) + def test_mixup(self): mixup = image_augmentations.MixUp(p=1.0) # Create dummy input and labels @@ -39,3 +44,8 @@ def test_mixup(self): # Because the images are all ones and zeros the mean of the pixels should be equal to the labels after being transformed assert torch.allclose(augmented_x.mean(dim=-1).mean(dim=-1), augmented_y) + + mixup = image_augmentations.MixUp(p=0) + augmented_x, augmented_y = mixup(x, y) + + assert torch.all(augmented_x == x) & torch.all(augmented_y == y) diff --git a/tests/pipeline/model/training/augmentation/test_time_series_augmentations.py b/tests/pipeline/model/training/augmentation/test_time_series_augmentations.py index 5868cb6..0c38494 100644 --- a/tests/pipeline/model/training/augmentation/test_time_series_augmentations.py +++ b/tests/pipeline/model/training/augmentation/test_time_series_augmentations.py @@ -36,6 +36,10 @@ def test_cutmix1d(self): # Because the images are all ones and zeros the mean of the pixels should be equal to the labels after being transformed assert torch.allclose(augmented_x.mean(dim=-1), augmented_y) + cutmix1d = time_series_augmentations.CutMix1D(p=0) + augmented_x, augmented_y = cutmix1d(x, y) + assert torch.all(augmented_x == x) & torch.all(augmented_y == y) + def test_mixup1d(self): set_torch_seed(42) mixup1d = time_series_augmentations.MixUp1D(p=1.0) @@ -53,6 +57,10 @@ def test_mixup1d(self): # Because the images are all ones and zeros the mean of the pixels should be equal to the labels after being transformed assert torch.allclose(augmented_x.mean(dim=-1), augmented_y) + mixup1d = time_series_augmentations.MixUp1D(p=0) + augmented_x, augmented_y = mixup1d(x, y) + assert torch.all(augmented_x == x) & torch.all(augmented_y == y) + def test_mirror1d(self): set_torch_seed(42) mirror1d = time_series_augmentations.Mirror1D(p=1.0) @@ -69,6 +77,10 @@ def test_mirror1d(self): torch.cat([torch.zeros(32, 1, 50), torch.ones(32, 1, 50)], dim=-1), ) + mirror1d = time_series_augmentations.Mirror1D(p=0) + augmented_x = mirror1d(x) + assert torch.all(augmented_x == x) + def test_random_amplitude_shift(self): set_torch_seed(42) low = 0.5 @@ -92,6 +104,10 @@ def test_random_amplitude_shift(self): <= torch.abs(torch.fft.rfft(x)) * high ) + random_amplitude_shift = time_series_augmentations.RandomAmplitudeShift(p=0) + augmented_x = random_amplitude_shift(x) + assert torch.all(augmented_x == x) + def test_random_phase_shift(self): set_torch_seed(42) random_phase_shift = time_series_augmentations.RandomPhaseShift(p=1.0) @@ -113,18 +129,26 @@ def test_random_phase_shift(self): assert torch.isclose(augmented_x.mean(), x.mean()) assert torch.isclose(augmented_x.mean(), torch.tensor([0]).float()) + random_phase_shift = time_series_augmentations.RandomPhaseShift(p=0) + augmented_x = random_phase_shift(x) + assert torch.all(augmented_x == x) + def test_reverse_1d(self): set_torch_seed(42) - reverse = time_series_augmentations.Reverse1D(p=1.0) + reverse1d = time_series_augmentations.Reverse1D(p=1.0) x = torch.sin(torch.linspace(0, 2 * np.pi, 1000)).unsqueeze(0) test_x = torch.sin(torch.linspace(np.pi, 3 * np.pi, 1000)).unsqueeze(0) - augmented_x = reverse(x) + augmented_x = reverse1d(x) # Assert the output shape is correct assert augmented_x.shape == x.shape # Assert the reversed sine wave is equal to 180 degrees phase shifted version assert torch.allclose(test_x, augmented_x, atol=0.0000005) + reverse1d = time_series_augmentations.Reverse1D(p=0) + augmented_x = reverse1d(x) + assert torch.all(augmented_x == x) + def test_subtract_channels(self): set_torch_seed(42) subtract_channels = time_series_augmentations.SubstractChannels(p=1.0) @@ -136,3 +160,7 @@ def test_subtract_channels(self): assert augmented_x.shape == x.shape assert torch.allclose(torch.zeros(*augmented_x.shape), augmented_x) + + subtract_channels = time_series_augmentations.SubstractChannels(p=0) + augmented_x = subtract_channels(x) + assert torch.all(augmented_x == x) From fd690385beaded0dc83d7900d4ef8c4e347aee0e Mon Sep 17 00:00:00 2001 From: tolgakopar Date: Tue, 16 Apr 2024 16:49:46 +0200 Subject: [PATCH 11/11] fixed pre-commit issues Also updated readme and requirements for augmentations and timm --- .pre-commit-config.yaml | 1 + README.md | 11 ++++ .../model/training/augmentation/__init__.py | 1 + .../augmentation/image_augmentations.py | 30 ++++++----- .../augmentation/time_series_augmentations.py | 54 ++++++++++--------- .../model/training/augmentation/utils.py | 28 +++++----- .../pipeline/model/training/models/timm.py | 11 +--- 7 files changed, 77 insertions(+), 59 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index acc9452..10948db 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -50,4 +50,5 @@ repos: - torch - traitlets - timm + - kornia args: [ --disallow-any-generics, --disallow-untyped-defs, --disable-error-code=import-untyped] diff --git a/README.md b/README.md index fea5e96..30c78c5 100644 --- a/README.md +++ b/README.md @@ -44,6 +44,17 @@ For caching some imports are only required, these have to be manually installed - pyarrow >= 6.0.0 (Read parquet files) - annotated-types >= 0.6.0 +### Model + +There is support for using timm models. To be able to do so the user must manually install timm. +- timm >= 0.9.16 + +### Augmentation + +There is also implementations of augmentations that are not in commonly used packages. Most of these are for time series data but there are implmenetations for CutMix and MixUp for images that can be used in the pipeline. To be able to use these the user must manually install kornia. + +- kornia >= 0.7.2 + ## Documentation Documentation is generated using [Sphinx](https://www.sphinx-doc.org/en/master/). diff --git a/epochalyst/pipeline/model/training/augmentation/__init__.py b/epochalyst/pipeline/model/training/augmentation/__init__.py index e69de29..758ab3e 100644 --- a/epochalyst/pipeline/model/training/augmentation/__init__.py +++ b/epochalyst/pipeline/model/training/augmentation/__init__.py @@ -0,0 +1 @@ +"""Module containing implementation for augmentations.""" diff --git a/epochalyst/pipeline/model/training/augmentation/image_augmentations.py b/epochalyst/pipeline/model/training/augmentation/image_augmentations.py index 8f0c049..bf042f7 100644 --- a/epochalyst/pipeline/model/training/augmentation/image_augmentations.py +++ b/epochalyst/pipeline/model/training/augmentation/image_augmentations.py @@ -6,15 +6,18 @@ import torch -def get_kornia_mix() -> Any: +def get_kornia_mix() -> Any: # noqa: ANN401 + """Return kornia mix.""" try: import kornia - return kornia.augmentation._2d.mix except ImportError: raise ImportError( - "If you want to use this augmentation you must install kornia" - ) + "If you want to use this augmentation you must install kornia", + ) from None + + else: + return kornia.augmentation._2d.mix # noqa: SLF001 @dataclass @@ -40,9 +43,12 @@ def __post_init__(self) -> None: ) def __call__( - self, x: torch.Tensor, y: torch.Tensor + self, + x: torch.Tensor, + y: torch.Tensor, ) -> tuple[torch.Tensor, torch.Tensor]: """Randomly patch the input with another sample. + :param x: Input images. (N,C,W,H) :param y: Input labels. (N,C) """ @@ -53,10 +59,7 @@ def __call__( y = y.float() y_result = y.clone() for i in range(augmentation_info.shape[0]): - y_result[i] = ( - y[i] * (1 - augmentation_info[i, 2]) - + y[int(augmentation_info[i, 1])] * augmentation_info[i, 2] - ) + y_result[i] = y[i] * (1 - augmentation_info[i, 2]) + y[int(augmentation_info[i, 1])] * augmentation_info[i, 2] return augmented_x, y_result @@ -84,7 +87,9 @@ def __post_init__(self) -> None: ) def __call__( - self, x: torch.Tensor, y: torch.Tensor + self, + x: torch.Tensor, + y: torch.Tensor, ) -> tuple[torch.Tensor, torch.Tensor]: """Randomly patch the input with another sample.""" dummy_labels = torch.arange(x.size(0)) @@ -93,9 +98,6 @@ def __call__( y = y.float() y_result = y.clone() for i in range(augmentation_info.shape[0]): - y_result[i] = ( - y[i] * (1 - augmentation_info[i, 2]) - + y[int(augmentation_info[i, 1])] * augmentation_info[i, 2] - ) + y_result[i] = y[i] * (1 - augmentation_info[i, 2]) + y[int(augmentation_info[i, 1])] * augmentation_info[i, 2] return augmented_x, y_result diff --git a/epochalyst/pipeline/model/training/augmentation/time_series_augmentations.py b/epochalyst/pipeline/model/training/augmentation/time_series_augmentations.py index 7e25493..6195c15 100644 --- a/epochalyst/pipeline/model/training/augmentation/time_series_augmentations.py +++ b/epochalyst/pipeline/model/training/augmentation/time_series_augmentations.py @@ -9,6 +9,7 @@ @dataclass class CutMix1D(torch.nn.Module): """CutMix augmentation for 1D signals. + Randomly select a percentage between 'low' and 'high' to preserve on the left side of the signal. The right side will be replaced by the corresponding range from another sample from the batch. The labels become the weighted average of the mixed signals where weights are the mix ratios. @@ -19,21 +20,27 @@ class CutMix1D(torch.nn.Module): high: float = 1 def __call__( - self, x: torch.Tensor, y: torch.Tensor + self, + x: torch.Tensor, + y: torch.Tensor, ) -> tuple[torch.Tensor, torch.Tensor]: """Appply CutMix to the batch of 1D signal. :param x: Input features. (N,C,L) :param y: Input labels. (N,C) - :return: The augmented features and labels""" - + :return: The augmented features and labels + """ indices = torch.arange(x.shape[0], device=x.device, dtype=torch.int) shuffled_indices = torch.randperm(indices.shape[0]) low_len = int(self.low * x.shape[-1]) high_len = int(self.high * x.shape[-1]) cutoff_indices = torch.randint( - low_len, high_len, (x.shape[-1],), device=x.device, dtype=torch.int + low_len, + high_len, + (x.shape[-1],), + device=x.device, + dtype=torch.int, ) cutoff_rates = cutoff_indices.float() / x.shape[-1] @@ -42,29 +49,34 @@ def __call__( for i in range(x.shape[0]): if torch.rand(1) < self.p: augmented_x[i, :, cutoff_indices[i] :] = x[ - shuffled_indices[i], :, cutoff_indices[i] : + shuffled_indices[i], + :, + cutoff_indices[i] :, ] - augmented_y[i] = y[i] * cutoff_rates[i] + y[shuffled_indices[i]] * ( - 1 - cutoff_rates[i] - ) + augmented_y[i] = y[i] * cutoff_rates[i] + y[shuffled_indices[i]] * (1 - cutoff_rates[i]) return augmented_x, augmented_y @dataclass class MixUp1D(torch.nn.Module): """MixUp augmentation for 1D signals. - Randomly takes the weighted average of 2 samples and their labels with random weights.""" + + Randomly takes the weighted average of 2 samples and their labels with random weights. + """ p: float = 0.5 def __call__( - self, x: torch.Tensor, y: torch.Tensor + self, + x: torch.Tensor, + y: torch.Tensor, ) -> tuple[torch.Tensor, torch.Tensor]: """Appply MixUp to the batch of 1D signal. :param x: Input features. (N,C,L)|(N,L) :param y: Input labels. (N,C) - :return: The augmented features and labels""" + :return: The augmented features and labels + """ indices = torch.arange(x.shape[0], device=x.device, dtype=torch.int) shuffled_indices = torch.randperm(indices.shape[0]) @@ -81,7 +93,9 @@ def __call__( @dataclass class Mirror1D(torch.nn.Module): """Mirror augmentation for 1D signals. - Mirrors the signal around its mean in the horizontal(time) axis.""" + + Mirrors the signal around its mean in the horizontal(time) axis. + """ p: float = 0.5 @@ -116,11 +130,7 @@ def __call__(self, x: torch.Tensor) -> torch.Tensor: # Take the rfft of the input tensor x_freq = torch.fft.rfft(x, dim=-1) # Create a random tensor of scaler in the range [low,high] - random_amplitude = ( - torch.rand(*x_freq.shape, device=x.device, dtype=x.dtype) - * (self.high - self.low) - + self.low - ) + random_amplitude = torch.rand(*x_freq.shape, device=x.device, dtype=x.dtype) * (self.high - self.low) + self.low # Multiply the rfft with the random amplitude x_freq = x_freq * random_amplitude # Take the irfft of the result @@ -145,12 +155,7 @@ def __call__(self, x: torch.Tensor) -> torch.Tensor: # Take the rfft of the input tensor x_freq = torch.fft.rfft(x, dim=-1) # Create a random tensor of complex numbers each with a random phase but with magnitude of 1 - random_phase = ( - torch.rand(*x_freq.shape, device=x.device, dtype=x.dtype) - * 2 - * np.pi - * self.shift_limit - ) + random_phase = torch.rand(*x_freq.shape, device=x.device, dtype=x.dtype) * 2 * np.pi * self.shift_limit random_phase = torch.cos(random_phase) + 1j * torch.sin(random_phase) # Multiply the rfft with the random phase x_freq = x_freq * random_phase @@ -186,12 +191,13 @@ class SubstractChannels(torch.nn.Module): def __call__(self, x: torch.Tensor) -> torch.Tensor: """Apply substracting other channels to the input signal. + :param x: Input features. (N,C,L) :return: Augmented features. (N,C,L) """ if x.shape[1] == 1: raise ValueError( - "Sequence only has 1 channel. No channels to subtract from each other" + "Sequence only has 1 channel. No channels to subtract from each other", ) if torch.rand(1) < self.p: length = x.shape[1] - 1 diff --git a/epochalyst/pipeline/model/training/augmentation/utils.py b/epochalyst/pipeline/model/training/augmentation/utils.py index 21f8a09..af2c644 100644 --- a/epochalyst/pipeline/model/training/augmentation/utils.py +++ b/epochalyst/pipeline/model/training/augmentation/utils.py @@ -1,5 +1,4 @@ -""" -This module provides utility classes for applying augmentations to data. +"""Module providing utility classes for applying augmentations to data. Classes: - CustomApplyOne: A custom sequential class for applying a single augmentation fro a selection based on their probabilities. @@ -38,7 +37,9 @@ def __post_init__(self) -> None: self.all_transforms = self.x_transforms + self.xy_transforms def __call__( - self, x: torch.Tensor, y: torch.Tensor + self, + x: torch.Tensor, + y: torch.Tensor, ) -> tuple[torch.Tensor, torch.Tensor]: """Apply the augmentations sequentially. @@ -49,8 +50,10 @@ def __call__( transform = self.all_transforms[ int( torch.multinomial( - self.probabilities_tensor, 1, replacement=False - ).item() + self.probabilities_tensor, + 1, + replacement=False, + ).item(), ) ] if transform in self.x_transforms: @@ -71,16 +74,15 @@ class CustomSequential: xy_transforms: list[Any] = field(default_factory=list) def __call__( - self, x: torch.Tensor, y: torch.Tensor + self, + x: torch.Tensor, + y: torch.Tensor, ) -> tuple[torch.Tensor, torch.Tensor]: """Apply the augmentations sequentially. - Args: - x (torch.Tensor): The input tensor. - y (torch.Tensor): The target tensor. - - Returns: - tuple[torch.Tensor, torch.Tensor]: The augmented input and target tensors. + :param x: Input features. + :param y: input labels. + :return: Augmented features and labels. """ if self.x_transforms is not None: for transform in self.x_transforms: @@ -104,9 +106,11 @@ def __call__(self, x: torch.Tensor) -> torch.Tensor: """Apply the augmentation to the input signal. Args: + ---- x (torch.Tensor): The input signal tensor. Returns: + ------- torch.Tensor: The augmented input signal tensor. """ return x diff --git a/epochalyst/pipeline/model/training/models/timm.py b/epochalyst/pipeline/model/training/models/timm.py index b2723f7..539367b 100644 --- a/epochalyst/pipeline/model/training/models/timm.py +++ b/epochalyst/pipeline/model/training/models/timm.py @@ -1,17 +1,10 @@ -"""Timm model for 2D spectrogram classification.""" +"""Timm model for 2D image classification.""" import torch from torch import nn class Timm(nn.Module): - """Timm model for 2D spectrogram classification.. - - Input: - X: (n_samples, n_channel, n_width, n_height) - Y: (n_samples) - - Output: - out: (n_samples) + """Timm model for 2D image classification. :param in_channels: Number of input channels :param out_channels: Number of output channels