Merge branch 'v4' into 215-doc-add-optional-dependencies-description-…

…to-readmemd
TeamEpochGithub · Jun 28, 2024 · 1442450 · 1442450
2 parents 9ac4536 + e8a1b9c
commit 1442450
Show file tree

Hide file tree

Showing 62 changed files with 176 additions and 134 deletions.
diff --git a/.github/workflows/main-branch-testing.yml b/.github/workflows/main-branch-testing.yml
@@ -7,20 +7,7 @@ on:
     branches: [ "main" ]
 
 jobs:
-  pre-commit:
-    runs-on: ubuntu-latest
-    steps:
-    - uses: actions/checkout@v3
-    - uses: actions/setup-python@v3
-      with:
-        python-version: 3.11
-    - name: Install pre-commit
-      run: pip install pre-commit
-    - name: Run pre-commit
-      run: pre-commit run --all-files
-
   build:
-
     runs-on: ubuntu-latest
     container:
       image: python:3.11-slim

diff --git a/.github/workflows/static-analysis.yml b/.github/workflows/static-analysis.yml
@@ -0,0 +1,22 @@
+name: Static Analysis
+
+on:
+  push:
+    branches: [ "main" ]
+  pull_request:
+    branches: [ "main", "v*" ]
+
+jobs:
+  pre-commit:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Check out repository
+        uses: actions/checkout@v4.1.6
+        with:
+          fetch-depth: 0
+      - name: Set up Python 3.10.14
+        uses: actions/setup-python@v5.1.0
+        with:
+          python-version: 3.10.14
+      - name: Run pre-commit
+        uses: pre-commit/action@v3.0.1
diff --git a/.github/workflows/version-branch-testing.yml b/.github/workflows/version-branch-testing.yml
@@ -5,18 +5,6 @@ on:
     branches: ["v*"]
 
 jobs:
-  pre-commit:
-    runs-on: ubuntu-latest
-    steps:
-    - uses: actions/checkout@v4.1.6
-    - uses: actions/setup-python@v5.1.0
-      with:
-        python-version: 3.10.14
-    - name: Install pre-commit
-      run: pip install pre-commit
-    - name: Run pre-commit
-      run: pre-commit run --all-files
-
   pytest:
     runs-on: ubuntu-latest
     steps:

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,4 +1,6 @@
 exclude: ^(external/|venv/|.venv/|tests/|.cache)
+ci:
+  skip: [mypy]
 repos:
   - repo: local  # Remove this when a new version of pre-commit-hooks (>4.6.0) is released
     hooks:
@@ -68,10 +70,10 @@ repos:
     hooks:
       - id: mypy
         additional_dependencies:
+          - "--extra-index-url=https://download.pytorch.org/whl/cpu"
           - numpy==1.26.4
           - pandas-stubs>=2.2.2.240514
-          - matplotlib==3.8.4
-          - torch==2.3.1
+          - torch==2.3.1+cpu
           - dask==2024.6.2
           - typing_extensions==4.9.0
           - annotated-types==0.7.0

diff --git a/epochalyst/__init__.py b/epochalyst/__init__.py
@@ -1,6 +1,9 @@
-"""The epochalyst package.
+"""The epochalyst package."""
 
-It consists of the following modules:
-- `logging`: The logging module contains the classes and methods to log the pipeline.
-- `pipeline`: The pipeline module contains the classes and methods to create a pipeline for the model.
-"""
+from .ensemble import EnsemblePipeline
+from .model import ModelPipeline
+
+__all__ = [
+    "ModelPipeline",
+    "EnsemblePipeline",
+]
diff --git a/epochalyst/_core/__init__.py b/epochalyst/_core/__init__.py
diff --git a/epochalyst/_core/_caching/__init__.py b/epochalyst/_core/_caching/__init__.py
diff --git a/epochalyst/_core/_pipeline/__init__.py b/epochalyst/_core/_pipeline/__init__.py
diff --git a/epochalyst/caching/__init__.py b/epochalyst/caching/__init__.py
@@ -0,0 +1,5 @@
+"""Caching module for epochalyst."""
+
+from .cacher import CacheArgs, Cacher
+
+__all__ = ["Cacher", "CacheArgs"]
diff --git a/epochalyst/_core/_caching/_cacher.py → epochalyst/caching/cacher.py b/epochalyst/_core/_caching/_cacher.py → epochalyst/caching/cacher.py
@@ -1,10 +1,12 @@
+"""The cacher module contains the Cacher class."""
+
 import glob
 import os
 import pickle
 import sys
 from typing import Any, Literal, TypedDict
 
-from epochalyst.logging.logger import Logger
+from epochalyst.logging import Logger
 
 try:
     import dask.array as da
@@ -27,7 +29,6 @@
 except ImportError:
     """User doesn't require these packages"""
 
-
 if sys.version_info < (3, 11):  # pragma: no cover (<py311)
     from typing_extensions import NotRequired
 else:  # pragma: no cover (py311+)
@@ -76,7 +77,7 @@ class CacheArgs(TypedDict):
     store_args: NotRequired[dict[str, Any]]
 
 
-class _Cacher(Logger):
+class Cacher(Logger):
     """The cacher is a flexible class that allows for caching of any data.
 
     The cacher uses cache_args to determine if the data is already cached and if so, return the cached data.

diff --git a/epochalyst/pipeline/ensemble.py → epochalyst/ensemble.py b/epochalyst/pipeline/ensemble.py → epochalyst/ensemble.py
@@ -4,7 +4,7 @@
 
 from agogos.training import ParallelTrainingSystem
 
-from epochalyst._core._caching._cacher import CacheArgs
+from epochalyst.caching import CacheArgs
 
 
 class EnsemblePipeline(ParallelTrainingSystem):

diff --git a/epochalyst/logging/__init__.py b/epochalyst/logging/__init__.py
@@ -1 +1,5 @@
-"""Module for core logging functionality."""
+"""Logging module, contains Logger class for logging messages to console and file."""
+
+from .logger import Logger
+
+__all__ = ["Logger"]
diff --git a/epochalyst/logging/logger.py b/epochalyst/logging/logger.py
@@ -1,9 +1,8 @@
-"""Logger base class."""
+"""Logger base class for logging methods."""
 
 import logging
 import os
-from collections.abc import Mapping
-from typing import Any
+from typing import Any, Mapping
 
 
 class Logger:

diff --git a/epochalyst/pipeline/model/model.py → epochalyst/model.py b/epochalyst/pipeline/model/model.py → epochalyst/model.py
@@ -1,10 +1,10 @@
-"""ModelPipeline connects multiple transforming and training systems for extended training functionality."""
+"""Model module. Contains the ModelPipeline class."""
 
 from typing import Any
 
 from agogos.training import Pipeline
 
-from epochalyst._core._caching._cacher import CacheArgs
+from epochalyst.caching import CacheArgs
 
 
 class ModelPipeline(Pipeline):

diff --git a/epochalyst/pipeline/__init__.py b/epochalyst/pipeline/__init__.py
diff --git a/epochalyst/pipeline/model/__init__.py b/epochalyst/pipeline/model/__init__.py
diff --git a/epochalyst/pipeline/model/training/__init__.py b/epochalyst/pipeline/model/training/__init__.py
diff --git a/epochalyst/pipeline/model/training/augmentation/__init__.py b/epochalyst/pipeline/model/training/augmentation/__init__.py
diff --git a/epochalyst/pipeline/model/training/models/__init__.py b/epochalyst/pipeline/model/training/models/__init__.py
diff --git a/epochalyst/pipeline/model/transformation/__init__.py b/epochalyst/pipeline/model/transformation/__init__.py
diff --git a/epochalyst/training/__init__.py b/epochalyst/training/__init__.py
@@ -0,0 +1,14 @@
+"""Module containing training functionality for the epochalyst package."""
+
+from .pretrain_block import PretrainBlock
+from .torch_trainer import TorchTrainer, TrainValidationDataset
+from .training import TrainingPipeline
+from .training_block import TrainingBlock
+
+__all__ = [
+    "PretrainBlock",
+    "TrainingBlock",
+    "TorchTrainer",
+    "TrainingPipeline",
+    "TrainValidationDataset",
+]
diff --git a/.../_core/_pipeline/_custom_data_parallel.py → epochalyst/training/_custom_data_parallel.py b/.../_core/_pipeline/_custom_data_parallel.py → epochalyst/training/_custom_data_parallel.py
diff --git a/epochalyst/training/augmentation/__init__.py b/epochalyst/training/augmentation/__init__.py
@@ -0,0 +1,26 @@
+"""Module containing implementation for augmentations."""
+
+from epochalyst.training.augmentation.image_augmentations import CutMix, MixUp
+from epochalyst.training.augmentation.time_series_augmentations import (
+    AddBackgroundNoiseWrapper,
+    CutMix1D,
+    EnergyCutmix,
+    Mirror1D,
+    MixUp1D,
+    RandomAmplitudeShift,
+    RandomPhaseShift,
+    SubtractChannels,
+)
+
+__all__ = [
+    "CutMix",
+    "MixUp",
+    "CutMix1D",
+    "MixUp1D",
+    "Mirror1D",
+    "EnergyCutmix",
+    "RandomPhaseShift",
+    "RandomAmplitudeShift",
+    "SubtractChannels",
+    "AddBackgroundNoiseWrapper",
+]
diff --git a/...ining/augmentation/image_augmentations.py → ...ining/augmentation/image_augmentations.py b/...ining/augmentation/image_augmentations.py → ...ining/augmentation/image_augmentations.py
diff --git a/...augmentation/time_series_augmentations.py → ...augmentation/time_series_augmentations.py b/...augmentation/time_series_augmentations.py → ...augmentation/time_series_augmentations.py
@@ -6,7 +6,7 @@
 import numpy as np
 import torch
 
-from epochalyst.pipeline.model.training.augmentation.utils import get_audiomentations
+from .utils import get_audiomentations
 
 
 @dataclass
@@ -187,7 +187,7 @@ def __call__(self, x: torch.Tensor) -> torch.Tensor:
 
 
 @dataclass
-class SubstractChannels(torch.nn.Module):
+class SubtractChannels(torch.nn.Module):
     """Randomly substract other channels from the current one."""
 
     p: float = 0.5

diff --git a/...line/model/training/augmentation/utils.py → epochalyst/training/augmentation/utils.py b/...line/model/training/augmentation/utils.py → epochalyst/training/augmentation/utils.py
@@ -12,7 +12,7 @@
 
 import torch
 
-from epochalyst.pipeline.model.training.utils.recursive_repr import recursive_repr
+from epochalyst.training.utils.recursive_repr import recursive_repr
 
 
 def get_audiomentations() -> ModuleType:

diff --git a/epochalyst/training/models/__init__.py b/epochalyst/training/models/__init__.py
@@ -0,0 +1,7 @@
+"""Module for reusable models or wrappers."""
+
+from .timm import Timm
+
+__all__ = [
+    "Timm",
+]
diff --git a/...st/pipeline/model/training/models/timm.py → epochalyst/training/models/timm.py b/...st/pipeline/model/training/models/timm.py → epochalyst/training/models/timm.py
diff --git a/...pipeline/model/training/pretrain_block.py → epochalyst/training/pretrain_block.py b/...pipeline/model/training/pretrain_block.py → epochalyst/training/pretrain_block.py
@@ -6,7 +6,7 @@
 
 from joblib import hash
 
-from epochalyst.pipeline.model.training.training_block import TrainingBlock
+from .training_block import TrainingBlock
 
 
 @dataclass

diff --git a/.../pipeline/model/training/torch_trainer.py → epochalyst/training/torch_trainer.py b/.../pipeline/model/training/torch_trainer.py → epochalyst/training/torch_trainer.py
@@ -19,22 +19,21 @@
 from torch.utils.data import DataLoader, Dataset, TensorDataset
 from tqdm import tqdm
 
-from epochalyst._core._pipeline._custom_data_parallel import _CustomDataParallel
-from epochalyst.pipeline.model.training.training_block import TrainingBlock
-from epochalyst.pipeline.model.training.utils import _get_onnxrt, _get_openvino
-from epochalyst.pipeline.model.training.utils.tensor_functions import batch_to_device
+from ._custom_data_parallel import _CustomDataParallel
+from .training_block import TrainingBlock
+from .utils import _get_onnxrt, _get_openvino, batch_to_device
 
 T = TypeVar("T", bound=Dataset)  # type: ignore[type-arg]
 T_co = TypeVar("T_co", covariant=True)
 
 
-def custom_collate(batch: tuple[Tensor, ...]) -> tuple[Tensor, ...]:
+def custom_collate(batch: list[Tensor]) -> tuple[Tensor, Tensor]:
     """Collate function for the dataloader.
 
     :param batch: The batch to collate.
     :return: Collated batch.
     """
-    X, y = batch
+    X, y = batch[0], batch[1]
     return X, y
 
 
@@ -167,7 +166,7 @@ def log_to_terminal(self, message: str) -> None:
     epochs: Annotated[int, Gt(0)] = 10
     patience: Annotated[int, Gt(0)] = -1  # Early stopping
     batch_size: Annotated[int, Gt(0)] = 32
-    collate_fn: Callable[[tuple[Tensor, ...]], tuple[Tensor, ...]] = field(default=custom_collate, init=True, repr=False, compare=False)
+    collate_fn: Callable[[list[Tensor]], tuple[Tensor, Tensor]] = field(default=custom_collate, init=True, repr=False, compare=False)
 
     # Checkpointing
     checkpointing_enabled: bool = field(default=True, init=True, repr=False, compare=False)
@@ -379,9 +378,7 @@ def _predict_after_train(
                     concat_dataset,
                     batch_size=self.batch_size,
                     shuffle=False,
-                    collate_fn=(
-                        self.collate_fn if hasattr(concat_dataset, "__getitems__") else None  # type: ignore[arg-type]
-                    ),
+                    collate_fn=(self.collate_fn if hasattr(concat_dataset, "__getitems__") else None),
                 )
                 return self.predict_on_loader(pred_dataloader), y
             case "validation":
@@ -413,7 +410,7 @@ def custom_predict(self, x: Any, **pred_args: Any) -> npt.NDArray[np.float32]:
             pred_dataset,
             batch_size=curr_batch_size,
             shuffle=False,
-            collate_fn=(self.collate_fn if hasattr(pred_dataset, "__getitems__") else None),  # type: ignore[arg-type]
+            collate_fn=(self.collate_fn if hasattr(pred_dataset, "__getitems__") else None),
         )
 
         # Predict with a single model
@@ -459,9 +456,7 @@ def predict_on_loader(
             loader.dataset,
             batch_size=loader.batch_size,
             shuffle=False,
-            collate_fn=(
-                self.collate_fn if hasattr(loader.dataset, "__getitems__") else None  # type: ignore[arg-type]
-            ),
+            collate_fn=(self.collate_fn if hasattr(loader.dataset, "__getitems__") else None),
             **self.dataloader_args,
         )
         if compile_method is None:
@@ -572,14 +567,14 @@ def create_dataloaders(
             train_dataset,
             batch_size=self.batch_size,
             shuffle=True,
-            collate_fn=(self.collate_fn if hasattr(train_dataset, "__getitems__") else None),  # type: ignore[arg-type]
+            collate_fn=(self.collate_fn if hasattr(train_dataset, "__getitems__") else None),
             **self.dataloader_args,
         )
         validation_loader = DataLoader(
             validation_dataset,
             batch_size=self.batch_size,
             shuffle=False,
-            collate_fn=(self.collate_fn if hasattr(validation_dataset, "__getitems__") else None),  # type: ignore[arg-type]
+            collate_fn=(self.collate_fn if hasattr(validation_dataset, "__getitems__") else None),
             **self.dataloader_args,
         )
         return train_loader, validation_loader