From 53ac91be4faa849b53255aab286189ecf3b4ff2d Mon Sep 17 00:00:00 2001 From: Jasper van Selm <70692744+schobbejak@users.noreply.github.com> Date: Mon, 22 Apr 2024 14:52:42 +0200 Subject: [PATCH 1/6] Update pyproject.toml --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index c4c26e1..7c9b910 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "epochalyst" -version = "0.3.2" +version = "0.3.3" authors = [ { name = "Jasper van Selm", email = "jmvanselm@gmail.com" }, { name = "Ariel Ebersberger", email = "arielebersberger@gmail.com" }, From c3f62061ebfd1282c8b3ecff8360032411c0ddd4 Mon Sep 17 00:00:00 2001 From: Schobbejak Date: Wed, 15 May 2024 15:23:01 +0200 Subject: [PATCH 2/6] Add logging for storing cache in pipeline and blocks --- .../pipeline/model/training/training.py | 5 ++- .../pipeline/model/training/training_block.py | 34 +++++++++++-------- .../model/transformation/transformation.py | 1 + .../transformation/transformation_block.py | 1 + 4 files changed, 25 insertions(+), 16 deletions(-) diff --git a/epochalyst/pipeline/model/training/training.py b/epochalyst/pipeline/model/training/training.py index 8125a1d..8c10504 100644 --- a/epochalyst/pipeline/model/training/training.py +++ b/epochalyst/pipeline/model/training/training.py @@ -63,6 +63,7 @@ def train(self, x: Any, y: Any, cache_args: CacheArgs | None = None, **train_arg x, y = super().train(x, y, **train_args) if cache_args: + self.log_to_terminal(f"Storing cache for x and y to {cache_args["storage_path"]}") self._store_cache(name=self.get_hash() + "x", data=x, cache_args=cache_args) self._store_cache(name=self.get_hash() + "y", data=y, cache_args=cache_args) @@ -115,7 +116,9 @@ def predict(self, x: Any, cache_args: CacheArgs | None = None, **pred_args: Any) x = super().predict(x, **pred_args) - self._store_cache(self.get_hash() + "p", x, cache_args) if cache_args else None + if cache_args: + self.log_to_terminal(f"Storing cache for x to {cache_args["storage_path"]}") + self._store_cache(self.get_hash() + "p", x, cache_args) # Set steps to original in case class is called again self.steps = self.all_steps diff --git a/epochalyst/pipeline/model/training/training_block.py b/epochalyst/pipeline/model/training/training_block.py index cb27831..914bb79 100644 --- a/epochalyst/pipeline/model/training/training_block.py +++ b/epochalyst/pipeline/model/training/training_block.py @@ -76,16 +76,18 @@ def train(self, x: Any, y: Any, cache_args: CacheArgs | None = None, **train_arg x, y = self.custom_train(x, y, **train_args) - self._store_cache( - name=self.get_hash() + "x", - data=x, - cache_args=cache_args, - ) if cache_args else None - self._store_cache( - name=self.get_hash() + "y", - data=y, - cache_args=cache_args, - ) if cache_args else None + if cache_args: + self.log_to_terminal(f"Storing cache for x and y to {cache_args["storage_path"]}") + self._store_cache( + name=self.get_hash() + "x", + data=x, + cache_args=cache_args, + ) + self._store_cache( + name=self.get_hash() + "y", + data=y, + cache_args=cache_args, + ) return x, y @@ -116,11 +118,13 @@ def predict(self, x: Any, cache_args: CacheArgs | None = None, **pred_args: Any) x = self.custom_predict(x, **pred_args) - self._store_cache( - name=self.get_hash() + "p", - data=x, - cache_args=cache_args, - ) if cache_args else None + if cache_args: + self.log_to_terminal(f"Store cache for predictions to {cache_args["storage_path"]}") + self._store_cache( + name=self.get_hash() + "p", + data=x, + cache_args=cache_args, + ) return x diff --git a/epochalyst/pipeline/model/transformation/transformation.py b/epochalyst/pipeline/model/transformation/transformation.py index 479cc94..623d152 100644 --- a/epochalyst/pipeline/model/transformation/transformation.py +++ b/epochalyst/pipeline/model/transformation/transformation.py @@ -105,6 +105,7 @@ def transform(self, data: Any, cache_args: CacheArgs | None = None, **transform_ data = super().transform(data, **transform_args) if cache_args: + self.log_to_terminal(f"Storing cache for pipeline to {cache_args["storage_path"]}") self._store_cache(self.get_hash(), data, cache_args) # Set steps to original in case class is called again diff --git a/epochalyst/pipeline/model/transformation/transformation_block.py b/epochalyst/pipeline/model/transformation/transformation_block.py index 8ec5abe..1f1bf36 100644 --- a/epochalyst/pipeline/model/transformation/transformation_block.py +++ b/epochalyst/pipeline/model/transformation/transformation_block.py @@ -73,6 +73,7 @@ def transform(self, data: Any, cache_args: CacheArgs | None = None, **transform_ data = self.custom_transform(data, **transform_args) if cache_args: + self.log_to_terminal(f"Storing cache to {cache_args["storage_path"]}") self._store_cache(name=self.get_hash(), data=data, cache_args=cache_args) return data From 8e5a13cf8664e0f4ec36a1421487d2739716f7d7 Mon Sep 17 00:00:00 2001 From: Schobbejak Date: Wed, 15 May 2024 15:39:58 +0200 Subject: [PATCH 3/6] Fix tests for quotes --- epochalyst/pipeline/model/training/training.py | 4 ++-- epochalyst/pipeline/model/training/training_block.py | 4 ++-- epochalyst/pipeline/model/transformation/transformation.py | 2 +- .../pipeline/model/transformation/transformation_block.py | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/epochalyst/pipeline/model/training/training.py b/epochalyst/pipeline/model/training/training.py index 8c10504..94a2415 100644 --- a/epochalyst/pipeline/model/training/training.py +++ b/epochalyst/pipeline/model/training/training.py @@ -63,7 +63,7 @@ def train(self, x: Any, y: Any, cache_args: CacheArgs | None = None, **train_arg x, y = super().train(x, y, **train_args) if cache_args: - self.log_to_terminal(f"Storing cache for x and y to {cache_args["storage_path"]}") + self.log_to_terminal(f"Storing cache for x and y to {cache_args['storage_path']}") self._store_cache(name=self.get_hash() + "x", data=x, cache_args=cache_args) self._store_cache(name=self.get_hash() + "y", data=y, cache_args=cache_args) @@ -117,7 +117,7 @@ def predict(self, x: Any, cache_args: CacheArgs | None = None, **pred_args: Any) x = super().predict(x, **pred_args) if cache_args: - self.log_to_terminal(f"Storing cache for x to {cache_args["storage_path"]}") + self.log_to_terminal(f"Storing cache for x to {cache_args['storage_path']}") self._store_cache(self.get_hash() + "p", x, cache_args) # Set steps to original in case class is called again diff --git a/epochalyst/pipeline/model/training/training_block.py b/epochalyst/pipeline/model/training/training_block.py index 914bb79..6cbdf67 100644 --- a/epochalyst/pipeline/model/training/training_block.py +++ b/epochalyst/pipeline/model/training/training_block.py @@ -77,7 +77,7 @@ def train(self, x: Any, y: Any, cache_args: CacheArgs | None = None, **train_arg x, y = self.custom_train(x, y, **train_args) if cache_args: - self.log_to_terminal(f"Storing cache for x and y to {cache_args["storage_path"]}") + self.log_to_terminal(f"Storing cache for x and y to {cache_args['storage_path']}") self._store_cache( name=self.get_hash() + "x", data=x, @@ -119,7 +119,7 @@ def predict(self, x: Any, cache_args: CacheArgs | None = None, **pred_args: Any) x = self.custom_predict(x, **pred_args) if cache_args: - self.log_to_terminal(f"Store cache for predictions to {cache_args["storage_path"]}") + self.log_to_terminal(f"Store cache for predictions to {cache_args['storage_path']}") self._store_cache( name=self.get_hash() + "p", data=x, diff --git a/epochalyst/pipeline/model/transformation/transformation.py b/epochalyst/pipeline/model/transformation/transformation.py index 623d152..ebdb3d1 100644 --- a/epochalyst/pipeline/model/transformation/transformation.py +++ b/epochalyst/pipeline/model/transformation/transformation.py @@ -105,7 +105,7 @@ def transform(self, data: Any, cache_args: CacheArgs | None = None, **transform_ data = super().transform(data, **transform_args) if cache_args: - self.log_to_terminal(f"Storing cache for pipeline to {cache_args["storage_path"]}") + self.log_to_terminal(f"Storing cache for pipeline to {cache_args['storage_path']}") self._store_cache(self.get_hash(), data, cache_args) # Set steps to original in case class is called again diff --git a/epochalyst/pipeline/model/transformation/transformation_block.py b/epochalyst/pipeline/model/transformation/transformation_block.py index 1f1bf36..dc8b261 100644 --- a/epochalyst/pipeline/model/transformation/transformation_block.py +++ b/epochalyst/pipeline/model/transformation/transformation_block.py @@ -73,7 +73,7 @@ def transform(self, data: Any, cache_args: CacheArgs | None = None, **transform_ data = self.custom_transform(data, **transform_args) if cache_args: - self.log_to_terminal(f"Storing cache to {cache_args["storage_path"]}") + self.log_to_terminal(f"Storing cache to {cache_args['storage_path']}") self._store_cache(name=self.get_hash(), data=data, cache_args=cache_args) return data From a571cdc73aeda7494a6b118d485ec4f3de17c39c Mon Sep 17 00:00:00 2001 From: Schobbejak Date: Thu, 16 May 2024 10:07:00 +0200 Subject: [PATCH 4/6] Update documentation for dataloader_args --- epochalyst/pipeline/model/training/torch_trainer.py | 1 + 1 file changed, 1 insertion(+) diff --git a/epochalyst/pipeline/model/training/torch_trainer.py b/epochalyst/pipeline/model/training/torch_trainer.py index 1dc0bfb..adeb326 100644 --- a/epochalyst/pipeline/model/training/torch_trainer.py +++ b/epochalyst/pipeline/model/training/torch_trainer.py @@ -43,6 +43,7 @@ class TorchTrainer(TrainingBlock): - `model_name` (str): Name of the model - `n_folds` (float): Number of folds for cross validation (0 for train full, - `fold` (int): Fold number + - `dataloader_args (dict): Arguments for the dataloader` Methods ------- From a33ec46add7edf9810b95d703d1a7241aeb8d9ca Mon Sep 17 00:00:00 2001 From: Schobbejak Date: Thu, 16 May 2024 10:14:38 +0200 Subject: [PATCH 5/6] Fix scheduler step delay in torch trainer --- epochalyst/pipeline/model/training/torch_trainer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/epochalyst/pipeline/model/training/torch_trainer.py b/epochalyst/pipeline/model/training/torch_trainer.py index 99e478f..48b5bbb 100644 --- a/epochalyst/pipeline/model/training/torch_trainer.py +++ b/epochalyst/pipeline/model/training/torch_trainer.py @@ -619,7 +619,7 @@ def _train_one_epoch( # Step the scheduler if self.initialized_scheduler is not None: - self.initialized_scheduler.step(epoch=epoch) + self.initialized_scheduler.step(epoch=epoch + 1) # Remove the cuda cache torch.cuda.empty_cache() From d002488945d28a2efe8bbf5f8f6f80d3c9ac133a Mon Sep 17 00:00:00 2001 From: Schobbejak Date: Thu, 16 May 2024 10:50:12 +0200 Subject: [PATCH 6/6] Add option to specify type for tensors --- .../pipeline/model/training/torch_trainer.py | 19 ++++++--- .../pipeline/model/training/utils/__init__.py | 1 + .../model/training/utils/tensor_functions.py | 40 +++++++++++++++++++ 3 files changed, 55 insertions(+), 5 deletions(-) create mode 100644 epochalyst/pipeline/model/training/utils/__init__.py create mode 100644 epochalyst/pipeline/model/training/utils/tensor_functions.py diff --git a/epochalyst/pipeline/model/training/torch_trainer.py b/epochalyst/pipeline/model/training/torch_trainer.py index adeb326..296f2b8 100644 --- a/epochalyst/pipeline/model/training/torch_trainer.py +++ b/epochalyst/pipeline/model/training/torch_trainer.py @@ -20,6 +20,7 @@ from epochalyst._core._pipeline._custom_data_parallel import _CustomDataParallel from epochalyst.logging.section_separator import print_section_separator from epochalyst.pipeline.model.training.training_block import TrainingBlock +from epochalyst.pipeline.model.training.utils.tensor_functions import batch_to_device T = TypeVar("T", bound=Dataset) # type: ignore[type-arg] T_co = TypeVar("T_co", covariant=True) @@ -44,6 +45,8 @@ class TorchTrainer(TrainingBlock): - `n_folds` (float): Number of folds for cross validation (0 for train full, - `fold` (int): Fold number - `dataloader_args (dict): Arguments for the dataloader` + - `x_tensor_type` (str): Type of x tensor for data + - `y_tensor_type` (str): Type of y tensor for labels Methods ------- @@ -145,6 +148,10 @@ def log_to_terminal(self, message: str) -> None: dataloader_args: dict[str, Any] = field(default_factory=dict, repr=False) + # Types for tensors + x_tensor_type: str = "float" + y_tensor_type: str = "float" + def __post_init__(self) -> None: """Post init method for the TorchTrainer class.""" # Make sure to_predict is either "test" or "all" or "none" @@ -404,7 +411,7 @@ def predict_on_loader( ) with torch.no_grad(), tqdm(loader, unit="batch", disable=False) as tepoch: for data in tepoch: - X_batch = data[0].to(self.device).float() + X_batch = batch_to_device(data[0], self.x_tensor_type, self.device) y_pred = self.model(X_batch).squeeze(1).cpu().numpy() predictions.extend(y_pred) @@ -607,8 +614,9 @@ def _train_one_epoch( ) for batch in pbar: X_batch, y_batch = batch - X_batch = X_batch.to(self.device).float() - y_batch = y_batch.to(self.device).float() + + X_batch = batch_to_device(X_batch, self.x_tensor_type, self.device) + y_batch = batch_to_device(y_batch, self.x_tensor_type, self.device) # Forward pass y_pred = self.model(X_batch).squeeze(1) @@ -650,8 +658,9 @@ def _val_one_epoch( with torch.no_grad(): for batch in pbar: X_batch, y_batch = batch - X_batch = X_batch.to(self.device).float() - y_batch = y_batch.to(self.device).float() + + X_batch = batch_to_device(X_batch, self.x_tensor_type, self.device) + y_batch = batch_to_device(y_batch, self.y_tensor_type, self.device) # Forward pass y_pred = self.model(X_batch).squeeze(1) diff --git a/epochalyst/pipeline/model/training/utils/__init__.py b/epochalyst/pipeline/model/training/utils/__init__.py new file mode 100644 index 0000000..902f1d9 --- /dev/null +++ b/epochalyst/pipeline/model/training/utils/__init__.py @@ -0,0 +1 @@ +"""Module with utility functions for training.""" diff --git a/epochalyst/pipeline/model/training/utils/tensor_functions.py b/epochalyst/pipeline/model/training/utils/tensor_functions.py new file mode 100644 index 0000000..59a7be6 --- /dev/null +++ b/epochalyst/pipeline/model/training/utils/tensor_functions.py @@ -0,0 +1,40 @@ +"""Module with tensor functions.""" +import torch +from torch import Tensor + + +def batch_to_device(batch: Tensor, tensor_type: str, device: torch.device) -> Tensor: + """Move batch to device with certain type. + + :param batch: Batch to move + :param tensor_type: Type of the batch + :param device: Device to move the batch to + :return: The moved tensor + """ + type_conversion = { + "float": torch.float32, + "float32": torch.float32, + "float64": torch.float64, + "double": torch.float64, + "float16": torch.float16, + "half": torch.float16, + "int": torch.int32, + "int32": torch.int32, + "int64": torch.int64, + "long": torch.int64, + "int16": torch.int16, + "short": torch.int16, + "uint8": torch.uint8, + "byte": torch.uint8, + "int8": torch.int8, + "bfloat16": torch.bfloat16, + "bool": torch.bool, + } + + if tensor_type in type_conversion: + dtype = type_conversion[tensor_type] + batch = batch.to(device, dtype=dtype) + else: + raise ValueError(f"Unsupported tensor type: {tensor_type}") + + return batch