add docs and tests

AlexanderVNikitin · Sep 23, 2023 · 63933b4 · 63933b4
1 parent bbb74d0
commit 63933b4
Show file tree

Hide file tree

Showing 3 changed files with 100 additions and 4 deletions.
diff --git a/setup.py b/setup.py
@@ -66,7 +66,7 @@ def read_file(filename: str) -> str:
           "seaborn",
           "scikit-learn",
           "prettytable",
-          "yfinance",
+          "yfinance==0.2.28",
           "tqdm",
           "dtaidistance >= 2.3.10",
           "tensorflow",

diff --git a/tests/test_utils.py b/tests/test_utils.py
@@ -2,6 +2,8 @@
 
 import functools
 import numpy as np
+import random
+import tensorflow as tf
 import sklearn.metrics.pairwise
 
 import tsgm
@@ -201,3 +203,15 @@ def test_get_wafer():
 
     assert X_test.shape == (6164, 152)
     assert y_test.shape == (6164,)
+
+
+def test_fix_random_seeds():
+    assert random.random() != 0.6394267984578837
+    assert np.random.random() != 0.3745401188473625
+    assert float(tf.random.uniform([1])[0]) != 0.68789124
+
+    tsgm.utils.fix_seeds()
+
+    assert random.random() == 0.6394267984578837
+    assert np.random.random() == 0.3745401188473625
+    assert float(tf.random.uniform([1])[0]) == 0.6645621061325073
diff --git a/tsgm/utils/utils.py b/tsgm/utils/utils.py
@@ -1,9 +1,56 @@
 import random
 import numpy as np
+import numpy.typing as npt
 import tensorflow as tf
 
 
-def reconstruction_loss_by_axis(original, reconstructed, axis=0):
+def reconstruction_loss_by_axis(original: tf.Tensor, reconstructed: tf.Tensor, axis: int = 0) -> tf.Tensor:
+    """
+    Calculate the reconstruction loss based on a specified axis.
+
+    This function computes the reconstruction loss between the original data and
+    the reconstructed data along a specified axis. The loss can be computed in
+    two ways depending on the chosen axis:
+
+    - When `axis` is 0, it computes the loss as the sum of squared differences
+      between the original and reconstructed data for all elements.
+    - When `axis` is 1 or 2, it computes the mean squared error (MSE) between the
+      mean values along the chosen axis for the original and reconstructed data.
+
+    Parameters:
+    ----------
+    original : tf.Tensor
+        The original data tensor.
+
+    reconstructed : tf.Tensor
+        The reconstructed data tensor, typically produced by an autoencoder.
+
+    axis : int, optional (default=0)
+        The axis along which to compute the reconstruction loss:
+        - 0: All elements (sum of squared differences).
+        - 1: Along features (MSE).
+        - 2: Along time steps (MSE).
+
+    Returns:
+    -------
+    tf.Tensor
+        The computed reconstruction loss as a TensorFlow tensor.
+
+    Example:
+    --------
+    >>> original = tf.constant([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
+    >>> reconstructed = tf.constant([[1.1, 2.2, 2.9], [3.9, 4.8, 6.1]])
+    >>> loss = reconstruction_loss_by_axis(original, reconstructed, axis=0)
+    >>> print(loss.numpy())
+
+    Notes:
+    ------
+    - This function is commonly used in the context of autoencoders and other
+      reconstruction-based models to assess the quality of the reconstruction.
+    - The choice of `axis` determines how the loss is calculated, and it should
+      align with the data's structure.
+    """
+
     # axis=0 all (sum of squared diffs)
     # axis=1 features (MSE)
     # axis=2 times (MSE)
@@ -13,15 +60,50 @@ def reconstruction_loss_by_axis(original, reconstructed, axis=0):
         return tf.losses.mean_squared_error(tf.reduce_mean(original, axis=axis), tf.reduce_mean(reconstructed, axis=axis))
 
 
-def generate_slices(X, slice_len=10):
+def generate_slices(X: npt.NDArray, slice_len: int = 10) -> npt.NDArray:
+    """
+    Generate slices of a time series dataset.
+
+    This function takes a time series dataset X
+    and generates slices of each sequence with a specified slice length.
+
+    Parameters:
+    ----------
+    X : list or numpy.ndarray
+        The input list or array of sequences to be sliced.
+
+    slice_len : int, optional (default=10)
+        The desired slice length for each sequence.
+
+    Returns:
+    -------
+    numpy.ndarray
+        A numpy array containing the sliced sequences. Each row of the array represents
+        a slice of a sequence from the input.
+    """
     new_X = []
     for el in X:
         for i in range(0, len(el) - slice_len, slice_len):
             new_X.append(el[i : i + slice_len])
     return np.array(new_X)
 
 
-def fix_seeds(seed_value=42):
+def fix_seeds(seed_value: int = 42) -> None:
+    """
+    Fix random number generator seeds for reproducibility.
+
+    Parameters:
+    ----------
+    seed_value : int, optional (default=42)
+        The seed value to use for fixing the random number generator seeds.
+        This value is used to initialize the random number generators.
+
+    Returns:
+    -------
+    None
+        This function does not return a value; it modifies the random number generators
+        in-place to fix their seeds.
+    """
     random.seed(seed_value)
     np.random.seed(seed_value)
     tf.random.set_seed(seed_value)