Skip to content

Commit

Permalink
add docs and tests
Browse files Browse the repository at this point in the history
  • Loading branch information
AlexanderVNikitin committed Sep 23, 2023
1 parent bbb74d0 commit 63933b4
Show file tree
Hide file tree
Showing 3 changed files with 100 additions and 4 deletions.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def read_file(filename: str) -> str:
"seaborn",
"scikit-learn",
"prettytable",
"yfinance",
"yfinance==0.2.28",
"tqdm",
"dtaidistance >= 2.3.10",
"tensorflow",
Expand Down
14 changes: 14 additions & 0 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

import functools
import numpy as np
import random
import tensorflow as tf
import sklearn.metrics.pairwise

import tsgm
Expand Down Expand Up @@ -201,3 +203,15 @@ def test_get_wafer():

assert X_test.shape == (6164, 152)
assert y_test.shape == (6164,)


def test_fix_random_seeds():
assert random.random() != 0.6394267984578837
assert np.random.random() != 0.3745401188473625
assert float(tf.random.uniform([1])[0]) != 0.68789124

tsgm.utils.fix_seeds()

assert random.random() == 0.6394267984578837
assert np.random.random() == 0.3745401188473625
assert float(tf.random.uniform([1])[0]) == 0.6645621061325073
88 changes: 85 additions & 3 deletions tsgm/utils/utils.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,56 @@
import random
import numpy as np
import numpy.typing as npt
import tensorflow as tf


def reconstruction_loss_by_axis(original, reconstructed, axis=0):
def reconstruction_loss_by_axis(original: tf.Tensor, reconstructed: tf.Tensor, axis: int = 0) -> tf.Tensor:
"""
Calculate the reconstruction loss based on a specified axis.
This function computes the reconstruction loss between the original data and
the reconstructed data along a specified axis. The loss can be computed in
two ways depending on the chosen axis:
- When `axis` is 0, it computes the loss as the sum of squared differences
between the original and reconstructed data for all elements.
- When `axis` is 1 or 2, it computes the mean squared error (MSE) between the
mean values along the chosen axis for the original and reconstructed data.
Parameters:
----------
original : tf.Tensor
The original data tensor.
reconstructed : tf.Tensor
The reconstructed data tensor, typically produced by an autoencoder.
axis : int, optional (default=0)
The axis along which to compute the reconstruction loss:
- 0: All elements (sum of squared differences).
- 1: Along features (MSE).
- 2: Along time steps (MSE).
Returns:
-------
tf.Tensor
The computed reconstruction loss as a TensorFlow tensor.
Example:
--------
>>> original = tf.constant([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
>>> reconstructed = tf.constant([[1.1, 2.2, 2.9], [3.9, 4.8, 6.1]])
>>> loss = reconstruction_loss_by_axis(original, reconstructed, axis=0)
>>> print(loss.numpy())
Notes:
------
- This function is commonly used in the context of autoencoders and other
reconstruction-based models to assess the quality of the reconstruction.
- The choice of `axis` determines how the loss is calculated, and it should
align with the data's structure.
"""

# axis=0 all (sum of squared diffs)
# axis=1 features (MSE)
# axis=2 times (MSE)
Expand All @@ -13,15 +60,50 @@ def reconstruction_loss_by_axis(original, reconstructed, axis=0):
return tf.losses.mean_squared_error(tf.reduce_mean(original, axis=axis), tf.reduce_mean(reconstructed, axis=axis))


def generate_slices(X, slice_len=10):
def generate_slices(X: npt.NDArray, slice_len: int = 10) -> npt.NDArray:
"""
Generate slices of a time series dataset.
This function takes a time series dataset X
and generates slices of each sequence with a specified slice length.
Parameters:
----------
X : list or numpy.ndarray
The input list or array of sequences to be sliced.
slice_len : int, optional (default=10)
The desired slice length for each sequence.
Returns:
-------
numpy.ndarray
A numpy array containing the sliced sequences. Each row of the array represents
a slice of a sequence from the input.
"""
new_X = []
for el in X:
for i in range(0, len(el) - slice_len, slice_len):
new_X.append(el[i : i + slice_len])
return np.array(new_X)


def fix_seeds(seed_value=42):
def fix_seeds(seed_value: int = 42) -> None:
"""
Fix random number generator seeds for reproducibility.
Parameters:
----------
seed_value : int, optional (default=42)
The seed value to use for fixing the random number generator seeds.
This value is used to initialize the random number generators.
Returns:
-------
None
This function does not return a value; it modifies the random number generators
in-place to fix their seeds.
"""
random.seed(seed_value)
np.random.seed(seed_value)
tf.random.set_seed(seed_value)

0 comments on commit 63933b4

Please sign in to comment.