Skip to content

Commit

Permalink
add tests for zoo
Browse files Browse the repository at this point in the history
  • Loading branch information
AlexanderVNikitin committed Sep 23, 2023
1 parent f52c62e commit ad2d08d
Show file tree
Hide file tree
Showing 9 changed files with 213 additions and 98 deletions.
61 changes: 52 additions & 9 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,13 +49,6 @@ def test_sine_generator():
assert np.max(ts) <= 2 and np.min(ts) >= -2


def test_reconstruction_loss():
original = np.array([[[0, 2], [1, 0], [1, 2]]])
reconstructed = np.array([[[0.1, 1.5], [1.1, 0.1], [1, 2]]])

# TODO finalize


def test_switch_generator():
Xs, ys = tsgm.utils.gen_sine_const_switch_dataset(10, 100, 20)

Expand Down Expand Up @@ -193,9 +186,34 @@ def test_mmd_3_test():
assert pvalue < 1e-10 # the null hypothesis is rejected


@pytest.mark.parametrize("dataset_name", [
"beef",
"coffee",
"ecg200",
"electric",
"freezer",
"gunpoint",
"insect",
"mixed_shapes",
"starlight",
"wafer"
])
def test_ucr_loadable(dataset_name):
ucr_data_manager = tsgm.utils.UCRDataManager(ds=dataset_name)
X_train, y_train, X_test, y_test = ucr_data_manager.get()
assert X_train.shape[0] == y_train.shape[0]
assert X_test.shape[0] == y_test.shape[0]


def test_ucr_raises():
with pytest.raises(ValueError) as excinfo:
ucr_data_manager = tsgm.utils.UCRDataManager(ds="does not exist")
assert "ds should be in" in str(excinfo.value)


def test_get_wafer():
DATASET = "wafer"
ucr_data_manager = tsgm.utils.UCRDataManager(ds=DATASET)
dataset = "wafer"
ucr_data_manager = tsgm.utils.UCRDataManager(ds=dataset)
assert ucr_data_manager.summary() is None
X_train, y_train, X_test, y_test = ucr_data_manager.get()
assert X_train.shape == (1000, 152)
Expand All @@ -215,3 +233,28 @@ def test_fix_random_seeds():
assert random.random() == 0.6394267984578837
assert np.random.random() == 0.3745401188473625
assert float(tf.random.uniform([1])[0]) == 0.6645621061325073


def test_reconstruction_loss_by_axis():
eps = 1e-8
original = tf.constant([[[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]])
reconstructed = tf.constant([[[1.1, 2.2, 2.9], [3.9, 4.8, 6.1]]])
loss = tsgm.utils.reconstruction_loss_by_axis(original, reconstructed)
assert abs(loss.numpy() - 0.119999886) < eps
loss = tsgm.utils.reconstruction_loss_by_axis(original, reconstructed, axis=1)
assert abs(loss.numpy()) < eps
loss = tsgm.utils.reconstruction_loss_by_axis(original, reconstructed, axis=2)
assert abs(loss.numpy() - 0.00444442) < eps


def test_get_physionet2012():
train_X, train_y, test_X, test_y, val_X, val_y = tsgm.utils.get_physionet2012()

assert train_X.shape == (1757980, 4)
assert train_y.shape == (4000, 6)

assert test_X.shape == (1762535, 4)
assert test_y.shape == (4000, 6)

assert val_X.shape == (1765303, 4)
assert val_y.shape == (4000, 6)
44 changes: 40 additions & 4 deletions tests/test_vae.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,34 @@
from tensorflow import keras


def test_vae():
seq_len = 256
feat_dim = 1
latent_dim = 4

model_type = tsgm.models.architectures.zoo["vae_conv5"]
architecture = model_type(seq_len=seq_len, feat_dim=feat_dim, latent_dim=latent_dim)

encoder, decoder = architecture.encoder, architecture.decoder

X = tsgm.utils.gen_sine_dataset(100, seq_len, feat_dim, max_value=20)

scaler = tsgm.utils.TSFeatureWiseScaler((0, 1))
X = scaler.fit_transform(X).astype(np.float64)

vae = tsgm.models.cvae.BetaVAE(encoder, decoder)
vae.compile(optimizer=keras.optimizers.Adam(0.0003))
vae.fit(X, epochs=1, batch_size=128)
x_decoded = vae.predict([X])
assert x_decoded.shape == X.shape

x_samples = vae.generate(7)
assert x_samples.shape == (7, seq_len, feat_dim)

x_decoded = vae([X])
assert x_decoded.shape == X.shape


def test_cvae():
seq_len = 256
feat_dim = 1
Expand All @@ -15,22 +43,30 @@ def test_cvae():
model_type = tsgm.models.architectures.zoo["cvae_conv5"]
architecture = model_type(seq_len=seq_len, feat_dim=feat_dim, latent_dim=latent_dim, output_dim=2)

encoder, decoder = architecture._encoder, architecture._decoder
encoder, decoder = architecture.encoder, architecture.decoder

X, y_i = tsgm.utils.gen_sine_vs_const_dataset(100, seq_len, feat_dim, max_value=20, const=10)

scaler = tsgm.utils.TSFeatureWiseScaler((0, 1))
X = scaler.fit_transform(X).astype(np.float64)
y = keras.utils.to_categorical(y_i, output_dim).astype(np.float64)

cbk = tsgm.models.monitors.VAEMonitor(
num_samples=1, latent_dim=latent_dim, output_dim=2)

vae = tsgm.models.cvae.cBetaVAE(encoder, decoder, latent_dim=latent_dim, temporal=False)
vae.compile(optimizer=keras.optimizers.Adam(0.0003))

vae.fit(X, y, epochs=1, batch_size=128)

vae.fit(X, y, epochs=1, batch_size=128, callbacks=[cbk])
x_decoded = vae.predict([X, y])
assert x_decoded.shape == X.shape

x_samples, y_samples = vae.generate(y[:7])
assert x_samples.shape == (7, seq_len, feat_dim)

x_decoded = vae([X, y])
assert x_decoded.shape == X.shape


def test_temp_cvae():
seq_len = 256
Expand All @@ -53,7 +89,7 @@ def test_temp_cvae():
dataset = tf.data.Dataset.from_tensor_slices((X_train, y))
dataset = dataset.shuffle(buffer_size=1024).batch(batch_size)

encoder, decoder = architecture._encoder, architecture._decoder
encoder, decoder = architecture.encoder, architecture.decoder

vae = tsgm.models.cvae.cBetaVAE(encoder, decoder, latent_dim=latent_dim, temporal=True)
vae.compile(optimizer=keras.optimizers.Adam(0.0003))
Expand Down
34 changes: 28 additions & 6 deletions tests/test_visualizations.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,10 @@ def test_visualize_dataset():
tsgm.utils.visualize_dataset(Xs)


def test_visualize_tsne_unlabeled():
@pytest.mark.parametrize("feature_averaging", [
True, False
])
def test_visualize_tsne_unlabeled(feature_averaging):
Xs = np.array([
[[1, 2, 3], [3, 4, 5]],
[[1, 2, 3], [3, 4, 5]],
Expand All @@ -19,7 +22,7 @@ def test_visualize_tsne_unlabeled():
])
Xgen = Xs
ys = np.ones((Xs.shape[0], 1))
tsgm.utils.visualize_tsne_unlabeled(Xs, Xgen, perplexity=2)
tsgm.utils.visualize_tsne_unlabeled(Xs, Xgen, perplexity=2, feature_averaging=feature_averaging)


def test_visualize_tsne():
Expand All @@ -41,11 +44,30 @@ def test_visualize_ts():
tsgm.utils.visualize_ts(Xs, num=1)


def test_visualize_ts_lineplot():
@pytest.mark.parametrize("unite_features", [
True, False
])
def test_visualize_ts_lineplot(unite_features):
Xs = np.array([[[1, 2, 3], [3, 4, 5]]])
tsgm.utils.visualize_ts_lineplot(Xs, num=1)
tsgm.utils.visualize_ts_lineplot(Xs, num=1, unite_features=unite_features)

ys = np.array([1, 2])
tsgm.utils.visualize_ts_lineplot(Xs, ys, num=1, unite_features=unite_features)

def visualize_training_loss():
loss = np.array([10, 9, 8, 7])

def test_visualize_training_loss():
loss = np.array([[10.0], [9.0], [8.0], [7.0]])
tsgm.utils.visualize_training_loss(loss)


def test_visualize_original_and_reconst_ts():
original = np.array([
[[1, 2, 3], [3, 4, 5]],
[[1, 2, 3], [3, 4, 5]],
[[1, 2, 3], [3, 4, 5]],
[[1, 2, 3], [3, 4, 5]],
[[1, 2, 3], [3, 4, 5]]
])
reconstructed = original
tsgm.utils.visualize_original_and_reconst_ts(original, reconstructed)

82 changes: 66 additions & 16 deletions tests/test_zoo.py
Original file line number Diff line number Diff line change
@@ -1,29 +1,79 @@
import pytest

import functools
import numpy as np
import random
import tensorflow as tf
from tensorflow.keras import layers

import sklearn.metrics.pairwise

import tsgm


def test_zoo():
assert isinstance(tsgm.models.zoo, tsgm.models.architectures.Zoo)
assert len(tsgm.models.zoo.keys()) == len(tsgm.models.zoo.values())
@pytest.mark.parametrize("model_type", [
tsgm.models.architectures.zoo["cvae_conv5"],
])
def test_zoo_cvae(model_type):
seq_len = 10
feat_dim = 2
latent_dim = 1
output_dim = 1

arch = model_type(seq_len=seq_len, feat_dim=feat_dim, latent_dim=latent_dim, output_dim=output_dim)
arch_dict = arch.get()

assert arch.encoder == arch_dict["encoder"] and arch.decoder == arch_dict["decoder"]


assert tsgm.models.zoo.summary() is None
@pytest.mark.parametrize("model_type", [
tsgm.models.architectures.zoo["cgan_base_c4_l1"],
tsgm.models.architectures.zoo["cgan_lstm_n"]
])
def test_zoo_cgan(model_type):
seq_len = 10
feat_dim = 2
latent_dim = 1
output_dim = 1

arch = model_type(
seq_len=seq_len, feat_dim=feat_dim,
latent_dim=latent_dim, output_dim=output_dim)
arch_dict = arch.get()

assert isinstance(tsgm.models.zoo, dict)
assert arch.generator == arch_dict["generator"] and arch.discriminator == arch_dict["discriminator"]

with pytest.raises(TypeError):
result = tsgm.models.architectures.BaseGANArchitecture()
with pytest.raises(TypeError):
result = tsgm.models.architectures.BaseVAEArchitecture()

@pytest.mark.parametrize("model_type_name", [
"clf_cn",
"clf_cl_n",
"clf_block"],
)
def test_zoo_clf(model_type_name):
seq_len = 10
feat_dim = 2
output_dim = 1
model_type = tsgm.models.architectures.zoo[model_type_name]
if model_type_name == "clf_block":
arch = model_type(
seq_len=seq_len, feat_dim=feat_dim, output_dim=output_dim, blocks=[layers.Conv1D(filters=64, kernel_size=3, activation="relu")])
else:
arch = model_type(
seq_len=seq_len, feat_dim=feat_dim, output_dim=output_dim)
arch_dict = arch.get()

def test_sampling():
input_sampling = [0.0, 1.0]
result = tsgm.models.architectures.Sampling()(input_sampling)
assert isinstance(result, tf.Tensor)
assert arch.model == arch_dict["model"]


def test_dict_types():
for k, v in tsgm.models.zoo.items():
assert issubclass(v, tsgm.models.architectures.Architecture)
def test_basic_rec():
seq_len = 10
feat_dim = 2
output_dim = 1

arch = tsgm.models.zoo["recurrent"](
hidden_dim=2,
output_dim=output_dim,
n_layers=1,
network_type="gru")
model = arch.build()
assert model is not None
5 changes: 2 additions & 3 deletions tsgm/models/cvae.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,8 +100,7 @@ def generate(self, n: int) -> tsgm.types.Tensor:


class cBetaVAE(keras.Model):
# TODO: allow using architecture or encoder & decoder
def __init__(self, encoder, decoder, latent_dim, temporal, beta=1.0, **kwargs):
def __init__(self, encoder, decoder, latent_dim, temporal: bool, beta=1.0, **kwargs):
super(cBetaVAE, self).__init__(**kwargs)
self.beta = beta
self.encoder = encoder
Expand Down Expand Up @@ -137,7 +136,7 @@ def generate(self, labels: tsgm.types.Tensor) -> tuple:
:returns: a tuple of synthetically generated data and labels.
"""
batch_size = tf.shape(labels)[0]
z = tf.random.normal((batch_size, self._seq_len, self.latent_dim))
z = tf.random.normal((batch_size, self._seq_len, self.latent_dim), dtype=labels.dtype)
decoder_input = self._get_decoder_input(z, labels)
return (self.decoder(decoder_input), labels)

Expand Down
12 changes: 5 additions & 7 deletions tsgm/models/monitors.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

class GANMonitor(keras.callbacks.Callback):
def __init__(self, num_samples: int, latent_dim: int, labels: tsgm.types.Tensor,
save: bool = True, save_path: typing.Optional[str] = None, mode: str = "clf"):
save: bool = True, save_path: typing.Optional[str] = None, mode: str = "clf") -> None:
self._num_samples = num_samples
self._latent_dim = latent_dim
self._save = save
Expand All @@ -37,7 +37,7 @@ def __init__(self, num_samples: int, latent_dim: int, labels: tsgm.types.Tensor,
logger.warning("save_path is specified, but save is False.")
os.makedirs(self._save_path, exist_ok=True)

def on_epoch_end(self, epoch, logs=None):
def on_epoch_end(self, epoch, logs=None) -> None:
if self._mode in ["clf", "reg"]:
random_latent_vectors = tf.random.normal(shape=(self._num_samples, self._latent_dim))
elif self._mode == "temporal":
Expand All @@ -64,14 +64,12 @@ def on_epoch_end(self, epoch, logs=None):


class VAEMonitor(keras.callbacks.Callback):
def __init__(self, num_samples=6, latent_dim=128, output_dim=2):
def __init__(self, num_samples: int = 6, latent_dim: int = 128, output_dim: int = 2) -> None:
self._num_samples = num_samples
self._latent_dim = latent_dim
self._output_dim = output_dim

def on_epoch_end(self, epoch, logs=None):
random_latent_vectors = tf.random.normal(shape=(self._output_dim * self._num_samples, self._latent_dim))

def on_epoch_end(self, epoch, logs=None) -> None:
labels = []
for i in range(self._output_dim):
if not len(labels):
Expand All @@ -80,7 +78,7 @@ def on_epoch_end(self, epoch, logs=None):
labels = tf.concat((labels, keras.utils.to_categorical([i], self._output_dim)), 0)

labels = tf.repeat(labels, self._num_samples, axis=0)
generated_images = self.model.decoder(tf.concat([random_latent_vectors, labels], 1))
generated_images, _ = self.model.generate(labels)

for i in range(self._output_dim * self._num_samples):
sns.lineplot(x=range(0, generated_images[i].shape[0]), y=tf.squeeze(generated_images[i]))
Expand Down
Loading

0 comments on commit ad2d08d

Please sign in to comment.