add tests for zoo

AlexanderVNikitin · Sep 23, 2023 · ad2d08d · ad2d08d
1 parent f52c62e
commit ad2d08d
Show file tree

Hide file tree

Showing 9 changed files with 213 additions and 98 deletions.
diff --git a/tests/test_utils.py b/tests/test_utils.py
@@ -49,13 +49,6 @@ def test_sine_generator():
     assert np.max(ts) <= 2 and np.min(ts) >= -2
 
 
-def test_reconstruction_loss():
-    original = np.array([[[0, 2], [1, 0], [1, 2]]])
-    reconstructed = np.array([[[0.1, 1.5], [1.1, 0.1], [1, 2]]])
-
-    # TODO finalize
-
-
 def test_switch_generator():
     Xs, ys = tsgm.utils.gen_sine_const_switch_dataset(10, 100, 20)
 
@@ -193,9 +186,34 @@ def test_mmd_3_test():
     assert pvalue < 1e-10  # the null hypothesis is rejected
 
 
+@pytest.mark.parametrize("dataset_name", [
+    "beef",
+    "coffee",
+    "ecg200",
+    "electric",
+    "freezer",
+    "gunpoint",
+    "insect",
+    "mixed_shapes",
+    "starlight",
+    "wafer"
+])
+def test_ucr_loadable(dataset_name):
+    ucr_data_manager = tsgm.utils.UCRDataManager(ds=dataset_name)
+    X_train, y_train, X_test, y_test = ucr_data_manager.get()
+    assert X_train.shape[0] == y_train.shape[0]
+    assert X_test.shape[0] == y_test.shape[0]
+
+
+def test_ucr_raises():
+    with pytest.raises(ValueError) as excinfo:
+        ucr_data_manager = tsgm.utils.UCRDataManager(ds="does not exist")
+        assert "ds should be in" in str(excinfo.value)
+
+
 def test_get_wafer():
-    DATASET = "wafer"
-    ucr_data_manager = tsgm.utils.UCRDataManager(ds=DATASET)
+    dataset = "wafer"
+    ucr_data_manager = tsgm.utils.UCRDataManager(ds=dataset)
     assert ucr_data_manager.summary() is None
     X_train, y_train, X_test, y_test = ucr_data_manager.get()
     assert X_train.shape == (1000, 152)
@@ -215,3 +233,28 @@ def test_fix_random_seeds():
     assert random.random() == 0.6394267984578837
     assert np.random.random() == 0.3745401188473625
     assert float(tf.random.uniform([1])[0]) == 0.6645621061325073
+
+
+def test_reconstruction_loss_by_axis():
+    eps = 1e-8
+    original = tf.constant([[[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]])
+    reconstructed = tf.constant([[[1.1, 2.2, 2.9], [3.9, 4.8, 6.1]]])
+    loss = tsgm.utils.reconstruction_loss_by_axis(original, reconstructed)
+    assert abs(loss.numpy() - 0.119999886) < eps
+    loss = tsgm.utils.reconstruction_loss_by_axis(original, reconstructed, axis=1)
+    assert abs(loss.numpy()) < eps
+    loss = tsgm.utils.reconstruction_loss_by_axis(original, reconstructed, axis=2)
+    assert abs(loss.numpy() - 0.00444442) < eps
+
+
+def test_get_physionet2012():
+    train_X, train_y, test_X, test_y, val_X, val_y = tsgm.utils.get_physionet2012()
+
+    assert train_X.shape == (1757980, 4)
+    assert train_y.shape == (4000, 6)
+
+    assert test_X.shape == (1762535, 4)
+    assert test_y.shape == (4000, 6)
+
+    assert val_X.shape == (1765303, 4)
+    assert val_y.shape == (4000, 6)
diff --git a/tests/test_vae.py b/tests/test_vae.py
@@ -6,6 +6,34 @@
 from tensorflow import keras
 
 
+def test_vae():
+    seq_len = 256
+    feat_dim = 1
+    latent_dim = 4
+
+    model_type = tsgm.models.architectures.zoo["vae_conv5"]
+    architecture = model_type(seq_len=seq_len, feat_dim=feat_dim, latent_dim=latent_dim)
+
+    encoder, decoder = architecture.encoder, architecture.decoder
+
+    X = tsgm.utils.gen_sine_dataset(100, seq_len, feat_dim, max_value=20)
+
+    scaler = tsgm.utils.TSFeatureWiseScaler((0, 1))
+    X = scaler.fit_transform(X).astype(np.float64)
+
+    vae = tsgm.models.cvae.BetaVAE(encoder, decoder)
+    vae.compile(optimizer=keras.optimizers.Adam(0.0003))
+    vae.fit(X, epochs=1, batch_size=128)
+    x_decoded = vae.predict([X])
+    assert x_decoded.shape == X.shape
+
+    x_samples = vae.generate(7)
+    assert x_samples.shape == (7, seq_len, feat_dim)
+
+    x_decoded = vae([X])
+    assert x_decoded.shape == X.shape
+
+
 def test_cvae():
     seq_len = 256
     feat_dim = 1
@@ -15,22 +43,30 @@ def test_cvae():
     model_type = tsgm.models.architectures.zoo["cvae_conv5"]
     architecture = model_type(seq_len=seq_len, feat_dim=feat_dim, latent_dim=latent_dim, output_dim=2)
 
-    encoder, decoder = architecture._encoder, architecture._decoder
+    encoder, decoder = architecture.encoder, architecture.decoder
 
     X, y_i = tsgm.utils.gen_sine_vs_const_dataset(100, seq_len, feat_dim, max_value=20, const=10)
 
     scaler = tsgm.utils.TSFeatureWiseScaler((0, 1))
     X = scaler.fit_transform(X).astype(np.float64)
     y = keras.utils.to_categorical(y_i, output_dim).astype(np.float64)
 
+    cbk = tsgm.models.monitors.VAEMonitor(
+        num_samples=1, latent_dim=latent_dim, output_dim=2)
+
     vae = tsgm.models.cvae.cBetaVAE(encoder, decoder, latent_dim=latent_dim, temporal=False)
     vae.compile(optimizer=keras.optimizers.Adam(0.0003))
 
-    vae.fit(X, y, epochs=1, batch_size=128)
-
+    vae.fit(X, y, epochs=1, batch_size=128, callbacks=[cbk])
     x_decoded = vae.predict([X, y])
     assert x_decoded.shape == X.shape
 
+    x_samples, y_samples = vae.generate(y[:7])
+    assert x_samples.shape == (7, seq_len, feat_dim)
+
+    x_decoded = vae([X, y])
+    assert x_decoded.shape == X.shape
+
 
 def test_temp_cvae():
     seq_len = 256
@@ -53,7 +89,7 @@ def test_temp_cvae():
     dataset = tf.data.Dataset.from_tensor_slices((X_train, y))
     dataset = dataset.shuffle(buffer_size=1024).batch(batch_size)
 
-    encoder, decoder = architecture._encoder, architecture._decoder
+    encoder, decoder = architecture.encoder, architecture.decoder
 
     vae = tsgm.models.cvae.cBetaVAE(encoder, decoder,  latent_dim=latent_dim, temporal=True)
     vae.compile(optimizer=keras.optimizers.Adam(0.0003))

diff --git a/tests/test_visualizations.py b/tests/test_visualizations.py
@@ -9,7 +9,10 @@ def test_visualize_dataset():
     tsgm.utils.visualize_dataset(Xs)
 
 
-def test_visualize_tsne_unlabeled():
+@pytest.mark.parametrize("feature_averaging", [
+    True, False
+])
+def test_visualize_tsne_unlabeled(feature_averaging):
     Xs = np.array([
         [[1, 2, 3], [3, 4, 5]],
         [[1, 2, 3], [3, 4, 5]],
@@ -19,7 +22,7 @@ def test_visualize_tsne_unlabeled():
     ])
     Xgen = Xs
     ys = np.ones((Xs.shape[0], 1))
-    tsgm.utils.visualize_tsne_unlabeled(Xs, Xgen, perplexity=2)
+    tsgm.utils.visualize_tsne_unlabeled(Xs, Xgen, perplexity=2, feature_averaging=feature_averaging)
 
 
 def test_visualize_tsne():
@@ -41,11 +44,30 @@ def test_visualize_ts():
     tsgm.utils.visualize_ts(Xs, num=1)
 
 
-def test_visualize_ts_lineplot():
+@pytest.mark.parametrize("unite_features", [
+    True, False
+])
+def test_visualize_ts_lineplot(unite_features):
     Xs = np.array([[[1, 2, 3], [3, 4, 5]]])
-    tsgm.utils.visualize_ts_lineplot(Xs, num=1)
+    tsgm.utils.visualize_ts_lineplot(Xs, num=1, unite_features=unite_features)
 
+    ys = np.array([1, 2])
+    tsgm.utils.visualize_ts_lineplot(Xs, ys, num=1, unite_features=unite_features)
 
-def visualize_training_loss():
-    loss = np.array([10, 9, 8, 7])
+
+def test_visualize_training_loss():
+    loss = np.array([[10.0], [9.0], [8.0], [7.0]])
     tsgm.utils.visualize_training_loss(loss)
+
+
+def test_visualize_original_and_reconst_ts():
+    original = np.array([
+        [[1, 2, 3], [3, 4, 5]],
+        [[1, 2, 3], [3, 4, 5]],
+        [[1, 2, 3], [3, 4, 5]],
+        [[1, 2, 3], [3, 4, 5]],
+        [[1, 2, 3], [3, 4, 5]]
+    ])
+    reconstructed = original
+    tsgm.utils.visualize_original_and_reconst_ts(original, reconstructed)
+
diff --git a/tests/test_zoo.py b/tests/test_zoo.py
@@ -1,29 +1,79 @@
 import pytest
+
+import functools
+import numpy as np
+import random
 import tensorflow as tf
+from tensorflow.keras import layers
+
+import sklearn.metrics.pairwise
 
 import tsgm
 
 
-def test_zoo():
-    assert isinstance(tsgm.models.zoo, tsgm.models.architectures.Zoo)
-    assert len(tsgm.models.zoo.keys()) == len(tsgm.models.zoo.values())
+@pytest.mark.parametrize("model_type", [
+    tsgm.models.architectures.zoo["cvae_conv5"],
+])
+def test_zoo_cvae(model_type):
+    seq_len = 10
+    feat_dim = 2
+    latent_dim = 1
+    output_dim = 1
+
+    arch = model_type(seq_len=seq_len, feat_dim=feat_dim, latent_dim=latent_dim, output_dim=output_dim)
+    arch_dict = arch.get()
+
+    assert arch.encoder == arch_dict["encoder"] and arch.decoder == arch_dict["decoder"]
+
 
-    assert tsgm.models.zoo.summary() is None
+@pytest.mark.parametrize("model_type", [
+    tsgm.models.architectures.zoo["cgan_base_c4_l1"],
+    tsgm.models.architectures.zoo["cgan_lstm_n"]
+])
+def test_zoo_cgan(model_type):
+    seq_len = 10
+    feat_dim = 2
+    latent_dim = 1
+    output_dim = 1
+
+    arch = model_type(
+        seq_len=seq_len, feat_dim=feat_dim,
+        latent_dim=latent_dim, output_dim=output_dim)
+    arch_dict = arch.get()
 
-    assert isinstance(tsgm.models.zoo, dict)
+    assert arch.generator == arch_dict["generator"] and arch.discriminator == arch_dict["discriminator"]
 
-    with pytest.raises(TypeError):
-        result = tsgm.models.architectures.BaseGANArchitecture()
-    with pytest.raises(TypeError):
-        result = tsgm.models.architectures.BaseVAEArchitecture()
 
+@pytest.mark.parametrize("model_type_name", [
+    "clf_cn",
+    "clf_cl_n",
+    "clf_block"],
+)
+def test_zoo_clf(model_type_name):
+    seq_len = 10
+    feat_dim = 2
+    output_dim = 1
+    model_type = tsgm.models.architectures.zoo[model_type_name]
+    if model_type_name == "clf_block":
+        arch = model_type(
+            seq_len=seq_len, feat_dim=feat_dim, output_dim=output_dim, blocks=[layers.Conv1D(filters=64, kernel_size=3, activation="relu")])
+    else:
+        arch = model_type(
+            seq_len=seq_len, feat_dim=feat_dim, output_dim=output_dim)
+    arch_dict = arch.get()
 
-def test_sampling():
-    input_sampling = [0.0, 1.0]
-    result = tsgm.models.architectures.Sampling()(input_sampling)
-    assert isinstance(result, tf.Tensor)
+    assert arch.model == arch_dict["model"]
 
 
-def test_dict_types():
-    for k, v in tsgm.models.zoo.items():
-        assert issubclass(v, tsgm.models.architectures.Architecture)
+def test_basic_rec():
+    seq_len = 10
+    feat_dim = 2
+    output_dim = 1
+
+    arch = tsgm.models.zoo["recurrent"](
+        hidden_dim=2,
+        output_dim=output_dim,
+        n_layers=1,
+        network_type="gru")
+    model = arch.build()
+    assert model is not None
diff --git a/tsgm/models/cvae.py b/tsgm/models/cvae.py
@@ -100,8 +100,7 @@ def generate(self, n: int) -> tsgm.types.Tensor:
 
 
 class cBetaVAE(keras.Model):
-    # TODO: allow using architecture or encoder & decoder
-    def __init__(self, encoder, decoder, latent_dim, temporal, beta=1.0, **kwargs):
+    def __init__(self, encoder, decoder, latent_dim, temporal: bool, beta=1.0, **kwargs):
         super(cBetaVAE, self).__init__(**kwargs)
         self.beta = beta
         self.encoder = encoder
@@ -137,7 +136,7 @@ def generate(self, labels: tsgm.types.Tensor) -> tuple:
         :returns: a tuple of synthetically generated data and labels.
         """
         batch_size = tf.shape(labels)[0]
-        z = tf.random.normal((batch_size, self._seq_len, self.latent_dim))
+        z = tf.random.normal((batch_size, self._seq_len, self.latent_dim), dtype=labels.dtype)
         decoder_input = self._get_decoder_input(z, labels)
         return (self.decoder(decoder_input), labels)
 

diff --git a/tsgm/models/monitors.py b/tsgm/models/monitors.py
@@ -17,7 +17,7 @@
 
 class GANMonitor(keras.callbacks.Callback):
     def __init__(self, num_samples: int, latent_dim: int, labels: tsgm.types.Tensor,
-                 save: bool = True, save_path: typing.Optional[str] = None, mode: str = "clf"):
+                 save: bool = True, save_path: typing.Optional[str] = None, mode: str = "clf") -> None:
         self._num_samples = num_samples
         self._latent_dim = latent_dim
         self._save = save
@@ -37,7 +37,7 @@ def __init__(self, num_samples: int, latent_dim: int, labels: tsgm.types.Tensor,
                 logger.warning("save_path is specified, but save is False.")
             os.makedirs(self._save_path, exist_ok=True)
 
-    def on_epoch_end(self, epoch, logs=None):
+    def on_epoch_end(self, epoch, logs=None) -> None:
         if self._mode in ["clf", "reg"]:
             random_latent_vectors = tf.random.normal(shape=(self._num_samples, self._latent_dim))
         elif self._mode == "temporal":
@@ -64,14 +64,12 @@ def on_epoch_end(self, epoch, logs=None):
 
 
 class VAEMonitor(keras.callbacks.Callback):
-    def __init__(self, num_samples=6, latent_dim=128, output_dim=2):
+    def __init__(self, num_samples: int = 6, latent_dim: int = 128, output_dim: int = 2) -> None:
         self._num_samples = num_samples
         self._latent_dim = latent_dim
         self._output_dim = output_dim
 
-    def on_epoch_end(self, epoch, logs=None):
-        random_latent_vectors = tf.random.normal(shape=(self._output_dim * self._num_samples, self._latent_dim))
-
+    def on_epoch_end(self, epoch, logs=None) -> None:
         labels = []
         for i in range(self._output_dim):
             if not len(labels):
@@ -80,7 +78,7 @@ def on_epoch_end(self, epoch, logs=None):
                 labels = tf.concat((labels, keras.utils.to_categorical([i], self._output_dim)), 0)
 
         labels = tf.repeat(labels, self._num_samples, axis=0)
-        generated_images = self.model.decoder(tf.concat([random_latent_vectors, labels], 1))
+        generated_images, _ = self.model.generate(labels)
 
         for i in range(self._output_dim * self._num_samples):
             sns.lineplot(x=range(0, generated_images[i].shape[0]), y=tf.squeeze(generated_images[i]))