From 1acb49f57104663a021e9c0b4b492c3d0d1734a1 Mon Sep 17 00:00:00 2001 From: Michiel Date: Mon, 11 Jul 2022 10:49:50 +0200 Subject: [PATCH 01/16] ALAD method implemented --- examples/alad_example.py | 67 ++++++ pyod/models/alad.py | 490 +++++++++++++++++++++++++++++++++++++++ pyod/test/test_alad.py | 142 ++++++++++++ 3 files changed, 699 insertions(+) create mode 100644 examples/alad_example.py create mode 100644 pyod/models/alad.py create mode 100644 pyod/test/test_alad.py diff --git a/examples/alad_example.py b/examples/alad_example.py new file mode 100644 index 000000000..f02acf741 --- /dev/null +++ b/examples/alad_example.py @@ -0,0 +1,67 @@ +# -*- coding: utf-8 -*- +"""Example of using Adversarially Learned Anomaly Detection(ALAD) for outlier detection +""" +from __future__ import division +from __future__ import print_function + +import os +import sys + +# temporary solution for relative imports in case pyod is not installed +# if pyod is installed, no need to use the following line +sys.path.append( + os.path.abspath(os.path.join(os.path.dirname("__file__"), '..'))) + +from pyod.models.alad import ALAD +from pyod.utils.data import generate_data + +from pyod.utils.data import evaluate_print +from pyod.utils.example import visualize + +if __name__ == "__main__": + contamination = 0.1 # percentage of outliers + n_train = 500 # number of training points + n_test = 200 # number of testing points + + # Generate sample data + X_train, y_train, X_test, y_test = \ + generate_data(n_train=n_train, + n_test=n_test, + n_features=2, + contamination=contamination, + random_state=42) + + # train ALAD detector + clf_name = 'ALAD' + clf = ALAD( epochs = 100, latent_dim = 2, + learning_rate_disc = 0.0001, + learning_rate_gen = 0.0001, + dropout_rate = 0.2, + add_recon_loss = False, + lambda_recon_loss= 0.05, + add_disc_zz_loss = True, + dec_layers=[ 75, 100 ], + enc_layers=[ 100, 75 ], + disc_layers= [ 100, 75 ], + activation_hidden_disc = 'tanh', activation_hidden_gen = 'tanh' , + preprocessing=True, batch_size = 200, contamination = contamination) + + clf.fit(X_train) + + # get the prediction labels and outlier scores of the training data + y_train_pred = clf.labels_ # binary labels (0: inliers, 1: outliers) + y_train_scores = clf.decision_scores_ # raw outlier scores + + # get the prediction on the test data + y_test_pred = clf.predict(X_test) # outlier labels (0 or 1) + y_test_scores = clf.decision_function(X_test) # outlier scores + + # evaluate and print the results + print("\nOn Training Data:") + evaluate_print(clf_name, y_train, y_train_scores) + print("\nOn Test Data:") + evaluate_print(clf_name, y_test, y_test_scores) + + # visualize the results + visualize(clf_name, X_train, y_train, X_test, y_test, y_train_pred, + y_test_pred, show_figure=True, save_figure=False) diff --git a/pyod/models/alad.py b/pyod/models/alad.py new file mode 100644 index 000000000..8ccbe0f4c --- /dev/null +++ b/pyod/models/alad.py @@ -0,0 +1,490 @@ +# -*- coding: utf-8 -*- +"""Using Adversarially Learned Anomaly Detection +""" +# Author: Yue Zhao +# License: BSD 2 clause + +from __future__ import division +from __future__ import print_function + +import numpy as np +from matplotlib import pyplot as plt +import pandas as pd +from sklearn.preprocessing import StandardScaler +from sklearn.utils import check_array +from sklearn.utils.validation import check_is_fitted + +from ..utils.utility import check_parameter +from ..utils.stat_models import pairwise_distances_no_broadcast + +from .base import BaseDetector +from .base_dl import _get_tensorflow_version + +# if tensorflow 2, import from tf directly +if _get_tensorflow_version() == 1: + raise NotImplementedError('Model not implemented for Tensorflow version 1') +else: + import tensorflow as tf + from tensorflow.keras.models import Model + from tensorflow.keras.layers import Input, Dense, Dropout + from tensorflow.keras.optimizers import Adam + import tensorflow_addons as tfa + + + + + +class ALAD(BaseDetector): + """Adversarially Learned Anomaly Detection (ALAD). + Paper: https://arxiv.org/pdf/1812.02288.pdf + + Parameters + ---------- + output_activation : str, optional (default=None) + Activation function to use for output layers for encoder and dector. + See https://keras.io/activations/ + activation_hidden_disc : str, optional (default='tanh') + Activation function to use for hidden layers in discrimators. + See https://keras.io/activations/ + activation_hidden_gen : str, optional (default='tanh') + Activation function to use for hidden layers in encoder and decoder (i.e. generator). + See https://keras.io/activations/ + epochs : int, optional (default=500) + Number of epochs to train the model. + batch_size : int, optional (default=32) + Number of samples per gradient update. + dropout_rate : float in (0., 1), optional (default=0.2) + The dropout to be used across all layers. + dec_layers : list, optional (default=[5,10,25]) + List that indicates the number of nodes per hidden layer for the decoder network. + Thus, [10,10] indicates 2 hidden layers having each 10 nodes. + enc_layers : list, optional (default=[25,10,5]) + List that indicates the number of nodes per hidden layer for the encoder network. + Thus, [10,10] indicates 2 hidden layers having each 10 nodes. + disc_layers : list, optional (default=[25,10,5]) + List that indicates the number of nodes per hidden layer for the discrimator networks. + Thus, [10,10] indicates 2 hidden layers having each 10 nodes. + learning_rate_gen: float in (0., 1), optional (default=0.001) + learning rate of training the the encoder and decoder + learning_rate_disc: float in (0., 1), optional (default=0.001) + learning rate of training the discrimators + add_recon_loss: bool optional (default=False) + add an extra loss for encoder and decoder based on the reconstruction error + lambda_recon_loss: float in (0., 1), optional (default=0.1) + if ``add_recon_loss= True``, the reconstruction loss gets multiplied by ``lambda_recon_loss`` + and added to the total loss for the generator (i.e. encoder and decoder). + + preprocessing : bool, optional (default=True) + If True, apply standardization on the data. + verbose : int, optional (default=1) + Verbosity mode. + - 0 = silent + - 1 = progress bar + contamination : float in (0., 0.5), optional (default=0.1) + The amount of contamination of the data set, i.e. + the proportion of outliers in the data set. When fitting this is used + to define the threshold on the decision function. + Attributes + ---------- + decision_scores_ : numpy array of shape (n_samples,) + The outlier scores of the training data [0,1]. + The higher, the more abnormal. Outliers tend to have higher + scores. This value is available once the detector is + fitted. + threshold_ : float + The threshold is based on ``contamination``. It is the + ``n_samples * contamination`` most abnormal samples in + ``decision_scores_``. The threshold is calculated for generating + binary outlier labels. + labels_ : int, either 0 or 1 + The binary labels of the training data. 0 stands for inliers + and 1 for outliers/anomalies. It is generated by applying + ``threshold_`` on ``decision_scores_``. + """ + + def __init__(self, activation_hidden_gen='tanh', + activation_hidden_disc='tanh', + output_activation = None, + dropout_rate=0.2, + latent_dim=2, + dec_layers=[5, 10, 25], + enc_layers=[25, 10, 5], + disc_layers=[25, 10, 5], + learning_rate_gen = 0.0001, learning_rate_disc = 0.0001, + add_recon_loss = False, lambda_recon_loss = 0.1, + epochs = 200, + verbose = 0, + preprocessing = False, + add_disc_zz_loss = True, + batch_size = 32, contamination=0.1): + super(ALAD, self).__init__(contamination=contamination) + + self.activation_hidden_disc = activation_hidden_disc + self.activation_hidden_gen = activation_hidden_gen + self.dropout_rate = dropout_rate + self.latent_dim = latent_dim + self.dec_layers = dec_layers + self.enc_layers = enc_layers + self.disc_layers = disc_layers + self.add_recon_loss = add_recon_loss + self.lambda_recon_loss = lambda_recon_loss + self.add_disc_zz_loss = add_disc_zz_loss + + self.output_activation = output_activation + self.contamination = contamination + self.epochs = epochs + self.learning_rate_gen = learning_rate_gen + self.learning_rate_disc = learning_rate_disc + self.preprocessing = preprocessing + self.batch_size = batch_size + self.verbose = verbose + + check_parameter(dropout_rate, 0, 1, param_name='dropout_rate', include_left=True) + + def _build_model(self): + + + #### Decoder ##### + dec_in = Input(shape=(self.latent_dim,), name='I1') + dec_1 = Dropout(self.dropout_rate)(dec_in) + last_layer = dec_1 + + dec_hl_dict = {} + for i, l_dim in enumerate(self.dec_layers): + layer_name = 'hl_{}'.format(i) + dec_hl_dict[layer_name] = Dropout(self.dropout_rate)( + Dense(l_dim, activation=self.activation_hidden_gen)(last_layer)) + last_layer = dec_hl_dict[layer_name] + + dec_out = Dense(self.n_features_, activation=self.output_activation)(last_layer) + + self.dec = Model(inputs=(dec_in), outputs=[dec_out]) + self.hist_loss_dec = [] + + + #### Encoder ##### + enc_in = Input(shape=(self.n_features_,), name='I1') + enc_1 = Dropout(self.dropout_rate)(enc_in) + last_layer = enc_1 + + enc_hl_dict = {} + for i, l_dim in enumerate(self.enc_layers): + layer_name = 'hl_{}'.format(i) + enc_hl_dict[layer_name] = Dropout(self.dropout_rate)( + Dense(l_dim, activation=self.activation_hidden_gen)(last_layer)) + last_layer = enc_hl_dict[layer_name] + + enc_out = Dense(self.latent_dim, activation=self.output_activation)(last_layer) + + self.enc = Model(inputs=(enc_in), outputs=[enc_out]) + self.hist_loss_enc = [] + + + + + #### Discriminator_xz ##### + disc_xz_in_x = Input(shape=(self.n_features_,), name='I1') + disc_xz_in_z = Input(shape=(self.latent_dim,), name='I2') + disc_xz_in = tf.concat([disc_xz_in_x, disc_xz_in_z], axis = 1 ) + + disc_xz_1 = Dropout(self.dropout_rate )(disc_xz_in) + last_layer = disc_xz_1 + + disc_xz_hl_dict = {} + for i, l_dim in enumerate(self.disc_layers): + layer_name = 'hl_{}'.format(i) + disc_xz_hl_dict[layer_name] = Dropout(self.dropout_rate)( + tfa.layers.SpectralNormalization( Dense(l_dim, activation=self.activation_hidden_disc) )(last_layer) ) + last_layer = disc_xz_hl_dict[layer_name] + + disc_xz_out = Dense(1, activation= 'sigmoid' )(last_layer) + self.disc_xz = Model(inputs=(disc_xz_in_x, disc_xz_in_z), outputs=[disc_xz_out] ) + # self.hist_loss_disc_xz = [] + + + #### Discriminator_xx ##### + disc_xx_in_x = Input(shape=(self.n_features_,), name='I1') + disc_xx_in_x_hat = Input(shape=(self.n_features_,), name='I2') + disc_xx_in = tf.concat([disc_xx_in_x, disc_xx_in_x_hat], axis = 1 ) + + disc_xx_1 = Dropout(self.dropout_rate, input_shape=(self.n_features_,))(disc_xx_in) + last_layer = disc_xx_1 + + disc_xx_hl_dict = {} + for i, l_dim in enumerate(self.disc_layers): + layer_name = 'hl_{}'.format(i) + disc_xx_hl_dict[layer_name] = Dropout(self.dropout_rate)( + tfa.layers.SpectralNormalization( Dense(l_dim, activation=self.activation_hidden_disc) )(last_layer) ) + last_layer = disc_xx_hl_dict[layer_name] + + disc_xx_out = Dense(1, activation= 'sigmoid' )(last_layer) + self.disc_xx = Model(inputs=(disc_xx_in_x, disc_xx_in_x_hat), outputs=[disc_xx_out, last_layer]) + # self.hist_loss_disc_xx = [] + + + + #### Discriminator_zz ##### + disc_zz_in_z = Input(shape=(self.latent_dim,), name='I1') + disc_zz_in_z_prime = Input(shape=(self.latent_dim,), name='I2') + disc_zz_in = tf.concat([disc_zz_in_z, disc_zz_in_z_prime], axis = 1 ) + + disc_zz_1 = Dropout(self.dropout_rate, input_shape=(self.n_features_,))(disc_zz_in) + last_layer = disc_zz_1 + + disc_zz_hl_dict = {} + for i, l_dim in enumerate(self.disc_layers): + layer_name = 'hl_{}'.format(i) + disc_zz_hl_dict[layer_name] = Dropout(self.dropout_rate)( + tfa.layers.SpectralNormalization( Dense(l_dim, activation=self.activation_hidden_disc) )(last_layer) ) + last_layer = disc_zz_hl_dict[layer_name] + + disc_zz_out = Dense(1, activation= 'sigmoid' )(last_layer) + self.disc_zz = Model(inputs=(disc_zz_in_z, disc_zz_in_z_prime), outputs=[disc_zz_out]) + # self.hist_loss_disc_zz = [] + + + # Set optimizer + opt_gen = Adam(learning_rate=self.learning_rate_gen) + opt_disc = Adam(learning_rate=self.learning_rate_disc) + + self.dec.compile(optimizer = opt_gen ) + self.enc.compile(optimizer = opt_gen ) + self.disc_xz.compile(optimizer = opt_disc ) + self.disc_xx.compile(optimizer = opt_disc ) + self.disc_zz.compile(optimizer = opt_disc ) + + self.hist_loss_disc = [] + self.hist_loss_gen = [] + + + + def train_step(self, data ): + cross_entropy = tf.keras.losses.BinaryCrossentropy(from_logits=False) + + x_real, z_real = data + + def get_losses(): + y_true = tf.ones_like(x_real[:,[0]]) + y_fake = tf.zeros_like(x_real[:,[0]]) + + + # Generator + x_gen = self.dec({'I1': z_real}, training=True) + + # Encoder + z_gen = self.enc({'I1': x_real}, training=True) + + + #Discriminatorxz + out_truexz = self.disc_xz({'I1': x_real, 'I2': z_gen}, training=True) + out_fakexz = self.disc_xz({'I1': x_gen, 'I2': z_real}, training=True) + + #Discriminatorzz + if( self.add_disc_zz_loss == True): + out_truezz = self.disc_zz({'I1': z_real, 'I2': z_real}, training=True) + out_fakezz = self.disc_zz({'I1': z_real, 'I2': self.enc({'I1': self.dec({'I1': z_real }, training=True) }) }, training=True) + + #Discriminatorxx + out_truexx, _ = self.disc_xx({'I1': x_real, 'I2': x_real}, training=True) #self.Dxx(x_real, x_real) + out_fakexx, _ = self.disc_xx({'I1': x_real, 'I2': self.dec({'I1': self.enc({'I1': x_real }, training=True) }) }, training=True) + + + #Losses + loss_dxz = cross_entropy(y_true, out_truexz) + cross_entropy( y_fake,out_fakexz) + loss_dxx = cross_entropy(y_true, out_truexx) + cross_entropy( y_fake,out_fakexx) + if( self.add_disc_zz_loss == True): + loss_dzz = cross_entropy(y_true, out_truezz) + cross_entropy( y_fake,out_fakezz) + loss_disc = loss_dxz + loss_dzz + loss_dxx + else: + loss_disc = loss_dxz + loss_dxx + + loss_gexz = cross_entropy( y_true,out_fakexz) + cross_entropy( y_fake,out_truexz) + loss_gexx = cross_entropy( y_true,out_fakexx) + cross_entropy( y_fake,out_truexx) + if( self.add_disc_zz_loss == True): + loss_gezz = cross_entropy( y_true,out_fakezz) + cross_entropy( y_fake,out_truezz) + cycle_consistency = loss_gezz + loss_gexx + loss_gen = loss_gexz + cycle_consistency + else: + cycle_consistency = loss_gexx + loss_gen = loss_gexz + cycle_consistency + + + if( self.add_recon_loss == True): + # Extra recon loss + x_recon = self.dec({'I1': self.enc({'I1': x_real }, training=True ) }) + loss_recon = tf.reduce_mean( ( x_real - x_recon )**2 ) + loss_gen += loss_recon * self.lambda_recon_loss + + return loss_disc,loss_gen + + + with tf.GradientTape() as enc_tape, tf.GradientTape() as dec_tape, tf.GradientTape() as disc_xx_tape, tf.GradientTape() as disc_xz_tape, tf.GradientTape() as disc_zz_tape: + loss_disc, loss_gen = get_losses() + + + self.hist_loss_disc.append( np.float64(loss_disc.numpy()) ) + self.hist_loss_gen.append( np.float64(loss_gen.numpy()) ) + + gradients_dec = dec_tape.gradient(loss_gen, self.dec.trainable_variables) + self.dec.optimizer.apply_gradients(zip(gradients_dec, self.dec.trainable_variables)) + + + gradients_enc = enc_tape.gradient(loss_gen, self.enc.trainable_variables) + self.enc.optimizer.apply_gradients(zip(gradients_enc, self.enc.trainable_variables)) + + + gradients_disc_xx = disc_xx_tape.gradient(loss_disc, self.disc_xx.trainable_variables) + self.disc_xx.optimizer.apply_gradients(zip(gradients_disc_xx, self.disc_xx.trainable_variables)) + + if( self.add_disc_zz_loss == True): + gradients_disc_zz = disc_zz_tape.gradient(loss_disc, self.disc_zz.trainable_variables) + self.disc_zz.optimizer.apply_gradients(zip(gradients_disc_zz, self.disc_zz.trainable_variables)) + + + gradients_disc_xz = disc_xz_tape.gradient(loss_disc, self.disc_xz.trainable_variables) + self.disc_xz.optimizer.apply_gradients(zip(gradients_disc_xz, self.disc_xz.trainable_variables)) + + + def plot_learning_curves(self, start_ind=0, window_smoothening=10): + fig = plt.figure(figsize=(12, 5)) + + l_gen = pd.Series(self.hist_loss_gen[start_ind:]).rolling(window_smoothening).mean() + l_disc = pd.Series(self.hist_loss_disc[start_ind:]).rolling(window_smoothening).mean() + + ax = fig.add_subplot(1, 2, 1) + ax.plot(range(len(l_gen)), l_gen, ) + ax.set_title('Generator') + ax.set_ylabel('Loss') + ax.set_ylabel('Iter') + + ax = fig.add_subplot(1, 2, 2) + ax.plot(range(len(l_disc)), l_disc) + ax.set_title('Discriminator(s)') + ax.set_ylabel('Loss') + ax.set_xlabel('Iter') + + plt.show() + + + + def fit(self, X, y=None, noise_std= 0.1): + """Fit detector. y is ignored in unsupervised methods. + Parameters + ---------- + X : numpy array of shape (n_samples, n_features) + The input samples. + y : Ignored + Not used, present for API consistency by convention. + Returns + ------- + self : object + Fitted estimator. + """ + # validate inputs X and y (optional) + X = check_array(X) + self._set_n_classes(y) + + # Verify and construct the hidden units + self.n_samples_, self.n_features_ = X.shape[0], X.shape[1] + self._build_model() + + # Standardize data for better performance + if self.preprocessing: + self.scaler_ = StandardScaler() + X_norm = self.scaler_.fit_transform(X) + else: + X_norm = np.copy(X) + + + for n in range(self.epochs): + if ((n % 50 == 0) and (n != 0) and (self.verbose == 1)): + print('Train iter:{}'.format(n)) + + # Shuffle train + np.random.shuffle(X_norm) + + X_train_sel = X_norm[0: min(self.batch_size, self.n_samples_), :] + latent_noise = np.random.normal(0, 1, (X_train_sel.shape[0], self.latent_dim)) + X_train_sel += np.random.normal(0,noise_std, size = X_train_sel.shape) + self.train_step( ( np.float32(X_train_sel), np.float32(latent_noise) ) ) + + + + # Predict on X itself and calculate the reconstruction error as + # the outlier scores. Noted X_norm was shuffled has to recreate + if self.preprocessing: + X_norm = self.scaler_.transform(X) + else: + X_norm = np.copy(X) + + + pred_scores = self.get_outlier_scores(X_norm) + self.decision_scores_ = pred_scores + self._process_decision_scores() + return self + + + + def train_more(self, X, epochs=100, noise_std = 0.1 ): + + # Standardize data for better performance + if self.preprocessing: + X_norm = self.scaler_.transform(X) + else: + X_norm = np.copy(X) + + for n in range(epochs): + if ((n % 50 == 0) and (n != 0) and (self.verbose == 1)): + print('Train iter:{}'.format(n)) + + # Shuffle train + np.random.shuffle(X_norm) + + X_train_sel = X_norm[0: min(self.batch_size, self.n_samples_), :] + latent_noise = np.random.normal(0, 1, (X_train_sel.shape[0], self.latent_dim)) + X_train_sel += np.random.normal(0,noise_std, size = X_train_sel.shape) + self.train_step( ( np.float32(X_train_sel), np.float32(latent_noise) ) ) + + + def get_outlier_scores(self, X_norm): + + X_enc = self.enc({'I1': X_norm }).numpy() + X_enc_gen = self.dec({'I1':X_enc }).numpy() + + _, act_layer_xx = self.disc_xx({'I1': X_norm, 'I2': X_norm}, training=False) + act_layer_xx = act_layer_xx.numpy() + _, act_layer_xx_enc_gen = self.disc_xx({'I1': X_norm, 'I2': X_enc_gen}, training=False) + act_layer_xx_enc_gen = act_layer_xx_enc_gen.numpy() + outlier_scores = np.mean( np.abs( (act_layer_xx - act_layer_xx_enc_gen)**2 ) ,axis=1) + + return outlier_scores + + + def decision_function(self, X): + """Predict raw anomaly score of X using the fitted detector. + The anomaly score of an input sample is computed based on different + detector algorithms. For consistency, outliers are assigned with + larger anomaly scores. + Parameters + ---------- + X : numpy array of shape (n_samples, n_features) + The training input samples. Sparse matrices are accepted only + if they are supported by the base estimator. + Returns + ------- + anomaly_scores : numpy array of shape (n_samples,) + The anomaly score of the input samples. + """ + check_is_fitted(self, ['decision_scores_']) + X = check_array(X) + + if self.preprocessing: + X_norm = self.scaler_.transform(X) + else: + X_norm = np.copy(X) + + # Predict on X + pred_scores = self.get_outlier_scores(X_norm) + return pred_scores + + diff --git a/pyod/test/test_alad.py b/pyod/test/test_alad.py new file mode 100644 index 000000000..2044d45a2 --- /dev/null +++ b/pyod/test/test_alad.py @@ -0,0 +1,142 @@ +# -*- coding: utf-8 -*- +from __future__ import division +from __future__ import print_function + +import os +import sys + +import unittest +# noinspection PyProtectedMember +from numpy.testing import assert_equal +from numpy.testing import assert_raises + +from sklearn.metrics import roc_auc_score +from sklearn.base import clone + +# temporary solution for relative imports in case pyod is not installed +# if pyod is installed, no need to use the following line +sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) + +from pyod.models.alad import ALAD +from pyod.utils.data import generate_data + + + + +class TestALAD(unittest.TestCase): + def setUp(self): + self.n_train = 500 + self.n_test = 200 + self.n_features = 2 + self.contamination = 0.1 + self.roc_floor = 0.8 + self.X_train, self.y_train, self.X_test, self.y_test = generate_data( + n_train=self.n_train, n_test=self.n_test, + n_features=self.n_features, contamination=self.contamination, + random_state=42) + + self.clf = ALAD( epochs = 100, latent_dim = 2, + learning_rate_disc = 0.0001, + learning_rate_gen = 0.0001, + dropout_rate = 0.2, + add_recon_loss = False, + lambda_recon_loss= 0.05, # only important when add_recon_loss = True + add_disc_zz_loss = True, + dec_layers=[ 75, 100 ], + enc_layers=[ 100, 75 ], + disc_layers= [ 100, 75 ], + activation_hidden_disc = 'tanh', activation_hidden_gen = 'tanh' , + preprocessing=True, batch_size = 200, contamination = contamination) + + self.clf.fit(self.X_train) + + def test_parameters(self): + assert (hasattr(self.clf, 'decision_scores_') and + self.clf.decision_scores_ is not None) + assert (hasattr(self.clf, 'labels_') and + self.clf.labels_ is not None) + assert (hasattr(self.clf, 'threshold_') and + self.clf.threshold_ is not None) + assert (hasattr(self.clf, '_mu') and + self.clf._mu is not None) + assert (hasattr(self.clf, '_sigma') and + self.clf._sigma is not None) + + + def test_train_scores(self): + assert_equal(len(self.clf.decision_scores_), self.X_train.shape[0]) + + def test_prediction_scores(self): + pred_scores = self.clf.decision_function(self.X_test) + + # check score shapes + assert_equal(pred_scores.shape[0], self.X_test.shape[0]) + + # check performance + assert (roc_auc_score(self.y_test, pred_scores) >= self.roc_floor) + + def test_prediction_labels(self): + pred_labels = self.clf.predict(self.X_test) + assert_equal(pred_labels.shape, self.y_test.shape) + + def test_prediction_proba(self): + pred_proba = self.clf.predict_proba(self.X_test) + assert (pred_proba.min() >= 0) + assert (pred_proba.max() <= 1) + + def test_prediction_proba_linear(self): + pred_proba = self.clf.predict_proba(self.X_test, method='linear') + assert (pred_proba.min() >= 0) + assert (pred_proba.max() <= 1) + + def test_prediction_proba_unify(self): + pred_proba = self.clf.predict_proba(self.X_test, method='unify') + assert (pred_proba.min() >= 0) + assert (pred_proba.max() <= 1) + + def test_prediction_proba_parameter(self): + with assert_raises(ValueError): + self.clf.predict_proba(self.X_test, method='something') + + def test_prediction_labels_confidence(self): + pred_labels, confidence = self.clf.predict(self.X_test, + return_confidence=True) + assert_equal(pred_labels.shape, self.y_test.shape) + assert_equal(confidence.shape, self.y_test.shape) + assert (confidence.min() >= 0) + assert (confidence.max() <= 1) + + def test_prediction_proba_linear_confidence(self): + pred_proba, confidence = self.clf.predict_proba(self.X_test, + method='linear', + return_confidence=True) + assert (pred_proba.min() >= 0) + assert (pred_proba.max() <= 1) + + assert_equal(confidence.shape, self.y_test.shape) + assert (confidence.min() >= 0) + assert (confidence.max() <= 1) + + def test_fit_predict(self): + pred_labels = self.clf.fit_predict(self.X_train) + assert_equal(pred_labels.shape, self.y_train.shape) + + def test_fit_predict_score(self): + self.clf.fit_predict_score(self.X_test, self.y_test) + self.clf.fit_predict_score(self.X_test, self.y_test, + scoring='roc_auc_score') + self.clf.fit_predict_score(self.X_test, self.y_test, + scoring='prc_n_score') + with assert_raises(NotImplementedError): + self.clf.fit_predict_score(self.X_test, self.y_test, + scoring='something') + def test_model_clone(self): + # for deep models this may not apply + clone_clf = clone(self.clf) + + def tearDown(self): + pass + + +if __name__ == '__main__': + unittest.main() From e17553abcd196da24e58f1b78c3b09b85ffa3b5c Mon Sep 17 00:00:00 2001 From: Michiel Date: Mon, 11 Jul 2022 11:42:30 +0200 Subject: [PATCH 02/16] Spectral normalization options, and only when tensorflow_addons is installed --- examples/alad_example.py | 2 +- pyod/models/alad.py | 38 ++++++++++++++++++++++++++++++-------- pyod/test/test_alad.py | 2 +- 3 files changed, 32 insertions(+), 10 deletions(-) diff --git a/examples/alad_example.py b/examples/alad_example.py index f02acf741..ae41a960a 100644 --- a/examples/alad_example.py +++ b/examples/alad_example.py @@ -42,7 +42,7 @@ add_disc_zz_loss = True, dec_layers=[ 75, 100 ], enc_layers=[ 100, 75 ], - disc_layers= [ 100, 75 ], + disc_layers= [ 100, 75 ], spectral_normalization = False, activation_hidden_disc = 'tanh', activation_hidden_gen = 'tanh' , preprocessing=True, batch_size = 200, contamination = contamination) diff --git a/pyod/models/alad.py b/pyod/models/alad.py index 8ccbe0f4c..a0a5eb695 100644 --- a/pyod/models/alad.py +++ b/pyod/models/alad.py @@ -28,7 +28,7 @@ from tensorflow.keras.models import Model from tensorflow.keras.layers import Input, Dense, Dropout from tensorflow.keras.optimizers import Adam - import tensorflow_addons as tfa + @@ -115,7 +115,7 @@ def __init__(self, activation_hidden_gen='tanh', epochs = 200, verbose = 0, preprocessing = False, - add_disc_zz_loss = True, + add_disc_zz_loss = True, spectral_normalization = False, batch_size = 32, contamination=0.1): super(ALAD, self).__init__(contamination=contamination) @@ -138,6 +138,16 @@ def __init__(self, activation_hidden_gen='tanh', self.preprocessing = preprocessing self.batch_size = batch_size self.verbose = verbose + self.spectral_normalization = spectral_normalization + + if( self.spectral_normalization == True): + try: + import tensorflow_addons as tfa + except ModuleNotFoundError: + # Error handling + print('tensorflow_addons not found, cannot use spectral normalization. Install tensorflow_addons first.') + self.spectral_normalization = False + check_parameter(dropout_rate, 0, 1, param_name='dropout_rate', include_left=True) @@ -193,8 +203,12 @@ def _build_model(self): disc_xz_hl_dict = {} for i, l_dim in enumerate(self.disc_layers): layer_name = 'hl_{}'.format(i) - disc_xz_hl_dict[layer_name] = Dropout(self.dropout_rate)( - tfa.layers.SpectralNormalization( Dense(l_dim, activation=self.activation_hidden_disc) )(last_layer) ) + + if( self.spectral_normalization == True): + disc_xz_hl_dict[layer_name] = Dropout(self.dropout_rate)(tfa.layers.SpectralNormalization( Dense(l_dim, activation=self.activation_hidden_disc) )(last_layer) ) + else: + disc_xz_hl_dict[layer_name] = Dropout(self.dropout_rate)( Dense(l_dim, activation=self.activation_hidden_disc)(last_layer) ) + last_layer = disc_xz_hl_dict[layer_name] disc_xz_out = Dense(1, activation= 'sigmoid' )(last_layer) @@ -213,8 +227,12 @@ def _build_model(self): disc_xx_hl_dict = {} for i, l_dim in enumerate(self.disc_layers): layer_name = 'hl_{}'.format(i) - disc_xx_hl_dict[layer_name] = Dropout(self.dropout_rate)( - tfa.layers.SpectralNormalization( Dense(l_dim, activation=self.activation_hidden_disc) )(last_layer) ) + + if( self.spectral_normalization == True): + disc_xx_hl_dict[layer_name] = Dropout(self.dropout_rate)(tfa.layers.SpectralNormalization( Dense(l_dim, activation=self.activation_hidden_disc) )(last_layer) ) + else: + disc_xx_hl_dict[layer_name] = Dropout(self.dropout_rate)( Dense(l_dim, activation=self.activation_hidden_disc)(last_layer) ) + last_layer = disc_xx_hl_dict[layer_name] disc_xx_out = Dense(1, activation= 'sigmoid' )(last_layer) @@ -234,8 +252,12 @@ def _build_model(self): disc_zz_hl_dict = {} for i, l_dim in enumerate(self.disc_layers): layer_name = 'hl_{}'.format(i) - disc_zz_hl_dict[layer_name] = Dropout(self.dropout_rate)( - tfa.layers.SpectralNormalization( Dense(l_dim, activation=self.activation_hidden_disc) )(last_layer) ) + + if( self.spectral_normalization == True): + disc_zz_hl_dict[layer_name] = Dropout(self.dropout_rate)(tfa.layers.SpectralNormalization( Dense(l_dim, activation=self.activation_hidden_disc) )(last_layer) ) + else: + disc_zz_hl_dict[layer_name] = Dropout(self.dropout_rate)( Dense(l_dim, activation=self.activation_hidden_disc)(last_layer) ) + last_layer = disc_zz_hl_dict[layer_name] disc_zz_out = Dense(1, activation= 'sigmoid' )(last_layer) diff --git a/pyod/test/test_alad.py b/pyod/test/test_alad.py index 2044d45a2..7417ee08f 100644 --- a/pyod/test/test_alad.py +++ b/pyod/test/test_alad.py @@ -44,7 +44,7 @@ def setUp(self): add_disc_zz_loss = True, dec_layers=[ 75, 100 ], enc_layers=[ 100, 75 ], - disc_layers= [ 100, 75 ], + disc_layers= [ 100, 75 ], spectral_normalization = False, activation_hidden_disc = 'tanh', activation_hidden_gen = 'tanh' , preprocessing=True, batch_size = 200, contamination = contamination) From 38744c7abdf43de53fc809cf6ce9ebfe3c7b7750 Mon Sep 17 00:00:00 2001 From: Michiel Date: Mon, 11 Jul 2022 12:11:09 +0200 Subject: [PATCH 03/16] self.contamination missing --- pyod/test/test_alad.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyod/test/test_alad.py b/pyod/test/test_alad.py index 7417ee08f..0961e8753 100644 --- a/pyod/test/test_alad.py +++ b/pyod/test/test_alad.py @@ -46,7 +46,7 @@ def setUp(self): enc_layers=[ 100, 75 ], disc_layers= [ 100, 75 ], spectral_normalization = False, activation_hidden_disc = 'tanh', activation_hidden_gen = 'tanh' , - preprocessing=True, batch_size = 200, contamination = contamination) + preprocessing=True, batch_size = 200, contamination = self.contamination) self.clf.fit(self.X_train) From 395e574c1cc7679e1d85b6e36bc416ef5517064f Mon Sep 17 00:00:00 2001 From: Michiel Date: Mon, 11 Jul 2022 12:43:47 +0200 Subject: [PATCH 04/16] changed order of train and test X,y --- examples/alad_example.py | 2 +- pyod/test/test_alad.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/alad_example.py b/examples/alad_example.py index ae41a960a..5585c8976 100644 --- a/examples/alad_example.py +++ b/examples/alad_example.py @@ -24,7 +24,7 @@ n_test = 200 # number of testing points # Generate sample data - X_train, y_train, X_test, y_test = \ + X_train, X_test, y_train, y_test = \ generate_data(n_train=n_train, n_test=n_test, n_features=2, diff --git a/pyod/test/test_alad.py b/pyod/test/test_alad.py index 0961e8753..36e002bd3 100644 --- a/pyod/test/test_alad.py +++ b/pyod/test/test_alad.py @@ -30,7 +30,7 @@ def setUp(self): self.n_features = 2 self.contamination = 0.1 self.roc_floor = 0.8 - self.X_train, self.y_train, self.X_test, self.y_test = generate_data( + self.X_train, self.X_test, self.y_train, self.y_test = generate_data( n_train=self.n_train, n_test=self.n_test, n_features=self.n_features, contamination=self.contamination, random_state=42) From bc0cadaf6bbbfbada7c3810159c88728f8bff308 Mon Sep 17 00:00:00 2001 From: Michiel Date: Tue, 12 Jul 2022 07:32:58 +0200 Subject: [PATCH 05/16] Updated comments in code + train_more() also recalculates scores --- pyod/models/alad.py | 45 +++++++++++++++++++++++++++++++++++---------- 1 file changed, 35 insertions(+), 10 deletions(-) diff --git a/pyod/models/alad.py b/pyod/models/alad.py index a0a5eb695..43fa7139d 100644 --- a/pyod/models/alad.py +++ b/pyod/models/alad.py @@ -1,8 +1,7 @@ # -*- coding: utf-8 -*- """Using Adversarially Learned Anomaly Detection """ -# Author: Yue Zhao -# License: BSD 2 clause +# Author: Michiel Bongaerts (but not author of the ALAD method) from __future__ import division from __future__ import print_function @@ -153,12 +152,12 @@ def __init__(self, activation_hidden_gen='tanh', def _build_model(self): - #### Decoder ##### dec_in = Input(shape=(self.latent_dim,), name='I1') dec_1 = Dropout(self.dropout_rate)(dec_in) last_layer = dec_1 + # Store all hidden layers in dict dec_hl_dict = {} for i, l_dim in enumerate(self.dec_layers): layer_name = 'hl_{}'.format(i) @@ -177,6 +176,7 @@ def _build_model(self): enc_1 = Dropout(self.dropout_rate)(enc_in) last_layer = enc_1 + # Store all hidden layers in dict enc_hl_dict = {} for i, l_dim in enumerate(self.enc_layers): layer_name = 'hl_{}'.format(i) @@ -200,6 +200,7 @@ def _build_model(self): disc_xz_1 = Dropout(self.dropout_rate )(disc_xz_in) last_layer = disc_xz_1 + # Store all hidden layers in dict disc_xz_hl_dict = {} for i, l_dim in enumerate(self.disc_layers): layer_name = 'hl_{}'.format(i) @@ -224,6 +225,7 @@ def _build_model(self): disc_xx_1 = Dropout(self.dropout_rate, input_shape=(self.n_features_,))(disc_xx_in) last_layer = disc_xx_1 + # Store all hidden layers in dict disc_xx_hl_dict = {} for i, l_dim in enumerate(self.disc_layers): layer_name = 'hl_{}'.format(i) @@ -249,6 +251,7 @@ def _build_model(self): disc_zz_1 = Dropout(self.dropout_rate, input_shape=(self.n_features_,))(disc_zz_in) last_layer = disc_zz_1 + # Store all hidden layers in dict disc_zz_hl_dict = {} for i, l_dim in enumerate(self.disc_layers): layer_name = 'hl_{}'.format(i) @@ -311,7 +314,7 @@ def get_losses(): out_fakexx, _ = self.disc_xx({'I1': x_real, 'I2': self.dec({'I1': self.enc({'I1': x_real }, training=True) }) }, training=True) - #Losses + #Losses for discriminators loss_dxz = cross_entropy(y_true, out_truexz) + cross_entropy( y_fake,out_fakexz) loss_dxx = cross_entropy(y_true, out_truexx) + cross_entropy( y_fake,out_fakexx) if( self.add_disc_zz_loss == True): @@ -320,6 +323,7 @@ def get_losses(): else: loss_disc = loss_dxz + loss_dxx + #Losses for generator loss_gexz = cross_entropy( y_true,out_fakexz) + cross_entropy( y_fake,out_truexz) loss_gexx = cross_entropy( y_true,out_fakexx) + cross_entropy( y_fake,out_truexx) if( self.add_disc_zz_loss == True): @@ -406,11 +410,11 @@ def fit(self, X, y=None, noise_std= 0.1): X = check_array(X) self._set_n_classes(y) - # Verify and construct the hidden units + # Get number of sampels and features from train set self.n_samples_, self.n_features_ = X.shape[0], X.shape[1] self._build_model() - # Standardize data for better performance + # Apply data scaling or not if self.preprocessing: self.scaler_ = StandardScaler() X_norm = self.scaler_.fit_transform(X) @@ -432,8 +436,8 @@ def fit(self, X, y=None, noise_std= 0.1): - # Predict on X itself and calculate the reconstruction error as - # the outlier scores. Noted X_norm was shuffled has to recreate + # Predict on X itself and calculate the the outlier scores. + # Note, X_norm was shuffled and needs to be recreated if self.preprocessing: X_norm = self.scaler_.transform(X) else: @@ -448,8 +452,14 @@ def fit(self, X, y=None, noise_std= 0.1): def train_more(self, X, epochs=100, noise_std = 0.1 ): + """This function allows the researcher to perform extra training instead of the fixed number determined + by the fit() function. + """ - # Standardize data for better performance + # fit() should have been called first + check_is_fitted(self, ['decision_scores_']) + + # Apply data scaling or not if self.preprocessing: X_norm = self.scaler_.transform(X) else: @@ -465,7 +475,22 @@ def train_more(self, X, epochs=100, noise_std = 0.1 ): X_train_sel = X_norm[0: min(self.batch_size, self.n_samples_), :] latent_noise = np.random.normal(0, 1, (X_train_sel.shape[0], self.latent_dim)) X_train_sel += np.random.normal(0,noise_std, size = X_train_sel.shape) - self.train_step( ( np.float32(X_train_sel), np.float32(latent_noise) ) ) + self.train_step( ( np.float32(X_train_sel), np.float32(latent_noise) ) ) + + + + # Predict on X itself and calculate the the outlier scores. + # Note, X_norm was shuffled and needs to be recreated + if self.preprocessing: + X_norm = self.scaler_.transform(X) + else: + X_norm = np.copy(X) + + pred_scores = self.get_outlier_scores(X_norm) + self.decision_scores_ = pred_scores + self._process_decision_scores() + return self + def get_outlier_scores(self, X_norm): From 97136bd765470cfab6419a6bd51111f476ce2641 Mon Sep 17 00:00:00 2001 From: Michiel Date: Mon, 18 Jul 2022 10:39:29 +0200 Subject: [PATCH 06/16] Seperate arguments for number of hidden layers for Dxx, Dzz and Dxz --- examples/alad_example.py | 5 ++++- pyod/models/alad.py | 26 +++++++++++++++++++------- pyod/test/test_alad.py | 5 ++++- 3 files changed, 27 insertions(+), 9 deletions(-) diff --git a/examples/alad_example.py b/examples/alad_example.py index 5585c8976..660634fd0 100644 --- a/examples/alad_example.py +++ b/examples/alad_example.py @@ -42,7 +42,10 @@ add_disc_zz_loss = True, dec_layers=[ 75, 100 ], enc_layers=[ 100, 75 ], - disc_layers= [ 100, 75 ], spectral_normalization = False, + disc_xx_layers= [ 100, 75 ], + disc_zz_layers= [ 25, 25 ], + disc_xz_layers= [ 100, 75 ], + spectral_normalization = False, activation_hidden_disc = 'tanh', activation_hidden_gen = 'tanh' , preprocessing=True, batch_size = 200, contamination = contamination) diff --git a/pyod/models/alad.py b/pyod/models/alad.py index 43fa7139d..00ff805be 100644 --- a/pyod/models/alad.py +++ b/pyod/models/alad.py @@ -60,8 +60,14 @@ class ALAD(BaseDetector): enc_layers : list, optional (default=[25,10,5]) List that indicates the number of nodes per hidden layer for the encoder network. Thus, [10,10] indicates 2 hidden layers having each 10 nodes. - disc_layers : list, optional (default=[25,10,5]) - List that indicates the number of nodes per hidden layer for the discrimator networks. + disc_xx_layers : list, optional (default=[25,10,5]) + List that indicates the number of nodes per hidden layer for discrimator_xx. + Thus, [10,10] indicates 2 hidden layers having each 10 nodes. + disc_zz_layers : list, optional (default=[25,10,5]) + List that indicates the number of nodes per hidden layer for discrimator_zz. + Thus, [10,10] indicates 2 hidden layers having each 10 nodes. + disc_xz_layers : list, optional (default=[25,10,5]) + List that indicates the number of nodes per hidden layer for discrimator_xz. Thus, [10,10] indicates 2 hidden layers having each 10 nodes. learning_rate_gen: float in (0., 1), optional (default=0.001) learning rate of training the the encoder and decoder @@ -108,7 +114,9 @@ def __init__(self, activation_hidden_gen='tanh', latent_dim=2, dec_layers=[5, 10, 25], enc_layers=[25, 10, 5], - disc_layers=[25, 10, 5], + disc_xx_layers=[25, 10, 5], + disc_zz_layers=[25, 10, 5], + disc_xz_layers=[25, 10, 5], learning_rate_gen = 0.0001, learning_rate_disc = 0.0001, add_recon_loss = False, lambda_recon_loss = 0.1, epochs = 200, @@ -124,7 +132,11 @@ def __init__(self, activation_hidden_gen='tanh', self.latent_dim = latent_dim self.dec_layers = dec_layers self.enc_layers = enc_layers - self.disc_layers = disc_layers + + self.disc_xx_layers = disc_xx_layers + self.disc_zz_layers = disc_zz_layers + self.disc_xz_layers = disc_xz_layers + self.add_recon_loss = add_recon_loss self.lambda_recon_loss = lambda_recon_loss self.add_disc_zz_loss = add_disc_zz_loss @@ -202,7 +214,7 @@ def _build_model(self): # Store all hidden layers in dict disc_xz_hl_dict = {} - for i, l_dim in enumerate(self.disc_layers): + for i, l_dim in enumerate(self.disc_xz_layers): layer_name = 'hl_{}'.format(i) if( self.spectral_normalization == True): @@ -227,7 +239,7 @@ def _build_model(self): # Store all hidden layers in dict disc_xx_hl_dict = {} - for i, l_dim in enumerate(self.disc_layers): + for i, l_dim in enumerate(self.disc_xx_layers): layer_name = 'hl_{}'.format(i) if( self.spectral_normalization == True): @@ -253,7 +265,7 @@ def _build_model(self): # Store all hidden layers in dict disc_zz_hl_dict = {} - for i, l_dim in enumerate(self.disc_layers): + for i, l_dim in enumerate(self.disc_zz_layers): layer_name = 'hl_{}'.format(i) if( self.spectral_normalization == True): diff --git a/pyod/test/test_alad.py b/pyod/test/test_alad.py index 36e002bd3..f7a62fe1a 100644 --- a/pyod/test/test_alad.py +++ b/pyod/test/test_alad.py @@ -44,7 +44,10 @@ def setUp(self): add_disc_zz_loss = True, dec_layers=[ 75, 100 ], enc_layers=[ 100, 75 ], - disc_layers= [ 100, 75 ], spectral_normalization = False, + disc_xx_layers= [ 100, 75 ], + disc_zz_layers= [ 25, 25 ], + disc_xz_layers= [ 100, 75 ], + spectral_normalization = False, activation_hidden_disc = 'tanh', activation_hidden_gen = 'tanh' , preprocessing=True, batch_size = 200, contamination = self.contamination) From e5a1545b329b1330cd8b557fa4249e9e09788261 Mon Sep 17 00:00:00 2001 From: Michiel Date: Mon, 25 Jul 2022 08:42:07 +0200 Subject: [PATCH 07/16] tfa needs to be global in order to work --- pyod/models/alad.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pyod/models/alad.py b/pyod/models/alad.py index 00ff805be..da6c58242 100644 --- a/pyod/models/alad.py +++ b/pyod/models/alad.py @@ -153,6 +153,7 @@ def __init__(self, activation_hidden_gen='tanh', if( self.spectral_normalization == True): try: + global tfa import tensorflow_addons as tfa except ModuleNotFoundError: # Error handling From 8e6218e031419981ba201dedd1d70e73510a698a Mon Sep 17 00:00:00 2001 From: yzhao062 Date: Fri, 29 Jul 2022 20:58:51 -0400 Subject: [PATCH 08/16] optimize import --- pyod/models/abod.py | 1 + pyod/models/anogan.py | 6 ++---- pyod/models/auto_encoder.py | 5 ++--- pyod/models/auto_encoder_torch.py | 7 +++---- pyod/models/base.py | 10 ++++------ pyod/models/base_dl.py | 1 + pyod/models/cblof.py | 7 ++++--- pyod/models/cd.py | 4 ++-- pyod/models/copod.py | 8 ++++---- pyod/models/deep_svdd.py | 3 +-- pyod/models/ecod.py | 8 ++++---- pyod/models/feature_bagging.py | 14 ++++---------- pyod/models/gmm.py | 5 +++-- pyod/models/hbos.py | 5 ++--- pyod/models/iforest.py | 5 ++--- pyod/models/inne.py | 2 +- pyod/models/kde.py | 2 +- pyod/models/knn.py | 2 +- pyod/models/loci.py | 2 +- pyod/models/loda.py | 3 ++- pyod/models/lof.py | 5 +++-- pyod/models/lscp.py | 3 +-- pyod/models/lunar.py | 12 ++++++------ pyod/models/mcd.py | 2 +- pyod/models/mo_gaal.py | 3 +-- pyod/models/ocsvm.py | 2 +- pyod/models/pca.py | 2 +- pyod/models/rgraph.py | 12 ++++++------ pyod/models/rod.py | 3 ++- pyod/models/sklearn_base.py | 2 +- pyod/models/so_gaal.py | 3 +-- pyod/models/sod.py | 4 ++-- pyod/models/vae.py | 12 +++++------- pyod/models/xgbod.py | 2 +- pyod/test/test_abod.py | 7 +++---- pyod/test/test_auto_encoder.py | 5 ++--- pyod/test/test_auto_encoder_torch.py | 7 ++----- pyod/test/test_base.py | 5 ++--- pyod/test/test_cblof.py | 7 +++---- pyod/test/test_cd.py | 6 ++---- pyod/test/test_cof.py | 6 ++---- pyod/test/test_combination.py | 5 ++--- pyod/test/test_copod.py | 7 +++---- pyod/test/test_copod_parallel.py | 7 +++---- pyod/test/test_data.py | 5 ++--- pyod/test/test_deepsvdd.py | 7 ++----- pyod/test/test_ecod.py | 7 +++---- pyod/test/test_ecod_parallel.py | 7 +++---- pyod/test/test_feature_bagging.py | 7 +++---- pyod/test/test_gmm.py | 8 ++++---- pyod/test/test_hbos.py | 9 +++------ pyod/test/test_iforest.py | 9 +++------ pyod/test/test_inne.py | 7 +++---- pyod/test/test_kde.py | 5 +++-- pyod/test/test_knn.py | 7 +++---- pyod/test/test_lmdd.py | 7 ++----- pyod/test/test_loci.py | 7 +++---- pyod/test/test_loda.py | 8 ++------ pyod/test/test_lof.py | 7 +++---- pyod/test/test_lscp.py | 13 +++++-------- pyod/test/test_lunar.py | 8 ++------ pyod/test/test_mad.py | 6 ++---- pyod/test/test_mcd.py | 7 +++---- pyod/test/test_mo_gaal.py | 4 +--- pyod/test/test_ocsvm.py | 7 +++---- pyod/test/test_pca.py | 7 +++---- pyod/test/test_rgraph.py | 5 ++--- pyod/test/test_rod.py | 5 ++--- pyod/test/test_sampling.py | 6 +++--- pyod/test/test_so_gaal.py | 7 +------ pyod/test/test_sod.py | 5 ++--- pyod/test/test_sos.py | 5 +---- pyod/test/test_stat_models.py | 6 ++---- pyod/test/test_suod.py | 14 ++++---------- pyod/test/test_utility.py | 4 +--- pyod/test/test_vae.py | 7 ++----- pyod/test/test_xgbod.py | 9 ++++----- pyod/utils/__init__.py | 20 ++++++++++---------- pyod/utils/data.py | 9 +++++---- pyod/utils/example.py | 1 + pyod/utils/torch_utility.py | 2 -- pyod/utils/utility.py | 8 +++----- 82 files changed, 205 insertions(+), 296 deletions(-) diff --git a/pyod/models/abod.py b/pyod/models/abod.py index 27092685f..5898b65f8 100644 --- a/pyod/models/abod.py +++ b/pyod/models/abod.py @@ -16,6 +16,7 @@ from sklearn.neighbors import NearestNeighbors from sklearn.utils import check_array from sklearn.utils.validation import check_is_fitted + from .base import BaseDetector from ..utils.utility import check_parameter diff --git a/pyod/models/anogan.py b/pyod/models/anogan.py index aa1a704c9..ca8105a82 100644 --- a/pyod/models/anogan.py +++ b/pyod/models/anogan.py @@ -7,18 +7,16 @@ # License: BSD 2 clause +import matplotlib.pyplot as plt import numpy as np import pandas as pd from sklearn.preprocessing import StandardScaler from sklearn.utils import check_array from sklearn.utils.validation import check_is_fitted -from ..utils.utility import check_parameter - from .base import BaseDetector from .base_dl import _get_tensorflow_version - -import matplotlib.pyplot as plt +from ..utils.utility import check_parameter # if tensorflow 2, import from tf directly if _get_tensorflow_version() == 1: diff --git a/pyod/models/auto_encoder.py b/pyod/models/auto_encoder.py index 1ed264dd9..2efdaaf5a 100644 --- a/pyod/models/auto_encoder.py +++ b/pyod/models/auto_encoder.py @@ -12,11 +12,10 @@ from sklearn.utils import check_array from sklearn.utils.validation import check_is_fitted -from ..utils.utility import check_parameter -from ..utils.stat_models import pairwise_distances_no_broadcast - from .base import BaseDetector from .base_dl import _get_tensorflow_version +from ..utils.stat_models import pairwise_distances_no_broadcast +from ..utils.utility import check_parameter # if tensorflow 2, import from tf directly if _get_tensorflow_version() == 1: diff --git a/pyod/models/auto_encoder_torch.py b/pyod/models/auto_encoder_torch.py index a79aad8ed..af5575e8c 100644 --- a/pyod/models/auto_encoder_torch.py +++ b/pyod/models/auto_encoder_torch.py @@ -7,16 +7,15 @@ from __future__ import division from __future__ import print_function -import torch -from torch import nn - import numpy as np +import torch from sklearn.utils import check_array from sklearn.utils.validation import check_is_fitted +from torch import nn from .base import BaseDetector -from ..utils.torch_utility import get_activation_by_name from ..utils.stat_models import pairwise_distances_no_broadcast +from ..utils.torch_utility import get_activation_by_name class PyODDataset(torch.utils.data.Dataset): diff --git a/pyod/models/base.py b/pyod/models/base.py index 228392dab..785fb726b 100644 --- a/pyod/models/base.py +++ b/pyod/models/base.py @@ -7,23 +7,21 @@ from __future__ import division from __future__ import print_function +import abc import warnings from collections import defaultdict - from inspect import signature -import abc -import six - import numpy as np +import six from numpy import percentile from scipy.special import erf from scipy.stats import binom -from sklearn.preprocessing import MinMaxScaler from sklearn.metrics import roc_auc_score +from sklearn.preprocessing import MinMaxScaler from sklearn.utils import deprecated -from sklearn.utils.validation import check_is_fitted from sklearn.utils.multiclass import check_classification_targets +from sklearn.utils.validation import check_is_fitted from .sklearn_base import _pprint from ..utils.utility import precision_n_scores diff --git a/pyod/models/base_dl.py b/pyod/models/base_dl.py index d6fcdd8a9..7267e4019 100644 --- a/pyod/models/base_dl.py +++ b/pyod/models/base_dl.py @@ -9,6 +9,7 @@ import tensorflow + def _get_tensorflow_version(): # pragma: no cover """ Utility function to decide the version of tensorflow, which will affect how to import keras models. diff --git a/pyod/models/cblof.py b/pyod/models/cblof.py index 75a24a722..6fd48b1d9 100644 --- a/pyod/models/cblof.py +++ b/pyod/models/cblof.py @@ -9,16 +9,17 @@ from __future__ import print_function import warnings + import numpy as np from scipy.spatial.distance import cdist from sklearn.cluster import KMeans -from sklearn.utils.validation import check_is_fitted -from sklearn.utils.validation import check_array from sklearn.utils.estimator_checks import check_estimator +from sklearn.utils.validation import check_array +from sklearn.utils.validation import check_is_fitted from .base import BaseDetector -from ..utils.utility import check_parameter from ..utils.stat_models import pairwise_distances_no_broadcast +from ..utils.utility import check_parameter __all__ = ['CBLOF'] diff --git a/pyod/models/cd.py b/pyod/models/cd.py index c6e2cfb97..6a630ab69 100644 --- a/pyod/models/cd.py +++ b/pyod/models/cd.py @@ -9,13 +9,13 @@ from __future__ import print_function import numpy as np -from sklearn.linear_model import LinearRegression from sklearn.decomposition import PCA +from sklearn.linear_model import LinearRegression from sklearn.utils import check_array from sklearn.utils.validation import check_is_fitted from .base import BaseDetector -from ..utils.utility import check_parameter + def whiten_data(X, pca): diff --git a/pyod/models/copod.py b/pyod/models/copod.py index d58c4dbe3..62f5fd2c9 100644 --- a/pyod/models/copod.py +++ b/pyod/models/copod.py @@ -8,13 +8,13 @@ from __future__ import print_function import warnings -import numpy as np -from statsmodels.distributions.empirical_distribution import ECDF +import matplotlib.pyplot as plt +import numpy as np +from joblib import Parallel, delayed from scipy.stats import skew from sklearn.utils import check_array -from joblib import Parallel, delayed, effective_n_jobs -import matplotlib.pyplot as plt +from statsmodels.distributions.empirical_distribution import ECDF from .base import BaseDetector from .sklearn_base import _partition_estimators diff --git a/pyod/models/deep_svdd.py b/pyod/models/deep_svdd.py index eeada45f1..75a9b7359 100644 --- a/pyod/models/deep_svdd.py +++ b/pyod/models/deep_svdd.py @@ -12,10 +12,9 @@ from sklearn.utils import check_array from sklearn.utils.validation import check_is_fitted -from ..utils.utility import check_parameter - from .base import BaseDetector from .base_dl import _get_tensorflow_version +from ..utils.utility import check_parameter # if tensorflow 2, import from tf directly if _get_tensorflow_version() == 2: diff --git a/pyod/models/ecod.py b/pyod/models/ecod.py index 649edc0b2..ef7d00f0d 100644 --- a/pyod/models/ecod.py +++ b/pyod/models/ecod.py @@ -9,13 +9,13 @@ from __future__ import print_function import warnings -import numpy as np -from statsmodels.distributions.empirical_distribution import ECDF +import matplotlib.pyplot as plt +import numpy as np +from joblib import Parallel, delayed from scipy.stats import skew from sklearn.utils import check_array -from joblib import Parallel, delayed, effective_n_jobs -import matplotlib.pyplot as plt +from statsmodels.distributions.empirical_distribution import ECDF from .base import BaseDetector from .sklearn_base import _partition_estimators diff --git a/pyod/models/feature_bagging.py b/pyod/models/feature_bagging.py index 8f11e8839..e1f28292d 100644 --- a/pyod/models/feature_bagging.py +++ b/pyod/models/feature_bagging.py @@ -6,26 +6,20 @@ from __future__ import division from __future__ import print_function -import numpy as np import numbers -from joblib import Parallel -from joblib.parallel import delayed - +import numpy as np from sklearn.base import clone -from sklearn.utils import check_random_state from sklearn.utils import check_array +from sklearn.utils import check_random_state from sklearn.utils.validation import check_is_fitted -from sklearn.utils.random import sample_without_replacement -from .lof import LOF from .base import BaseDetector -from .sklearn_base import _partition_estimators from .combination import average, maximization +from .lof import LOF +from ..utils.utility import check_detector from ..utils.utility import check_parameter -from ..utils.utility import generate_indices from ..utils.utility import generate_bagging_indices -from ..utils.utility import check_detector MAX_INT = np.iinfo(np.int32).max diff --git a/pyod/models/gmm.py b/pyod/models/gmm.py index 651a836c7..e1c14e53d 100644 --- a/pyod/models/gmm.py +++ b/pyod/models/gmm.py @@ -7,12 +7,13 @@ from __future__ import division, print_function -from pyod.models.base import BaseDetector -from pyod.utils.utility import invert_order from sklearn.mixture import GaussianMixture from sklearn.utils import check_array from sklearn.utils.validation import check_is_fitted +from pyod.models.base import BaseDetector +from pyod.utils.utility import invert_order + class GMM(BaseDetector): """Wrapper of scikit-learn Gaussian Mixture Model with more functionalities. diff --git a/pyod/models/hbos.py b/pyod/models/hbos.py index 9217b78bc..9e39f247d 100644 --- a/pyod/models/hbos.py +++ b/pyod/models/hbos.py @@ -12,11 +12,10 @@ from sklearn.utils import check_array from sklearn.utils.validation import check_is_fitted +from .base import BaseDetector from ..utils.utility import check_parameter -from ..utils.utility import invert_order from ..utils.utility import get_optimal_n_bins - -from .base import BaseDetector +from ..utils.utility import invert_order class HBOS(BaseDetector): diff --git a/pyod/models/iforest.py b/pyod/models/iforest.py index 0d356debf..f45c69f40 100644 --- a/pyod/models/iforest.py +++ b/pyod/models/iforest.py @@ -10,15 +10,14 @@ import numpy as np from joblib import Parallel from joblib.parallel import delayed - from sklearn.ensemble import IsolationForest -from sklearn.utils.validation import check_is_fitted from sklearn.utils import check_array +from sklearn.utils.validation import check_is_fitted from .base import BaseDetector -from ..utils.utility import invert_order # noinspection PyProtectedMember from ..utils.utility import _get_sklearn_version +from ..utils.utility import invert_order # TODO: behavior of Isolation Forest will change in sklearn 0.22. See below. diff --git a/pyod/models/inne.py b/pyod/models/inne.py index e69bbc520..8400b9b07 100644 --- a/pyod/models/inne.py +++ b/pyod/models/inne.py @@ -12,9 +12,9 @@ from warnings import warn import numpy as np +from sklearn.metrics import euclidean_distances from sklearn.utils import check_array from sklearn.utils.validation import check_is_fitted, check_random_state -from sklearn.metrics import euclidean_distances from .base import BaseDetector from ..utils.utility import MAX_INT, invert_order diff --git a/pyod/models/kde.py b/pyod/models/kde.py index 845c9cc3a..6fc3e54c3 100644 --- a/pyod/models/kde.py +++ b/pyod/models/kde.py @@ -12,8 +12,8 @@ from sklearn.utils import check_array from sklearn.utils.validation import check_is_fitted -from ..utils.utility import invert_order from .base import BaseDetector +from ..utils.utility import invert_order class KDE(BaseDetector): diff --git a/pyod/models/knn.py b/pyod/models/knn.py index e81134eb6..c94cdde3a 100644 --- a/pyod/models/knn.py +++ b/pyod/models/knn.py @@ -9,8 +9,8 @@ from warnings import warn import numpy as np -from sklearn.neighbors import NearestNeighbors from sklearn.neighbors import BallTree +from sklearn.neighbors import NearestNeighbors from sklearn.utils import check_array from sklearn.utils.validation import check_is_fitted diff --git a/pyod/models/loci.py b/pyod/models/loci.py index db80ec268..e13616728 100644 --- a/pyod/models/loci.py +++ b/pyod/models/loci.py @@ -10,9 +10,9 @@ import numpy as np from numba import njit +from scipy.spatial.distance import pdist, squareform from sklearn.utils import check_array from sklearn.utils.validation import check_is_fitted -from scipy.spatial.distance import pdist, squareform from .base import BaseDetector diff --git a/pyod/models/loda.py b/pyod/models/loda.py index 1dc4591b3..ee995d66b 100644 --- a/pyod/models/loda.py +++ b/pyod/models/loda.py @@ -9,9 +9,10 @@ from __future__ import print_function import numbers + import numpy as np -from sklearn.utils.validation import check_is_fitted from sklearn.utils import check_array +from sklearn.utils.validation import check_is_fitted from .base import BaseDetector from ..utils.utility import get_optimal_n_bins diff --git a/pyod/models/lof.py b/pyod/models/lof.py index a7c85afcc..0d26ea83b 100644 --- a/pyod/models/lof.py +++ b/pyod/models/lof.py @@ -8,13 +8,14 @@ from __future__ import print_function from sklearn.neighbors import LocalOutlierFactor -from sklearn.utils.validation import check_is_fitted from sklearn.utils.validation import check_array +from sklearn.utils.validation import check_is_fitted from .base import BaseDetector from ..utils.utility import invert_order + + # noinspection PyProtectedMember -from ..utils.utility import _get_sklearn_version class LOF(BaseDetector): diff --git a/pyod/models/lscp.py b/pyod/models/lscp.py index 64bf4522d..1274d9010 100644 --- a/pyod/models/lscp.py +++ b/pyod/models/lscp.py @@ -10,7 +10,6 @@ # numpy import numpy as np - # sklearn imports from sklearn.neighbors import KDTree from sklearn.utils import check_array @@ -21,9 +20,9 @@ from pyod.models.base import BaseDetector from pyod.utils.stat_models import pearsonr from pyod.utils.utility import argmaxn +from pyod.utils.utility import check_detector from pyod.utils.utility import generate_bagging_indices from pyod.utils.utility import standardizer -from pyod.utils.utility import check_detector # TODO: find random state that is causing runtime warning in pearson diff --git a/pyod/models/lunar.py b/pyod/models/lunar.py index db2b16065..ee486cbc8 100644 --- a/pyod/models/lunar.py +++ b/pyod/models/lunar.py @@ -5,18 +5,18 @@ # from copy import deepcopy -from sklearn.metrics import roc_auc_score + import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.optim as optim +from sklearn.metrics import roc_auc_score from sklearn.model_selection import train_test_split from sklearn.neighbors import NearestNeighbors from sklearn.preprocessing import MinMaxScaler from sklearn.utils.validation import check_is_fitted -import torch -import torch.nn as nn -import torch.optim as optim -import torch.nn.functional as F - from .base import BaseDetector diff --git a/pyod/models/mcd.py b/pyod/models/mcd.py index d358271d5..337fdbdba 100644 --- a/pyod/models/mcd.py +++ b/pyod/models/mcd.py @@ -8,8 +8,8 @@ from __future__ import print_function from sklearn.covariance import MinCovDet -from sklearn.utils.validation import check_is_fitted from sklearn.utils.validation import check_array +from sklearn.utils.validation import check_is_fitted from .base import BaseDetector diff --git a/pyod/models/mo_gaal.py b/pyod/models/mo_gaal.py index c54929da8..6a44ca1ae 100644 --- a/pyod/models/mo_gaal.py +++ b/pyod/models/mo_gaal.py @@ -12,14 +12,13 @@ from collections import defaultdict import numpy as np - from sklearn.utils import check_array from sklearn.utils.validation import check_is_fitted from .base import BaseDetector +from .base_dl import _get_tensorflow_version from .gaal_base import create_discriminator from .gaal_base import create_generator -from .base_dl import _get_tensorflow_version # if tensorflow 2, import from tf directly if _get_tensorflow_version() == 1: diff --git a/pyod/models/ocsvm.py b/pyod/models/ocsvm.py index 4739780eb..62610fd34 100644 --- a/pyod/models/ocsvm.py +++ b/pyod/models/ocsvm.py @@ -8,8 +8,8 @@ from __future__ import print_function from sklearn.svm import OneClassSVM -from sklearn.utils.validation import check_is_fitted from sklearn.utils import check_array +from sklearn.utils.validation import check_is_fitted from .base import BaseDetector from ..utils.utility import invert_order diff --git a/pyod/models/pca.py b/pyod/models/pca.py index ffaad3477..ad545c126 100644 --- a/pyod/models/pca.py +++ b/pyod/models/pca.py @@ -10,8 +10,8 @@ import numpy as np from scipy.spatial.distance import cdist from sklearn.decomposition import PCA as sklearn_PCA -from sklearn.utils.validation import check_is_fitted from sklearn.utils.validation import check_array +from sklearn.utils.validation import check_is_fitted from .base import BaseDetector from ..utils.utility import check_parameter diff --git a/pyod/models/rgraph.py b/pyod/models/rgraph.py index 41b7f0280..004071343 100644 --- a/pyod/models/rgraph.py +++ b/pyod/models/rgraph.py @@ -8,18 +8,18 @@ from __future__ import division from __future__ import print_function +import warnings + import numpy as np +from scipy import sparse +from sklearn.decomposition import sparse_encode +from sklearn.linear_model import LinearRegression from sklearn.preprocessing import StandardScaler +from sklearn.preprocessing import normalize from sklearn.utils import check_array from .base import BaseDetector -from scipy import sparse -from sklearn.decomposition import sparse_encode -from sklearn.preprocessing import normalize -from sklearn.linear_model import LinearRegression -import warnings - class RGraph(BaseDetector): """ Outlier Detection via R-graph. Paper: https://openaccess.thecvf.com/content_cvpr_2017/papers/You_Provable_Self-Representation_Based_CVPR_2017_paper.pdf diff --git a/pyod/models/rod.py b/pyod/models/rod.py index 0d18a1994..cc5bfe8d4 100644 --- a/pyod/models/rod.py +++ b/pyod/models/rod.py @@ -6,9 +6,10 @@ from __future__ import division from __future__ import print_function +import multiprocessing from itertools import combinations as com from multiprocessing import Pool -import multiprocessing + import numba import numpy as np from sklearn.preprocessing import MinMaxScaler, RobustScaler diff --git a/pyod/models/sklearn_base.py b/pyod/models/sklearn_base.py index fff44c894..0aaf6a4e7 100644 --- a/pyod/models/sklearn_base.py +++ b/pyod/models/sklearn_base.py @@ -7,8 +7,8 @@ from __future__ import division from __future__ import print_function -import six import numpy as np +import six from joblib.parallel import cpu_count diff --git a/pyod/models/so_gaal.py b/pyod/models/so_gaal.py index 5cfd6f59a..349ec5984 100644 --- a/pyod/models/so_gaal.py +++ b/pyod/models/so_gaal.py @@ -12,14 +12,13 @@ from collections import defaultdict import numpy as np - from sklearn.utils import check_array from sklearn.utils.validation import check_is_fitted from .base import BaseDetector +from .base_dl import _get_tensorflow_version from .gaal_base import create_discriminator from .gaal_base import create_generator -from .base_dl import _get_tensorflow_version # if tensorflow 2, import from tf directly if _get_tensorflow_version() == 1: diff --git a/pyod/models/sod.py b/pyod/models/sod.py index 790db5f79..f3600e465 100644 --- a/pyod/models/sod.py +++ b/pyod/models/sod.py @@ -4,13 +4,13 @@ # Author: Yahya Almardeny # License: BSD 2 clause -import numpy as np import numba as nb +import numpy as np from sklearn.neighbors import NearestNeighbors from sklearn.utils import check_array -from ..utils.utility import check_parameter from .base import BaseDetector +from ..utils.utility import check_parameter @nb.njit(parallel=True) diff --git a/pyod/models/vae.py b/pyod/models/vae.py index eb8bbaf4c..4938cbc8c 100644 --- a/pyod/models/vae.py +++ b/pyod/models/vae.py @@ -16,34 +16,32 @@ # Author: Andrij Vasylenko # License: BSD 2 clause +from __future__ import absolute_import from __future__ import division from __future__ import print_function -from __future__ import absolute_import import numpy as np - from sklearn.preprocessing import StandardScaler from sklearn.utils import check_array from sklearn.utils.validation import check_is_fitted -from ..utils.utility import check_parameter -from ..utils.stat_models import pairwise_distances_no_broadcast - from .base import BaseDetector from .base_dl import _get_tensorflow_version +from ..utils.stat_models import pairwise_distances_no_broadcast +from ..utils.utility import check_parameter # if tensorflow 2, import from tf directly if _get_tensorflow_version() == 1: from keras.models import Model from keras.layers import Lambda, Input, Dense, Dropout from keras.regularizers import l2 - from keras.losses import mse, binary_crossentropy + from keras.losses import mse from keras import backend as K else: from tensorflow.keras.models import Model from tensorflow.keras.layers import Lambda, Input, Dense, Dropout from tensorflow.keras.regularizers import l2 - from tensorflow.keras.losses import mse, binary_crossentropy + from tensorflow.keras.losses import mse from tensorflow.keras import backend as K diff --git a/pyod/models/xgbod.py b/pyod/models/xgbod.py index 79a2e8f91..6a5f07ee8 100644 --- a/pyod/models/xgbod.py +++ b/pyod/models/xgbod.py @@ -10,8 +10,8 @@ import numpy as np from sklearn.metrics import roc_auc_score from sklearn.utils import check_array -from sklearn.utils.validation import check_is_fitted from sklearn.utils.validation import check_X_y +from sklearn.utils.validation import check_is_fitted try: import xgboost diff --git a/pyod/test/test_abod.py b/pyod/test/test_abod.py index 4316b8e3b..f007b12d2 100644 --- a/pyod/test/test_abod.py +++ b/pyod/test/test_abod.py @@ -4,17 +4,16 @@ import os import sys - import unittest + # noinspection PyProtectedMember from numpy.testing import assert_allclose from numpy.testing import assert_array_less from numpy.testing import assert_equal from numpy.testing import assert_raises - -from sklearn.metrics import roc_auc_score -from sklearn.base import clone from scipy.stats import rankdata +from sklearn.base import clone +from sklearn.metrics import roc_auc_score # temporary solution for relative imports in case pyod is not installed # if pyod is installed, no need to use the following line diff --git a/pyod/test/test_auto_encoder.py b/pyod/test/test_auto_encoder.py index 5b830fec9..8295f9bb6 100644 --- a/pyod/test/test_auto_encoder.py +++ b/pyod/test/test_auto_encoder.py @@ -4,14 +4,13 @@ import os import sys - import unittest + # noinspection PyProtectedMember from numpy.testing import assert_equal from numpy.testing import assert_raises - -from sklearn.metrics import roc_auc_score from sklearn.base import clone +from sklearn.metrics import roc_auc_score # temporary solution for relative imports in case pyod is not installed # if pyod is installed, no need to use the following line diff --git a/pyod/test/test_auto_encoder_torch.py b/pyod/test/test_auto_encoder_torch.py index 389b150ba..61c797897 100644 --- a/pyod/test/test_auto_encoder_torch.py +++ b/pyod/test/test_auto_encoder_torch.py @@ -4,18 +4,15 @@ import os import sys +import unittest import numpy as np import torch - -import unittest +from numpy.testing import assert_almost_equal # noinspection PyProtectedMember from numpy.testing import assert_equal -from numpy.testing import assert_almost_equal from numpy.testing import assert_raises - from sklearn.metrics import roc_auc_score -from sklearn.base import clone # temporary solution for relative imports in case pyod is not installed # if pyod is installed, no need to use the following line diff --git a/pyod/test/test_base.py b/pyod/test/test_base.py index 3bd35f9bc..4b5968d20 100644 --- a/pyod/test/test_base.py +++ b/pyod/test/test_base.py @@ -4,12 +4,11 @@ import os import sys - import unittest -from numpy.testing import assert_equal -from numpy.testing import assert_raises import numpy as np +from numpy.testing import assert_equal +from numpy.testing import assert_raises # temporary solution for relative imports in case pyod is not installed # if pyod is installed, no need to use the following line diff --git a/pyod/test/test_cblof.py b/pyod/test/test_cblof.py index c891f3e17..e66bbdfa4 100644 --- a/pyod/test/test_cblof.py +++ b/pyod/test/test_cblof.py @@ -4,17 +4,16 @@ import os import sys - import unittest + # noinspection PyProtectedMember from numpy.testing import assert_allclose from numpy.testing import assert_array_less from numpy.testing import assert_equal from numpy.testing import assert_raises - -from sklearn.metrics import roc_auc_score -from sklearn.base import clone from scipy.stats import rankdata +from sklearn.base import clone +from sklearn.metrics import roc_auc_score # temporary solution for relative imports in case pyod is not installed # if pyod is installed, no need to use the following line diff --git a/pyod/test/test_cd.py b/pyod/test/test_cd.py index 9eb1227a1..998c655e4 100644 --- a/pyod/test/test_cd.py +++ b/pyod/test/test_cd.py @@ -4,14 +4,12 @@ import os import sys - import unittest + +import numpy as np # noinspection PyProtectedMember -from numpy.testing import assert_allclose -from numpy.testing import assert_array_less from numpy.testing import assert_equal from numpy.testing import assert_raises -import numpy as np from sklearn.base import clone # temporary solution for relative imports in case pyod is not installed diff --git a/pyod/test/test_cof.py b/pyod/test/test_cof.py index 868b65c87..4b6b3f4b2 100644 --- a/pyod/test/test_cof.py +++ b/pyod/test/test_cof.py @@ -4,17 +4,15 @@ import os import sys - import unittest + # noinspection PyProtectedMember from numpy.testing import assert_allclose from numpy.testing import assert_array_less from numpy.testing import assert_equal from numpy.testing import assert_raises - -from sklearn.metrics import roc_auc_score -from sklearn.base import clone from scipy.stats import rankdata +from sklearn.metrics import roc_auc_score # temporary solution for relative imports in case pyod is not installed # if pyod is installed, no need to use the following line diff --git a/pyod/test/test_combination.py b/pyod/test/test_combination.py index 9c0de5c2e..6a750ff8e 100644 --- a/pyod/test/test_combination.py +++ b/pyod/test/test_combination.py @@ -5,14 +5,13 @@ import os import sys - import unittest + +import numpy as np from numpy.testing import assert_allclose from numpy.testing import assert_array_equal from numpy.testing import assert_equal from numpy.testing import assert_raises - -import numpy as np from sklearn.utils import shuffle # temporary solution for relative imports in case pyod is not installed diff --git a/pyod/test/test_copod.py b/pyod/test/test_copod.py index 4f4ee6fde..ba154a3b0 100644 --- a/pyod/test/test_copod.py +++ b/pyod/test/test_copod.py @@ -4,17 +4,16 @@ import os import sys - import unittest + # noinspection PyProtectedMember from numpy.testing import assert_allclose from numpy.testing import assert_array_less from numpy.testing import assert_equal from numpy.testing import assert_raises - -from sklearn.metrics import roc_auc_score -from sklearn.base import clone from scipy.stats import rankdata +from sklearn.base import clone +from sklearn.metrics import roc_auc_score # temporary solution for relative imports in case pyod is not installed # if pyod is installed, no need to use the following line diff --git a/pyod/test/test_copod_parallel.py b/pyod/test/test_copod_parallel.py index 3c5ab41af..e37d29a2c 100644 --- a/pyod/test/test_copod_parallel.py +++ b/pyod/test/test_copod_parallel.py @@ -4,17 +4,16 @@ import os import sys - import unittest + # noinspection PyProtectedMember from numpy.testing import assert_allclose from numpy.testing import assert_array_less from numpy.testing import assert_equal from numpy.testing import assert_raises - -from sklearn.metrics import roc_auc_score -from sklearn.base import clone from scipy.stats import rankdata +from sklearn.base import clone +from sklearn.metrics import roc_auc_score # temporary solution for relative imports in case pyod is not installed # if pyod is installed, no need to use the following line diff --git a/pyod/test/test_data.py b/pyod/test/test_data.py index 42747c85b..7810168b8 100644 --- a/pyod/test/test_data.py +++ b/pyod/test/test_data.py @@ -5,15 +5,14 @@ import os import sys - import unittest + +import numpy as np # noinspection PyProtectedMember from numpy.testing import assert_allclose from numpy.testing import assert_equal from numpy.testing import assert_raises -import numpy as np - # temporary solution for relative imports in case pyod is not installed # if pyod is installed, no need to use the following line from pyod.utils.data import generate_data_categorical diff --git a/pyod/test/test_deepsvdd.py b/pyod/test/test_deepsvdd.py index d9cab2f9b..fb0133219 100644 --- a/pyod/test/test_deepsvdd.py +++ b/pyod/test/test_deepsvdd.py @@ -4,16 +4,13 @@ import os import sys - import unittest + # noinspection PyProtectedMember -from numpy.testing import assert_allclose -from numpy.testing import assert_array_less from numpy.testing import assert_equal from numpy.testing import assert_raises - -from sklearn.metrics import roc_auc_score from sklearn.base import clone +from sklearn.metrics import roc_auc_score # temporary solution for relative imports in case pyod is not installed # if pyod is installed, no need to use the following line diff --git a/pyod/test/test_ecod.py b/pyod/test/test_ecod.py index 61a90ba91..d3222d6ac 100644 --- a/pyod/test/test_ecod.py +++ b/pyod/test/test_ecod.py @@ -4,17 +4,16 @@ import os import sys - import unittest + # noinspection PyProtectedMember from numpy.testing import assert_allclose from numpy.testing import assert_array_less from numpy.testing import assert_equal from numpy.testing import assert_raises - -from sklearn.metrics import roc_auc_score -from sklearn.base import clone from scipy.stats import rankdata +from sklearn.base import clone +from sklearn.metrics import roc_auc_score # temporary solution for relative imports in case pyod is not installed # if pyod is installed, no need to use the following line diff --git a/pyod/test/test_ecod_parallel.py b/pyod/test/test_ecod_parallel.py index efeddf787..b6ea64574 100644 --- a/pyod/test/test_ecod_parallel.py +++ b/pyod/test/test_ecod_parallel.py @@ -4,17 +4,16 @@ import os import sys - import unittest + # noinspection PyProtectedMember from numpy.testing import assert_allclose from numpy.testing import assert_array_less from numpy.testing import assert_equal from numpy.testing import assert_raises - -from sklearn.metrics import roc_auc_score -from sklearn.base import clone from scipy.stats import rankdata +from sklearn.base import clone +from sklearn.metrics import roc_auc_score # temporary solution for relative imports in case pyod is not installed # if pyod is installed, no need to use the following line diff --git a/pyod/test/test_feature_bagging.py b/pyod/test/test_feature_bagging.py index 4379862d0..141128beb 100644 --- a/pyod/test/test_feature_bagging.py +++ b/pyod/test/test_feature_bagging.py @@ -5,17 +5,16 @@ import os import sys - import unittest + # noinspection PyProtectedMember from numpy.testing import assert_allclose from numpy.testing import assert_array_less from numpy.testing import assert_equal from numpy.testing import assert_raises - -from sklearn.metrics import roc_auc_score -from sklearn.base import clone from scipy.stats import rankdata +from sklearn.base import clone +from sklearn.metrics import roc_auc_score # temporary solution for relative imports in case pyod is not installed # if pyod is installed, no need to use the following line diff --git a/pyod/test/test_gmm.py b/pyod/test/test_gmm.py index 7bfe6ff0d..54e798bdb 100644 --- a/pyod/test/test_gmm.py +++ b/pyod/test/test_gmm.py @@ -6,14 +6,14 @@ import unittest # noinspection PyProtectedMember -from numpy.testing import (assert_allclose, assert_array_less, assert_equal, +from numpy.testing import (assert_array_less, assert_equal, assert_raises) -from pyod.models.gmm import GMM -from pyod.utils.data import generate_data_clusters -from scipy.stats import rankdata from sklearn.base import clone from sklearn.metrics import roc_auc_score +from pyod.models.gmm import GMM +from pyod.utils.data import generate_data_clusters + # temporary solution for relative imports in case pyod is not installed # if pyod is installed, no need to use the following line sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) diff --git a/pyod/test/test_hbos.py b/pyod/test/test_hbos.py index eb7888c2a..e5ca93761 100644 --- a/pyod/test/test_hbos.py +++ b/pyod/test/test_hbos.py @@ -4,19 +4,16 @@ import os import sys - import unittest + # noinspection PyProtectedMember from numpy.testing import assert_allclose from numpy.testing import assert_array_less from numpy.testing import assert_equal from numpy.testing import assert_raises - -from sklearn.utils.estimator_checks import check_estimator - -from sklearn.metrics import roc_auc_score -from sklearn.base import clone from scipy.stats import rankdata +from sklearn.base import clone +from sklearn.metrics import roc_auc_score # temporary solution for relative imports in case pyod is not installed # if pyod is installed, no need to use the following line diff --git a/pyod/test/test_iforest.py b/pyod/test/test_iforest.py index d7c630f33..042e937a9 100644 --- a/pyod/test/test_iforest.py +++ b/pyod/test/test_iforest.py @@ -5,19 +5,16 @@ import os import sys - import unittest + # noinspection PyProtectedMember from numpy.testing import assert_allclose from numpy.testing import assert_array_less from numpy.testing import assert_equal from numpy.testing import assert_raises - -from sklearn.utils.estimator_checks import check_estimator - -from sklearn.metrics import roc_auc_score -from sklearn.base import clone from scipy.stats import rankdata +from sklearn.base import clone +from sklearn.metrics import roc_auc_score # temporary solution for relative imports in case pyod is not installed # if pyod is installed, no need to use the following line diff --git a/pyod/test/test_inne.py b/pyod/test/test_inne.py index ab1dabd12..091c1938d 100644 --- a/pyod/test/test_inne.py +++ b/pyod/test/test_inne.py @@ -5,17 +5,16 @@ import os import sys - import unittest + # noinspection PyProtectedMember from numpy.testing import assert_allclose from numpy.testing import assert_array_less from numpy.testing import assert_equal from numpy.testing import assert_raises - -from sklearn.metrics import roc_auc_score -from sklearn.base import clone from scipy.stats import rankdata +from sklearn.base import clone +from sklearn.metrics import roc_auc_score # temporary solution for relative imports in case pyod is not installed # if pyod is installed, no need to use the following line diff --git a/pyod/test/test_kde.py b/pyod/test/test_kde.py index 9fef00dbc..5d509aee9 100644 --- a/pyod/test/test_kde.py +++ b/pyod/test/test_kde.py @@ -8,12 +8,13 @@ # noinspection PyProtectedMember from numpy.testing import (assert_allclose, assert_array_less, assert_equal, assert_raises) -from pyod.models.kde import KDE -from pyod.utils.data import generate_data from scipy.stats import rankdata from sklearn.base import clone from sklearn.metrics import roc_auc_score +from pyod.models.kde import KDE +from pyod.utils.data import generate_data + # temporary solution for relative imports in case pyod is not installed # if pyod is installed, no need to use the following line sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) diff --git a/pyod/test/test_knn.py b/pyod/test/test_knn.py index 3f23df445..e2f1b9f19 100644 --- a/pyod/test/test_knn.py +++ b/pyod/test/test_knn.py @@ -5,18 +5,17 @@ import os import sys +import unittest import numpy as np -import unittest # noinspection PyProtectedMember from numpy.testing import assert_allclose from numpy.testing import assert_array_less from numpy.testing import assert_equal from numpy.testing import assert_raises - -from sklearn.metrics import roc_auc_score -from sklearn.base import clone from scipy.stats import rankdata +from sklearn.base import clone +from sklearn.metrics import roc_auc_score # temporary solution for relative imports in case pyod is not installed # if pyod is installed, no need to use the following line diff --git a/pyod/test/test_lmdd.py b/pyod/test/test_lmdd.py index 5a03a748e..1a7625088 100644 --- a/pyod/test/test_lmdd.py +++ b/pyod/test/test_lmdd.py @@ -4,16 +4,13 @@ import os import sys - import unittest + # noinspection PyProtectedMember -from numpy.testing import assert_allclose -from numpy.testing import assert_array_less from numpy.testing import assert_equal from numpy.testing import assert_raises - -from sklearn.metrics import roc_auc_score from sklearn.base import clone +from sklearn.metrics import roc_auc_score # temporary solution for relative imports in case pyod is not installed # if pyod is installed, no need to use the following line diff --git a/pyod/test/test_loci.py b/pyod/test/test_loci.py index 947b7f5db..4c98be6f6 100644 --- a/pyod/test/test_loci.py +++ b/pyod/test/test_loci.py @@ -4,17 +4,16 @@ import os import sys - import unittest + # noinspection PyProtectedMember from numpy.testing import assert_allclose from numpy.testing import assert_array_less from numpy.testing import assert_equal from numpy.testing import assert_raises - -from sklearn.metrics import roc_auc_score -from sklearn.base import clone from scipy.stats import rankdata +from sklearn.base import clone +from sklearn.metrics import roc_auc_score # temporary solution for relative imports in case pyod is not installed # if pyod is installed, no need to use the following line diff --git a/pyod/test/test_loda.py b/pyod/test/test_loda.py index 44f637ac0..7badffc46 100644 --- a/pyod/test/test_loda.py +++ b/pyod/test/test_loda.py @@ -4,17 +4,13 @@ import os import sys - import unittest + # noinspection PyProtectedMember -from numpy.testing import assert_allclose -from numpy.testing import assert_array_less from numpy.testing import assert_equal from numpy.testing import assert_raises - -from sklearn.metrics import roc_auc_score from sklearn.base import clone -from scipy.stats import rankdata +from sklearn.metrics import roc_auc_score # temporary solution for relative imports in case pyod is not installed # if pyod is installed, no need to use the following line diff --git a/pyod/test/test_lof.py b/pyod/test/test_lof.py index 0130f2ca0..f67206bc6 100644 --- a/pyod/test/test_lof.py +++ b/pyod/test/test_lof.py @@ -4,17 +4,16 @@ import os import sys - import unittest + # noinspection PyProtectedMember from numpy.testing import assert_allclose from numpy.testing import assert_array_less from numpy.testing import assert_equal from numpy.testing import assert_raises - -from sklearn.metrics import roc_auc_score -from sklearn.base import clone from scipy.stats import rankdata +from sklearn.base import clone +from sklearn.metrics import roc_auc_score # temporary solution for relative imports in case pyod is not installed # if pyod is installed, no need to use the following line diff --git a/pyod/test/test_lscp.py b/pyod/test/test_lscp.py index 4afaf9b34..c7eb4c10c 100644 --- a/pyod/test/test_lscp.py +++ b/pyod/test/test_lscp.py @@ -5,22 +5,20 @@ import os import sys +import unittest from os import path -import unittest # noinspection PyProtectedMember from numpy.testing import assert_allclose from numpy.testing import assert_array_less from numpy.testing import assert_equal from numpy.testing import assert_raises - -from sklearn.model_selection import train_test_split -from sklearn.utils.validation import check_X_y from scipy.io import loadmat - -from sklearn.metrics import roc_auc_score -from sklearn.base import clone from scipy.stats import rankdata +from sklearn.base import clone +from sklearn.metrics import roc_auc_score +from sklearn.model_selection import train_test_split +from sklearn.utils.validation import check_X_y # temporary solution for relative imports in case pyod is not installed # if pyod is installed, no need to use the following line @@ -28,7 +26,6 @@ from pyod.models.lscp import LSCP from pyod.models.lof import LOF -from pyod.utils.utility import standardizer from pyod.utils.data import generate_data diff --git a/pyod/test/test_lunar.py b/pyod/test/test_lunar.py index 13a1eb508..0407bb089 100644 --- a/pyod/test/test_lunar.py +++ b/pyod/test/test_lunar.py @@ -5,18 +5,14 @@ import os import sys - -import numpy as np import unittest + # noinspection PyProtectedMember -from numpy.testing import assert_allclose from numpy.testing import assert_array_less from numpy.testing import assert_equal from numpy.testing import assert_raises - -from sklearn.metrics import roc_auc_score from sklearn.base import clone -from scipy.stats import rankdata +from sklearn.metrics import roc_auc_score # temporary solution for relative imports in case pyod is not installed # if pyod is installed, no need to use the following line diff --git a/pyod/test/test_mad.py b/pyod/test/test_mad.py index 95da394b6..3a935dc43 100644 --- a/pyod/test/test_mad.py +++ b/pyod/test/test_mad.py @@ -4,17 +4,15 @@ import os import sys - import unittest + # noinspection PyProtectedMember from numpy.testing import assert_allclose from numpy.testing import assert_array_less from numpy.testing import assert_equal from numpy.testing import assert_raises - -from sklearn.metrics import roc_auc_score -from sklearn.base import clone from scipy.stats import rankdata +from sklearn.metrics import roc_auc_score # temporary solution for relative imports in case pyod is not installed # if pyod is installed, no need to use the following line diff --git a/pyod/test/test_mcd.py b/pyod/test/test_mcd.py index d90638964..fd27b2d66 100644 --- a/pyod/test/test_mcd.py +++ b/pyod/test/test_mcd.py @@ -4,17 +4,16 @@ import os import sys - import unittest + # noinspection PyProtectedMember from numpy.testing import assert_allclose from numpy.testing import assert_array_less from numpy.testing import assert_equal from numpy.testing import assert_raises - -from sklearn.metrics import roc_auc_score -from sklearn.base import clone from scipy.stats import rankdata +from sklearn.base import clone +from sklearn.metrics import roc_auc_score # temporary solution for relative imports in case pyod is not installed # if pyod is installed, no need to use the following line diff --git a/pyod/test/test_mo_gaal.py b/pyod/test/test_mo_gaal.py index 7b3415bb9..f3732b678 100644 --- a/pyod/test/test_mo_gaal.py +++ b/pyod/test/test_mo_gaal.py @@ -4,11 +4,9 @@ import os import sys - import unittest + # noinspection PyProtectedMember -from numpy.testing import assert_allclose -from numpy.testing import assert_array_less from numpy.testing import assert_equal from numpy.testing import assert_raises from sklearn.base import clone diff --git a/pyod/test/test_ocsvm.py b/pyod/test/test_ocsvm.py index ca1af97cc..c8028a6a6 100644 --- a/pyod/test/test_ocsvm.py +++ b/pyod/test/test_ocsvm.py @@ -5,17 +5,16 @@ import os import sys - import unittest + # noinspection PyProtectedMember from numpy.testing import assert_allclose from numpy.testing import assert_array_less from numpy.testing import assert_equal from numpy.testing import assert_raises - -from sklearn.metrics import roc_auc_score -from sklearn.base import clone from scipy.stats import rankdata +from sklearn.base import clone +from sklearn.metrics import roc_auc_score # temporary solution for relative imports in case pyod is not installed # if pyod is installed, no need to use the following line diff --git a/pyod/test/test_pca.py b/pyod/test/test_pca.py index 992679a63..42fb8942b 100644 --- a/pyod/test/test_pca.py +++ b/pyod/test/test_pca.py @@ -4,17 +4,16 @@ import os import sys - import unittest + # noinspection PyProtectedMember from numpy.testing import assert_allclose from numpy.testing import assert_array_less from numpy.testing import assert_equal from numpy.testing import assert_raises - -from sklearn.metrics import roc_auc_score -from sklearn.base import clone from scipy.stats import rankdata +from sklearn.base import clone +from sklearn.metrics import roc_auc_score # temporary solution for relative imports in case pyod is not installed # if pyod is installed, no need to use the following line diff --git a/pyod/test/test_rgraph.py b/pyod/test/test_rgraph.py index 62ab24fa5..cb3b22a3d 100644 --- a/pyod/test/test_rgraph.py +++ b/pyod/test/test_rgraph.py @@ -4,14 +4,13 @@ import os import sys - import unittest + # noinspection PyProtectedMember from numpy.testing import assert_equal from numpy.testing import assert_raises - -from sklearn.metrics import roc_auc_score from sklearn.base import clone +from sklearn.metrics import roc_auc_score # temporary solution for relative imports in case pyod is not installed # if pyod is installed, no need to use the following line diff --git a/pyod/test/test_rod.py b/pyod/test/test_rod.py index 3c8604375..0b07dc1ab 100644 --- a/pyod/test/test_rod.py +++ b/pyod/test/test_rod.py @@ -4,15 +4,14 @@ import os import sys -import numpy as np import unittest + +import numpy as np # noinspection PyProtectedMember from numpy.testing import * from numpy.testing import assert_array_less from numpy.testing import assert_equal from numpy.testing import assert_raises - -from sklearn.metrics import roc_auc_score from scipy.stats import rankdata # temporary solution for relative imports in case pyod is not installed diff --git a/pyod/test/test_sampling.py b/pyod/test/test_sampling.py index 1f5521196..c55c8aa3a 100644 --- a/pyod/test/test_sampling.py +++ b/pyod/test/test_sampling.py @@ -6,7 +6,6 @@ import unittest import numpy as np - # noinspection PyProtectedMember from numpy.testing import ( assert_allclose, @@ -14,12 +13,13 @@ assert_equal, assert_raises, ) -from pyod.models.sampling import Sampling -from pyod.utils.data import generate_data from scipy.stats import rankdata from sklearn.base import clone from sklearn.metrics import roc_auc_score +from pyod.models.sampling import Sampling +from pyod.utils.data import generate_data + # temporary solution for relative imports in case pyod is not installed # if pyod is installed, no need to use the following line sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) diff --git a/pyod/test/test_so_gaal.py b/pyod/test/test_so_gaal.py index fd0f04a23..c2da63f5c 100644 --- a/pyod/test/test_so_gaal.py +++ b/pyod/test/test_so_gaal.py @@ -4,15 +4,11 @@ import os import sys - import unittest + # noinspection PyProtectedMember -from numpy.testing import assert_allclose -from numpy.testing import assert_array_less from numpy.testing import assert_equal from numpy.testing import assert_raises - -from sklearn.metrics import roc_auc_score from sklearn.base import clone # temporary solution for relative imports in case pyod is not installed @@ -21,7 +17,6 @@ from pyod.models.so_gaal import SO_GAAL from pyod.utils.data import generate_data -from pyod.utils.data import evaluate_print class TestSO_GAAL(unittest.TestCase): diff --git a/pyod/test/test_sod.py b/pyod/test/test_sod.py index a5d3f1d90..acfc5c44a 100644 --- a/pyod/test/test_sod.py +++ b/pyod/test/test_sod.py @@ -6,15 +6,14 @@ import os import sys import unittest + # noinspection PyProtectedMember from numpy.testing import assert_allclose from numpy.testing import assert_array_less from numpy.testing import assert_equal from numpy.testing import assert_raises - -from sklearn.metrics import roc_auc_score -from sklearn.base import clone from scipy.stats import rankdata +from sklearn.metrics import roc_auc_score # temporary solution for relative imports in case pyod is not installed # if pyod is installed, no need to use the following line diff --git a/pyod/test/test_sos.py b/pyod/test/test_sos.py index c7457a3fd..51bba6660 100644 --- a/pyod/test/test_sos.py +++ b/pyod/test/test_sos.py @@ -4,16 +4,13 @@ import os import sys - import unittest + # noinspection PyProtectedMember from numpy.testing import assert_allclose from numpy.testing import assert_array_less from numpy.testing import assert_equal from numpy.testing import assert_raises - -from sklearn.metrics import roc_auc_score -from sklearn.base import clone from scipy.stats import rankdata # temporary solution for relative imports in case pyod is not installed diff --git a/pyod/test/test_stat_models.py b/pyod/test/test_stat_models.py index 2afefade5..193707d81 100644 --- a/pyod/test/test_stat_models.py +++ b/pyod/test/test_stat_models.py @@ -5,16 +5,14 @@ import os import sys - import unittest + +import numpy as np # noinspection PyProtectedMember from numpy.testing import assert_allclose -from numpy.testing import assert_array_less from numpy.testing import assert_equal from numpy.testing import assert_raises -import numpy as np - # temporary solution for relative imports in case pyod is not installed # if pyod is installed, no need to use the following line sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) diff --git a/pyod/test/test_suod.py b/pyod/test/test_suod.py index 5e7d0f7b8..a50e4c892 100644 --- a/pyod/test/test_suod.py +++ b/pyod/test/test_suod.py @@ -5,22 +5,17 @@ import os import sys +import unittest from os import path -import unittest # noinspection PyProtectedMember -from numpy.testing import assert_allclose -from numpy.testing import assert_array_less from numpy.testing import assert_equal from numpy.testing import assert_raises - -from sklearn.model_selection import train_test_split -from sklearn.utils.validation import check_X_y from scipy.io import loadmat - -from sklearn.metrics import roc_auc_score from sklearn.base import clone -from scipy.stats import rankdata +from sklearn.metrics import roc_auc_score +from sklearn.model_selection import train_test_split +from sklearn.utils.validation import check_X_y # temporary solution for relative imports in case pyod is not installed # if pyod is installed, no need to use the following line @@ -30,7 +25,6 @@ from pyod.models.lof import LOF from pyod.models.iforest import IForest from pyod.models.copod import COPOD -from pyod.utils.utility import standardizer from pyod.utils.data import generate_data diff --git a/pyod/test/test_utility.py b/pyod/test/test_utility.py index 249e3c087..1c3d5b326 100644 --- a/pyod/test/test_utility.py +++ b/pyod/test/test_utility.py @@ -5,9 +5,9 @@ import os import sys - import unittest +import numpy as np # noinspection PyProtectedMember from numpy.testing import assert_allclose from numpy.testing import assert_equal @@ -15,8 +15,6 @@ from sklearn.metrics import precision_score from sklearn.utils import check_random_state -import numpy as np - # temporary solution for relative imports in case pyod is not installed # if pyod is installed, no need to use the following line sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) diff --git a/pyod/test/test_vae.py b/pyod/test/test_vae.py index 16e3d6606..54bb532ea 100644 --- a/pyod/test/test_vae.py +++ b/pyod/test/test_vae.py @@ -4,16 +4,13 @@ import os import sys - import unittest + # noinspection PyProtectedMember -from numpy.testing import assert_allclose -from numpy.testing import assert_array_less from numpy.testing import assert_equal from numpy.testing import assert_raises - -from sklearn.metrics import roc_auc_score from sklearn.base import clone +from sklearn.metrics import roc_auc_score # temporary solution for relative imports in case pyod is not installed # if pyod is installed, no need to use the following line diff --git a/pyod/test/test_xgbod.py b/pyod/test/test_xgbod.py index 1c33d6f52..c95f3c0de 100644 --- a/pyod/test/test_xgbod.py +++ b/pyod/test/test_xgbod.py @@ -5,21 +5,20 @@ import os import sys +import unittest from os import path -import unittest # noinspection PyProtectedMember from numpy.testing import assert_allclose from numpy.testing import assert_array_less from numpy.testing import assert_equal from numpy.testing import assert_raises - -from sklearn.metrics import roc_auc_score +from scipy.io import loadmat +from scipy.stats import rankdata from sklearn.base import clone +from sklearn.metrics import roc_auc_score from sklearn.model_selection import train_test_split from sklearn.utils.validation import check_X_y -from scipy.io import loadmat -from scipy.stats import rankdata # temporary solution for relative imports in case pyod is not installed # if pyod is installed, no need to use the following line diff --git a/pyod/utils/__init__.py b/pyod/utils/__init__.py index 3f5917c71..6cb1558c6 100644 --- a/pyod/utils/__init__.py +++ b/pyod/utils/__init__.py @@ -1,17 +1,17 @@ # -*- coding: utf-8 -*- -from .utility import check_parameter -from .utility import standardizer -from .utility import score_to_label -from .utility import precision_n_scores -from .utility import get_label_n -from .utility import argmaxn -from .utility import invert_order -from .utility import get_optimal_n_bins -from .data import generate_data from .data import evaluate_print +from .data import generate_data from .stat_models import pairwise_distances_no_broadcast -from .stat_models import wpearsonr from .stat_models import pearsonr_mat +from .stat_models import wpearsonr +from .utility import argmaxn +from .utility import check_parameter +from .utility import get_label_n +from .utility import get_optimal_n_bins +from .utility import invert_order +from .utility import precision_n_scores +from .utility import score_to_label +from .utility import standardizer __all__ = ['check_parameter', 'standardizer', diff --git a/pyod/utils/data.py b/pyod/utils/data.py index 7aaba039f..d3e29a005 100644 --- a/pyod/utils/data.py +++ b/pyod/utils/data.py @@ -9,17 +9,18 @@ from __future__ import print_function from warnings import warn + import numpy as np from sklearn.datasets import make_blobs +from sklearn.metrics import roc_auc_score from sklearn.model_selection import train_test_split -from sklearn.utils import column_or_1d from sklearn.utils import check_X_y -from sklearn.utils import check_random_state from sklearn.utils import check_consistent_length -from sklearn.metrics import roc_auc_score +from sklearn.utils import check_random_state +from sklearn.utils import column_or_1d -from .utility import precision_n_scores from .utility import check_parameter +from .utility import precision_n_scores MAX_INT = np.iinfo(np.int32).max diff --git a/pyod/utils/example.py b/pyod/utils/example.py index 2f2ffece7..1d96f814f 100644 --- a/pyod/utils/example.py +++ b/pyod/utils/example.py @@ -9,6 +9,7 @@ from __future__ import print_function import matplotlib.pyplot as plt + from .data import check_consistent_shape from .data import get_outliers_inliers diff --git a/pyod/utils/torch_utility.py b/pyod/utils/torch_utility.py index f17fd9b5b..643e43339 100644 --- a/pyod/utils/torch_utility.py +++ b/pyod/utils/torch_utility.py @@ -1,5 +1,3 @@ -import torch - import torch.nn as nn diff --git a/pyod/utils/utility.py b/pyod/utils/utility.py index 6998eb058..8f08c6e16 100644 --- a/pyod/utils/utility.py +++ b/pyod/utils/utility.py @@ -7,19 +7,17 @@ from __future__ import division from __future__ import print_function -import numpy as np -from numpy import percentile import numbers +import numpy as np import sklearn +from numpy import percentile from sklearn.metrics import precision_score from sklearn.preprocessing import StandardScaler - -from sklearn.utils import column_or_1d from sklearn.utils import check_array from sklearn.utils import check_consistent_length - from sklearn.utils import check_random_state +from sklearn.utils import column_or_1d from sklearn.utils.random import sample_without_replacement MAX_INT = np.iinfo(np.int32).max From 2d3a93950bd02cc389dc550fb1e8d1762837e9cb Mon Sep 17 00:00:00 2001 From: yzhao062 Date: Fri, 29 Jul 2022 20:59:30 -0400 Subject: [PATCH 09/16] optimize import --- CHANGES.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGES.txt b/CHANGES.txt index 8eed4618e..a9352050d 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -167,4 +167,4 @@ v<1.0.3>, <07/04/2022> -- Add AnoGAN (#412). v<1.0.4>, <07/29/2022> -- General improvement of code quality and test coverage. v<1.0.4>, <07/29/2022> -- Add LUNAR (#413). v<1.0.4>, <07/29/2022> -- Add LUNAR (#415). - +v<1.0.5>, <07/29/2022> -- Import optimization. From c3380ca21e517c9fab1f5ef457082faaec2be844 Mon Sep 17 00:00:00 2001 From: Alex Loftus Date: Fri, 5 Aug 2022 13:01:14 -0400 Subject: [PATCH 10/16] Update example.rst --- docs/example.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/example.rst b/docs/example.rst index 59f657206..0855d171a 100644 --- a/docs/example.rst +++ b/docs/example.rst @@ -46,7 +46,7 @@ Full example: `knn_example.py Date: Wed, 17 Aug 2022 23:16:23 -0400 Subject: [PATCH 11/16] code and doc optimization --- CHANGES.txt | 1 + README.rst | 67 ++++------------ docs/index.rst | 6 +- pyod/models/anogan.py | 134 ++++++++++++++++++++------------ pyod/models/cd.py | 38 ++++----- pyod/models/inne.py | 29 ++++--- pyod/models/kde.py | 14 ++-- pyod/models/rgraph.py | 165 ++++++++++++++++++++++++++-------------- pyod/models/sampling.py | 3 +- temp_text.txt | 50 ++++++++++++ 10 files changed, 309 insertions(+), 198 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index a9352050d..e88b7335f 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -168,3 +168,4 @@ v<1.0.4>, <07/29/2022> -- General improvement of code quality and test coverage. v<1.0.4>, <07/29/2022> -- Add LUNAR (#413). v<1.0.4>, <07/29/2022> -- Add LUNAR (#415). v<1.0.5>, <07/29/2022> -- Import optimization. +v<1.0.5>, <08/27/2022> -- Code optimization. diff --git a/README.rst b/README.rst index 529b0d2e4..2cd281f9e 100644 --- a/README.rst +++ b/README.rst @@ -68,17 +68,17 @@ or `Anomaly Detection `_. PyOD includes more than 40 detection algorithms, from classical LOF (SIGMOD 2000) to the latest ECOD (TKDE 2022). Since 2017, PyOD has been successfully used in numerous academic researches and -commercial products [#Zhao2019LSCP]_ [#Zhao2021SUOD]_ with more than 7 million downloads. +commercial products with more than `8 million downloads `_. It is also well acknowledged by the machine learning community with various dedicated posts/tutorials, including `Analytics Vidhya `_, `KDnuggets `_, and `Towards Data Science `_. -PyOD is featured for: +**PyOD is featured for**: * **Unified APIs, detailed documentation, and interactive examples** across various algorithms. -* **Advanced models**\ , including **classical ones by distance and density estimation**, **latest deep learning methods**, and **emerging algorithms like ECOD**. +* **Advanced models**\, including **classical distance and density estimation**, **latest deep learning methods**, and **emerging algorithms like ECOD**. * **Optimized performance with JIT and parallelization** using `numba `_ and `joblib `_. * **Fast training & prediction with SUOD** [#Zhao2021SUOD]_. @@ -126,7 +126,6 @@ or:: * `View the latest codes on Github `_ -* `Execute Interactive Jupyter Notebooks `_ * `Anomaly Detection Resources `_ @@ -139,7 +138,6 @@ or:: * `Model Save & Load <#model-save--load>`_ * `Fast Train with SUOD <#fast-train-with-suod>`_ * `Implemented Algorithms <#implemented-algorithms>`_ -* `Old Algorithm Benchmark <#old-algorithm-benchmark>`_ * `Quick Start for Outlier Detection <#quick-start-for-outlier-detection>`_ * `How to Contribute <#how-to-contribute>`_ * `Inclusion Criteria <#inclusion-criteria>`_ @@ -243,6 +241,19 @@ The organization of **ADBench** is provided below: :alt: benchmark-fig +**The comparison of selected models** is made available below +(\ `Figure `_\ , +`compare_all_models.py `_\ , +`Interactive Jupyter Notebooks `_\ ). +For Jupyter Notebooks, please navigate to **"/notebooks/Compare All Models.ipynb"**. + + +.. image:: https://raw.githubusercontent.com/yzhao062/pyod/master/examples/ALL.png + :target: https://raw.githubusercontent.com/yzhao062/pyod/master/examples/ALL.png + :alt: Comparision_of_All + + + ---- Model Save & Load @@ -393,52 +404,6 @@ Utility precision_n_scores calculate precision @ rank n ---- - -Old Algorithm Benchmark -^^^^^^^^^^^^^^^^^^^^^^^ - -In June 2022, we released a 36-page, the most comprehensive `anomaly detection benchmark paper `_. -The fully `open-sourced ADBench `_ compares 30 anomaly detection algorithms on 55 benchmark datasets. - -The organization of **ADBench** is provided below: - -.. image:: https://github.com/Minqi824/ADBench/blob/main/figs/ADBench.png?raw=true - :target: https://github.com/Minqi824/ADBench/blob/main/figs/ADBench.png?raw=true - :alt: benchmark-old - -**The content below is obsolete**. - -**The comparison among of implemented models** is made available below -(\ `Figure `_\ , -`compare_all_models.py `_\ , -`Interactive Jupyter Notebooks `_\ ). -For Jupyter Notebooks, please navigate to **"/notebooks/Compare All Models.ipynb"**. - - -.. image:: https://raw.githubusercontent.com/yzhao062/pyod/master/examples/ALL.png - :target: https://raw.githubusercontent.com/yzhao062/pyod/master/examples/ALL.png - :alt: Comparision_of_All - -A benchmark is supplied for select algorithms to provide an overview of the implemented models. -In total, 17 benchmark datasets are used for comparison, which -can be downloaded at `ODDS `_. - -For each dataset, it is first split into 60% for training and 40% for testing. -All experiments are repeated 10 times independently with random splits. -The mean of 10 trials is regarded as the final result. Three evaluation metrics -are provided: - -- The area under receiver operating characteristic (ROC) curve -- Precision @ rank n (P@N) -- Execution time - -Check the latest `benchmark `_. You could replicate this process by running -`benchmark.py `_. - - ----- - - Quick Start for Outlier Detection ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/docs/index.rst b/docs/index.rst index a59414f41..069247d02 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -74,17 +74,17 @@ or `Anomaly Detection `_. PyOD includes more than 40 detection algorithms, from classical LOF (SIGMOD 2000) to the latest ECOD (TKDE 2020). Since 2017, PyOD :cite:`a-zhao2019pyod` has been successfully used in numerous -academic researches and commercial products :cite:`a-zhao2019lscp,a-zhao2021suod` with more than 7 million downloads. +academic researches and commercial products with more than `8 million downloads `_. It is also well acknowledged by the machine learning community with various dedicated posts/tutorials, including `Analytics Vidhya `_, `KDnuggets `_, and `Towards Data Science `_. -PyOD is featured for: +**PyOD is featured for**: * **Unified APIs, detailed documentation, and interactive examples** across various algorithms. -* **Advanced models**\ , including **classical ones by distance and density estimation**, **latest deep learning methods**, and **emerging algorithms like ECOD**. +* **Advanced models**\, including **classical distance and density estimation**, **latest deep learning methods**, and **emerging algorithms like ECOD**. * **Optimized performance with JIT and parallelization** using `numba `_ and `joblib `_. * **Fast training & prediction with SUOD** :cite:`a-zhao2021suod`. diff --git a/pyod/models/anogan.py b/pyod/models/anogan.py index ca8105a82..cbe984213 100644 --- a/pyod/models/anogan.py +++ b/pyod/models/anogan.py @@ -30,8 +30,9 @@ class AnoGAN(BaseDetector): - """Anomaly Detection with Generative Adversarial Networks (AnoGAN). See the original paper - "Unsupervised anomaly detection with generative adversarial networks to guide marker discovery" + """Anomaly Detection with Generative Adversarial Networks (AnoGAN). + See the original paper "Unsupervised anomaly detection with generative + adversarial networks to guide marker discovery". See :cite:`schlegl2017unsupervised` for details. @@ -57,28 +58,31 @@ class AnoGAN(BaseDetector): The dropout to be used across all layers. G_layers : list, optional (default=[20,10,3,10,20]) - List that indicates the number of nodes per hidden layer for the generator. - Thus, [10,10] indicates 2 hidden layers having each 10 nodes. + List that indicates the number of nodes per hidden layer for the + generator. Thus, [10,10] indicates 2 hidden layers having each 10 nodes. D_layers : list, optional (default=[20,10,5]) - List that indicates the number of nodes per hidden layer for the discriminator. - Thus, [10,10] indicates 2 hidden layers having each 10 nodes. + List that indicates the number of nodes per hidden layer for the + discriminator. Thus, [10,10] indicates 2 hidden layers having each 10 + nodes. learning_rate: float in (0., 1), optional (default=0.001) - learning rate of training the the network + learning rate of training the network index_D_layer_for_recon_error: int, optional (default = 1) - This is the index of the hidden layer in the discriminator for which the reconstruction error - will be determined between query sample and the sample created from the latent space. + This is the index of the hidden layer in the discriminator for which + the reconstruction error will be determined between query sample and + the sample created from the latent space. learning_rate_query: float in (0., 1), optional (default=0.001) - learning rate for the backpropagation steps needed to find a point in the latent space - of the generator that approximate the query sample + learning rate for the backpropagation steps needed to find a point in + the latent space of the generator that approximate the query sample epochs_query: int, optional (default=20) - Number of epochs to approximate the query sample in the latent space of the generator + Number of epochs to approximate the query sample in the latent space + of the generator preprocessing : bool, optional (default=True) If True, apply standardization on the data. @@ -114,14 +118,21 @@ class AnoGAN(BaseDetector): ``threshold_`` on ``decision_scores_``. """ - def __init__(self, activation_hidden='tanh', dropout_rate=0.2, + def __init__(self, activation_hidden='tanh', + dropout_rate=0.2, latent_dim_G=2, - G_layers=[20, 10, 3, 10, 20], verbose=0, - D_layers=[20, 10, 5], index_D_layer_for_recon_error=1, + G_layers=[20, 10, 3, 10, 20], + verbose=0, + D_layers=[20, 10, 5], + index_D_layer_for_recon_error=1, epochs=500, - preprocessing=False, learning_rate=0.001, learning_rate_query=0.01, + preprocessing=False, + learning_rate=0.001, + learning_rate_query=0.01, epochs_query=20, - batch_size=32, output_activation=None, contamination=0.1): + batch_size=32, + output_activation=None, + contamination=0.1): super(AnoGAN, self).__init__(contamination=contamination) self.activation_hidden = activation_hidden @@ -140,7 +151,8 @@ def __init__(self, activation_hidden='tanh', dropout_rate=0.2, self.batch_size = batch_size self.verbose = verbose - check_parameter(dropout_rate, 0, 1, param_name='dropout_rate', include_left=True) + check_parameter(dropout_rate, 0, 1, param_name='dropout_rate', + include_left=True) def _build_model(self): #### Generator ##### @@ -155,7 +167,8 @@ def _build_model(self): Dense(l_dim, activation=self.activation_hidden)(last_layer)) last_layer = G_hl_dict[layer_name] - G_out = Dense(self.n_features_, activation=self.output_activation)(last_layer) + G_out = Dense(self.n_features_, activation=self.output_activation)( + last_layer) self.generator = Model(inputs=(G_in), outputs=[G_out]) self.hist_loss_generator = [] @@ -174,8 +187,10 @@ def _build_model(self): classifier_node = Dense(1, activation='sigmoid')(last_layer) - self.discriminator = Model(inputs=(D_in), outputs=[classifier_node, D_hl_dict[ - 'hl_{}'.format(self.index_D_layer_for_recon_error)]]) + self.discriminator = Model(inputs=(D_in), + outputs=[classifier_node, + D_hl_dict['hl_{}'.format( + self.index_D_layer_for_recon_error)]]) self.hist_loss_discriminator = [] # Set optimizer @@ -183,11 +198,14 @@ def _build_model(self): self.generator.compile(optimizer=opt) self.discriminator.compile(optimizer=opt) - def plot_learning_curves(self, start_ind=0, window_smoothening=10): # pragma: no cover + def plot_learning_curves(self, start_ind=0, + window_smoothening=10): # pragma: no cover fig = plt.figure(figsize=(12, 5)) - l_gen = pd.Series(self.hist_loss_generator[start_ind:]).rolling(window_smoothening).mean() - l_disc = pd.Series(self.hist_loss_discriminator[start_ind:]).rolling(window_smoothening).mean() + l_gen = pd.Series(self.hist_loss_generator[start_ind:]).rolling( + window_smoothening).mean() + l_disc = pd.Series(self.hist_loss_discriminator[start_ind:]).rolling( + window_smoothening).mean() ax = fig.add_subplot(1, 2, 1) ax.plot(range(len(l_gen)), l_gen, ) @@ -210,31 +228,40 @@ def train_step(self, data): with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape: X_gen = self.generator({'I1': latent_noise}, training=True) - real_output, _ = self.discriminator({'I1': X_original}, training=True) + real_output, _ = self.discriminator({'I1': X_original}, + training=True) fake_output, _ = self.discriminator({'I1': X_gen}, training=True) # Correctly predicted - loss_discriminator = cross_entropy(tf.ones_like(fake_output), fake_output) + loss_discriminator = cross_entropy(tf.ones_like(fake_output), + fake_output) total_loss_generator = loss_discriminator ## Losses discriminator - real_loss = cross_entropy(tf.ones_like(real_output, dtype='float32') * 0.9, - real_output) # one-sided label smoothening + real_loss = cross_entropy( + tf.ones_like(real_output, dtype='float32') * 0.9, + real_output) # one-sided label smoothening fake_loss = cross_entropy(tf.zeros_like(fake_output), fake_output) total_loss_discriminator = real_loss + fake_loss # Compute gradients - gradients_gen = gen_tape.gradient(total_loss_generator, self.generator.trainable_variables) + gradients_gen = gen_tape.gradient(total_loss_generator, + self.generator.trainable_variables) # Update weights - self.generator.optimizer.apply_gradients(zip(gradients_gen, self.generator.trainable_variables)) + self.generator.optimizer.apply_gradients( + zip(gradients_gen, self.generator.trainable_variables)) # Compute gradients - gradients_disc = disc_tape.gradient(total_loss_discriminator, self.discriminator.trainable_variables) + gradients_disc = disc_tape.gradient(total_loss_discriminator, + self.discriminator.trainable_variables) # Update weights - self.discriminator.optimizer.apply_gradients(zip(gradients_disc, self.discriminator.trainable_variables)) + self.discriminator.optimizer.apply_gradients( + zip(gradients_disc, self.discriminator.trainable_variables)) - self.hist_loss_generator.append(np.float64(total_loss_generator.numpy())) - self.hist_loss_discriminator.append(np.float64(total_loss_discriminator.numpy())) + self.hist_loss_generator.append( + np.float64(total_loss_generator.numpy())) + self.hist_loss_discriminator.append( + np.float64(total_loss_discriminator.numpy())) def fit_query(self, query_sample): @@ -244,15 +271,19 @@ def fit_query(self, query_sample): # Make pseudo input (just zeros) zeros = np.zeros((1, self.latent_dim_G)) - ### build model for back-propagating a approximate latent space where reconstruction with - # query sample is optimal ### + # build model for back-propagating a approximate latent space where + # reconstruction with query sample is optimal pseudo_in = Input(shape=(self.latent_dim_G,), name='I1') - z_gamma = Dense(self.latent_dim_G, activation=None, use_bias=True)(pseudo_in) + z_gamma = Dense(self.latent_dim_G, activation=None, use_bias=True)( + pseudo_in) sample_gen = self.generator({'I1': z_gamma}, training=False) - _, sample_disc_latent = self.discriminator({'I1': sample_gen}, training=False) + _, sample_disc_latent = self.discriminator({'I1': sample_gen}, + training=False) - self.query_model = Model(inputs=(pseudo_in), outputs=[z_gamma, sample_gen, sample_disc_latent]) + self.query_model = Model(inputs=(pseudo_in), + outputs=[z_gamma, sample_gen, + sample_disc_latent]) opt = Adam(learning_rate=self.learning_rate_query) self.query_model.compile(optimizer=opt) @@ -264,23 +295,31 @@ def fit_query(self, query_sample): with tf.GradientTape() as tape: - z, sample_gen, sample_disc_latent = self.query_model({'I1': zeros}, training=True) + z, sample_gen, sample_disc_latent = self.query_model( + {'I1': zeros}, training=True) - _, sample_disc_latent_original = self.discriminator({'I1': query_sample}, training=False) + _, sample_disc_latent_original = self.discriminator( + {'I1': query_sample}, training=False) # Reconstruction loss generator abs_err = tf.keras.backend.abs(query_sample - sample_gen) - loss_recon_gen = tf.keras.backend.mean(tf.keras.backend.mean(abs_err, axis=-1)) + loss_recon_gen = tf.keras.backend.mean( + tf.keras.backend.mean(abs_err, axis=-1)) # Reconstruction loss latent space of discrimator - abs_err = tf.keras.backend.abs(sample_disc_latent_original - sample_disc_latent) - loss_recon_disc = tf.keras.backend.mean(tf.keras.backend.mean(abs_err, axis=-1)) + abs_err = tf.keras.backend.abs( + sample_disc_latent_original - sample_disc_latent) + loss_recon_disc = tf.keras.backend.mean( + tf.keras.backend.mean(abs_err, axis=-1)) total_loss = loss_recon_gen + loss_recon_disc # equal weighting both terms # Compute gradients - gradients = tape.gradient(total_loss, self.query_model.trainable_variables[0:2]) + gradients = tape.gradient(total_loss, + self.query_model.trainable_variables[ + 0:2]) # Update weights - self.query_model.optimizer.apply_gradients(zip(gradients, self.query_model.trainable_variables[0:2])) + self.query_model.optimizer.apply_gradients( + zip(gradients, self.query_model.trainable_variables[0:2])) return total_loss.numpy() @@ -323,7 +362,8 @@ def fit(self, X, y=None): np.random.shuffle(X_norm) X_train_sel = X_norm[0: min(self.batch_size, self.n_samples_), :] - latent_noise = np.random.normal(0, 1, (X_train_sel.shape[0], self.latent_dim_G)) + latent_noise = np.random.normal(0, 1, ( + X_train_sel.shape[0], self.latent_dim_G)) self.train_step((np.float32(X_train_sel), np.float32(latent_noise))) diff --git a/pyod/models/cd.py b/pyod/models/cd.py index 6a630ab69..45c98731c 100644 --- a/pyod/models/cd.py +++ b/pyod/models/cd.py @@ -18,14 +18,12 @@ def whiten_data(X, pca): - X = pca.transform(X) return X def Cooks_dist(X, y, model): - # Leverage is computed as the diagonal of the projection matrix of X leverage = (X * np.linalg.pinv(X).T).sum(1) @@ -44,7 +42,6 @@ def Cooks_dist(X, y, model): return distance_ - class CD(BaseDetector): """Cook's distance can be used to identify points that negatively @@ -61,7 +58,7 @@ class CD(BaseDetector): define the threshold on the decision function. whiten : bool, optional (default=True) - transform X to have a covariance matrix that is the identity matrix  + transform X to have a covariance matrix that is the identity matrix of 1 in the diagonal and 0 for the other cells using PCA rule_of_thumb : bool, optional (default=False) @@ -91,13 +88,11 @@ class CD(BaseDetector): ``threshold_`` on ``decision_scores_``. """ - def __init__(self, whitening=True, contamination=0.1, rule_of_thumb=False): - super(CD, self).__init__(contamination=contamination) - self.whitening = whitening - self.rule_of_thumb = rule_of_thumb - + super(CD, self).__init__(contamination=contamination) + self.whitening = whitening + self.rule_of_thumb = rule_of_thumb def fit(self, X, y): """Fit detector. y is necessary for supervised method. @@ -117,9 +112,9 @@ def fit(self, X, y): # Validate inputs X and y try: X = check_array(X) - except ValueError: - X = X.reshape(-1,1) - + except ValueError: + X = X.reshape(-1, 1) + y = np.squeeze(check_array(y, ensure_2d=False)) self._set_n_classes(y) @@ -138,7 +133,8 @@ def fit(self, X, y): # Compute the influence threshold if self.rule_of_thumb: influence_threshold_ = 4 / X.shape[0] - self.contamination = sum(distance_ > influence_threshold_) / X.shape[0] + self.contamination = sum(distance_ > influence_threshold_) / \ + X.shape[0] self.decision_scores_ = distance_ @@ -146,7 +142,6 @@ def fit(self, X, y): return self - def decision_function(self, X): """Predict raw anomaly score of X using the fitted detector. @@ -172,18 +167,17 @@ def decision_function(self, X): try: X = check_array(X) - except ValueError: - X = X.reshape(-1,1) - - y = X[:,-1] - X = X[:,:-1] - + except ValueError: + X = X.reshape(-1, 1) + + y = X[:, -1] + X = X[:, :-1] # Apply whitening if self.whitening: - X = whiten_data(X, self.pca) + X = whiten_data(X, self.pca) - # Get Cook's Distance + # Get Cook's Distance distance_ = Cooks_dist(X, y, self.model) return distance_ diff --git a/pyod/models/inne.py b/pyod/models/inne.py index 8400b9b07..ee509fea6 100644 --- a/pyod/models/inne.py +++ b/pyod/models/inne.py @@ -25,13 +25,14 @@ class INNE(BaseDetector): """ Isolation-based anomaly detection using nearest-neighbor ensembles. - The INNE algorithm uses the nearest neighbour ensemble to isolate anomalies. - It partitions the data space into regions using a subsample and determines an - isolation score for each region. As each region adapts to local distribution, - the calculated isolation score is a local measure that is relative to the local - neighbourhood, enabling it to detect both global and local anomalies. INNE has - linear time complexity to efficiently handle large and high-dimensional datasets - with complex distributions. + The INNE algorithm uses the nearest neighbour ensemble to isolate + anomalies. It partitions the data space into regions using a subsample and + determines an isolation score for each region. As each region adapts to + local distribution, the calculated isolation score is a local measure that + is relative to the local neighbourhood, enabling it to detect both global + and local anomalies. INNE has linear time complexity to efficiently handle + large and high-dimensional datasets with complex distributions. + See :cite:`bandaragoda2018isolation` for details. Parameters @@ -181,14 +182,15 @@ def _fit(self, X): center_dist = euclidean_distances( self._centroids[i], self._centroids[i], squared=True) np.fill_diagonal(center_dist, np.inf) - # radius of each hypersphere is the Nearest Neighbors distance of centroid. + # radius of each hypersphere is the Nearest Neighbors + # distance of centroid. self._centroids_radius[i] = np.amin(center_dist, axis=1) # Nearest Neighbors of centroids cnn_index = np.argmin(center_dist, axis=1) cnn_radius = self._centroids_radius[i][cnn_index] self._ratio[i] = 1 - (cnn_radius + MIN_FLOAT) / \ - (self._centroids_radius[i] + MIN_FLOAT) + (self._centroids_radius[i] + MIN_FLOAT) return self def decision_function(self, X): @@ -234,14 +236,17 @@ def _score_samples(self, X): X = check_array(X, accept_sparse=False) isolation_scores = np.ones([self.n_estimators, X.shape[0]]) - # each test instance is evaluated against n_estimators sets of hyperspheres + # each test instance is evaluated against n_estimators sets of + # hyperspheres for i in range(self.n_estimators): x_dists = euclidean_distances(X, self._centroids[i], squared=True) # find instances that are covered by at least one hypersphere. cover_radius = np.where( - x_dists <= self._centroids_radius[i], self._centroids_radius[i], np.nan) + x_dists <= self._centroids_radius[i], + self._centroids_radius[i], np.nan) x_covered = np.where(~np.isnan(cover_radius).all(axis=1)) - # the centroid of the hypersphere covering x and having the smallest radius + # the centroid of the hypersphere covering x and having the + # smallest radius cnn_x = np.nanargmin(cover_radius[x_covered], axis=1) isolation_scores[i][x_covered] = self._ratio[i][cnn_x] # the isolation scores are averaged to produce the anomaly score diff --git a/pyod/models/kde.py b/pyod/models/kde.py index 6fc3e54c3..66fc2f74c 100644 --- a/pyod/models/kde.py +++ b/pyod/models/kde.py @@ -98,13 +98,13 @@ class KDE(BaseDetector): """ def __init__( - self, - contamination=0.1, - bandwidth=1.0, - algorithm="auto", - leaf_size=30, - metric="minkowski", - metric_params=None, + self, + contamination=0.1, + bandwidth=1.0, + algorithm="auto", + leaf_size=30, + metric="minkowski", + metric_params=None, ): super().__init__(contamination=contamination) self.bandwidth = bandwidth diff --git a/pyod/models/rgraph.py b/pyod/models/rgraph.py index 004071343..cea380115 100644 --- a/pyod/models/rgraph.py +++ b/pyod/models/rgraph.py @@ -22,8 +22,9 @@ class RGraph(BaseDetector): - """ Outlier Detection via R-graph. Paper: https://openaccess.thecvf.com/content_cvpr_2017/papers/You_Provable_Self-Representation_Based_CVPR_2017_paper.pdf - See :cite:`you2017provable` for details. + """ Outlier Detection via R-graph. + Paper: https://openaccess.thecvf.com/content_cvpr_2017/papers/You_Provable_Self-Representation_Based_CVPR_2017_paper.pdf + See :cite:`you2017provable` for details. Parameters ---------- @@ -34,41 +35,51 @@ class RGraph(BaseDetector): gamma : float gamma_nz : boolean, default True - gamma and gamma_nz together determines the parameter alpha. When ``gamma_nz = False``, - alpha = gamma. When ``gamma_nz = True``, then alpha = gamma * alpha0, where alpha0 is - the largest number such that the solution to the optimization problem with alpha = alpha0 - is the zero vector (see Proposition 1 in [1]). Therefore, when ``gamma_nz = True``, gamma - should be a value greater than 1.0. A good choice is typically in the range [5, 500]. + gamma and gamma_nz together determines the parameter alpha. + When ``gamma_nz = False``, alpha = gamma. + When ``gamma_nz = True``, then alpha = gamma * alpha0, where alpha0 is + the largest number such that the solution to the optimization problem + with alpha = alpha0 is the zero vector (see Proposition 1 in [1]). + Therefore, when ``gamma_nz = True``, gamma should be a value greater + than 1.0. A good choice is typically in the range [5, 500]. tau : float, default 1.0 Parameter for elastic net penalty term. - When tau = 1.0, the method reduces to sparse subspace clustering with basis pursuit (SSC-BP) [2]. - When tau = 0.0, the method reduces to least squares regression (LSR) [3]. + When tau = 1.0, the method reduces to sparse subspace clustering with + basis pursuit (SSC-BP) [2]. + When tau = 0.0, the method reduces to least squares regression (LSR). algorithm : string, default ``lasso_lars`` - Algorithm for computing the representation. Either lasso_lars or lasso_cd. - Note: ``lasso_lars`` and ``lasso_cd`` only support tau = 1. For cases tau << 1 linear regression is used. + Algorithm for computing the representation. Either lasso_lars or + lasso_cd. + Note: ``lasso_lars`` and ``lasso_cd`` only support tau = 1. + For cases tau << 1 linear regression is used. fit_intercept_LR: bool, optional (default=False) - For ``gamma`` > 10000 linear regression is used instead of ``lasso_lars`` or ``lasso_cd``. This parameter determines whether the + For ``gamma`` > 10000 linear regression is used instead of + ``lasso_lars`` or ``lasso_cd``. This parameter determines whether the intercept for the model is calculated. maxiter_lasso : int, default 1000 The maximum number of iterations for ``lasso_lars`` and ``lasso_cd``. n_nonzero : int, default 50 - This is an upper bound on the number of nonzero entries of each representation vector. - If there are more than n_nonzero nonzero entries, only the top n_nonzero number of + This is an upper bound on the number of nonzero entries of each + representation vector. + If there are more than n_nonzero nonzero entries, + only the top n_nonzero number of entries with largest absolute value are kept. active_support: boolean, default True - Set to True to use the active support algorithm in [1] for solving the optimization problem. - This should significantly reduce the running time when n_samples is large. + Set to True to use the active support algorithm in [1] for solving the + optimization problem. This should significantly reduce the running time + when n_samples is large. active_support_params: dictionary of string to any, optional - Parameters (keyword arguments) and values for the active support algorithm. It may be - used to set the parameters ``support_init``, ``support_size`` and ``maxiter``, see + Parameters (keyword arguments) and values for the active support + algorithm. It may be used to set the parameters ``support_init``, + ``support_size`` and ``maxiter``, see ``active_support_elastic_net`` for details. Example: active_support_params={'support_size':50, 'maxiter':100} Ignored when ``active_support=False`` @@ -128,9 +139,12 @@ class RGraph(BaseDetector): ``threshold_`` on ``decision_scores_``. """ - def __init__(self, transition_steps=10, n_nonzero=10, gamma=50.0, gamma_nz=True, algorithm='lasso_lars', tau=1.0, - maxiter_lasso=1000, preprocessing=True, contamination=0.1, blocksize_test_data=10, - support_init='L2', maxiter=40, support_size=100, active_support=True, fit_intercept_LR=False, + def __init__(self, transition_steps=10, n_nonzero=10, gamma=50.0, + gamma_nz=True, algorithm='lasso_lars', tau=1.0, + maxiter_lasso=1000, preprocessing=True, contamination=0.1, + blocksize_test_data=10, + support_init='L2', maxiter=40, support_size=100, + active_support=True, fit_intercept_LR=False, verbose=True): super(RGraph, self).__init__(contamination=contamination) @@ -152,8 +166,10 @@ def __init__(self, transition_steps=10, n_nonzero=10, gamma=50.0, gamma_nz=True, self.blocksize_test_data = blocksize_test_data self.fit_intercept_LR = fit_intercept_LR - def active_support_elastic_net(self, X, y, alpha, tau=1.0, algorithm='lasso_lars', support_init='L2', - support_size=100, maxiter=40, maxiter_lasso=1000): + def active_support_elastic_net(self, X, y, alpha, tau=1.0, + algorithm='lasso_lars', support_init='L2', + support_size=100, maxiter=40, + maxiter_lasso=1000): """ Source: https://github.com/ChongYou/subspace-clustering/blob/master/cluster/selfrepresentation.py An active support based algorithm for solving the elastic net optimization problem @@ -162,19 +178,28 @@ def active_support_elastic_net(self, X, y, alpha, tau=1.0, algorithm='lasso_lars Parameters ----------- X : array-like, shape (n_samples, n_features) + y : array-like, shape (1, n_features) + alpha : float + tau : float, default 1.0 + algorithm : string, default ``spams`` - Algorithm for computing solving the subproblems. Either lasso_lars or lasso_cd or spams + Algorithm for computing solving the subproblems. Either lasso_lars + or lasso_cd or spams (installation of spams package is required). Note: ``lasso_lars`` and ``lasso_cd`` only support tau = 1. + support_init: string, default ``knn`` This determines how the active support is initialized. It can be either ``knn`` or ``L2``. + support_size: int, default 100 This determines the size of the working set. - A small support_size decreases the runtime per iteration while increase the number of iterations. + A small support_size decreases the runtime per iteration while + increase the number of iterations. + maxiter: int default 40 Termination condition for active support update. @@ -186,30 +211,37 @@ def active_support_elastic_net(self, X, y, alpha, tau=1.0, algorithm='lasso_lars n_samples = X.shape[0] if n_samples <= support_size: # skip active support search for small scale data - supp = np.arange(n_samples, dtype=int) # this results in the following iteration to converge in 1 iteration + supp = np.arange(n_samples, + dtype=int) # this results in the following iteration to converge in 1 iteration else: if support_init == 'L2': - L2sol = np.linalg.solve(np.identity(y.shape[1]) * alpha + np.dot(X.T, X), y.T) + L2sol = np.linalg.solve( + np.identity(y.shape[1]) * alpha + np.dot(X.T, X), y.T) c0 = np.dot(X, L2sol)[:, 0] - supp = np.argpartition(-np.abs(c0), support_size)[0:support_size] + supp = np.argpartition(-np.abs(c0), support_size)[ + 0:support_size] elif support_init == 'knn': - supp = np.argpartition(-np.abs(np.dot(y, X.T)[0]), support_size)[0:support_size] + supp = np.argpartition(-np.abs(np.dot(y, X.T)[0]), + support_size)[0:support_size] curr_obj = float("inf") for _ in range(maxiter): Xs = X[supp, :] - ## Removed the original option to use 'spams' since this would require the spams dependency + ## Removed the original option to use 'spams' since this would + # require the spams dependency # if algorithm == 'spams': # cs = spams.lasso(np.asfortranarray(y.T), D=np.asfortranarray(Xs.T), # lambda1=tau*alpha, lambda2=(1.0-tau)*alpha) # cs = np.asarray(cs.todense()).T # else: - cs = sparse_encode(y, Xs, algorithm=algorithm, alpha=alpha, max_iter=maxiter_lasso) + cs = sparse_encode(y, Xs, algorithm=algorithm, alpha=alpha, + max_iter=maxiter_lasso) delta = (y - np.dot(cs, Xs)) / alpha - obj = tau * np.sum(np.abs(cs[0])) + (1.0 - tau) / 2.0 * np.sum(np.power(cs[0], 2.0)) + alpha / 2.0 * np.sum( + obj = tau * np.sum(np.abs(cs[0])) + (1.0 - tau) / 2.0 * np.sum( + np.power(cs[0], 2.0)) + alpha / 2.0 * np.sum( np.power(delta, 2.0)) if curr_obj - obj < 1.0e-10 * curr_obj: break @@ -226,10 +258,13 @@ def active_support_elastic_net(self, X, y, alpha, tau=1.0, algorithm='lasso_lars activesupp = supp[np.abs(cs[0]) > 1.0e-10] if activesupp.size > 0.8 * support_size: # this suggests that support_size is too small and needs to be increased - support_size = min([round(max([activesupp.size, support_size]) * 1.1), n_samples]) + support_size = min( + [round(max([activesupp.size, support_size]) * 1.1), + n_samples]) if addedsupp.size + activesupp.size > support_size: - ord = np.argpartition(-coherence[addedsupp], support_size - activesupp.size)[ + ord = np.argpartition(-coherence[addedsupp], + support_size - activesupp.size)[ 0:support_size - activesupp.size] addedsupp = addedsupp[ord] @@ -239,9 +274,12 @@ def active_support_elastic_net(self, X, y, alpha, tau=1.0, algorithm='lasso_lars c[supp] = cs return c - def elastic_net_subspace_clustering(self, X, gamma=50.0, gamma_nz=True, tau=1.0, algorithm='lasso_lars', + def elastic_net_subspace_clustering(self, X, gamma=50.0, gamma_nz=True, + tau=1.0, algorithm='lasso_lars', fit_intercept_LR=False, - active_support=True, active_support_params=None, n_nonzero=50, + active_support=True, + active_support_params=None, + n_nonzero=50, maxiter_lasso=1000): """ Source: https://github.com/ChongYou/subspace-clustering/blob/master/cluster/selfrepresentation.py @@ -308,8 +346,11 @@ def elastic_net_subspace_clustering(self, X, gamma=50.0, gamma_nz=True, tau=1.0, [3] C. Lu, et al. Robust and efficient subspace segmentation via least squares regression, ECCV 2012 """ - if ((algorithm in ('lasso_lars', 'lasso_cd')) and (tau < 1.0 - 1.0e-10)): - warnings.warn('algorithm {} cannot handle tau smaller than 1. Using tau = 1'.format(algorithm)) + if ((algorithm in ('lasso_lars', 'lasso_cd')) and ( + tau < 1.0 - 1.0e-10)): + warnings.warn( + 'algorithm {} cannot handle tau smaller than 1. Using tau = 1'.format( + algorithm)) tau = 1.0 if active_support == True and active_support_params == None: @@ -332,14 +373,16 @@ def elastic_net_subspace_clustering(self, X, gamma=50.0, gamma_nz=True, tau=1.0, if algorithm in ('lasso_lars', 'lasso_cd'): if gamma_nz == True: coh = np.delete(np.absolute(np.dot(X, y.T)), i) - alpha0 = np.amax(coh) / tau # value for which the solution is zero + alpha0 = np.amax( + coh) / tau # value for which the solution is zero alpha = alpha0 / gamma else: alpha = 1.0 / gamma if (gamma >= 10 ** 4): if (gamma_is_zero_notification == False): - warnings.warn('Set alpha = 0 i.e. LinearRegression() is used') + warnings.warn( + 'Set alpha = 0 i.e. LinearRegression() is used') gamma_is_zero_notification = True alpha = 0 @@ -351,7 +394,9 @@ def elastic_net_subspace_clustering(self, X, gamma=50.0, gamma_nz=True, tau=1.0, elif active_support == True: - c = self.active_support_elastic_net(X, y, alpha, tau, algorithm, **active_support_params) + c = self.active_support_elastic_net(X, y, alpha, tau, + algorithm, + **active_support_params) else: ## Removed the original option to use 'spams' since this would require the spams dependency @@ -360,7 +405,8 @@ def elastic_net_subspace_clustering(self, X, gamma=50.0, gamma_nz=True, tau=1.0, # lambda1=tau * alpha, lambda2=(1.0-tau) * alpha) # c = np.asarray(c.todense()).T[0] # else: - c = sparse_encode(y, X, algorithm=algorithm, alpha=alpha, max_iter=maxiter_lasso)[0] + c = sparse_encode(y, X, algorithm=algorithm, alpha=alpha, + max_iter=maxiter_lasso)[0] else: warnings.warn("algorithm {} not found".format(algorithm)) @@ -376,7 +422,8 @@ def elastic_net_subspace_clustering(self, X, gamma=50.0, gamma_nz=True, tau=1.0, X[i, :] = y - return sparse.csr_matrix((vals, (rows, cols)), shape=(n_samples, n_samples)) + return sparse.csr_matrix((vals, (rows, cols)), + shape=(n_samples, n_samples)) def fit(self, X, y=None): """Fit detector. y is ignored in unsupervised methods. @@ -442,7 +489,8 @@ def decision_function(self, X): if (self.verbose == 1): print("Test block {}/{}".format(i, N)) - X_block_i = np.copy(X[i * self.blocksize_test_data: (i + 1) * self.blocksize_test_data]) + X_block_i = np.copy(X[i * self.blocksize_test_data: ( + i + 1) * self.blocksize_test_data]) if (X_block_i.shape[0] >= 1): original_size_i = X_block_i.shape[0] @@ -476,19 +524,25 @@ def decision_function(self, X): def _decision_function(self, X_norm): - A = self.elastic_net_subspace_clustering(X_norm, gamma=self.gamma, gamma_nz=self.gamma_nz, - tau=self.tau, algorithm=self.algorithm, - fit_intercept_LR=self.fit_intercept_LR, - active_support=self.active_support, n_nonzero=self.n_nonzero, - maxiter_lasso=self.maxiter_lasso, - active_support_params={'support_init': self.support_init, - 'support_size': self.support_size, - 'maxiter': self.maxiter} - ) + A = self.elastic_net_subspace_clustering( + X_norm, gamma=self.gamma, + gamma_nz=self.gamma_nz, + tau=self.tau, + algorithm=self.algorithm, + fit_intercept_LR=self.fit_intercept_LR, + active_support=self.active_support, + n_nonzero=self.n_nonzero, + maxiter_lasso=self.maxiter_lasso, + active_support_params={ + 'support_init': self.support_init, + 'support_size': self.support_size, + 'maxiter': self.maxiter} + ) self.transition_matrix_ = normalize(np.abs(A.toarray()), norm='l1') - pi = np.ones((1, len(self.transition_matrix_)), dtype='float64') / len(self.transition_matrix_) + pi = np.ones((1, len(self.transition_matrix_)), dtype='float64') / len( + self.transition_matrix_) pi_bar = np.zeros((1, len(self.transition_matrix_)), dtype='float64') # Do transition steps @@ -499,7 +553,8 @@ def _decision_function(self, X_norm): pi_bar /= self.transition_steps scores = pi_bar[0] - # smaller scores correspond with outliers, thus we use -1 * score such that + # smaller scores correspond with outliers, + # thus we use -1 * score such that # higher scores are associated with outliers scores = -1 * scores diff --git a/pyod/models/sampling.py b/pyod/models/sampling.py index 2358cb0d1..669764e49 100644 --- a/pyod/models/sampling.py +++ b/pyod/models/sampling.py @@ -149,7 +149,8 @@ def fit(self, X, y=None): if self.metric_params is None: self.dist = DistanceMetric.get_metric(self.metric) else: - self.dist = DistanceMetric.get_metric(self.metric, **self.metric_params) + self.dist = DistanceMetric.get_metric(self.metric, + **self.metric_params) pair_dist = self.dist.pairwise(X, self.subset) anomaly_scores = np.min(pair_dist, axis=1) diff --git a/temp_text.txt b/temp_text.txt index 8151665e8..22b918115 100644 --- a/temp_text.txt +++ b/temp_text.txt @@ -128,3 +128,53 @@ please navigate to **"/notebooks/Model Combination.ipynb"** * `Quick Start for Combining Outlier Scores from Various Base Detectors <#quick-start-for-combining-outlier-scores-from-various-base-detectors>`_ +* `Execute Interactive Jupyter Notebooks `_ + +* `Old Algorithm Benchmark <#old-algorithm-benchmark>`_ + +---- + + +Old Algorithm Benchmark +^^^^^^^^^^^^^^^^^^^^^^^ + +In June 2022, we released a 36-page, the most comprehensive `anomaly detection benchmark paper `_. +The fully `open-sourced ADBench `_ compares 30 anomaly detection algorithms on 55 benchmark datasets. + +The organization of **ADBench** is provided below: + +.. image:: https://github.com/Minqi824/ADBench/blob/main/figs/ADBench.png?raw=true + :target: https://github.com/Minqi824/ADBench/blob/main/figs/ADBench.png?raw=true + :alt: benchmark-old + +**The content below is obsolete**. + +**The comparison among of implemented models** is made available below +(\ `Figure `_\ , +`compare_all_models.py `_\ , +`Interactive Jupyter Notebooks `_\ ). +For Jupyter Notebooks, please navigate to **"/notebooks/Compare All Models.ipynb"**. + + +.. image:: https://raw.githubusercontent.com/yzhao062/pyod/master/examples/ALL.png + :target: https://raw.githubusercontent.com/yzhao062/pyod/master/examples/ALL.png + :alt: Comparision_of_All + +A benchmark is supplied for select algorithms to provide an overview of the implemented models. +In total, 17 benchmark datasets are used for comparison, which +can be downloaded at `ODDS `_. + +For each dataset, it is first split into 60% for training and 40% for testing. +All experiments are repeated 10 times independently with random splits. +The mean of 10 trials is regarded as the final result. Three evaluation metrics +are provided: + +- The area under receiver operating characteristic (ROC) curve +- Precision @ rank n (P@N) +- Execution time + +Check the latest `benchmark `_. You could replicate this process by running +`benchmark.py `_. + + +---- \ No newline at end of file From 625abb8332cf7b6671ce94373b87e9bfd149fcda Mon Sep 17 00:00:00 2001 From: yzhao062 Date: Sun, 21 Aug 2022 19:15:59 -0400 Subject: [PATCH 12/16] Add tips for algorithm selection --- README.rst | 7 +++++++ docs/index.rst | 7 +++++++ 2 files changed, 14 insertions(+) diff --git a/README.rst b/README.rst index 2cd281f9e..5421250f5 100644 --- a/README.rst +++ b/README.rst @@ -99,6 +99,13 @@ It is also well acknowledged by the machine learning community with various dedi y_test_scores = clf.decision_function(X_test) # predict raw outlier scores on test +**Personal suggestion on selecting an OD algorithm**. If you do not know which algorithm to try, go with: + +- `ECOD `_: Example of using ECOD for outlier detection +- `Isolation Forest `_: Example of using Isolation Forest for outlier detection + +They are both fast and interpretable. Or, you could try more data-driven approach `MetaOD `_. + **Citing PyOD**\ : `PyOD paper `_ is published in diff --git a/docs/index.rst b/docs/index.rst index 069247d02..a724320e2 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -105,6 +105,13 @@ It is also well acknowledged by the machine learning community with various dedi y_test_scores = clf.decision_function(X_test) # predict raw outlier scores on test +**Personal suggestion on selecting an OD algorithm**. If you do not know which algorithm to try, go with: + +- `ECOD `_: Example of using ECOD for outlier detection +- `Isolation Forest `_: Example of using Isolation Forest for outlier detection + +They are both fast and interpretable. Or, you could try more data-driven approach `MetaOD `_. + **Citing PyOD**\ : From caf4629ad807143133884d6da9da7fab1517cc75 Mon Sep 17 00:00:00 2001 From: Lucas Date: Thu, 1 Sep 2022 22:26:13 +0200 Subject: [PATCH 13/16] Clean up docstring and initialization of Autoencoder_torch --- pyod/models/auto_encoder_torch.py | 57 ++++++++++++++++--------------- 1 file changed, 30 insertions(+), 27 deletions(-) diff --git a/pyod/models/auto_encoder_torch.py b/pyod/models/auto_encoder_torch.py index a79aad8ed..58d5dc4ab 100644 --- a/pyod/models/auto_encoder_torch.py +++ b/pyod/models/auto_encoder_torch.py @@ -120,19 +120,21 @@ class AutoEncoder(BaseDetector): hidden_activation : str, optional (default='relu') Activation function to use for hidden layers. All hidden layers are forced to use the same type of activation. - See https://keras.io/activations/ + See https://pytorch.org/docs/stable/nn.html for details. + Currently only + 'relu': nn.ReLU() + 'sigmoid': nn.Sigmoid() + 'tanh': nn.Tanh() + are supported. See pyod/utils/torch_utility.py for details. batch_norm : boolean, optional (default=True) Whether to apply Batch Normalization, See https://pytorch.org/docs/stable/generated/torch.nn.BatchNorm1d.html - loss : str or obj, optional (default=torch.nn.MSELoss) - String (name of objective function) or objective function. - NOT SUPPORT FOR CHANGE YET. - - optimizer : str, optional (default='adam') - String (name of optimizer) or optimizer instance. - NOT SUPPORT FOR CHANGE YET. + learning_rate : float, optional (default=1e-3) + Learning rate for the optimizer. This learning_rate is given to + an Adam optimizer (torch.optim.Adam). + See https://pytorch.org/docs/stable/generated/torch.optim.Adam.html epochs : int, optional (default=100) Number of epochs to train the model. @@ -143,17 +145,18 @@ class AutoEncoder(BaseDetector): dropout_rate : float in (0., 1), optional (default=0.2) The dropout to be used across all layers. - l2_regularizer : float in (0., 1), optional (default=0.1) - The regularization strength of activity_regularizer - applied on each layer. By default, l2 regularizer is used. See - https://keras.io/regularizers/ - - validation_size : float in (0., 1), optional (default=0.1) - The percentage of data to be used for validation. + weight_decay : float, optional (default=1e-5) + The weight decay for Adam optimizer. + See https://pytorch.org/docs/stable/generated/torch.optim.Adam.html preprocessing : bool, optional (default=True) If True, apply standardization on the data. + loss_fn : obj, optional (default=torch.nn.MSELoss) + Optimizer instance which implements torch.nn._Loss. + One of https://pytorch.org/docs/stable/nn.html#loss-functions + or a custom loss. Custom losses are currently unstable. + verbose : int, optional (default=1) Verbosity mode. @@ -162,6 +165,7 @@ class AutoEncoder(BaseDetector): - 2 = one line per epoch. For verbose >= 1, model summary may be printed. + !CURRENTLY NOT SUPPORTED.! random_state : random_state: int, RandomState instance or None, optional (default=None) @@ -169,6 +173,7 @@ class AutoEncoder(BaseDetector): number generator; If RandomState instance, random_state is the random number generator; If None, the random number generator is the RandomState instance used by `np.random`. + !CURRENTLY NOT SUPPORTED.! contamination : float in (0., 0.5), optional (default=0.1) The amount of contamination of the data set, i.e. @@ -212,13 +217,10 @@ def __init__(self, hidden_neurons=None, hidden_activation='relu', batch_norm=True, - # loss='mse', - # optimizer='adam', learning_rate=1e-3, epochs=100, batch_size=32, dropout_rate=0.2, - # l2_regularizer=0.1, weight_decay=1e-5, # validation_size=0.1, preprocessing=True, @@ -228,33 +230,34 @@ def __init__(self, contamination=0.1, device=None): super(AutoEncoder, self).__init__(contamination=contamination) + + # save the initialization values self.hidden_neurons = hidden_neurons self.hidden_activation = hidden_activation self.batch_norm = batch_norm self.learning_rate = learning_rate - self.epochs = epochs self.batch_size = batch_size - self.dropout_rate = dropout_rate self.weight_decay = weight_decay self.preprocessing = preprocessing + self.loss_fn = loss_fn + # self.verbose = verbose + self.device = device - if loss_fn is None: + # create default loss functions + if self.loss_fn is None: self.loss_fn = torch.nn.MSELoss() - if device is None: + # create default calculation device (support GPU if available) + if self.device is None: self.device = torch.device( "cuda:0" if torch.cuda.is_available() else "cpu") - else: - self.device = device - # default values + # default values for the amount of hidden neurons if self.hidden_neurons is None: self.hidden_neurons = [64, 32] - # self.verbose = verbose - # noinspection PyUnresolvedReferences def fit(self, X, y=None): """Fit detector. y is ignored in unsupervised methods. From 3a524d85e65f3f5dda33c316a0859e1a60b5a2b0 Mon Sep 17 00:00:00 2001 From: yzhao062 Date: Wed, 14 Sep 2022 23:14:55 -0400 Subject: [PATCH 14/16] partial update of alad --- CHANGES.txt | 1 + docs/pyod.models.rst | 9 + docs/zreferences.bib | 9 + examples/alad_example.py | 35 ++-- pyod/models/alad.py | 423 ++++++++++++++++++++++----------------- pyod/test/test_alad.py | 44 ++-- pyod/version.py | 2 +- 7 files changed, 296 insertions(+), 227 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index e88b7335f..391808116 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -169,3 +169,4 @@ v<1.0.4>, <07/29/2022> -- Add LUNAR (#413). v<1.0.4>, <07/29/2022> -- Add LUNAR (#415). v<1.0.5>, <07/29/2022> -- Import optimization. v<1.0.5>, <08/27/2022> -- Code optimization. +v<1.0.5>, <09/14/2022> -- Add ALAD. diff --git a/docs/pyod.models.rst b/docs/pyod.models.rst index 51f692604..2ce72bd3c 100644 --- a/docs/pyod.models.rst +++ b/docs/pyod.models.rst @@ -11,6 +11,15 @@ pyod.models.abod module :show-inheritance: :inherited-members: +pyod.models.alad module +----------------------- + +.. automodule:: pyod.models.alad + :members: + :undoc-members: + :show-inheritance: + :inherited-members: + pyod.models.anogan module ------------------------- diff --git a/docs/zreferences.bib b/docs/zreferences.bib index f4152402b..60375b761 100644 --- a/docs/zreferences.bib +++ b/docs/zreferences.bib @@ -458,4 +458,13 @@ @inproceedings{you2017provable booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, pages={3395--3404}, year={2017} +} + +@inproceedings{zenati2018adversarially, + title={Adversarially learned anomaly detection}, + author={Zenati, Houssam and Romain, Manon and Foo, Chuan-Sheng and Lecouat, Bruno and Chandrasekhar, Vijay}, + booktitle={2018 IEEE International conference on data mining (ICDM)}, + pages={727--736}, + year={2018}, + organization={IEEE} } \ No newline at end of file diff --git a/examples/alad_example.py b/examples/alad_example.py index 660634fd0..8fe2de0c4 100644 --- a/examples/alad_example.py +++ b/examples/alad_example.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- -"""Example of using Adversarially Learned Anomaly Detection(ALAD) for outlier detection +"""Example of using Adversarially Learned Anomaly Detection (ALAD) for outlier +detection """ from __future__ import division from __future__ import print_function @@ -33,22 +34,22 @@ # train ALAD detector clf_name = 'ALAD' - clf = ALAD( epochs = 100, latent_dim = 2, - learning_rate_disc = 0.0001, - learning_rate_gen = 0.0001, - dropout_rate = 0.2, - add_recon_loss = False, - lambda_recon_loss= 0.05, - add_disc_zz_loss = True, - dec_layers=[ 75, 100 ], - enc_layers=[ 100, 75 ], - disc_xx_layers= [ 100, 75 ], - disc_zz_layers= [ 25, 25 ], - disc_xz_layers= [ 100, 75 ], - spectral_normalization = False, - activation_hidden_disc = 'tanh', activation_hidden_gen = 'tanh' , - preprocessing=True, batch_size = 200, contamination = contamination) - + clf = ALAD(epochs=100, latent_dim=2, + learning_rate_disc=0.0001, + learning_rate_gen=0.0001, + dropout_rate=0.2, + add_recon_loss=False, + lambda_recon_loss=0.05, + add_disc_zz_loss=True, + dec_layers=[75, 100], + enc_layers=[100, 75], + disc_xx_layers=[100, 75], + disc_zz_layers=[25, 25], + disc_xz_layers=[100, 75], + spectral_normalization=False, + activation_hidden_disc='tanh', activation_hidden_gen='tanh', + preprocessing=True, batch_size=200, contamination=contamination) + clf.fit(X_train) # get the prediction labels and outlier scores of the training data diff --git a/pyod/models/alad.py b/pyod/models/alad.py index da6c58242..cf8edc5f3 100644 --- a/pyod/models/alad.py +++ b/pyod/models/alad.py @@ -7,17 +7,15 @@ from __future__ import print_function import numpy as np -from matplotlib import pyplot as plt import pandas as pd +from matplotlib import pyplot as plt from sklearn.preprocessing import StandardScaler from sklearn.utils import check_array from sklearn.utils.validation import check_is_fitted -from ..utils.utility import check_parameter -from ..utils.stat_models import pairwise_distances_no_broadcast - from .base import BaseDetector from .base_dl import _get_tensorflow_version +from ..utils.utility import check_parameter # if tensorflow 2, import from tf directly if _get_tensorflow_version() == 1: @@ -27,68 +25,91 @@ from tensorflow.keras.models import Model from tensorflow.keras.layers import Input, Dense, Dropout from tensorflow.keras.optimizers import Adam - - - - class ALAD(BaseDetector): """Adversarially Learned Anomaly Detection (ALAD). Paper: https://arxiv.org/pdf/1812.02288.pdf + + See :cite:`zenati2018adversarially` for details. Parameters ---------- output_activation : str, optional (default=None) Activation function to use for output layers for encoder and dector. See https://keras.io/activations/ + activation_hidden_disc : str, optional (default='tanh') Activation function to use for hidden layers in discrimators. See https://keras.io/activations/ + activation_hidden_gen : str, optional (default='tanh') - Activation function to use for hidden layers in encoder and decoder (i.e. generator). + Activation function to use for hidden layers in encoder and decoder + (i.e. generator). See https://keras.io/activations/ + epochs : int, optional (default=500) Number of epochs to train the model. + batch_size : int, optional (default=32) Number of samples per gradient update. + dropout_rate : float in (0., 1), optional (default=0.2) The dropout to be used across all layers. + dec_layers : list, optional (default=[5,10,25]) - List that indicates the number of nodes per hidden layer for the decoder network. + List that indicates the number of nodes per hidden layer for the d + ecoder network. Thus, [10,10] indicates 2 hidden layers having each 10 nodes. + enc_layers : list, optional (default=[25,10,5]) - List that indicates the number of nodes per hidden layer for the encoder network. + List that indicates the number of nodes per hidden layer for the + encoder network. Thus, [10,10] indicates 2 hidden layers having each 10 nodes. + disc_xx_layers : list, optional (default=[25,10,5]) - List that indicates the number of nodes per hidden layer for discrimator_xx. + List that indicates the number of nodes per hidden layer for + discriminator_xx. Thus, [10,10] indicates 2 hidden layers having each 10 nodes. + disc_zz_layers : list, optional (default=[25,10,5]) - List that indicates the number of nodes per hidden layer for discrimator_zz. + List that indicates the number of nodes per hidden layer for + discriminator_zz. Thus, [10,10] indicates 2 hidden layers having each 10 nodes. + disc_xz_layers : list, optional (default=[25,10,5]) - List that indicates the number of nodes per hidden layer for discrimator_xz. + List that indicates the number of nodes per hidden layer for + discriminator_xz. Thus, [10,10] indicates 2 hidden layers having each 10 nodes. + learning_rate_gen: float in (0., 1), optional (default=0.001) - learning rate of training the the encoder and decoder + learning rate of training the encoder and decoder + learning_rate_disc: float in (0., 1), optional (default=0.001) - learning rate of training the discrimators + learning rate of training the discriminators + add_recon_loss: bool optional (default=False) - add an extra loss for encoder and decoder based on the reconstruction error + add an extra loss for encoder and decoder based on the reconstruction + error + lambda_recon_loss: float in (0., 1), optional (default=0.1) - if ``add_recon_loss= True``, the reconstruction loss gets multiplied by ``lambda_recon_loss`` - and added to the total loss for the generator (i.e. encoder and decoder). + if ``add_recon_loss= True``, the reconstruction loss gets multiplied + by ``lambda_recon_loss`` and added to the total loss for the generator + (i.e. encoder and decoder). preprocessing : bool, optional (default=True) If True, apply standardization on the data. + verbose : int, optional (default=1) Verbosity mode. - 0 = silent - 1 = progress bar + contamination : float in (0., 0.5), optional (default=0.1) The amount of contamination of the data set, i.e. the proportion of outliers in the data set. When fitting this is used to define the threshold on the decision function. + Attributes ---------- decision_scores_ : numpy array of shape (n_samples,) @@ -96,34 +117,36 @@ class ALAD(BaseDetector): The higher, the more abnormal. Outliers tend to have higher scores. This value is available once the detector is fitted. + threshold_ : float The threshold is based on ``contamination``. It is the ``n_samples * contamination`` most abnormal samples in ``decision_scores_``. The threshold is calculated for generating binary outlier labels. + labels_ : int, either 0 or 1 The binary labels of the training data. 0 stands for inliers and 1 for outliers/anomalies. It is generated by applying ``threshold_`` on ``decision_scores_``. """ - def __init__(self, activation_hidden_gen='tanh', - activation_hidden_disc='tanh', - output_activation = None, + def __init__(self, activation_hidden_gen='tanh', + activation_hidden_disc='tanh', + output_activation=None, dropout_rate=0.2, latent_dim=2, - dec_layers=[5, 10, 25], - enc_layers=[25, 10, 5], - disc_xx_layers=[25, 10, 5], - disc_zz_layers=[25, 10, 5], - disc_xz_layers=[25, 10, 5], - learning_rate_gen = 0.0001, learning_rate_disc = 0.0001, - add_recon_loss = False, lambda_recon_loss = 0.1, - epochs = 200, - verbose = 0, - preprocessing = False, - add_disc_zz_loss = True, spectral_normalization = False, - batch_size = 32, contamination=0.1): + dec_layers=[5, 10, 25], + enc_layers=[25, 10, 5], + disc_xx_layers=[25, 10, 5], + disc_zz_layers=[25, 10, 5], + disc_xz_layers=[25, 10, 5], + learning_rate_gen=0.0001, learning_rate_disc=0.0001, + add_recon_loss=False, lambda_recon_loss=0.1, + epochs=200, + verbose=0, + preprocessing=False, + add_disc_zz_loss=True, spectral_normalization=False, + batch_size=32, contamination=0.1): super(ALAD, self).__init__(contamination=contamination) self.activation_hidden_disc = activation_hidden_disc @@ -151,20 +174,21 @@ def __init__(self, activation_hidden_gen='tanh', self.verbose = verbose self.spectral_normalization = spectral_normalization - if( self.spectral_normalization == True): + if (self.spectral_normalization == True): try: global tfa import tensorflow_addons as tfa except ModuleNotFoundError: # Error handling - print('tensorflow_addons not found, cannot use spectral normalization. Install tensorflow_addons first.') + print( + 'tensorflow_addons not found, cannot use spectral normalization. Install tensorflow_addons first.') self.spectral_normalization = False - - check_parameter(dropout_rate, 0, 1, param_name='dropout_rate', include_left=True) + check_parameter(dropout_rate, 0, 1, param_name='dropout_rate', + include_left=True) def _build_model(self): - + #### Decoder ##### dec_in = Input(shape=(self.latent_dim,), name='I1') dec_1 = Dropout(self.dropout_rate)(dec_in) @@ -175,15 +199,16 @@ def _build_model(self): for i, l_dim in enumerate(self.dec_layers): layer_name = 'hl_{}'.format(i) dec_hl_dict[layer_name] = Dropout(self.dropout_rate)( - Dense(l_dim, activation=self.activation_hidden_gen)(last_layer)) + Dense(l_dim, activation=self.activation_hidden_gen)( + last_layer)) last_layer = dec_hl_dict[layer_name] - dec_out = Dense(self.n_features_, activation=self.output_activation)(last_layer) + dec_out = Dense(self.n_features_, activation=self.output_activation)( + last_layer) self.dec = Model(inputs=(dec_in), outputs=[dec_out]) self.hist_loss_dec = [] - - + #### Encoder ##### enc_in = Input(shape=(self.n_features_,), name='I1') enc_1 = Dropout(self.dropout_rate)(enc_in) @@ -194,23 +219,22 @@ def _build_model(self): for i, l_dim in enumerate(self.enc_layers): layer_name = 'hl_{}'.format(i) enc_hl_dict[layer_name] = Dropout(self.dropout_rate)( - Dense(l_dim, activation=self.activation_hidden_gen)(last_layer)) + Dense(l_dim, activation=self.activation_hidden_gen)( + last_layer)) last_layer = enc_hl_dict[layer_name] - enc_out = Dense(self.latent_dim, activation=self.output_activation)(last_layer) + enc_out = Dense(self.latent_dim, activation=self.output_activation)( + last_layer) self.enc = Model(inputs=(enc_in), outputs=[enc_out]) self.hist_loss_enc = [] - - - #### Discriminator_xz ##### disc_xz_in_x = Input(shape=(self.n_features_,), name='I1') disc_xz_in_z = Input(shape=(self.latent_dim,), name='I2') - disc_xz_in = tf.concat([disc_xz_in_x, disc_xz_in_z], axis = 1 ) - - disc_xz_1 = Dropout(self.dropout_rate )(disc_xz_in) + disc_xz_in = tf.concat([disc_xz_in_x, disc_xz_in_z], axis=1) + + disc_xz_1 = Dropout(self.dropout_rate)(disc_xz_in) last_layer = disc_xz_1 # Store all hidden layers in dict @@ -218,24 +242,30 @@ def _build_model(self): for i, l_dim in enumerate(self.disc_xz_layers): layer_name = 'hl_{}'.format(i) - if( self.spectral_normalization == True): - disc_xz_hl_dict[layer_name] = Dropout(self.dropout_rate)(tfa.layers.SpectralNormalization( Dense(l_dim, activation=self.activation_hidden_disc) )(last_layer) ) + if (self.spectral_normalization == True): + disc_xz_hl_dict[layer_name] = Dropout(self.dropout_rate)( + tfa.layers.SpectralNormalization( + Dense(l_dim, activation=self.activation_hidden_disc))( + last_layer)) else: - disc_xz_hl_dict[layer_name] = Dropout(self.dropout_rate)( Dense(l_dim, activation=self.activation_hidden_disc)(last_layer) ) + disc_xz_hl_dict[layer_name] = Dropout(self.dropout_rate)( + Dense(l_dim, activation=self.activation_hidden_disc)( + last_layer)) last_layer = disc_xz_hl_dict[layer_name] - disc_xz_out = Dense(1, activation= 'sigmoid' )(last_layer) - self.disc_xz = Model(inputs=(disc_xz_in_x, disc_xz_in_z), outputs=[disc_xz_out] ) + disc_xz_out = Dense(1, activation='sigmoid')(last_layer) + self.disc_xz = Model(inputs=(disc_xz_in_x, disc_xz_in_z), + outputs=[disc_xz_out]) # self.hist_loss_disc_xz = [] - - + #### Discriminator_xx ##### disc_xx_in_x = Input(shape=(self.n_features_,), name='I1') disc_xx_in_x_hat = Input(shape=(self.n_features_,), name='I2') - disc_xx_in = tf.concat([disc_xx_in_x, disc_xx_in_x_hat], axis = 1 ) - - disc_xx_1 = Dropout(self.dropout_rate, input_shape=(self.n_features_,))(disc_xx_in) + disc_xx_in = tf.concat([disc_xx_in_x, disc_xx_in_x_hat], axis=1) + + disc_xx_1 = Dropout(self.dropout_rate, + input_shape=(self.n_features_,))(disc_xx_in) last_layer = disc_xx_1 # Store all hidden layers in dict @@ -243,25 +273,30 @@ def _build_model(self): for i, l_dim in enumerate(self.disc_xx_layers): layer_name = 'hl_{}'.format(i) - if( self.spectral_normalization == True): - disc_xx_hl_dict[layer_name] = Dropout(self.dropout_rate)(tfa.layers.SpectralNormalization( Dense(l_dim, activation=self.activation_hidden_disc) )(last_layer) ) + if (self.spectral_normalization == True): + disc_xx_hl_dict[layer_name] = Dropout(self.dropout_rate)( + tfa.layers.SpectralNormalization( + Dense(l_dim, activation=self.activation_hidden_disc))( + last_layer)) else: - disc_xx_hl_dict[layer_name] = Dropout(self.dropout_rate)( Dense(l_dim, activation=self.activation_hidden_disc)(last_layer) ) + disc_xx_hl_dict[layer_name] = Dropout(self.dropout_rate)( + Dense(l_dim, activation=self.activation_hidden_disc)( + last_layer)) last_layer = disc_xx_hl_dict[layer_name] - disc_xx_out = Dense(1, activation= 'sigmoid' )(last_layer) - self.disc_xx = Model(inputs=(disc_xx_in_x, disc_xx_in_x_hat), outputs=[disc_xx_out, last_layer]) + disc_xx_out = Dense(1, activation='sigmoid')(last_layer) + self.disc_xx = Model(inputs=(disc_xx_in_x, disc_xx_in_x_hat), + outputs=[disc_xx_out, last_layer]) # self.hist_loss_disc_xx = [] - - - + #### Discriminator_zz ##### disc_zz_in_z = Input(shape=(self.latent_dim,), name='I1') disc_zz_in_z_prime = Input(shape=(self.latent_dim,), name='I2') - disc_zz_in = tf.concat([disc_zz_in_z, disc_zz_in_z_prime], axis = 1 ) - - disc_zz_1 = Dropout(self.dropout_rate, input_shape=(self.n_features_,))(disc_zz_in) + disc_zz_in = tf.concat([disc_zz_in_z, disc_zz_in_z_prime], axis=1) + + disc_zz_1 = Dropout(self.dropout_rate, + input_shape=(self.n_features_,))(disc_zz_in) last_layer = disc_zz_1 # Store all hidden layers in dict @@ -269,126 +304,146 @@ def _build_model(self): for i, l_dim in enumerate(self.disc_zz_layers): layer_name = 'hl_{}'.format(i) - if( self.spectral_normalization == True): - disc_zz_hl_dict[layer_name] = Dropout(self.dropout_rate)(tfa.layers.SpectralNormalization( Dense(l_dim, activation=self.activation_hidden_disc) )(last_layer) ) + if (self.spectral_normalization == True): + disc_zz_hl_dict[layer_name] = Dropout(self.dropout_rate)( + tfa.layers.SpectralNormalization( + Dense(l_dim, activation=self.activation_hidden_disc))( + last_layer)) else: - disc_zz_hl_dict[layer_name] = Dropout(self.dropout_rate)( Dense(l_dim, activation=self.activation_hidden_disc)(last_layer) ) + disc_zz_hl_dict[layer_name] = Dropout(self.dropout_rate)( + Dense(l_dim, activation=self.activation_hidden_disc)( + last_layer)) last_layer = disc_zz_hl_dict[layer_name] - disc_zz_out = Dense(1, activation= 'sigmoid' )(last_layer) - self.disc_zz = Model(inputs=(disc_zz_in_z, disc_zz_in_z_prime), outputs=[disc_zz_out]) + disc_zz_out = Dense(1, activation='sigmoid')(last_layer) + self.disc_zz = Model(inputs=(disc_zz_in_z, disc_zz_in_z_prime), + outputs=[disc_zz_out]) # self.hist_loss_disc_zz = [] - - + # Set optimizer - opt_gen = Adam(learning_rate=self.learning_rate_gen) - opt_disc = Adam(learning_rate=self.learning_rate_disc) - - self.dec.compile(optimizer = opt_gen ) - self.enc.compile(optimizer = opt_gen ) - self.disc_xz.compile(optimizer = opt_disc ) - self.disc_xx.compile(optimizer = opt_disc ) - self.disc_zz.compile(optimizer = opt_disc ) - - self.hist_loss_disc = [] - self.hist_loss_gen = [] + opt_gen = Adam(learning_rate=self.learning_rate_gen) + opt_disc = Adam(learning_rate=self.learning_rate_disc) + self.dec.compile(optimizer=opt_gen) + self.enc.compile(optimizer=opt_gen) + self.disc_xz.compile(optimizer=opt_disc) + self.disc_xx.compile(optimizer=opt_disc) + self.disc_zz.compile(optimizer=opt_disc) + self.hist_loss_disc = [] + self.hist_loss_gen = [] - def train_step(self, data ): + def train_step(self, data): cross_entropy = tf.keras.losses.BinaryCrossentropy(from_logits=False) - + x_real, z_real = data - + def get_losses(): - y_true = tf.ones_like(x_real[:,[0]]) - y_fake = tf.zeros_like(x_real[:,[0]]) - - + y_true = tf.ones_like(x_real[:, [0]]) + y_fake = tf.zeros_like(x_real[:, [0]]) + # Generator x_gen = self.dec({'I1': z_real}, training=True) - + # Encoder z_gen = self.enc({'I1': x_real}, training=True) - - - #Discriminatorxz - out_truexz = self.disc_xz({'I1': x_real, 'I2': z_gen}, training=True) - out_fakexz = self.disc_xz({'I1': x_gen, 'I2': z_real}, training=True) - - #Discriminatorzz - if( self.add_disc_zz_loss == True): - out_truezz = self.disc_zz({'I1': z_real, 'I2': z_real}, training=True) - out_fakezz = self.disc_zz({'I1': z_real, 'I2': self.enc({'I1': self.dec({'I1': z_real }, training=True) }) }, training=True) - - #Discriminatorxx - out_truexx, _ = self.disc_xx({'I1': x_real, 'I2': x_real}, training=True) #self.Dxx(x_real, x_real) - out_fakexx, _ = self.disc_xx({'I1': x_real, 'I2': self.dec({'I1': self.enc({'I1': x_real }, training=True) }) }, training=True) - - - #Losses for discriminators - loss_dxz = cross_entropy(y_true, out_truexz) + cross_entropy( y_fake,out_fakexz) - loss_dxx = cross_entropy(y_true, out_truexx) + cross_entropy( y_fake,out_fakexx) - if( self.add_disc_zz_loss == True): - loss_dzz = cross_entropy(y_true, out_truezz) + cross_entropy( y_fake,out_fakezz) + + # Discriminatorxz + out_truexz = self.disc_xz({'I1': x_real, 'I2': z_gen}, + training=True) + out_fakexz = self.disc_xz({'I1': x_gen, 'I2': z_real}, + training=True) + + # Discriminatorzz + if (self.add_disc_zz_loss == True): + out_truezz = self.disc_zz({'I1': z_real, 'I2': z_real}, + training=True) + out_fakezz = self.disc_zz({'I1': z_real, 'I2': self.enc( + {'I1': self.dec({'I1': z_real}, training=True)})}, + training=True) + + # Discriminatorxx + out_truexx, _ = self.disc_xx({'I1': x_real, 'I2': x_real}, + training=True) # self.Dxx(x_real, x_real) + out_fakexx, _ = self.disc_xx({'I1': x_real, 'I2': self.dec( + {'I1': self.enc({'I1': x_real}, training=True)})}, + training=True) + + # Losses for discriminators + loss_dxz = cross_entropy(y_true, out_truexz) + cross_entropy( + y_fake, out_fakexz) + loss_dxx = cross_entropy(y_true, out_truexx) + cross_entropy( + y_fake, out_fakexx) + if (self.add_disc_zz_loss == True): + loss_dzz = cross_entropy(y_true, out_truezz) + cross_entropy( + y_fake, out_fakezz) loss_disc = loss_dxz + loss_dzz + loss_dxx else: loss_disc = loss_dxz + loss_dxx - #Losses for generator - loss_gexz = cross_entropy( y_true,out_fakexz) + cross_entropy( y_fake,out_truexz) - loss_gexx = cross_entropy( y_true,out_fakexx) + cross_entropy( y_fake,out_truexx) - if( self.add_disc_zz_loss == True): - loss_gezz = cross_entropy( y_true,out_fakezz) + cross_entropy( y_fake,out_truezz) + # Losses for generator + loss_gexz = cross_entropy(y_true, out_fakexz) + cross_entropy( + y_fake, out_truexz) + loss_gexx = cross_entropy(y_true, out_fakexx) + cross_entropy( + y_fake, out_truexx) + if (self.add_disc_zz_loss == True): + loss_gezz = cross_entropy(y_true, out_fakezz) + cross_entropy( + y_fake, out_truezz) cycle_consistency = loss_gezz + loss_gexx loss_gen = loss_gexz + cycle_consistency else: cycle_consistency = loss_gexx loss_gen = loss_gexz + cycle_consistency - - if( self.add_recon_loss == True): + if (self.add_recon_loss == True): # Extra recon loss - x_recon = self.dec({'I1': self.enc({'I1': x_real }, training=True ) }) - loss_recon = tf.reduce_mean( ( x_real - x_recon )**2 ) + x_recon = self.dec( + {'I1': self.enc({'I1': x_real}, training=True)}) + loss_recon = tf.reduce_mean((x_real - x_recon) ** 2) loss_gen += loss_recon * self.lambda_recon_loss - - return loss_disc,loss_gen - - - with tf.GradientTape() as enc_tape, tf.GradientTape() as dec_tape, tf.GradientTape() as disc_xx_tape, tf.GradientTape() as disc_xz_tape, tf.GradientTape() as disc_zz_tape: - loss_disc, loss_gen = get_losses() + return loss_disc, loss_gen - self.hist_loss_disc.append( np.float64(loss_disc.numpy()) ) - self.hist_loss_gen.append( np.float64(loss_gen.numpy()) ) - - gradients_dec = dec_tape.gradient(loss_gen, self.dec.trainable_variables) - self.dec.optimizer.apply_gradients(zip(gradients_dec, self.dec.trainable_variables)) - + with tf.GradientTape() as enc_tape, tf.GradientTape() as dec_tape, tf.GradientTape() as disc_xx_tape, tf.GradientTape() as disc_xz_tape, tf.GradientTape() as disc_zz_tape: + loss_disc, loss_gen = get_losses() - gradients_enc = enc_tape.gradient(loss_gen, self.enc.trainable_variables) - self.enc.optimizer.apply_gradients(zip(gradients_enc, self.enc.trainable_variables)) + self.hist_loss_disc.append(np.float64(loss_disc.numpy())) + self.hist_loss_gen.append(np.float64(loss_gen.numpy())) + gradients_dec = dec_tape.gradient(loss_gen, + self.dec.trainable_variables) + self.dec.optimizer.apply_gradients( + zip(gradients_dec, self.dec.trainable_variables)) - gradients_disc_xx = disc_xx_tape.gradient(loss_disc, self.disc_xx.trainable_variables) - self.disc_xx.optimizer.apply_gradients(zip(gradients_disc_xx, self.disc_xx.trainable_variables)) - - if( self.add_disc_zz_loss == True): - gradients_disc_zz = disc_zz_tape.gradient(loss_disc, self.disc_zz.trainable_variables) - self.disc_zz.optimizer.apply_gradients(zip(gradients_disc_zz, self.disc_zz.trainable_variables)) + gradients_enc = enc_tape.gradient(loss_gen, + self.enc.trainable_variables) + self.enc.optimizer.apply_gradients( + zip(gradients_enc, self.enc.trainable_variables)) + gradients_disc_xx = disc_xx_tape.gradient(loss_disc, + self.disc_xx.trainable_variables) + self.disc_xx.optimizer.apply_gradients( + zip(gradients_disc_xx, self.disc_xx.trainable_variables)) - gradients_disc_xz = disc_xz_tape.gradient(loss_disc, self.disc_xz.trainable_variables) - self.disc_xz.optimizer.apply_gradients(zip(gradients_disc_xz, self.disc_xz.trainable_variables)) + if (self.add_disc_zz_loss == True): + gradients_disc_zz = disc_zz_tape.gradient(loss_disc, + self.disc_zz.trainable_variables) + self.disc_zz.optimizer.apply_gradients( + zip(gradients_disc_zz, self.disc_zz.trainable_variables)) + gradients_disc_xz = disc_xz_tape.gradient(loss_disc, + self.disc_xz.trainable_variables) + self.disc_xz.optimizer.apply_gradients( + zip(gradients_disc_xz, self.disc_xz.trainable_variables)) def plot_learning_curves(self, start_ind=0, window_smoothening=10): fig = plt.figure(figsize=(12, 5)) - l_gen = pd.Series(self.hist_loss_gen[start_ind:]).rolling(window_smoothening).mean() - l_disc = pd.Series(self.hist_loss_disc[start_ind:]).rolling(window_smoothening).mean() + l_gen = pd.Series(self.hist_loss_gen[start_ind:]).rolling( + window_smoothening).mean() + l_disc = pd.Series(self.hist_loss_disc[start_ind:]).rolling( + window_smoothening).mean() ax = fig.add_subplot(1, 2, 1) ax.plot(range(len(l_gen)), l_gen, ) @@ -403,10 +458,8 @@ def plot_learning_curves(self, start_ind=0, window_smoothening=10): ax.set_xlabel('Iter') plt.show() - - - - def fit(self, X, y=None, noise_std= 0.1): + + def fit(self, X, y=None, noise_std=0.1): """Fit detector. y is ignored in unsupervised methods. Parameters ---------- @@ -433,8 +486,7 @@ def fit(self, X, y=None, noise_std= 0.1): X_norm = self.scaler_.fit_transform(X) else: X_norm = np.copy(X) - - + for n in range(self.epochs): if ((n % 50 == 0) and (n != 0) and (self.verbose == 1)): print('Train iter:{}'.format(n)) @@ -443,28 +495,26 @@ def fit(self, X, y=None, noise_std= 0.1): np.random.shuffle(X_norm) X_train_sel = X_norm[0: min(self.batch_size, self.n_samples_), :] - latent_noise = np.random.normal(0, 1, (X_train_sel.shape[0], self.latent_dim)) - X_train_sel += np.random.normal(0,noise_std, size = X_train_sel.shape) - self.train_step( ( np.float32(X_train_sel), np.float32(latent_noise) ) ) - - - - # Predict on X itself and calculate the the outlier scores. + latent_noise = np.random.normal(0, 1, ( + X_train_sel.shape[0], self.latent_dim)) + X_train_sel += np.random.normal(0, noise_std, + size=X_train_sel.shape) + self.train_step( + (np.float32(X_train_sel), np.float32(latent_noise))) + + # Predict on X itself and calculate the the outlier scores. # Note, X_norm was shuffled and needs to be recreated if self.preprocessing: X_norm = self.scaler_.transform(X) else: X_norm = np.copy(X) - pred_scores = self.get_outlier_scores(X_norm) self.decision_scores_ = pred_scores self._process_decision_scores() return self - - - def train_more(self, X, epochs=100, noise_std = 0.1 ): + def train_more(self, X, epochs=100, noise_std=0.1): """This function allows the researcher to perform extra training instead of the fixed number determined by the fit() function. """ @@ -486,13 +536,14 @@ def train_more(self, X, epochs=100, noise_std = 0.1 ): np.random.shuffle(X_norm) X_train_sel = X_norm[0: min(self.batch_size, self.n_samples_), :] - latent_noise = np.random.normal(0, 1, (X_train_sel.shape[0], self.latent_dim)) - X_train_sel += np.random.normal(0,noise_std, size = X_train_sel.shape) - self.train_step( ( np.float32(X_train_sel), np.float32(latent_noise) ) ) - - - - # Predict on X itself and calculate the the outlier scores. + latent_noise = np.random.normal(0, 1, ( + X_train_sel.shape[0], self.latent_dim)) + X_train_sel += np.random.normal(0, noise_std, + size=X_train_sel.shape) + self.train_step( + (np.float32(X_train_sel), np.float32(latent_noise))) + + # Predict on X itself and calculate the the outlier scores. # Note, X_norm was shuffled and needs to be recreated if self.preprocessing: X_norm = self.scaler_.transform(X) @@ -504,22 +555,22 @@ def train_more(self, X, epochs=100, noise_std = 0.1 ): self._process_decision_scores() return self - - def get_outlier_scores(self, X_norm): - - X_enc = self.enc({'I1': X_norm }).numpy() - X_enc_gen = self.dec({'I1':X_enc }).numpy() - _, act_layer_xx = self.disc_xx({'I1': X_norm, 'I2': X_norm}, training=False) + X_enc = self.enc({'I1': X_norm}).numpy() + X_enc_gen = self.dec({'I1': X_enc}).numpy() + + _, act_layer_xx = self.disc_xx({'I1': X_norm, 'I2': X_norm}, + training=False) act_layer_xx = act_layer_xx.numpy() - _, act_layer_xx_enc_gen = self.disc_xx({'I1': X_norm, 'I2': X_enc_gen}, training=False) + _, act_layer_xx_enc_gen = self.disc_xx({'I1': X_norm, 'I2': X_enc_gen}, + training=False) act_layer_xx_enc_gen = act_layer_xx_enc_gen.numpy() - outlier_scores = np.mean( np.abs( (act_layer_xx - act_layer_xx_enc_gen)**2 ) ,axis=1) + outlier_scores = np.mean( + np.abs((act_layer_xx - act_layer_xx_enc_gen) ** 2), axis=1) return outlier_scores - def decision_function(self, X): """Predict raw anomaly score of X using the fitted detector. The anomaly score of an input sample is computed based on different @@ -546,5 +597,3 @@ def decision_function(self, X): # Predict on X pred_scores = self.get_outlier_scores(X_norm) return pred_scores - - diff --git a/pyod/test/test_alad.py b/pyod/test/test_alad.py index f7a62fe1a..552dd9caf 100644 --- a/pyod/test/test_alad.py +++ b/pyod/test/test_alad.py @@ -4,14 +4,13 @@ import os import sys - import unittest + # noinspection PyProtectedMember from numpy.testing import assert_equal from numpy.testing import assert_raises - -from sklearn.metrics import roc_auc_score from sklearn.base import clone +from sklearn.metrics import roc_auc_score # temporary solution for relative imports in case pyod is not installed # if pyod is installed, no need to use the following line @@ -21,8 +20,6 @@ from pyod.utils.data import generate_data - - class TestALAD(unittest.TestCase): def setUp(self): self.n_train = 500 @@ -35,22 +32,25 @@ def setUp(self): n_features=self.n_features, contamination=self.contamination, random_state=42) - self.clf = ALAD( epochs = 100, latent_dim = 2, - learning_rate_disc = 0.0001, - learning_rate_gen = 0.0001, - dropout_rate = 0.2, - add_recon_loss = False, - lambda_recon_loss= 0.05, # only important when add_recon_loss = True - add_disc_zz_loss = True, - dec_layers=[ 75, 100 ], - enc_layers=[ 100, 75 ], - disc_xx_layers= [ 100, 75 ], - disc_zz_layers= [ 25, 25 ], - disc_xz_layers= [ 100, 75 ], - spectral_normalization = False, - activation_hidden_disc = 'tanh', activation_hidden_gen = 'tanh' , - preprocessing=True, batch_size = 200, contamination = self.contamination) - + self.clf = ALAD(epochs=100, latent_dim=2, + learning_rate_disc=0.0001, + learning_rate_gen=0.0001, + dropout_rate=0.2, + add_recon_loss=False, + lambda_recon_loss=0.05, + # only important when add_recon_loss = True + add_disc_zz_loss=True, + dec_layers=[75, 100], + enc_layers=[100, 75], + disc_xx_layers=[100, 75], + disc_zz_layers=[25, 25], + disc_xz_layers=[100, 75], + spectral_normalization=False, + activation_hidden_disc='tanh', + activation_hidden_gen='tanh', + preprocessing=True, batch_size=200, + contamination=self.contamination) + self.clf.fit(self.X_train) def test_parameters(self): @@ -65,7 +65,6 @@ def test_parameters(self): assert (hasattr(self.clf, '_sigma') and self.clf._sigma is not None) - def test_train_scores(self): assert_equal(len(self.clf.decision_scores_), self.X_train.shape[0]) @@ -133,6 +132,7 @@ def test_fit_predict_score(self): with assert_raises(NotImplementedError): self.clf.fit_predict_score(self.X_test, self.y_test, scoring='something') + def test_model_clone(self): # for deep models this may not apply clone_clf = clone(self.clf) diff --git a/pyod/version.py b/pyod/version.py index c27a45844..6c7894379 100644 --- a/pyod/version.py +++ b/pyod/version.py @@ -20,4 +20,4 @@ # Dev branch marker is: 'X.Y.dev' or 'X.Y.devN' where N is an integer. # 'X.Y.dev0' is the canonical version of 'X.Y.dev' # -__version__ = '1.0.4' # pragma: no cover +__version__ = '1.0.5' # pragma: no cover From fb3ef681aaa9976aa27a0996d731662fb7fac9c5 Mon Sep 17 00:00:00 2001 From: yzhao062 Date: Wed, 14 Sep 2022 23:53:12 -0400 Subject: [PATCH 15/16] partial update of alad --- README.rst | 3 +++ docs/index.rst | 1 + 2 files changed, 4 insertions(+) diff --git a/README.rst b/README.rst index 5421250f5..1223ab8e1 100644 --- a/README.rst +++ b/README.rst @@ -371,6 +371,7 @@ Neural Networks SO_GAAL Single-Objective Generative Adversarial Neural Networks MO_GAAL Multiple-Objective Generative Adversarial Active Learning 2019 [#Liu2019Generative]_ Neural Networks DeepSVDD Deep One-Class Classification 2018 [#Ruff2018Deep]_ Neural Networks AnoGAN Anomaly Detection with Generative Adversarial Networks 2017 [#Schlegl2017Unsupervised]_ +Neural Networks ALAD Adversarially learned anomaly detection 2018 [#Zenati2018Adversarially]_ Graph-based R-Graph Outlier detection by R-graph 2017 [#You2017Provable]_ Graph-based LUNAR LUNAR: Unifying Local Outlier Detection Methods via Graph Neural Networks 2022 [#Goodge2022Lunar]_ =================== ================== ====================================================================================================== ===== ======================================== @@ -613,6 +614,8 @@ Reference .. [#You2017Provable] You, C., Robinson, D.P. and Vidal, R., 2017. Provable self-representation based outlier detection in a union of subspaces. In Proceedings of the IEEE conference on computer vision and pattern recognition. +.. [#Zenati2018Adversarially] Zenati, H., Romain, M., Foo, C.S., Lecouat, B. and Chandrasekhar, V., 2018, November. Adversarially learned anomaly detection. In 2018 IEEE International conference on data mining (ICDM) (pp. 727-736). IEEE. + .. [#Zhao2018XGBOD] Zhao, Y. and Hryniewicki, M.K. XGBOD: Improving Supervised Outlier Detection with Unsupervised Representation Learning. *IEEE International Joint Conference on Neural Networks*\ , 2018. .. [#Zhao2019LSCP] Zhao, Y., Nasrullah, Z., Hryniewicki, M.K. and Li, Z., 2019, May. LSCP: Locally selective combination in parallel outlier ensembles. In *Proceedings of the 2019 SIAM International Conference on Data Mining (SDM)*, pp. 585-593. Society for Industrial and Applied Mathematics. diff --git a/docs/index.rst b/docs/index.rst index a724320e2..1e0d41a17 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -207,6 +207,7 @@ Neural Networks SO_GAAL Single-Objective Generative Adversarial A Neural Networks MO_GAAL Multiple-Objective Generative Adversarial Active Learning 2019 :class:`pyod.models.mo_gaal.MO_GAAL` :cite:`a-liu2019generative` Neural Networks DeepSVDD Deep One-Class Classification 2018 :class:`pyod.models.deep_svdd.DeepSVDD` :cite:`a-ruff2018deepsvdd` Neural Networks AnoGAN Anomaly Detection with Generative Adversarial Networks 2017 :class:`pyod.models.anogan.AnoGAN` :cite:`a-schlegl2017unsupervised` +Neural Networks ALAD Adversarially learned anomaly detection 2018 :class:`pyod.models.alad.ALAD` :cite:`a-zenati2018adversarially` Graph-based R-Graph Outlier detection by R-graph 2017 :class:`pyod.models.rgraph.RGraph` :cite:`you2017provable` Graph-based LUNAR LUNAR: Unifying Local Outlier Detection Methods via Graph Neural Networks 2022 :class:`pyod.models.lunar.LUNAR` :cite:`a-goodge2022lunar` =================== ================ ====================================================================================================== ===== =================================================== ====================================================== From ab60b97ffea0f186d6ffeda2fad1adcd5d1de262 Mon Sep 17 00:00:00 2001 From: yzhao062 Date: Thu, 15 Sep 2022 00:46:44 -0400 Subject: [PATCH 16/16] partial update of alad --- pyod/test/test_anogan.py | 130 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 130 insertions(+) create mode 100644 pyod/test/test_anogan.py diff --git a/pyod/test/test_anogan.py b/pyod/test/test_anogan.py new file mode 100644 index 000000000..f9cca123f --- /dev/null +++ b/pyod/test/test_anogan.py @@ -0,0 +1,130 @@ +# -*- coding: utf-8 -*- +from __future__ import division +from __future__ import print_function + +import os +import sys +import unittest + +# noinspection PyProtectedMember +from numpy.testing import assert_equal +from numpy.testing import assert_raises +from sklearn.base import clone +from sklearn.metrics import roc_auc_score + +# temporary solution for relative imports in case pyod is not installed +# if pyod is installed, no need to use the following line +sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) + +from pyod.models.anogan import AnoGAN +from pyod.utils.data import generate_data + + +# todo: too slow to run +class TestAnoGAN(unittest.TestCase): + def setUp(self): + self.n_train = 500 + self.n_test = 200 + self.n_features = 2 + self.contamination = 0.1 + self.roc_floor = 0.8 + self.X_train, self.X_test, self.y_train, self.y_test = generate_data( + n_train=self.n_train, n_test=self.n_test, + n_features=self.n_features, contamination=self.contamination, + random_state=42) + + self.clf = AnoGAN(epochs=3, + contamination=self.contamination) + # + # self.clf.fit(self.X_train) + # + # def test_parameters(self): + # assert (hasattr(self.clf, 'decision_scores_') and + # self.clf.decision_scores_ is not None) + # assert (hasattr(self.clf, 'labels_') and + # self.clf.labels_ is not None) + # assert (hasattr(self.clf, 'threshold_') and + # self.clf.threshold_ is not None) + # assert (hasattr(self.clf, '_mu') and + # self.clf._mu is not None) + # assert (hasattr(self.clf, '_sigma') and + # self.clf._sigma is not None) + # + # def test_train_scores(self): + # assert_equal(len(self.clf.decision_scores_), self.X_train.shape[0]) + # + # def test_prediction_scores(self): + # pred_scores = self.clf.decision_function(self.X_test) + # + # # check score shapes + # assert_equal(pred_scores.shape[0], self.X_test.shape[0]) + # + # # check performance + # assert (roc_auc_score(self.y_test, pred_scores) >= self.roc_floor) + # + # def test_prediction_labels(self): + # pred_labels = self.clf.predict(self.X_test) + # assert_equal(pred_labels.shape, self.y_test.shape) + # + # def test_prediction_proba(self): + # pred_proba = self.clf.predict_proba(self.X_test) + # assert (pred_proba.min() >= 0) + # assert (pred_proba.max() <= 1) + # + # def test_prediction_proba_linear(self): + # pred_proba = self.clf.predict_proba(self.X_test, method='linear') + # assert (pred_proba.min() >= 0) + # assert (pred_proba.max() <= 1) + # + # def test_prediction_proba_unify(self): + # pred_proba = self.clf.predict_proba(self.X_test, method='unify') + # assert (pred_proba.min() >= 0) + # assert (pred_proba.max() <= 1) + # + # def test_prediction_proba_parameter(self): + # with assert_raises(ValueError): + # self.clf.predict_proba(self.X_test, method='something') + # + # def test_prediction_labels_confidence(self): + # pred_labels, confidence = self.clf.predict(self.X_test, + # return_confidence=True) + # assert_equal(pred_labels.shape, self.y_test.shape) + # assert_equal(confidence.shape, self.y_test.shape) + # assert (confidence.min() >= 0) + # assert (confidence.max() <= 1) + # + # def test_prediction_proba_linear_confidence(self): + # pred_proba, confidence = self.clf.predict_proba(self.X_test, + # method='linear', + # return_confidence=True) + # assert (pred_proba.min() >= 0) + # assert (pred_proba.max() <= 1) + # + # assert_equal(confidence.shape, self.y_test.shape) + # assert (confidence.min() >= 0) + # assert (confidence.max() <= 1) + # + # def test_fit_predict(self): + # pred_labels = self.clf.fit_predict(self.X_train) + # assert_equal(pred_labels.shape, self.y_train.shape) + # + # def test_fit_predict_score(self): + # self.clf.fit_predict_score(self.X_test, self.y_test) + # self.clf.fit_predict_score(self.X_test, self.y_test, + # scoring='roc_auc_score') + # self.clf.fit_predict_score(self.X_test, self.y_test, + # scoring='prc_n_score') + # with assert_raises(NotImplementedError): + # self.clf.fit_predict_score(self.X_test, self.y_test, + # scoring='something') + # + # def test_model_clone(self): + # # for deep models this may not apply + # clone_clf = clone(self.clf) + # + # def tearDown(self): + # pass + + +if __name__ == '__main__': + unittest.main()