Merge pull request #40 from deel-ai/develop

Develop
deel-ai · Sep 10, 2021 · 5e14d25 · 5e14d25
2 parents 2929ceb + 30419c8
commit 5e14d25
Show file tree

Hide file tree

Showing 52 changed files with 6,146 additions and 1,628 deletions.
diff --git a/README.md b/README.md
@@ -3,6 +3,8 @@
 [![Python](https://img.shields.io/pypi/pyversions/deel-lip.svg)](https://pypi.org/project/deel-lip)
 [![PyPI](https://img.shields.io/pypi/v/deel-lip.svg)](https://pypi.org/project/deel-lip)
 [![Documentation](https://img.shields.io/badge/api-reference-blue.svg)](https://deel-lip.readthedocs.io)
+[![deel-lip tests](https://github.com/deel-ai/deel-lip/actions/workflows/python-tests.yml/badge.svg?branch=master)](https://github.com/deel-ai/deel-lip/actions/workflows/python-tests.yml)
+[![deel-lip linters](https://github.com/deel-ai/deel-lip/actions/workflows/python-linters.yml/badge.svg?branch=master)](https://github.com/deel-ai/deel-lip/actions/workflows/python-linters.yml)
 [![GitHub license](https://img.shields.io/github/license/deel-ai/deel-lip.svg)](https://github.com/deel-ai/deel-lip/blob/master/LICENSE)
 
 Controlling the Lipschitz constant of a layer or a whole neural network has many applications ranging
@@ -37,7 +39,7 @@ from deel.lip.layers import (
 )
 from deel.lip.model import Sequential
 from deel.lip.activations import GroupSort
-from deel.lip.losses import HKR_multiclass_loss
+from deel.lip.losses import MulticlassHKR, MulticlassKR
 from tensorflow.keras.layers import Input, Flatten
 from tensorflow.keras.optimizers import Adam
 from tensorflow.keras.datasets import mnist
@@ -46,7 +48,8 @@ import numpy as np
 
 # Sequential (resp Model) from deel.model has the same properties as any lipschitz model.
 # It act only as a container, with features specific to lipschitz
-# functions (condensation, vanilla_exportation...)
+# functions (condensation, vanilla_exportation...) but The layers are fully compatible
+# with the tf.keras.model.Sequential/Model
 model = Sequential(
     [
         Input(shape=(28, 28, 1)),
@@ -92,9 +95,9 @@ model = Sequential(
 model.compile(
     # decreasing alpha and increasing min_margin improve robustness (at the cost of accuracy)
     # note also in the case of lipschitz networks, more robustness require more parameters.
-    loss=HKR_multiclass_loss(alpha=25, min_margin=0.25),
-    optimizer=Adam(lr=0.005),
-    metrics=["accuracy"],
+    loss=MulticlassHKR(alpha=50, min_margin=0.05),
+    optimizer=Adam(1e-3),
+    metrics=["accuracy", MulticlassKR()],
 )
 
 model.summary()
@@ -116,8 +119,8 @@ y_test = to_categorical(y_test)
 model.fit(
     x_train,
     y_train,
-    batch_size=256,
-    epochs=15,
+    batch_size=2048,
+    epochs=30,
     validation_data=(x_test, y_test),
     shuffle=True,
 )

diff --git a/deel/lip/activations.py b/deel/lip/activations.py
@@ -12,10 +12,10 @@
 from tensorflow.keras.constraints import MinMaxNorm
 from tensorflow.keras.layers import Layer, PReLU
 from .layers import LipschitzLayer
-from .utils import _deel_export
+from tensorflow.keras.utils import register_keras_serializable
 
 
-@_deel_export
+@register_keras_serializable("deel-lip", "MaxMin")
 class MaxMin(Layer, LipschitzLayer):
     def __init__(self, data_format="channels_last", k_coef_lip=1.0, *args, **kwargs):
         """
@@ -80,7 +80,7 @@ def compute_output_shape(self, input_shape):
         return new_shape
 
 
-@_deel_export
+@register_keras_serializable("deel-lip", "GroupSort")
 class GroupSort(Layer, LipschitzLayer):
     def __init__(
         self, n=None, data_format="channels_last", k_coef_lip=1.0, *args, **kwargs
@@ -155,7 +155,7 @@ def compute_output_shape(self, input_shape):
         return input_shape
 
 
-@_deel_export
+@register_keras_serializable("deel-lip", "GroupSort2")
 class GroupSort2(GroupSort):
     def __init__(self, **kwargs):
         """
@@ -174,7 +174,7 @@ def __init__(self, **kwargs):
         super().__init__(**kwargs)
 
 
-@_deel_export
+@register_keras_serializable("deel-lip", "FullSort")
 class FullSort(GroupSort):
     def __init__(self, **kwargs):
         """
@@ -193,7 +193,7 @@ def __init__(self, **kwargs):
         super().__init__(**kwargs)
 
 
-@_deel_export
+@register_keras_serializable("deel-lip", "PReLUlip")
 def PReLUlip(k_coef_lip=1.0):
     """
     PreLu activation, with Lipschitz constraint.

diff --git a/deel/lip/callbacks.py b/deel/lip/callbacks.py
@@ -10,6 +10,7 @@
 
 import tensorflow as tf
 from tensorflow.keras.callbacks import Callback
+
 from .layers import Condensable
 
 
@@ -58,6 +59,7 @@ def __init__(
         self,
         monitored_layers: Iterable[str],
         logdir: str,
+        target: str = "kernel",
         what: str = "max",
         on_epoch: bool = True,
         on_batch: bool = False,
@@ -72,6 +74,10 @@ def __init__(
         Args:
             monitored_layers: list of layer name to monitor.
             logdir: path to the logging directory.
+            target: describe what to monitor, can either "kernel" or "wbar". Setting
+                to "kernel" check values of the unconstrained weights while setting to
+                "wbar" check values of the constrained weights (allowing to check if
+                the parameters are correct to ensure lipschitz constraint)
             what: either "max", which display the largest singular value over the
                 training process, or "all", which plot the distribution of all singular
                 values.
@@ -80,6 +86,8 @@ def __init__(
         """
         self.on_epoch = on_epoch
         self.on_batch = on_batch
+        assert target in {"kernel", "wbar"}
+        self.target = target
         assert what in {"max", "all"}
         self.what = what
         self.logdir = logdir
@@ -96,10 +104,14 @@ def _monitor(self, step):
         step = self.params["steps"] * self.epochs + step
         for layer_name in self.monitored_layers:
             layer = self.model.get_layer(layer_name)
-            if (self.what == "max") and hasattr(layer, "sig"):
+            if (
+                (self.target == "kernel")
+                and (self.what == "max")
+                and hasattr(layer, "sig")
+            ):
                 sig = layer.sig[0, 0]
-            elif hasattr(layer, "kernel"):
-                kernel = layer.kernel
+            elif hasattr(layer, self.target):
+                kernel = getattr(layer, self.target)
                 w_shape = kernel.shape.as_list()
                 sigmas = tf.linalg.svd(
                     tf.keras.backend.reshape(kernel, [-1, w_shape[-1]]),
@@ -108,15 +120,20 @@ def _monitor(self, step):
                 ).numpy()
                 sig = sigmas[0]
             else:
-                RuntimeWarning("[MonitorCallback] unsupported layer")
+                RuntimeWarning(
+                    f"[MonitorCallback] layer {layer_name} has no "
+                    f"attribute {self.target}"
+                )
                 return
             if self.what == "max":
                 with self.file_writer.as_default():
-                    result = tf.summary.scalar("%s_sigma" % layer_name, sig, step=step)
+                    result = tf.summary.scalar(
+                        f"{layer_name}_{self.target}_sigmas", sig, step=step
+                    )
             else:
                 with self.file_writer.as_default():
                     result = tf.summary.histogram(
-                        "%s_sigmas" % layer_name,
+                        f"{layer_name}_{self.target}_sigmas",
                         sigmas,
                         step=step,
                         buckets=None,

diff --git a/deel/lip/constraints.py b/deel/lip/constraints.py
@@ -9,12 +9,12 @@
 import tensorflow as tf
 from tensorflow.keras import backend as K
 from tensorflow.keras.constraints import Constraint
-from .normalizers import bjorck_normalization, spectral_normalization
-from .utils import _deel_export
+from .normalizers import reshaped_kernel_orthogonalization
+from tensorflow.keras.utils import register_keras_serializable
 
 
-@_deel_export
-class WeightClip(Constraint):
+@register_keras_serializable("deel-lip", "WeightClipConstraint")
+class WeightClipConstraint(Constraint):
     def __init__(self, c=2):
         """
         Clips the weights incident to each hidden unit to be inside the range `[-c,+c]`.
@@ -28,11 +28,11 @@ def __call__(self, p):
         return K.clip(p, -self.c, self.c)
 
     def get_config(self):
-        return {"name": self.__class__.__name__, "c": self.c}
+        return {"c": self.c}
 
 
-@_deel_export
-class AutoWeightClip(Constraint):
+@register_keras_serializable("deel-lip", "AutoWeightClipConstraint")
+class AutoWeightClipConstraint(Constraint):
     def __init__(self, scale=1):
         """
         Clips the weights incident to each hidden unit to be inside the range `[-c,+c]`.
@@ -42,93 +42,75 @@ def __init__(self, scale=1):
             scale: scaling factor to increase/decrease clipping value.
         """
         self.scale = scale
-        self.c = None
 
     def __call__(self, w):
-        self.c = 1 / (tf.sqrt(tf.cast(tf.size(w), dtype=tf.float64)) * self.scale)
-        return tf.clip_by_value(w, -self.c, self.c)
+        c = 1 / (tf.sqrt(tf.cast(tf.size(w), dtype=w.dtype)) * self.scale)
+        return tf.clip_by_value(w, -c, c)
 
     def get_config(self):
-        return {"name": self.__class__.__name__, "scale": self.scale, "c": self.c}
+        return {"scale": self.scale}
 
 
-@_deel_export
-class FrobeniusNormalizer(Constraint):
+@register_keras_serializable("deel-lip", "FrobeniusConstraint")
+class FrobeniusConstraint(Constraint):
     # todo: duplicate of keras/constraints/UnitNorm ?
 
-    def __init__(self, **kwargs):
+    def __init__(self, eps=1e-7):
         """
-        Clips the weights incident to each hidden unit to be inside the range `[-c,+c]`.
-        With c = 1/norm(kernel).
+        Constrain the weights by dividing the weight matrix by it's L2 norm.
         """
-        super(FrobeniusNormalizer, self).__init__(**kwargs)
+        self.eps = eps
 
     def __call__(self, w):
-        return w * tf.sqrt(tf.reduce_sum(tf.square(w), keepdims=False))
+        return w / (tf.sqrt(tf.reduce_sum(tf.square(w), keepdims=False)) + self.eps)
+
+    def get_config(self):
+        return {"eps": self.eps}
 
 
-@_deel_export
-class SpectralNormalizer(Constraint):
-    def __init__(self, niter_spectral=3, u=None) -> None:
+@register_keras_serializable("deel-lip", "SpectralConstraint")
+class SpectralConstraint(Constraint):
+    def __init__(
+        self, k_coef_lip=1.0, niter_spectral=3, niter_bjorck=15, u=None
+    ) -> None:
         """
-        Ensure that the weights matrix have sigma_max == 1 (maximum singular value of
-        the weights matrix).
+        Ensure that *all* singular values of the weight matrix equals to 1. Computation
+        based on Bjorck algorithm. The computation is done in two steps:
+
+        1. reduce the larget singular value to k_coef_lip, using iterate power method.
+        2. increase other singular values to k_coef_lip, using bjorck algorithm.
 
         Args:
+            k_coef_lip: lipschitz coefficient of the weight matrix
             niter_spectral: number of iteration to find the maximum singular value.
+            niter_bjorck: number of iteration with Bjorck algorithm..
             u: vector used for iterated power method, can be set to None (used for
                 serialization/deserialization purposes).
         """
         self.niter_spectral = niter_spectral
+        self.niter_bjorck = niter_bjorck
+        self.k_coef_lip = k_coef_lip
         if not (isinstance(u, tf.Tensor) or (u is None)):
             u = tf.convert_to_tensor(u)
         self.u = u
-        super(SpectralNormalizer, self).__init__()
+        super(SpectralConstraint, self).__init__()
 
     def __call__(self, w):
-        w_bar, self.u, sigma = spectral_normalization(
-            super(SpectralNormalizer, self).__call__(w),
+        wbar, u, sigma = reshaped_kernel_orthogonalization(
+            w,
             self.u,
-            niter=self.niter_spectral,
+            self.k_coef_lip,
+            self.niter_spectral,
+            self.niter_bjorck,
         )
-        return K.reshape(w_bar, w.shape)
+        return wbar
 
     def get_config(self):
         config = {
+            "k_coef_lip": self.k_coef_lip,
             "niter_spectral": self.niter_spectral,
+            "niter_bjorck": self.niter_bjorck,
             "u": None if self.u is None else self.u.numpy(),
         }
-        base_config = super(SpectralNormalizer, self).get_config()
-        return dict(list(base_config.items()) + list(config.items()))
-
-
-@_deel_export
-class BjorckNormalizer(SpectralNormalizer):
-    def __init__(self, niter_spectral=3, niter_bjorck=15, u=None) -> None:
-        """
-        Ensure that *all* singular values of the weight matrix equals to 1. Computation
-        based on BjorckNormalizer algorithm. The computation is done in two steps:
-
-        1. reduce the larget singular value to 1, using iterated power method.
-        2. increase other singular values to 1, using BjorckNormalizer algorithm.
-
-        Args:
-            niter_spectral: number of iteration to find the maximum singular value.
-            niter_bjorck: number of iteration with BjorckNormalizer algorithm..
-            u: vector used for iterated power method, can be set to None (used for
-                serialization/deserialization purposes).
-        """
-        self.niter_bjorck = niter_bjorck
-        super(BjorckNormalizer, self).__init__(niter_spectral, u)
-
-    def __call__(self, w):
-        w_bar, self.u, sigma = spectral_normalization(
-            w, self.u, niter=self.niter_spectral
-        )
-        w_bar = bjorck_normalization(w_bar, niter=self.niter_bjorck)
-        return K.reshape(w_bar, shape=w.shape)
-
-    def get_config(self):
-        config = {"niter_bjorck": self.niter_bjorck}
-        base_config = super(BjorckNormalizer, self).get_config()
+        base_config = super(SpectralConstraint, self).get_config()
         return dict(list(base_config.items()) + list(config.items()))