Skip to content

Commit

Permalink
Merge pull request #40 from deel-ai/develop
Browse files Browse the repository at this point in the history
Develop
  • Loading branch information
thib-s authored Sep 10, 2021
2 parents 2929ceb + 30419c8 commit 5e14d25
Show file tree
Hide file tree
Showing 52 changed files with 6,146 additions and 1,628 deletions.
17 changes: 10 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
[![Python](https://img.shields.io/pypi/pyversions/deel-lip.svg)](https://pypi.org/project/deel-lip)
[![PyPI](https://img.shields.io/pypi/v/deel-lip.svg)](https://pypi.org/project/deel-lip)
[![Documentation](https://img.shields.io/badge/api-reference-blue.svg)](https://deel-lip.readthedocs.io)
[![deel-lip tests](https://github.com/deel-ai/deel-lip/actions/workflows/python-tests.yml/badge.svg?branch=master)](https://github.com/deel-ai/deel-lip/actions/workflows/python-tests.yml)
[![deel-lip linters](https://github.com/deel-ai/deel-lip/actions/workflows/python-linters.yml/badge.svg?branch=master)](https://github.com/deel-ai/deel-lip/actions/workflows/python-linters.yml)
[![GitHub license](https://img.shields.io/github/license/deel-ai/deel-lip.svg)](https://github.com/deel-ai/deel-lip/blob/master/LICENSE)

Controlling the Lipschitz constant of a layer or a whole neural network has many applications ranging
Expand Down Expand Up @@ -37,7 +39,7 @@ from deel.lip.layers import (
)
from deel.lip.model import Sequential
from deel.lip.activations import GroupSort
from deel.lip.losses import HKR_multiclass_loss
from deel.lip.losses import MulticlassHKR, MulticlassKR
from tensorflow.keras.layers import Input, Flatten
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.datasets import mnist
Expand All @@ -46,7 +48,8 @@ import numpy as np

# Sequential (resp Model) from deel.model has the same properties as any lipschitz model.
# It act only as a container, with features specific to lipschitz
# functions (condensation, vanilla_exportation...)
# functions (condensation, vanilla_exportation...) but The layers are fully compatible
# with the tf.keras.model.Sequential/Model
model = Sequential(
[
Input(shape=(28, 28, 1)),
Expand Down Expand Up @@ -92,9 +95,9 @@ model = Sequential(
model.compile(
# decreasing alpha and increasing min_margin improve robustness (at the cost of accuracy)
# note also in the case of lipschitz networks, more robustness require more parameters.
loss=HKR_multiclass_loss(alpha=25, min_margin=0.25),
optimizer=Adam(lr=0.005),
metrics=["accuracy"],
loss=MulticlassHKR(alpha=50, min_margin=0.05),
optimizer=Adam(1e-3),
metrics=["accuracy", MulticlassKR()],
)

model.summary()
Expand All @@ -116,8 +119,8 @@ y_test = to_categorical(y_test)
model.fit(
x_train,
y_train,
batch_size=256,
epochs=15,
batch_size=2048,
epochs=30,
validation_data=(x_test, y_test),
shuffle=True,
)
Expand Down
12 changes: 6 additions & 6 deletions deel/lip/activations.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@
from tensorflow.keras.constraints import MinMaxNorm
from tensorflow.keras.layers import Layer, PReLU
from .layers import LipschitzLayer
from .utils import _deel_export
from tensorflow.keras.utils import register_keras_serializable


@_deel_export
@register_keras_serializable("deel-lip", "MaxMin")
class MaxMin(Layer, LipschitzLayer):
def __init__(self, data_format="channels_last", k_coef_lip=1.0, *args, **kwargs):
"""
Expand Down Expand Up @@ -80,7 +80,7 @@ def compute_output_shape(self, input_shape):
return new_shape


@_deel_export
@register_keras_serializable("deel-lip", "GroupSort")
class GroupSort(Layer, LipschitzLayer):
def __init__(
self, n=None, data_format="channels_last", k_coef_lip=1.0, *args, **kwargs
Expand Down Expand Up @@ -155,7 +155,7 @@ def compute_output_shape(self, input_shape):
return input_shape


@_deel_export
@register_keras_serializable("deel-lip", "GroupSort2")
class GroupSort2(GroupSort):
def __init__(self, **kwargs):
"""
Expand All @@ -174,7 +174,7 @@ def __init__(self, **kwargs):
super().__init__(**kwargs)


@_deel_export
@register_keras_serializable("deel-lip", "FullSort")
class FullSort(GroupSort):
def __init__(self, **kwargs):
"""
Expand All @@ -193,7 +193,7 @@ def __init__(self, **kwargs):
super().__init__(**kwargs)


@_deel_export
@register_keras_serializable("deel-lip", "PReLUlip")
def PReLUlip(k_coef_lip=1.0):
"""
PreLu activation, with Lipschitz constraint.
Expand Down
29 changes: 23 additions & 6 deletions deel/lip/callbacks.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

import tensorflow as tf
from tensorflow.keras.callbacks import Callback

from .layers import Condensable


Expand Down Expand Up @@ -58,6 +59,7 @@ def __init__(
self,
monitored_layers: Iterable[str],
logdir: str,
target: str = "kernel",
what: str = "max",
on_epoch: bool = True,
on_batch: bool = False,
Expand All @@ -72,6 +74,10 @@ def __init__(
Args:
monitored_layers: list of layer name to monitor.
logdir: path to the logging directory.
target: describe what to monitor, can either "kernel" or "wbar". Setting
to "kernel" check values of the unconstrained weights while setting to
"wbar" check values of the constrained weights (allowing to check if
the parameters are correct to ensure lipschitz constraint)
what: either "max", which display the largest singular value over the
training process, or "all", which plot the distribution of all singular
values.
Expand All @@ -80,6 +86,8 @@ def __init__(
"""
self.on_epoch = on_epoch
self.on_batch = on_batch
assert target in {"kernel", "wbar"}
self.target = target
assert what in {"max", "all"}
self.what = what
self.logdir = logdir
Expand All @@ -96,10 +104,14 @@ def _monitor(self, step):
step = self.params["steps"] * self.epochs + step
for layer_name in self.monitored_layers:
layer = self.model.get_layer(layer_name)
if (self.what == "max") and hasattr(layer, "sig"):
if (
(self.target == "kernel")
and (self.what == "max")
and hasattr(layer, "sig")
):
sig = layer.sig[0, 0]
elif hasattr(layer, "kernel"):
kernel = layer.kernel
elif hasattr(layer, self.target):
kernel = getattr(layer, self.target)
w_shape = kernel.shape.as_list()
sigmas = tf.linalg.svd(
tf.keras.backend.reshape(kernel, [-1, w_shape[-1]]),
Expand All @@ -108,15 +120,20 @@ def _monitor(self, step):
).numpy()
sig = sigmas[0]
else:
RuntimeWarning("[MonitorCallback] unsupported layer")
RuntimeWarning(
f"[MonitorCallback] layer {layer_name} has no "
f"attribute {self.target}"
)
return
if self.what == "max":
with self.file_writer.as_default():
result = tf.summary.scalar("%s_sigma" % layer_name, sig, step=step)
result = tf.summary.scalar(
f"{layer_name}_{self.target}_sigmas", sig, step=step
)
else:
with self.file_writer.as_default():
result = tf.summary.histogram(
"%s_sigmas" % layer_name,
f"{layer_name}_{self.target}_sigmas",
sigmas,
step=step,
buckets=None,
Expand Down
104 changes: 43 additions & 61 deletions deel/lip/constraints.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,12 @@
import tensorflow as tf
from tensorflow.keras import backend as K
from tensorflow.keras.constraints import Constraint
from .normalizers import bjorck_normalization, spectral_normalization
from .utils import _deel_export
from .normalizers import reshaped_kernel_orthogonalization
from tensorflow.keras.utils import register_keras_serializable


@_deel_export
class WeightClip(Constraint):
@register_keras_serializable("deel-lip", "WeightClipConstraint")
class WeightClipConstraint(Constraint):
def __init__(self, c=2):
"""
Clips the weights incident to each hidden unit to be inside the range `[-c,+c]`.
Expand All @@ -28,11 +28,11 @@ def __call__(self, p):
return K.clip(p, -self.c, self.c)

def get_config(self):
return {"name": self.__class__.__name__, "c": self.c}
return {"c": self.c}


@_deel_export
class AutoWeightClip(Constraint):
@register_keras_serializable("deel-lip", "AutoWeightClipConstraint")
class AutoWeightClipConstraint(Constraint):
def __init__(self, scale=1):
"""
Clips the weights incident to each hidden unit to be inside the range `[-c,+c]`.
Expand All @@ -42,93 +42,75 @@ def __init__(self, scale=1):
scale: scaling factor to increase/decrease clipping value.
"""
self.scale = scale
self.c = None

def __call__(self, w):
self.c = 1 / (tf.sqrt(tf.cast(tf.size(w), dtype=tf.float64)) * self.scale)
return tf.clip_by_value(w, -self.c, self.c)
c = 1 / (tf.sqrt(tf.cast(tf.size(w), dtype=w.dtype)) * self.scale)
return tf.clip_by_value(w, -c, c)

def get_config(self):
return {"name": self.__class__.__name__, "scale": self.scale, "c": self.c}
return {"scale": self.scale}


@_deel_export
class FrobeniusNormalizer(Constraint):
@register_keras_serializable("deel-lip", "FrobeniusConstraint")
class FrobeniusConstraint(Constraint):
# todo: duplicate of keras/constraints/UnitNorm ?

def __init__(self, **kwargs):
def __init__(self, eps=1e-7):
"""
Clips the weights incident to each hidden unit to be inside the range `[-c,+c]`.
With c = 1/norm(kernel).
Constrain the weights by dividing the weight matrix by it's L2 norm.
"""
super(FrobeniusNormalizer, self).__init__(**kwargs)
self.eps = eps

def __call__(self, w):
return w * tf.sqrt(tf.reduce_sum(tf.square(w), keepdims=False))
return w / (tf.sqrt(tf.reduce_sum(tf.square(w), keepdims=False)) + self.eps)

def get_config(self):
return {"eps": self.eps}


@_deel_export
class SpectralNormalizer(Constraint):
def __init__(self, niter_spectral=3, u=None) -> None:
@register_keras_serializable("deel-lip", "SpectralConstraint")
class SpectralConstraint(Constraint):
def __init__(
self, k_coef_lip=1.0, niter_spectral=3, niter_bjorck=15, u=None
) -> None:
"""
Ensure that the weights matrix have sigma_max == 1 (maximum singular value of
the weights matrix).
Ensure that *all* singular values of the weight matrix equals to 1. Computation
based on Bjorck algorithm. The computation is done in two steps:
1. reduce the larget singular value to k_coef_lip, using iterate power method.
2. increase other singular values to k_coef_lip, using bjorck algorithm.
Args:
k_coef_lip: lipschitz coefficient of the weight matrix
niter_spectral: number of iteration to find the maximum singular value.
niter_bjorck: number of iteration with Bjorck algorithm..
u: vector used for iterated power method, can be set to None (used for
serialization/deserialization purposes).
"""
self.niter_spectral = niter_spectral
self.niter_bjorck = niter_bjorck
self.k_coef_lip = k_coef_lip
if not (isinstance(u, tf.Tensor) or (u is None)):
u = tf.convert_to_tensor(u)
self.u = u
super(SpectralNormalizer, self).__init__()
super(SpectralConstraint, self).__init__()

def __call__(self, w):
w_bar, self.u, sigma = spectral_normalization(
super(SpectralNormalizer, self).__call__(w),
wbar, u, sigma = reshaped_kernel_orthogonalization(
w,
self.u,
niter=self.niter_spectral,
self.k_coef_lip,
self.niter_spectral,
self.niter_bjorck,
)
return K.reshape(w_bar, w.shape)
return wbar

def get_config(self):
config = {
"k_coef_lip": self.k_coef_lip,
"niter_spectral": self.niter_spectral,
"niter_bjorck": self.niter_bjorck,
"u": None if self.u is None else self.u.numpy(),
}
base_config = super(SpectralNormalizer, self).get_config()
return dict(list(base_config.items()) + list(config.items()))


@_deel_export
class BjorckNormalizer(SpectralNormalizer):
def __init__(self, niter_spectral=3, niter_bjorck=15, u=None) -> None:
"""
Ensure that *all* singular values of the weight matrix equals to 1. Computation
based on BjorckNormalizer algorithm. The computation is done in two steps:
1. reduce the larget singular value to 1, using iterated power method.
2. increase other singular values to 1, using BjorckNormalizer algorithm.
Args:
niter_spectral: number of iteration to find the maximum singular value.
niter_bjorck: number of iteration with BjorckNormalizer algorithm..
u: vector used for iterated power method, can be set to None (used for
serialization/deserialization purposes).
"""
self.niter_bjorck = niter_bjorck
super(BjorckNormalizer, self).__init__(niter_spectral, u)

def __call__(self, w):
w_bar, self.u, sigma = spectral_normalization(
w, self.u, niter=self.niter_spectral
)
w_bar = bjorck_normalization(w_bar, niter=self.niter_bjorck)
return K.reshape(w_bar, shape=w.shape)

def get_config(self):
config = {"niter_bjorck": self.niter_bjorck}
base_config = super(BjorckNormalizer, self).get_config()
base_config = super(SpectralConstraint, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
Loading

0 comments on commit 5e14d25

Please sign in to comment.