diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index d3f4c24..f102bd4 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -4,17 +4,17 @@ fail_fast: true repos: - repo: https://github.com/psf/black - rev: 22.8.0 + rev: 23.9.1 hooks: - id: black - repo: https://github.com/timothycrosley/isort - rev: 5.10.1 + rev: 5.12.0 hooks: - id: isort - repo: https://github.com/pycqa/flake8 - rev: 5.0.4 + rev: 6.1.0 hooks: - id: flake8 additional_dependencies: [flake8-isort] diff --git a/CI/unit_tests/data/test_decision_boundary.py b/CI/unit_tests/data/test_decision_boundary.py index 07c36bb..1a22ad5 100644 --- a/CI/unit_tests/data/test_decision_boundary.py +++ b/CI/unit_tests/data/test_decision_boundary.py @@ -25,11 +25,16 @@ ------- Unit test for the decision boundary. """ -from znnl.data.decision_boundary import linear_boundary, circle, DecisionBoundaryGenerator -import numpy as onp import jax.numpy as np +import numpy as onp from pytest import approx +from znnl.data.decision_boundary import ( + DecisionBoundaryGenerator, + circle, + linear_boundary, +) + class TestDecisionBoundary: """ @@ -44,7 +49,7 @@ def test_linear_boundary(self): for _ in range(10): input_data = onp.random.uniform(0, 1, size=(10000, 2)) target_ratio += linear_boundary(input_data, 1.0, 0.0).mean() - + assert target_ratio / 10 == approx(0.5, rel=0.01) def test_circle(self): @@ -55,7 +60,7 @@ def test_circle(self): for _ in range(10): input_data = onp.random.uniform(0, 1, size=(10000, 2)) target_ratio += circle(input_data, 0.25).mean() - + # P(x in class 1) = 1 - (pi / 16) assert target_ratio / 10 == approx(1 - (np.pi / 16), abs=0.01) @@ -64,9 +69,7 @@ def test_one_hot_decision_boundary_generator(self): Test the actual generator. """ generator = DecisionBoundaryGenerator( - n_samples=10000, - discriminator="circle", - one_hot=True + n_samples=10000, discriminator="circle", one_hot=True ) # Check the dataset shapes @@ -80,9 +83,7 @@ def test_serial_decision_boundary_generator(self): Test the actual generator. """ generator = DecisionBoundaryGenerator( - n_samples=10000, - discriminator="circle", - one_hot=False + n_samples=10000, discriminator="circle", one_hot=False ) # Check the dataset shapes diff --git a/requirements.txt b/requirements.txt index 7ec3074..1de810f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,8 @@ numpy matplotlib sphinx -flake8==5.0.4 -black==22.8.0 +flake8 +black ipython numpydoc optax @@ -20,7 +20,7 @@ tqdm pandas neural-tangents tensorflow-datasets -isort==5.10.1 +isort tensorflow pyyaml jupyter diff --git a/znnl/data/decision_boundary.py b/znnl/data/decision_boundary.py index 4be1631..664f3a6 100644 --- a/znnl/data/decision_boundary.py +++ b/znnl/data/decision_boundary.py @@ -25,20 +25,19 @@ ------- Data generator for decision boundary problems. """ -from znnl.data.data_generator import DataGenerator import jax import jax.numpy as np +import matplotlib.pyplot as plt import numpy as onp + +from znnl.data.data_generator import DataGenerator from znnl.utils.prng import PRNGKey -import matplotlib.pyplot as plt -def linear_boundary( - data: onp.ndarray, gradient: float, intercept: float -) -> np.ndarray: +def linear_boundary(data: onp.ndarray, gradient: float, intercept: float) -> np.ndarray: """ Create a linear boundary between classes. - + Parameters ---------- data : np.ndarray (n_samples, 2) @@ -50,23 +49,22 @@ def linear_boundary( """ # y = m * x + c reference_values = gradient * data[:, 0] + intercept - + differences = data[:, 1] - reference_values differences[differences > 0] = 1 - differences[differences < 0.] = 0 - + differences[differences < 0.0] = 0 + return differences -def circle( - data: onp.ndarray, radius: float = 0.25 -): + +def circle(data: onp.ndarray, radius: float = 0.25): """ Create a circular classification problem. - + For simplicity, assume the points inside the circle are class 0 and outside are class 1. - + Parameters ---------- data : np.ndarray @@ -75,10 +73,10 @@ class 0 and outside are class 1. Radius of the circle. """ radii = onp.linalg.norm(data - 0.5, axis=1) - - radii[radii < radius] = 0. - radii[radii > radius] = 1. - + + radii[radii < radius] = 0.0 + radii[radii > radius] = 1.0 + return radii @@ -88,24 +86,24 @@ class DecisionBoundaryGenerator(DataGenerator): """ def __init__( - self, - n_samples: int, - discriminator: str = "line", - one_hot: bool = True, - gradient: float = 1.0, - y_intercept: float = 0.0, - radius: float = 0.25, - seed: int = None - ): + self, + n_samples: int, + discriminator: str = "line", + one_hot: bool = True, + gradient: float = 1.0, + y_intercept: float = 0.0, + radius: float = 0.25, + seed: int = None, + ): """ Instantiate the class. - + Parameters ---------- n_samples : int Number of samples to generate per class. discriminator : str - String to define the discriminator to use. + String to define the discriminator to use. Options are "line" and "circle". one_hot : bool Whether to use one-hot encoding for the classes. @@ -129,10 +127,10 @@ def __init__( self.args = (radius,) else: raise ValueError("Discriminator not recognised.") - + self.train_ds = self._build_dataset(n_samples=n_samples) self.test_ds = self._build_dataset(n_samples=n_samples) - + def _build_dataset(self, n_samples: int): """ Helper method to create datasets quickly. @@ -143,19 +141,15 @@ def _build_dataset(self, n_samples: int): Number of samples to generate per class. """ # Create the data-sets - data = onp.array(jax.random.uniform( - self.rng(), minval=0., maxval=1., shape=(n_samples, 2) - )) - data = onp.clip(data, 0., 1.) + data = onp.array( + jax.random.uniform(self.rng(), minval=0.0, maxval=1.0, shape=(n_samples, 2)) + ) + data = onp.clip(data, 0.0, 1.0) targets = self.discriminator(data, *self.args) # build classes (0, 1) - - class_one_indices = np.where( - targets == 0 - )[0] - class_two_indices = np.where( - targets == 1 - )[0] + class_one_indices = np.where(targets == 0)[0] + + class_two_indices = np.where(targets == 1)[0] indices = np.hstack((class_one_indices, class_two_indices)) indices = jax.random.shuffle(self.rng(), indices) @@ -164,12 +158,12 @@ def _build_dataset(self, n_samples: int): targets = np.array(jax.nn.one_hot(targets, num_classes=2)) else: targets = targets.reshape(-1, 1) - + return { "inputs": np.take(data, indices, axis=0), - "targets": np.take(targets, indices, axis=0) + "targets": np.take(targets, indices, axis=0), } - + def plot(self): """ Plot the training and test datasets. @@ -180,7 +174,7 @@ def plot(self): ax[0].scatter( self.train_ds["inputs"][:, 0], self.train_ds["inputs"][:, 1], - c=self.train_ds["targets"][:, 0] + c=self.train_ds["targets"][:, 0], ) ax[0].set_title("Training Data") ax[0].set_xlabel("x") @@ -190,13 +184,10 @@ def plot(self): ax[1].scatter( self.test_ds["inputs"][:, 0], self.test_ds["inputs"][:, 1], - c=self.test_ds["targets"][:, 0] + c=self.test_ds["targets"][:, 0], ) ax[1].set_title("Test Data") ax[1].set_xlabel("x") ax[1].set_ylabel("y") plt.show() - - - diff --git a/znnl/training_strategies/partitioned_training.py b/znnl/training_strategies/partitioned_training.py index fdab899..a08632b 100644 --- a/znnl/training_strategies/partitioned_training.py +++ b/znnl/training_strategies/partitioned_training.py @@ -244,7 +244,7 @@ def train_model( Number of epochs to train over. Each epoch defines a training phase. train_ds_selection : list - (default = [slice(-1, None, None), slice(None, None, None)]) + (default = [slice(-1, None, None), slice(None, None, None)]) The train is selected by a np.array of indices or slices. Each slice or array defines a training phase. batch_size : list (default = [1, 1])