diff --git a/README.MD b/README.MD index 7630fb4..15f7e14 100644 --- a/README.MD +++ b/README.MD @@ -46,7 +46,7 @@ Edux supports a variety of image augmentations, which can be used to increase th #### Single Image -````java +``` AugmentationSequence augmentationSequence= new AugmentationBuilder() .addAugmentation(new ResizeAugmentation(250,250)) @@ -54,11 +54,11 @@ Edux supports a variety of image augmentations, which can be used to increase th .build(); BufferedImage augmentedImage=augmentationSequence.applyTo(image); -```` +``` #### Run for all images in a directory -```java +``` AugmentationSequence augmentationSequence= new AugmentationBuilder() .addAugmentation(new ResizeAugmentation(250,250)) @@ -127,78 +127,74 @@ A multi-layer perceptron (MLP) is a feedforward artificial neural network that g input features. An MLP is characterized by several layers of input nodes connected as a directed graph between the input and output layers. -![Neural Network](https://hc-linux.eu/github/iris-nn.png) +### Step 0: Get Familiar with the Dataset -### Step 1: Data Processing +In this example we use the famouse MNIST Dataset. The MNIST database contains 60,000 training images and 10,000 testing -Firstly, we will load and prepare the IRIS dataset: +![](https://hc-linux.eu/edux/mnist-examples.png) -| sepal.length | sepal.width | petal.length | petal.width | variety | -|--------------|-------------|--------------|-------------|---------| -| 5.1 | 3.5 | 1.4 | 0.2 | Setosa | - -```java -var featureColumnIndices=new int[]{0,1,2,3}; // Specify your feature columns - var targetColumnIndex=4; // Specify your target column +### Step 1: Data Processing - var dataProcessor=new DataProcessor(new CSVIDataReader()); - var dataset=dataProcessor.loadDataSetFromCSV( - "path/to/your/data.csv", // Replace with your CSV file path - ',', // CSV delimiter - true, // Whether to skip the header - featureColumnIndices, - targetColumnIndex - ); - dataset.shuffle(); - dataset.normalize(); - dataProcessor.split(0.8); // Replace with your train-test split ratio ``` + String trainImages = "train-images.idx3-ubyte"; + String trainLabels = "train-labels.idx1-ubyte"; + String testImages = "t10k-images.idx3-ubyte"; + String testLabels = "t10k-labels.idx1-ubyte"; + Loader trainLoader = new ImageLoader(trainImages, trainLabels, batchSize); + Loader testLoader = new ImageLoader(testImages, testLabels, batchSize); -### Step 2: Preparing Training and Test Sets: +``` -Extract the features and labels for both training and test sets: +### Step 2: Configure the MultilayerPerceptron -```java - var trainFeatures=dataProcessor.getTrainFeatures(featureColumnIndices); - var trainLabels=dataProcessor.getTrainLabels(targetColumnIndex); - var testFeatures=dataProcessor.getTestFeatures(featureColumnIndices); - var testLabels=dataProcessor.getTestLabels(targetColumnIndex); ``` + int batchSize = 100; + int threads = 1; + int epochs = 10; + float initialLearningRate = 0.1f; + float finalLearningRate = 0.001f; + + MetaData trainMetaData = trainLoader.open(); + int inputSize = trainMetaData.getInputSize(); + int outputSize = trainMetaData.getExpectedSize(); + trainLoader.close(); +``` + +### Step 3: Build the Network -### Step 3: Network Configuration +We use the NetworkBuilder Class -```java -var networkConfiguration=new NetworkConfiguration( - trainFeatures[0].length, // Number of input neurons - List.of(128,256,512), // Number of neurons in each hidden layer - 3, // Number of output neurons - 0.01, // Learning rate - 300, // Number of epochs - ActivationFunction.LEAKY_RELU, // Activation function for hidden layers - ActivationFunction.SOFTMAX, // Activation function for output layer - LossFunction.CATEGORICAL_CROSS_ENTROPY, // Loss function - Initialization.XAVIER, // Weight initialization for hidden layers - Initialization.XAVIER // Weight initialization for output layer - ); ``` + new NetworkBuilder() + .addLayer(new DenseLayer(inputSize, 32)) //32 Neurons as output size + .addLayer(new ReLuLayer()) + .addLayer(new DenseLayer(32, outputSize)) //32 Neurons as input size + .addLayer(new SoftmaxLayer()) + .withBatchSize(batchSize) + .withLearningRates(initialLearningRate, finalLearningRate) + .withExecutionMode(singleThread) + .withEpochs(epochs) + .build() + .printArchitecture() + .fit(trainLoader, testLoader) + .saveModel("model.edux"); // Save the trained model +``` + +### Step 4: Load the model and continue training -### Step 4: Training and Evaluation +Load 'model.edux' and continue training for 10 epochs. -```java -MultilayerPerceptron multilayerPerceptron=new MultilayerPerceptron( - networkConfiguration, - testFeatures, - testLabels - ); - multilayerPerceptron.train(trainFeatures,trainLabels); - multilayerPerceptron.evaluate(testFeatures,testLabels); +``` + NeuralNetwork nn = + new NetworkBuilder().withEpochs(10).loadModel("model.edux").fit(trainLoader, testLoader); ``` ### Results ```output +........................Epoch: 1, Loss: 1,14, Accuracy: 91,04 ... -MultilayerPerceptron - Best accuracy after restoring best MLP model: 98,56% +........................Epoch: 10, Loss: 0,13, Accuracy: 96,16 ``` ### Working examples diff --git a/example/datasets/mnist/t10k-images.idx3-ubyte b/example/datasets/mnist/t10k-images.idx3-ubyte new file mode 100644 index 0000000..1170b2c Binary files /dev/null and b/example/datasets/mnist/t10k-images.idx3-ubyte differ diff --git a/example/datasets/mnist/t10k-labels.idx1-ubyte b/example/datasets/mnist/t10k-labels.idx1-ubyte new file mode 100644 index 0000000..d1c3a97 Binary files /dev/null and b/example/datasets/mnist/t10k-labels.idx1-ubyte differ diff --git a/example/datasets/mnist/train-images.idx3-ubyte b/example/datasets/mnist/train-images.idx3-ubyte new file mode 100644 index 0000000..bbce276 Binary files /dev/null and b/example/datasets/mnist/train-images.idx3-ubyte differ diff --git a/example/datasets/mnist/train-labels.idx1-ubyte b/example/datasets/mnist/train-labels.idx1-ubyte new file mode 100644 index 0000000..d6b4c5d Binary files /dev/null and b/example/datasets/mnist/train-labels.idx1-ubyte differ diff --git a/example/src/main/java/de/example/benchmark/Benchmark.java b/example/src/main/java/de/example/benchmark/Benchmark.java index 059c252..3e682b2 100644 --- a/example/src/main/java/de/example/benchmark/Benchmark.java +++ b/example/src/main/java/de/example/benchmark/Benchmark.java @@ -8,8 +8,6 @@ import de.edux.functions.loss.LossFunction; import de.edux.ml.decisiontree.DecisionTree; import de.edux.ml.knn.KnnClassifier; -import de.edux.ml.nn.config.NetworkConfiguration; -import de.edux.ml.nn.network.MultilayerPerceptron; import de.edux.ml.randomforest.RandomForest; import de.edux.ml.svm.SVMKernel; import de.edux.ml.svm.SupportVectorMachine; @@ -37,8 +35,6 @@ public class Benchmark { private double[][] trainLabels; private double[][] testFeatures; private double[][] testLabels; - private MultilayerPerceptron multilayerPerceptron; - private NetworkConfiguration networkConfiguration; private DataProcessor dataProcessor; public Benchmark() { @@ -46,7 +42,6 @@ public Benchmark() { results.put("DecisionTree", new ArrayList<>()); results.put("RandomForest", new ArrayList<>()); results.put("SVM", new ArrayList<>()); - results.put("MLP", new ArrayList<>()); init(); } @@ -80,38 +75,20 @@ private void run() { Classifier randomForest = new RandomForest(500, 10, 2, 3, 3, 60); Classifier svm = new SupportVectorMachine(SVMKernel.LINEAR, 1); - networkConfiguration = - new NetworkConfiguration( - trainFeatures[0].length, - List.of(64, 256, 512), - 3, - 0.01, - 300, - ActivationFunction.LEAKY_RELU, - ActivationFunction.SOFTMAX, - LossFunction.CATEGORICAL_CROSS_ENTROPY, - Initialization.XAVIER, - Initialization.XAVIER); - multilayerPerceptron = - new MultilayerPerceptron(networkConfiguration, testFeatures, testLabels); - knn.train(trainFeatures, trainLabels); decisionTree.train(trainFeatures, trainLabels); randomForest.train(trainFeatures, trainLabels); svm.train(trainFeatures, trainLabels); - multilayerPerceptron.train(trainFeatures, trainLabels); double knnAccuracy = knn.evaluate(testFeatures, testLabels); double decisionTreeAccuracy = decisionTree.evaluate(testFeatures, testLabels); double randomForestAccuracy = randomForest.evaluate(testFeatures, testLabels); double svmAccuracy = svm.evaluate(testFeatures, testLabels); - double multilayerPerceptronAccuracy = multilayerPerceptron.evaluate(testFeatures, testLabels); results.get("KNN").add(knnAccuracy); results.get("DecisionTree").add(decisionTreeAccuracy); results.get("RandomForest").add(randomForestAccuracy); results.get("SVM").add(svmAccuracy); - results.get("MLP").add(multilayerPerceptronAccuracy); init(); } diff --git a/example/src/main/java/de/example/mlp/MlpExampleOnMNIST.java b/example/src/main/java/de/example/mlp/MlpExampleOnMNIST.java new file mode 100644 index 0000000..4d7431f --- /dev/null +++ b/example/src/main/java/de/example/mlp/MlpExampleOnMNIST.java @@ -0,0 +1,87 @@ +package de.example.mlp; + +import de.edux.ml.api.ExecutionMode; +import de.edux.ml.mlp.core.network.NetworkBuilder; +import de.edux.ml.mlp.core.network.layers.DenseLayer; +import de.edux.ml.mlp.core.network.layers.ReLuLayer; +import de.edux.ml.mlp.core.network.layers.SoftmaxLayer; +import de.edux.ml.mlp.core.network.loader.image.ImageLoader; +import de.edux.ml.mlp.core.network.loader.Loader; +import de.edux.ml.mlp.core.network.loader.MetaData; +import java.io.File; + +public class MlpExampleOnMNIST { + public static void main(String[] args) { + String trainImages = + "example" + + File.separator + + "datasets" + + File.separator + + "mnist" + + File.separator + + "train-images.idx3-ubyte"; + String trainLabels = + "example" + + File.separator + + "datasets" + + File.separator + + "mnist" + + File.separator + + "train-labels.idx1-ubyte"; + String testImages = + "example" + + File.separator + + "datasets" + + File.separator + + "mnist" + + File.separator + + "t10k-images.idx3-ubyte"; + String testLabels = + "example" + + File.separator + + "datasets" + + File.separator + + "mnist" + + File.separator + + "t10k-labels.idx1-ubyte"; + + int batchSize = 100; + ExecutionMode singleThread = ExecutionMode.SINGLE_THREAD; + int epochs = 5; + float initialLearningRate = 0.1f; + float finalLearningRate = 0.001f; + + Loader trainLoader = new ImageLoader(trainImages, trainLabels, batchSize); + Loader testLoader = new ImageLoader(testImages, testLabels, batchSize); + + MetaData trainMetaData = trainLoader.open(); + int inputSize = trainMetaData.getInputSize(); + int outputSize = trainMetaData.getExpectedSize(); + trainLoader.close(); + + // Training from scratch + new NetworkBuilder() + .addLayer(new DenseLayer(inputSize, 128)) + .addLayer(new ReLuLayer()) + .addLayer(new DenseLayer(128, 128)) + .addLayer(new ReLuLayer()) + .addLayer(new DenseLayer(128, outputSize)) + .addLayer(new SoftmaxLayer()) + .withBatchSize(batchSize) + .withLearningRates(initialLearningRate, finalLearningRate) + .withExecutionMode(singleThread) + .withEpochs(epochs) + .build() + .printArchitecture() + .fit(trainLoader, testLoader) + .saveModel("mnist_trained.edux"); + + // Loading a trained model + new NetworkBuilder() + .withExecutionMode(singleThread) + .withEpochs(5) + .withLearningRates(0.01f, 0.001f) + .loadModel("mnist_trained.edux") + .fit(trainLoader, testLoader); + } +} diff --git a/example/src/main/java/de/example/nn/MultilayerNeuralNetworkExampleOnIrisDataset.java b/example/src/main/java/de/example/nn/MultilayerNeuralNetworkExampleOnIrisDataset.java deleted file mode 100644 index 82a5a34..0000000 --- a/example/src/main/java/de/example/nn/MultilayerNeuralNetworkExampleOnIrisDataset.java +++ /dev/null @@ -1,77 +0,0 @@ -package de.example.nn; - -import de.edux.data.provider.DataProcessor; -import de.edux.data.reader.CSVIDataReader; -import de.edux.functions.activation.ActivationFunction; -import de.edux.functions.initialization.Initialization; -import de.edux.functions.loss.LossFunction; -import de.edux.ml.nn.config.NetworkConfiguration; -import de.edux.ml.nn.network.MultilayerPerceptron; -import java.io.File; -import java.util.List; - -public class MultilayerNeuralNetworkExampleOnIrisDataset { - - private static final double TRAIN_TEST_SPLIT_RATIO = 0.70; - private static final File CSV_FILE = - new File( - "example" - + File.separator - + "datasets" - + File.separator - + "iris" - + File.separator - + "iris.csv"); - private static final boolean SKIP_HEAD = true; - - public static void main(String[] args) { - var featureColumnIndices = new int[] {0, 1, 2, 3}; - var targetColumnIndex = 4; - - var dataProcessor = new DataProcessor(new CSVIDataReader()); - var dataset = - dataProcessor.loadDataSetFromCSV( - CSV_FILE, ',', SKIP_HEAD, featureColumnIndices, targetColumnIndex); - dataset.shuffle(); - dataset.normalize(); - dataProcessor.split(TRAIN_TEST_SPLIT_RATIO); - - var trainFeatures = dataProcessor.getTrainFeatures(featureColumnIndices); - var trainLabels = dataProcessor.getTrainLabels(targetColumnIndex); - var testFeatures = dataProcessor.getTestFeatures(featureColumnIndices); - var testLabels = dataProcessor.getTestLabels(targetColumnIndex); - - var classMap = dataProcessor.getClassMap(); - - System.out.println("Class Map: " + classMap); - - // Configure Network with: - // - 4 Input Neurons - // - 2 Hidden Layer with 12 and 6 Neurons - // - 3 Output Neurons - // - Learning Rate of 0.1 - // - 300 Epochs - // - Leaky ReLU as Activation Function for Hidden Layers - // - Softmax as Activation Function for Output Layer - // - Categorical Cross Entropy as Loss Function - // - Xavier as Weight Initialization for Hidden Layers - // - Xavier as Weight Initialization for Output Layer - var networkConfiguration = - new NetworkConfiguration( - trainFeatures[0].length, - List.of(128, 256, 512), - 3, - 0.005, - 300, - ActivationFunction.LEAKY_RELU, - ActivationFunction.SOFTMAX, - LossFunction.CATEGORICAL_CROSS_ENTROPY, - Initialization.XAVIER, - Initialization.XAVIER); - - MultilayerPerceptron multilayerPerceptron = - new MultilayerPerceptron(networkConfiguration, testFeatures, testLabels); - multilayerPerceptron.train(trainFeatures, trainLabels); - multilayerPerceptron.evaluate(testFeatures, testLabels); - } -} diff --git a/example/src/main/java/de/example/nn/MultilayerNeuralNetworkExampleOnPenguinsDataset.java b/example/src/main/java/de/example/nn/MultilayerNeuralNetworkExampleOnPenguinsDataset.java deleted file mode 100644 index 98c3bf5..0000000 --- a/example/src/main/java/de/example/nn/MultilayerNeuralNetworkExampleOnPenguinsDataset.java +++ /dev/null @@ -1,94 +0,0 @@ -package de.example.nn; - -import de.edux.data.provider.DataProcessor; -import de.edux.data.reader.CSVIDataReader; -import de.edux.functions.activation.ActivationFunction; -import de.edux.functions.imputation.ImputationStrategy; -import de.edux.functions.initialization.Initialization; -import de.edux.functions.loss.LossFunction; -import de.edux.ml.nn.config.NetworkConfiguration; -import de.edux.ml.nn.network.MultilayerPerceptron; -import java.io.File; -import java.util.List; - -public class MultilayerNeuralNetworkExampleOnPenguinsDataset { - - private static final double TRAIN_TEST_SPLIT_RATIO = 0.70; - private static final File CSV_FILE = - new File( - "example" - + File.separator - + "datasets" - + File.separator - + "seaborn-penguins" - + File.separator - + "penguins.csv"); - private static final boolean SKIP_HEAD = true; - private static final ImputationStrategy averageImputation = ImputationStrategy.AVERAGE; - private static final ImputationStrategy modeImputation = ImputationStrategy.MODE; - - public static void main(String[] args) { - /* Penguins Dataset... - +--------+--------+---------------+--------------+------------------+------------------+ - | species| island| bill_length_mm| bill_depth_mm| flipper_length_mm| body_mass_g| sex| - +--------+--------+---------------+--------------+------------------+------------------+ - | Gentoo | Biscoe | 49.6 | 16 | 225 | 5700 | MALE| - +--------+--------+---------------+--------------+------------------+------------------+ - */ - - var featureColumnIndices = new int[] {1, 2, 3, 4, 5, 6}; - var targetColumnIndex = 0; - - var penguinsDataProcessor = - new DataProcessor(new CSVIDataReader()) - .loadDataSetFromCSV(CSV_FILE, ',', SKIP_HEAD, featureColumnIndices, targetColumnIndex) - .imputation(0, modeImputation) - .imputation(1, modeImputation) - .imputation(2, averageImputation) - .imputation(3, averageImputation) - .imputation(4, averageImputation) - .imputation(5, averageImputation) - .imputation(6, modeImputation) - .normalize() - .shuffle() - .split(TRAIN_TEST_SPLIT_RATIO); - - var trainFeatures = penguinsDataProcessor.getTrainFeatures(featureColumnIndices); - var trainLabels = penguinsDataProcessor.getTrainLabels(targetColumnIndex); - var testFeatures = penguinsDataProcessor.getTestFeatures(featureColumnIndices); - var testLabels = penguinsDataProcessor.getTestLabels(targetColumnIndex); - - var classMap = penguinsDataProcessor.getClassMap(); - - System.out.println("Class Map: " + classMap); - - // Configure Network with: - // - 4 Input Neurons - // - 2 Hidden Layer with 12 and 6 Neurons - // - 3 Output Neurons - // - Learning Rate of 0.1 - // - 1000 Epochs - // - Leaky ReLU as Activation Function for Hidden Layers - // - Softmax as Activation Function for Output Layer - // - Categorical Cross Entropy as Loss Function - // - Xavier as Weight Initialization for Hidden Layers - // - Xavier as Weight Initialization for Output Layer - var networkConfiguration = - new NetworkConfiguration( - trainFeatures[0].length, - List.of(128, 256, 512), - 3, - 0.01, - 300, - ActivationFunction.LEAKY_RELU, - ActivationFunction.SOFTMAX, - LossFunction.CATEGORICAL_CROSS_ENTROPY, - Initialization.XAVIER, - Initialization.XAVIER); - - MultilayerPerceptron multilayerPerceptron = - new MultilayerPerceptron(networkConfiguration, testFeatures, testLabels); - multilayerPerceptron.train(trainFeatures, trainLabels); - multilayerPerceptron.evaluate(testFeatures, testLabels); - } -} diff --git a/lib/src/main/java/de/edux/ml/api/ExecutionMode.java b/lib/src/main/java/de/edux/ml/api/ExecutionMode.java new file mode 100644 index 0000000..fce0992 --- /dev/null +++ b/lib/src/main/java/de/edux/ml/api/ExecutionMode.java @@ -0,0 +1,37 @@ +package de.edux.ml.api; + +/** + * This mode determines how batches are processed during training and testing. + * + *

Regardless of the chosen execution mode, all matrix operations are executed in parallel, with + * the ExecutionMode parallelism referring to the processing of batches. Currently supported: + * + *

+ */ +public enum ExecutionMode { + /** + * Single-thread execution mode. In this mode, all batches are processed sequentially in a single + * thread. + */ + SINGLE_THREAD(1); + + int threads = 1; + + ExecutionMode(int threads) { + this.threads = threads; + } + + public int getThreads() { + return threads; + } + + public static ExecutionMode fromString(String mode) { + if (mode.equalsIgnoreCase("single_thread")) { + return SINGLE_THREAD; + } + throw new IllegalArgumentException("Unknown execution mode: " + mode); + } +} diff --git a/lib/src/main/java/de/edux/ml/mlp/core/network/BatchResult.java b/lib/src/main/java/de/edux/ml/mlp/core/network/BatchResult.java new file mode 100644 index 0000000..26b931f --- /dev/null +++ b/lib/src/main/java/de/edux/ml/mlp/core/network/BatchResult.java @@ -0,0 +1,78 @@ +package de.edux.ml.mlp.core.network; + +import de.edux.ml.mlp.core.tensor.Matrix; + +import java.io.Serializable; +import java.util.LinkedList; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicReference; + +public class BatchResult implements Serializable { + + private AtomicReference accumulatedWeightGradient = new AtomicReference<>(); + private AtomicReference accumulatedBiasGradient = new AtomicReference<>(); + private AtomicReference lastInput = new AtomicReference<>(); + + private AtomicReference learningRate = new AtomicReference<>(); + + private AtomicInteger counter = new AtomicInteger(0); + + private AtomicInteger length = new AtomicInteger(0); + + public BatchResult() { + counter.incrementAndGet(); + } + + public AtomicInteger getCounter() { + return counter; + } + + public synchronized void addGradients( + Matrix weightsGradient, Matrix biasGradient, float learningRate, Matrix lastInput) { + + this.length.incrementAndGet(); + + if (accumulatedWeightGradient.get() == null) { + accumulatedWeightGradient.set(weightsGradient); + } else { + accumulatedWeightGradient.set(accumulatedWeightGradient.get().add(weightsGradient)); + } + + if (accumulatedBiasGradient.get() == null) { + accumulatedBiasGradient.set(biasGradient); + } else { + accumulatedBiasGradient.set(accumulatedBiasGradient.get().add(biasGradient)); + } + + this.lastInput.set(lastInput); + this.learningRate.set(learningRate); + } + + public AtomicReference getAccumulatedWeightGradient() { + return accumulatedWeightGradient; + } + + public AtomicReference getAccumulatedBiasGradient() { + return accumulatedBiasGradient; + } + + public AtomicReference getLastInput() { + return lastInput; + } + + public AtomicReference getLearningRate() { + return learningRate; + } + + public void clear() { + accumulatedWeightGradient.set(null); + accumulatedBiasGradient.set(null); + lastInput.set(null); + learningRate.set(null); + length.set(0); + } + + public int getLength() { + return length.get(); + } +} diff --git a/lib/src/main/java/de/edux/ml/mlp/core/network/Engine.java b/lib/src/main/java/de/edux/ml/mlp/core/network/Engine.java new file mode 100644 index 0000000..f22d3d5 --- /dev/null +++ b/lib/src/main/java/de/edux/ml/mlp/core/network/Engine.java @@ -0,0 +1,125 @@ +package de.edux.ml.mlp.core.network; + +import de.edux.api.Classifier; +import de.edux.ml.mlp.core.network.loss.LossFunction; +import de.edux.ml.mlp.core.network.loss.LossFunctions; +import de.edux.ml.mlp.core.tensor.Matrix; +import de.edux.ml.mlp.core.transformer.Transform; +import de.edux.ml.mlp.exceptions.UnsupportedLossFunction; +import java.io.Serializable; +import java.util.LinkedList; + +// Network +public class Engine implements Layer, Serializable { + private static final long serialVersionUID = 1L; + private final LinkedList lossHistory = new LinkedList<>(); + private final LinkedList accuracyHistory = new LinkedList<>(); + private final LinkedList transforms = new LinkedList<>(); + private final LinkedList weights = new LinkedList<>(); + private final LinkedList biases = new LinkedList<>(); + + private final LinkedList layers = new LinkedList<>(); + + private final LossFunction lossFunction = LossFunction.CROSS_ENTROPY; + + private transient RunningAverages runningAverages; + + public Engine(int batchSize) { + this.batchSize = batchSize; + initAverageMetrics(); + } + + private int batchSize; + + @Override + public Matrix backwardLayerBased(Matrix error, float learningRate) { + for (int i = layers.size() - 1; i >= 0; i--) { + error = layers.get(i).backwardLayerBased(error, learningRate); + } + + return error; + } + + @Override + public Matrix forwardLayerbased(Matrix input) { + Matrix output = input; + for (Layer layer : layers) { + output = layer.forwardLayerbased(output); + } + return output; + } + + public synchronized double evaluateLayerBased(Matrix predicted, Matrix expected) { + if (LossFunction.CROSS_ENTROPY != lossFunction) { + throw new UnsupportedLossFunction("Only Cross Entropy is supported."); + } + + double loss = LossFunctions.crossEntropy(expected, predicted).averageColumn().get(0); + Matrix predictions = predicted.getGreatestRowNumber(); + Matrix actual = expected.getGreatestRowNumber(); + + int correct = 0; + for (int i = 0; i < actual.getCols(); i++) { + if (predictions.get(i) == actual.get(i)) { + correct++; + } + } + + double percentCorrect = (100.0 * correct) / actual.getCols(); + this.accuracyHistory.add(percentCorrect); + this.lossHistory.add(loss); + if (this.runningAverages == null) { + initAverageMetrics(); + } + this.runningAverages.add(loss, percentCorrect); + return loss; + } + + private void initAverageMetrics() { + this.runningAverages = + new RunningAverages( + 2, + this.batchSize, + (callNumber, averages) -> { + System.out.printf( + "Epoch: %d, Loss: %.2f, Accuracy: %.2f\n", callNumber, averages[0], averages[1]); + }); + } + + public void addLayer(Layer layer) { + layers.add(layer); + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + for (Layer layer : layers) { + sb.append(layer.toString()); + sb.append("\n"); + } + return sb.toString(); + } + + public LinkedList getLossHistory() { + return lossHistory; + } + + public LinkedList getAccuracyHistory() { + return accuracyHistory; + } + + public void setBatchSize(int batchSize) { + this.batchSize = batchSize; + } + + @Override + public void updateWeightsAndBias() { + for (Layer layer : layers) { + layer.updateWeightsAndBias(); + } + } + + public int getBatchSize() { + return batchSize; + } +} diff --git a/lib/src/main/java/de/edux/ml/mlp/core/network/Layer.java b/lib/src/main/java/de/edux/ml/mlp/core/network/Layer.java new file mode 100644 index 0000000..1dad8a5 --- /dev/null +++ b/lib/src/main/java/de/edux/ml/mlp/core/network/Layer.java @@ -0,0 +1,13 @@ +package de.edux.ml.mlp.core.network; + +import de.edux.ml.mlp.core.tensor.Matrix; +import java.io.Serializable; + +public interface Layer extends Serializable { + + Matrix backwardLayerBased(Matrix error, float learningRate); + + Matrix forwardLayerbased(Matrix input); + + void updateWeightsAndBias(); +} diff --git a/lib/src/main/java/de/edux/ml/mlp/core/network/NetworkBuilder.java b/lib/src/main/java/de/edux/ml/mlp/core/network/NetworkBuilder.java new file mode 100644 index 0000000..6721e14 --- /dev/null +++ b/lib/src/main/java/de/edux/ml/mlp/core/network/NetworkBuilder.java @@ -0,0 +1,62 @@ +package de.edux.ml.mlp.core.network; + +import de.edux.ml.api.ExecutionMode; + +import java.util.ArrayList; +import java.util.List; + +public class NetworkBuilder { + private int batchSize = 100; + private float initialLearningRate = 0.05f; + private float finalLearningRate = 0.001f; + + private int threads = 1; + private int epochs = 10; + + private List layers = new ArrayList<>(); + + public NetworkBuilder addLayer(Layer layer) { + layers.add(layer); + return this; + } + + public NetworkBuilder withBatchSize(int batchSize) { + this.batchSize = batchSize; + return this; + } + + public NetworkBuilder withLearningRates(float initialLearningRate, float finalLearningRate) { + this.initialLearningRate = initialLearningRate; + this.finalLearningRate = finalLearningRate; + return this; + } + + public NetworkBuilder withExecutionMode(ExecutionMode executuinMode) { + this.threads = executuinMode.getThreads(); + return this; + } + + public NetworkBuilder withEpochs(int epochs) { + this.epochs = epochs; + return this; + } + + public NeuralNetwork build() { + NeuralNetwork nn = new NeuralNetwork(batchSize); + nn.setLearningRates(initialLearningRate, finalLearningRate); + nn.setEpochs(epochs); + nn.setThreads(threads); + for (Layer layer : layers) { + nn.addLayer(layer); + } + return nn; + } + + public NeuralNetwork loadModel(String modelname) { + NeuralNetwork nn = NeuralNetwork.loadModel(modelname); + nn.setLearningRates(initialLearningRate, finalLearningRate); + nn.setEpochs(epochs); + nn.setThreads(threads); + return nn; + } +} diff --git a/lib/src/main/java/de/edux/ml/mlp/core/network/NeuralNetwork.java b/lib/src/main/java/de/edux/ml/mlp/core/network/NeuralNetwork.java new file mode 100644 index 0000000..2197ba9 --- /dev/null +++ b/lib/src/main/java/de/edux/ml/mlp/core/network/NeuralNetwork.java @@ -0,0 +1,195 @@ +package de.edux.ml.mlp.core.network; + +import de.edux.api.Classifier; +import de.edux.ml.mlp.core.network.loader.BatchData; +import de.edux.ml.mlp.core.network.loader.Loader; +import de.edux.ml.mlp.core.network.loader.MetaData; +import de.edux.ml.mlp.core.tensor.Matrix; +import java.io.*; +import java.util.LinkedList; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class NeuralNetwork implements Serializable { + @Serial private static final long serialVersionUID = 1L; + private static final Logger log = LoggerFactory.getLogger(NeuralNetwork.class); + private final Engine engine; + private int epochs; + private float initialLearningRate; + private float finalLearningRate; + private transient float learningRate; + + private int threads = 8; + + NeuralNetwork(int batchSize) { + engine = new Engine(batchSize); + } + + public void setLearningRates(float initialLearningRate, float finalLearningRate) { + this.initialLearningRate = initialLearningRate; + this.finalLearningRate = finalLearningRate; + } + + public NeuralNetwork fit(Loader trainLoader, Loader evalLoader) { + learningRate = initialLearningRate; + for (int epoch = 0; epoch < epochs; epoch++) { + runEpochLayerBased(trainLoader, true); + + if (evalLoader != null) { + runEpochLayerBased(evalLoader, false); + } + + learningRate -= (initialLearningRate - finalLearningRate) / epochs; + } + return this; + } + + private void runEpochLayerBased(Loader loader, boolean traingMode) { + loader.open(); + + var queue = createBatchTasks(loader, traingMode); + consumeBatchTasksLayerbased(queue, traingMode); + + loader.close(); + + if (traingMode) { + engine.updateWeightsAndBias(); + } + } + + private Matrix runBatch(Loader loader, boolean trainingMode) { + MetaData metaData = loader.getMetaData(); + BatchData batchData = loader.readBatch(); + int itemsRead = metaData.getItemsRead(); + int inputSize = metaData.getInputSize(); + int expectedSize = metaData.getExpectedSize(); + + Matrix input = new Matrix(inputSize, itemsRead, batchData.getInputBatch()); + Matrix expected = new Matrix(expectedSize, itemsRead, batchData.getExpectedBatch()); + + Matrix batchResult = engine.forwardLayerbased(input); + + if (trainingMode) { + engine.backwardLayerBased(expected, learningRate); + } else { + engine.evaluateLayerBased(batchResult, expected); + } + + return batchResult; + } + + private synchronized void consumeBatchTasksLayerbased( + LinkedList> batches, boolean traingMode) { + int numberBatches = batches.size(); + int index = 0; + + for (var batch : batches) { + try { + var batchResult = batch.get(); + + } catch (Exception e) { + e.printStackTrace(); + } + + int printDot = (numberBatches / 25) + 1; + if (traingMode && index++ % printDot == 0) { + System.out.print("."); + } + } + } + + private LinkedList> createBatchTasks(Loader loader, boolean trainingMode) { + LinkedList> batches = new LinkedList<>(); + + MetaData metaData = loader.getMetaData(); + var numberBatches = metaData.getNumberBatches(); + + var executor = Executors.newFixedThreadPool(threads); + + for (int i = 0; i < numberBatches; i++) { + batches.add(executor.submit(() -> runBatch(loader, trainingMode))); + } + + executor.shutdown(); + + return batches; + } + + @Override + public String toString() { + return String.format("Neural Network Configuration\n") + + "----------------------------------------\n" + + String.format("Epochs: %d\n", epochs) + + String.format("Batch size: %d\n", engine.getBatchSize()) + + String.format( + "Initial learning rate: %f, Final learning rate: %f\n", + initialLearningRate, finalLearningRate) + + String.format("Threads: %d\n", threads) + + "\nNetwork Architecture:" + + "\n----------------------------------------\n" + + engine.toString(); + } + + public void setBatchSize(int batchSize) { + engine.setBatchSize(batchSize); + } + + public boolean saveModel(String fileName) { + File file = new File(fileName); + try (var ds = new ObjectOutputStream(new FileOutputStream(file))) { + ds.writeObject(this); + log.info("Model saved to {}", file.getAbsolutePath()); + } catch (IOException e) { + e.printStackTrace(); + return false; + } + + return true; + } + + public static NeuralNetwork loadModel(String fileName) { + NeuralNetwork model = null; + File file = new File(fileName); + if (!file.exists()) { + return null; + } + try (var ds = new ObjectInputStream(new FileInputStream(file))) { + model = (NeuralNetwork) ds.readObject(); + } catch (IOException | ClassNotFoundException e) { + e.printStackTrace(); + } + log.info("Model loaded from {}", file.getAbsolutePath()); + return model; + } + + public double[] predict(Matrix input) { + return engine.forwardLayerbased(input).getData(); + } + + public LinkedList getLossHistory() { + return engine.getLossHistory(); + } + + public LinkedList getAccuracyHistory() { + return engine.getAccuracyHistory(); + } + + public void setThreads(int threads) { + this.threads = threads; + } + + public void addLayer(Layer layer) { + engine.addLayer(layer); + } + + public void setEpochs(int epochs) { + this.epochs = epochs; + } + + public NeuralNetwork printArchitecture() { + System.out.println(this); + return this; + } +} diff --git a/lib/src/main/java/de/edux/ml/mlp/core/network/RunningAverages.java b/lib/src/main/java/de/edux/ml/mlp/core/network/RunningAverages.java new file mode 100644 index 0000000..c758191 --- /dev/null +++ b/lib/src/main/java/de/edux/ml/mlp/core/network/RunningAverages.java @@ -0,0 +1,39 @@ +package de.edux.ml.mlp.core.network; + +import java.util.stream.DoubleStream; + +public class RunningAverages { + + private final Callback callback; + private int nCalls = 0; + private double[][] values; + private int pos = 0; + + public interface Callback { + public void apply(int callNumber, double[] averages); + } + + public RunningAverages(int numberAverages, int windowSize, Callback callback) { + this.callback = callback; + values = new double[numberAverages][windowSize]; + + } + + public void add(double... args) { + for (int i = 0; i < values.length; i++) { + values[i][pos] = args[i]; + } + + if (++pos == values[0].length) { + double[] averages = new double[values.length]; + for(int i=0; i weights; + private AtomicReference bias; + private final Random random = new Random(); + private Matrix lastInput; + + public DenseLayer(int inputSize, int outputSize) { + weights = new AtomicReference<>(new Matrix(outputSize, inputSize)); + bias = new AtomicReference<>(new Matrix(outputSize, 1)); + initialize(); + } + + private void initialize() { + double standartDeviation = Math.sqrt(2.0 / (weights.get().getRows() + weights.get().getCols())); + + for (int i = 0; i < weights.get().getRows(); i++) { + for (int j = 0; j < weights.get().getCols(); j++) { + weights.get().set(i, j, random.nextGaussian() * standartDeviation); + } + } + for (int i = 0; i < bias.get().getRows(); i++) { + for (int j = 0; j < bias.get().getCols(); j++) { + bias.get().set(i, j, 0); + } + } + } + + @Override + public Matrix forwardLayerbased(Matrix input) { + this.lastInput = input; + return this.weights.get().multiplyParallel(input).add(this.bias.get()); + } + + @Override + public synchronized void updateWeightsAndBias() {} + + @Override + public Matrix backwardLayerBased(Matrix error, float learningRate) { + Matrix output = weights.get().transposeParallel().multiplyParallel(error); + // Calculate gradient of weights + Matrix weightsGradient = error.multiplyParallel(lastInput.transposeParallel()); + // Calculate gradient of bias + Matrix biasGradient = error.averageColumn(); + // Calculate learning rate per weight + float rate = learningRate / (lastInput.getCols()); + + // Update weights and bias + weights.set(weights.get().subtract(weightsGradient.multiplyParallel(rate))); + bias.set(bias.get().subtract(biasGradient.multiplyParallel(rate))); + + return output; + } + + @Override + public String toString() { + return "DenseLayer in: " + weights.get().getCols() + " x out: " + weights.get().getRows(); + } +} diff --git a/lib/src/main/java/de/edux/ml/mlp/core/network/layers/ReLuLayer.java b/lib/src/main/java/de/edux/ml/mlp/core/network/layers/ReLuLayer.java new file mode 100644 index 0000000..2639104 --- /dev/null +++ b/lib/src/main/java/de/edux/ml/mlp/core/network/layers/ReLuLayer.java @@ -0,0 +1,31 @@ +package de.edux.ml.mlp.core.network.layers; + +import de.edux.ml.mlp.core.network.Layer; +import de.edux.ml.mlp.core.tensor.Matrix; + +public class ReLuLayer implements Layer { + private Matrix lastInput; + + + @Override + public Matrix forwardLayerbased(Matrix input) { + this.lastInput = input; + return input.relu(); + } + + @Override + public void updateWeightsAndBias() { + //no weights and bias + } + + @Override + public Matrix backwardLayerBased(Matrix error, float learningRate) { + return error.reluDerivative(lastInput); + } + + + @Override + public String toString() { + return "ReLu"; + } +} diff --git a/lib/src/main/java/de/edux/ml/mlp/core/network/layers/SoftmaxLayer.java b/lib/src/main/java/de/edux/ml/mlp/core/network/layers/SoftmaxLayer.java new file mode 100644 index 0000000..557e912 --- /dev/null +++ b/lib/src/main/java/de/edux/ml/mlp/core/network/layers/SoftmaxLayer.java @@ -0,0 +1,31 @@ +package de.edux.ml.mlp.core.network.layers; + +import de.edux.ml.mlp.core.network.Layer; +import de.edux.ml.mlp.core.tensor.Matrix; + +public class SoftmaxLayer implements Layer { + + private Matrix lastSoftmax; + + @Override + public Matrix backwardLayerBased(Matrix expected, float learningRate) { + return lastSoftmax.subtract(expected); + } + + @Override + public Matrix forwardLayerbased(Matrix input) { + this.lastSoftmax = input.softmax(); + ; + return lastSoftmax; + } + + @Override + public void updateWeightsAndBias() { + //no weights and bias + } + + @Override + public String toString() { + return "Softmax"; + } +} diff --git a/lib/src/main/java/de/edux/ml/mlp/core/network/loader/AbstractBatchData.java b/lib/src/main/java/de/edux/ml/mlp/core/network/loader/AbstractBatchData.java new file mode 100644 index 0000000..6f1c1b2 --- /dev/null +++ b/lib/src/main/java/de/edux/ml/mlp/core/network/loader/AbstractBatchData.java @@ -0,0 +1,27 @@ +package de.edux.ml.mlp.core.network.loader; + +public abstract class AbstractBatchData implements BatchData { + + private double[] inputBatch; + private double[] expectedBatch; + + @Override + public double[] getInputBatch() { + return inputBatch; + } + + @Override + public void setInputBatch(double[] inputBatch) { + this.inputBatch = inputBatch; + } + + @Override + public double[] getExpectedBatch() { + return expectedBatch; + } + + @Override + public void setExpectedBatch(double[] expectedBatch) { + this.expectedBatch = expectedBatch; + } +} diff --git a/lib/src/main/java/de/edux/ml/mlp/core/network/loader/AbstractMetaData.java b/lib/src/main/java/de/edux/ml/mlp/core/network/loader/AbstractMetaData.java new file mode 100644 index 0000000..a35cf85 --- /dev/null +++ b/lib/src/main/java/de/edux/ml/mlp/core/network/loader/AbstractMetaData.java @@ -0,0 +1,71 @@ +package de.edux.ml.mlp.core.network.loader; + +public abstract class AbstractMetaData implements MetaData { + private int numberItems; + private int inputSize; + private int expectedSize; + private int numberBatches; + private int totalItemsRead; + private int itemsRead; + + @Override + public int getNumberItems() { + return numberItems; + } + + @Override + public void setNumberItems(int numberItems) { + this.numberItems = numberItems; + } + + @Override + public int getInputSize() { + return inputSize; + } + + @Override + public void setInputSize(int inputSize) { + this.inputSize = inputSize; + + } + + @Override + public int getExpectedSize() { + return expectedSize; + } + + @Override + public void setExpectedSize(int expectedSize) { + this.expectedSize = expectedSize; + } + + @Override + public int getNumberBatches() { + return numberBatches; + } + + @Override + public void setNumberBatches(int numberBatches) { + this.numberBatches = numberBatches; + } + + @Override + public int getTotalItemsRead() { + return totalItemsRead; + } + + @Override + public void setTotalItemsRead(int totalItemsRead) { + this.totalItemsRead = totalItemsRead; + } + + @Override + public int getItemsRead() { + return itemsRead; + } + + @Override + public void setItemsRead(int itemsRead) { + this.itemsRead = itemsRead; + } +} diff --git a/lib/src/main/java/de/edux/ml/mlp/core/network/loader/BatchData.java b/lib/src/main/java/de/edux/ml/mlp/core/network/loader/BatchData.java new file mode 100644 index 0000000..5fe4f79 --- /dev/null +++ b/lib/src/main/java/de/edux/ml/mlp/core/network/loader/BatchData.java @@ -0,0 +1,11 @@ +package de.edux.ml.mlp.core.network.loader; + +public interface BatchData { + + double[] getInputBatch(); + + void setInputBatch(double[] inputBatch); + + double[] getExpectedBatch(); + void setExpectedBatch(double[] expectedBatch); +} diff --git a/lib/src/main/java/de/edux/ml/mlp/core/network/loader/Loader.java b/lib/src/main/java/de/edux/ml/mlp/core/network/loader/Loader.java new file mode 100644 index 0000000..ea30abd --- /dev/null +++ b/lib/src/main/java/de/edux/ml/mlp/core/network/loader/Loader.java @@ -0,0 +1,11 @@ +package de.edux.ml.mlp.core.network.loader; + +public interface Loader { + MetaData open(); + void close(); + + MetaData getMetaData(); + BatchData readBatch(); + + +} diff --git a/lib/src/main/java/de/edux/ml/mlp/core/network/loader/MetaData.java b/lib/src/main/java/de/edux/ml/mlp/core/network/loader/MetaData.java new file mode 100644 index 0000000..8bc202d --- /dev/null +++ b/lib/src/main/java/de/edux/ml/mlp/core/network/loader/MetaData.java @@ -0,0 +1,27 @@ +package de.edux.ml.mlp.core.network.loader; + +public interface MetaData { + int getNumberItems(); + + void setNumberItems(int numberItems); + + int getInputSize(); + + void setInputSize(int inputSize); + + int getExpectedSize(); + + void setExpectedSize(int expectedSize); + + int getNumberBatches(); + + void setNumberBatches(int numberBatches); + + int getTotalItemsRead(); + + void setTotalItemsRead(int totalItemsRead); + + int getItemsRead(); + + void setItemsRead(int itemsRead); +} diff --git a/lib/src/main/java/de/edux/ml/mlp/core/network/loader/csv/CSVBatchData.java b/lib/src/main/java/de/edux/ml/mlp/core/network/loader/csv/CSVBatchData.java new file mode 100644 index 0000000..0bf70cb --- /dev/null +++ b/lib/src/main/java/de/edux/ml/mlp/core/network/loader/csv/CSVBatchData.java @@ -0,0 +1,5 @@ +package de.edux.ml.mlp.core.network.loader.csv; + +import de.edux.ml.mlp.core.network.loader.AbstractBatchData; + +public class CSVBatchData extends AbstractBatchData {} diff --git a/lib/src/main/java/de/edux/ml/mlp/core/network/loader/csv/CSVDataLoader.java b/lib/src/main/java/de/edux/ml/mlp/core/network/loader/csv/CSVDataLoader.java new file mode 100644 index 0000000..060632d --- /dev/null +++ b/lib/src/main/java/de/edux/ml/mlp/core/network/loader/csv/CSVDataLoader.java @@ -0,0 +1,32 @@ +package de.edux.ml.mlp.core.network.loader.csv; + +import de.edux.ml.mlp.core.network.loader.BatchData; +import de.edux.ml.mlp.core.network.loader.Loader; +import de.edux.ml.mlp.core.network.loader.MetaData; + +import java.io.File; + +public class CSVDataLoader implements Loader { + public CSVDataLoader(File csvFile, int batchSize) { + } + + @Override + public MetaData open() { + return null; + } + + @Override + public void close() { + + } + + @Override + public MetaData getMetaData() { + return null; + } + + @Override + public BatchData readBatch() { + return null; + } +} diff --git a/lib/src/main/java/de/edux/ml/mlp/core/network/loader/csv/CSVMetaData.java b/lib/src/main/java/de/edux/ml/mlp/core/network/loader/csv/CSVMetaData.java new file mode 100644 index 0000000..4888d7f --- /dev/null +++ b/lib/src/main/java/de/edux/ml/mlp/core/network/loader/csv/CSVMetaData.java @@ -0,0 +1,5 @@ +package de.edux.ml.mlp.core.network.loader.csv; + +import de.edux.ml.mlp.core.network.loader.AbstractMetaData; + +public class CSVMetaData extends AbstractMetaData {} diff --git a/lib/src/main/java/de/edux/ml/mlp/core/network/loader/image/ImageBatchData.java b/lib/src/main/java/de/edux/ml/mlp/core/network/loader/image/ImageBatchData.java new file mode 100644 index 0000000..9b50c93 --- /dev/null +++ b/lib/src/main/java/de/edux/ml/mlp/core/network/loader/image/ImageBatchData.java @@ -0,0 +1,6 @@ +package de.edux.ml.mlp.core.network.loader.image; + +import de.edux.ml.mlp.core.network.loader.AbstractBatchData; + +public class ImageBatchData extends AbstractBatchData { +} diff --git a/lib/src/main/java/de/edux/ml/mlp/core/network/loader/image/ImageLoader.java b/lib/src/main/java/de/edux/ml/mlp/core/network/loader/image/ImageLoader.java new file mode 100644 index 0000000..557c8fd --- /dev/null +++ b/lib/src/main/java/de/edux/ml/mlp/core/network/loader/image/ImageLoader.java @@ -0,0 +1,180 @@ +package de.edux.ml.mlp.core.network.loader.image; + +import de.edux.ml.mlp.core.network.loader.BatchData; +import de.edux.ml.mlp.core.network.loader.Loader; +import de.edux.ml.mlp.core.network.loader.MetaData; +import de.edux.ml.mlp.core.network.loader.image.ImageBatchData; +import de.edux.ml.mlp.core.network.loader.image.ImageMetaData; +import de.edux.ml.mlp.exceptions.LoaderException; +import java.io.DataInputStream; +import java.io.FileInputStream; +import java.io.IOException; +import java.util.concurrent.locks.Lock; +import java.util.concurrent.locks.ReentrantLock; + +public class ImageLoader implements Loader { + private String imageFileName; + private String labelFileName; + private int batchSize; + private DataInputStream imageInputStream; + private DataInputStream labelInputStream; + private ImageMetaData metaData; + private Lock readLock = new ReentrantLock(); + + public ImageLoader(String imageFileName, String labelFileName, int batchSize) { + this.imageFileName = imageFileName; + this.labelFileName = labelFileName; + this.batchSize = batchSize; + } + + + @Override + public MetaData open() { + imageInputStream = getImageInputStream(imageFileName); + labelInputStream = getImageInputStream(labelFileName); + return readMetaData(); + } + + private DataInputStream getImageInputStream(String filename) { + try { + return new DataInputStream(new FileInputStream(filename)); + } catch (Exception e) { + e.printStackTrace(); + throw new LoaderException(" Error opening file " + filename); + } + } + + @Override + public void close() { + metaData = null; + try { + imageInputStream.close(); + labelInputStream.close(); + + } catch (IOException e) { + throw new LoaderException("Error closing file " + imageFileName); + } + + } + + @Override + public MetaData getMetaData() { + return metaData; + } + + @Override + public BatchData readBatch() { + readLock.lock(); + ImageBatchData batchData; + try { + batchData = new ImageBatchData(); + int inputItemsRead = readInputBatch(batchData); + int expectedItemsRead = readExpectedBatch(batchData); + + if (inputItemsRead != expectedItemsRead) { + throw new LoaderException("Number of input items read does not match number of expected items read"); + } + metaData.setItemsRead(inputItemsRead); + } finally { + readLock.unlock(); + } + return batchData; + } + + private int readExpectedBatch(ImageBatchData batchData) { + try { + var totalItemsRead = metaData.getTotalItemsRead(); + var numberItems = metaData.getNumberItems(); + var numberToRead = Math.min(batchSize, numberItems - totalItemsRead); + + var labelData = new byte[numberToRead]; + var expectedSize = metaData.getExpectedSize(); + var numberRead = labelInputStream.read(labelData, 0, numberToRead); + + if (numberRead != numberToRead) { + throw new LoaderException("Error reading expected data from file " + labelFileName); + } + + double[] data = new double[numberToRead * expectedSize]; + for (int i = 0; i < numberToRead; i++) { + byte label = labelData[i]; + data[i * expectedSize + label] = 1.0; + } + batchData.setExpectedBatch(data); + return numberToRead; + } catch (IOException e) { + throw new LoaderException("Error reading input data from file " + imageFileName); + } + } + + private int readInputBatch(ImageBatchData batchData) { + var totalItemsRead = metaData.getTotalItemsRead(); + var numberItems = metaData.getNumberItems(); + var numberToRead = Math.min(batchSize, numberItems - totalItemsRead); + + + var inputSize = metaData.getInputSize(); + var numberBytesToRead = numberToRead * inputSize; + + byte[] imageData = new byte[numberBytesToRead]; + + try { + var numberRead = imageInputStream.read(imageData, 0, numberBytesToRead); + if (numberRead != numberBytesToRead) { + throw new LoaderException("Error reading input data from file " + imageFileName); + } + double[] data = new double[numberBytesToRead]; + + for (int i = 0; i < numberBytesToRead; i++) { + data[i] = (imageData[i] & 0xFF) / 256.0; + } + batchData.setInputBatch(data); + return numberToRead; + } catch (IOException e) { + throw new LoaderException("Error reading input data from file " + imageFileName); + } + } + + + private MetaData readMetaData() { + int numberItems = 0; + metaData = new ImageMetaData(); + try { + int magicNumber = labelInputStream.readInt(); + if (magicNumber != 2049) { + throw new LoaderException("Invalid magic number in file " + labelFileName); + } + + numberItems = labelInputStream.readInt(); + metaData.setNumberItems(numberItems); + + + } catch (IOException e) { + throw new LoaderException("Error reading magic number from file " + labelFileName); + } + + try { + int magicNumber = imageInputStream.readInt(); + if (magicNumber != 2051) { + throw new LoaderException("Invalid magic number in file " + labelFileName); + } + + if (numberItems != imageInputStream.readInt()) { + throw new LoaderException("Number of labels and images do not match"); + } + + int height = imageInputStream.readInt(); + int width = imageInputStream.readInt(); + metaData.setInputSize(height * width); + metaData.setExpectedSize(10); + metaData.setNumberBatches((int) Math.ceil(numberItems / batchSize)); + metaData.setHeight(height); + metaData.setWidth(width); + + } catch (IOException e) { + throw new LoaderException("Error reading magic number from file " + labelFileName); + } + return metaData; + } +} + diff --git a/lib/src/main/java/de/edux/ml/mlp/core/network/loader/image/ImageMetaData.java b/lib/src/main/java/de/edux/ml/mlp/core/network/loader/image/ImageMetaData.java new file mode 100644 index 0000000..5f50dd5 --- /dev/null +++ b/lib/src/main/java/de/edux/ml/mlp/core/network/loader/image/ImageMetaData.java @@ -0,0 +1,31 @@ +package de.edux.ml.mlp.core.network.loader.image; + +import de.edux.ml.mlp.core.network.loader.AbstractMetaData; + +public class ImageMetaData extends AbstractMetaData { + private int width; + private int height; + + public int getWidth() { + return width; + } + + public void setWidth(int width) { + this.width = width; + } + + public int getHeight() { + return height; + } + + public void setHeight(int height) { + this.height = height; + } + + @Override + public void setItemsRead(int itemsRead) { + super.setItemsRead(itemsRead); + super.setTotalItemsRead(super.getTotalItemsRead()+itemsRead); + + } +} diff --git a/lib/src/main/java/de/edux/ml/mlp/core/network/loader/test/TestBatchData.java b/lib/src/main/java/de/edux/ml/mlp/core/network/loader/test/TestBatchData.java new file mode 100644 index 0000000..9a6c3b4 --- /dev/null +++ b/lib/src/main/java/de/edux/ml/mlp/core/network/loader/test/TestBatchData.java @@ -0,0 +1,6 @@ +package de.edux.ml.mlp.core.network.loader.test; + +import de.edux.ml.mlp.core.network.loader.AbstractBatchData; + +public class TestBatchData extends AbstractBatchData { +} diff --git a/lib/src/main/java/de/edux/ml/mlp/core/network/loader/test/TestLoader.java b/lib/src/main/java/de/edux/ml/mlp/core/network/loader/test/TestLoader.java new file mode 100644 index 0000000..f369d99 --- /dev/null +++ b/lib/src/main/java/de/edux/ml/mlp/core/network/loader/test/TestLoader.java @@ -0,0 +1,76 @@ +package de.edux.ml.mlp.core.network.loader.test; + +import de.edux.ml.mlp.core.network.loader.BatchData; +import de.edux.ml.mlp.core.network.loader.Loader; +import de.edux.ml.mlp.core.network.loader.MetaData; +import de.edux.ml.mlp.util.Util; + +public class TestLoader implements Loader { + private MetaData metaData; + + private int numberItems = 9; + private int inputSize = 500; + private int expectedSize = 3; + private int numberBatches; + private int batchSize = 0; + private int totalItemsRead; + private int itemsRead; + + public TestLoader(int numberItems, int batchSize, int inputRows) { + this.inputSize = inputRows; + this.numberItems = numberItems; + this.batchSize = batchSize; + this.metaData = new TestMetaData(); + metaData.setNumberItems(numberItems); + + numberBatches = numberItems / batchSize; + + if (numberItems % batchSize != 0) { + numberBatches++; + } + + metaData.setNumberBatches(numberBatches); + metaData.setInputSize(inputSize); + metaData.setExpectedSize(expectedSize); + + } + + @Override + public MetaData open() { + return metaData; + } + + @Override + public void close() { + totalItemsRead = 0; + } + + @Override + public MetaData getMetaData() { + return metaData; + } + + @Override + public synchronized BatchData readBatch() { + if (totalItemsRead == numberItems) { + return null; + } + itemsRead = batchSize; + + totalItemsRead += itemsRead; + int excessItems = totalItemsRead - numberItems; + + if (excessItems > 0) { + totalItemsRead -= excessItems; + itemsRead -= excessItems; + } + var io = Util.generateTrainingArrays(inputSize, expectedSize, itemsRead); + + var batchData = new TestBatchData(); + batchData.setInputBatch(io.getInput()); + batchData.setExpectedBatch(io.getOutput()); + metaData.setTotalItemsRead(totalItemsRead); + metaData.setItemsRead(itemsRead); + return batchData; + } +} diff --git a/lib/src/main/java/de/edux/ml/mlp/core/network/loader/test/TestMetaData.java b/lib/src/main/java/de/edux/ml/mlp/core/network/loader/test/TestMetaData.java new file mode 100644 index 0000000..9e12773 --- /dev/null +++ b/lib/src/main/java/de/edux/ml/mlp/core/network/loader/test/TestMetaData.java @@ -0,0 +1,6 @@ +package de.edux.ml.mlp.core.network.loader.test; + +import de.edux.ml.mlp.core.network.loader.AbstractMetaData; + +public class TestMetaData extends AbstractMetaData { +} diff --git a/lib/src/main/java/de/edux/ml/mlp/core/network/loss/LossFunction.java b/lib/src/main/java/de/edux/ml/mlp/core/network/loss/LossFunction.java new file mode 100644 index 0000000..6b2aad4 --- /dev/null +++ b/lib/src/main/java/de/edux/ml/mlp/core/network/loss/LossFunction.java @@ -0,0 +1,6 @@ +package de.edux.ml.mlp.core.network.loss; + +public enum LossFunction { + CROSS_ENTROPY, + MEAN_SQUARED_ERROR; +} diff --git a/lib/src/main/java/de/edux/ml/mlp/core/network/loss/LossFunctions.java b/lib/src/main/java/de/edux/ml/mlp/core/network/loss/LossFunctions.java new file mode 100644 index 0000000..efa3f87 --- /dev/null +++ b/lib/src/main/java/de/edux/ml/mlp/core/network/loss/LossFunctions.java @@ -0,0 +1,11 @@ +package de.edux.ml.mlp.core.network.loss; + +import de.edux.ml.mlp.core.tensor.Matrix; + +public class LossFunctions { + + public static Matrix crossEntropy(Matrix expected, Matrix actual){ + return actual.apply((index, value) -> -expected.getData()[index] * Math.log(value)).sumColumns(); + } + +} diff --git a/lib/src/main/java/de/edux/ml/mlp/core/network/optimizer/Approximator.java b/lib/src/main/java/de/edux/ml/mlp/core/network/optimizer/Approximator.java new file mode 100644 index 0000000..51f2d2a --- /dev/null +++ b/lib/src/main/java/de/edux/ml/mlp/core/network/optimizer/Approximator.java @@ -0,0 +1,52 @@ +package de.edux.ml.mlp.core.network.optimizer; + +import de.edux.ml.mlp.core.tensor.Matrix; +import java.util.function.Function; + +public class Approximator { + public static Matrix gradient(Matrix input, Function transofrm) { + + final double INC = 0.00000001; + + Matrix loss1 = transofrm.apply(input); + + if (loss1.getCols() != input.getCols()){ + throw new IllegalArgumentException("Input/Loss cols must be equal"); + } + if (loss1.getRows() != 1){ + throw new IllegalArgumentException("Layer must return a row vector"); + } + + Matrix result = new Matrix(input.getRows(), input.getCols()); + + input.forEach((row, col, index, value) -> { + Matrix incremeted = input.addIncrement(row, col, INC); + Matrix loss2 = transofrm.apply(incremeted); + + double rate = (loss2.getData()[col] - loss1.getData()[col]) / INC; + result.set(row, col, rate); + }); + + return result; + + } + + public static Matrix weightGradient(Matrix weights, Function transofrm) { + + final double INC = 0.00000001; + + Matrix loss1 = transofrm.apply(weights); + Matrix result = new Matrix(weights.getRows(), weights.getCols(), (i) -> 0); + + weights.forEach((row, col, index, value) -> { + Matrix incremeted = weights.addIncrement(row, col, INC); + Matrix loss2 = transofrm.apply(incremeted); + + double rate = (loss2.get(0)- loss1.get(0)) / INC; + result.set(row, col, rate); + }); + + return result; + + } +} diff --git a/lib/src/main/java/de/edux/ml/mlp/core/network/optimizer/Calculus.java b/lib/src/main/java/de/edux/ml/mlp/core/network/optimizer/Calculus.java new file mode 100644 index 0000000..4a59a52 --- /dev/null +++ b/lib/src/main/java/de/edux/ml/mlp/core/network/optimizer/Calculus.java @@ -0,0 +1,38 @@ +package de.edux.ml.mlp.core.network.optimizer; + +import java.util.function.DoubleFunction; + +public class Calculus { + private static final double INC = 1e-4; + + public static double func1(double x) { + return 3.7 * x + 5.3; + } + + public static double func2(double x) { + return x * x; + } + + public static double func3(double y1, double y2) { + return y1 * y2 +4.7 *y1; + } + + public static double differentiate(DoubleFunction function, double x) { + double output1 = function.apply(x); + double output2 = function.apply(x + INC); + + return (output2 - output1) / INC; + } + + public static void main(String[] args) { + double x = 3.64; + double y = func1(x); + double z = func2(y); + + double dydx = differentiate(Calculus::func1, x); + double dzdy = differentiate(Calculus::func2, y); + + + } + +} diff --git a/lib/src/main/java/de/edux/ml/mlp/core/network/optimizer/Optimizer.java b/lib/src/main/java/de/edux/ml/mlp/core/network/optimizer/Optimizer.java new file mode 100644 index 0000000..b4f8f1b --- /dev/null +++ b/lib/src/main/java/de/edux/ml/mlp/core/network/optimizer/Optimizer.java @@ -0,0 +1,6 @@ +package de.edux.ml.mlp.core.network.optimizer; + +public interface Optimizer { + + +} diff --git a/lib/src/main/java/de/edux/ml/mlp/core/network/optimizer/SGD.java b/lib/src/main/java/de/edux/ml/mlp/core/network/optimizer/SGD.java new file mode 100644 index 0000000..fc293c6 --- /dev/null +++ b/lib/src/main/java/de/edux/ml/mlp/core/network/optimizer/SGD.java @@ -0,0 +1,13 @@ +package de.edux.ml.mlp.core.network.optimizer; + +import de.edux.ml.mlp.core.tensor.Matrix; + +public class SGD { + public void updateWeights(Matrix weights, Matrix weightErrors, float learningRate) { + for (int i = 0; i < weights.getRows(); i++) { + for (int j = 0; j < weights.getCols(); j++) { + weights.set(i, j, weights.get(i, j) - learningRate * weightErrors.get(i, j)); + } + } + } +} diff --git a/lib/src/main/java/de/edux/ml/mlp/core/tensor/Matrix.java b/lib/src/main/java/de/edux/ml/mlp/core/tensor/Matrix.java new file mode 100644 index 0000000..9a6a997 --- /dev/null +++ b/lib/src/main/java/de/edux/ml/mlp/core/tensor/Matrix.java @@ -0,0 +1,455 @@ +package de.edux.ml.mlp.core.tensor; + +import java.io.Serializable; +import java.util.Arrays; +import java.util.Objects; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + +public class Matrix implements Serializable { + + private static final String NUMBER_FORMAT = "%.3f"; + private double tolerance = 1e-5; + private final int rows; + private final int cols; + private double[] data; + + public Matrix multiplyParallel(double rate) { + Matrix result = new Matrix(this.rows, this.cols); + + IntStream.range(0, this.rows).parallel().forEach(i -> { + for (int j = 0; j < this.cols; j++) { + result.data[i * this.cols + j] = this.data[i * this.cols + j] * rate; + } + }); + + return result; + } + + public double sum() { + double sum = 0; + for (double datum : data) { + sum += datum; + } + return sum; + } + + public Matrix divide(int batches) { //TODO replace with apply + if (batches == 0) { + throw new IllegalArgumentException("Division durch null ist nicht erlaubt."); + } + + Matrix result = new Matrix(this.rows, this.cols); + for (int i = 0; i < this.data.length; i++) { + result.data[i] = this.data[i] / batches; + } + return result; + } + /** + * Subtracts the given matrix from this matrix. + *

+ * This method performs an element-wise subtraction between two matrices. + * It requires that both matrices have the same dimensions. If the matrices + * do not have the same dimensions, an IllegalArgumentException is thrown. + *

+ * + * @param matrix The matrix to be subtracted from this matrix. + * @return A new Matrix object representing the result of the subtraction. + * @throws IllegalArgumentException if the input matrix and this matrix do not have the same dimensions. + */ + public Matrix subtract(Matrix matrix) { + if (this.rows != matrix.rows || this.cols != matrix.cols) { + throw new IllegalArgumentException("Matrices must have the same size."); + } + + Matrix result = new Matrix(this.rows, this.cols); + for (int i = 0; i < this.data.length; i++) { + result.data[i] = this.data[i] - matrix.getData()[i]; + } + return result; + } + + public Matrix relu() { + return this.apply((index, value) -> Math.max(0, value)); + } + + public Matrix reluDerivative(Matrix input) { + return this.apply((index, value) -> input.get(index) > 0 ? value : 0); + } + + public Matrix print() { + System.out.println(this); + return this; + } + + public void set(int row, int col, double value) { + data[row * cols + col] = value; + } + + public double get(int row, int col) { + return data[row * cols + col]; + } + + public Matrix addIncrement(int row, int col, double increment) { + Matrix result = apply((index, value) -> data[index]); + double originalValue = result.get(row, col); + double newValue = originalValue + increment; + result.set(row, col, newValue); + + return result; + } + + public Matrix transpose() { + Matrix result = new Matrix(cols, rows); + for (int row = 0; row < rows; row++) { + for (int col = 0; col < cols; col++) { + result.data[col * rows + row] = data[row * cols + col]; + } + } + return result; + } + + public Matrix transposeParallel() { + Matrix result = new Matrix(cols, rows); + + IntStream.range(0, rows).parallel().forEach(row -> { + for (int col = 0; col < cols; col++) { + result.data[col * rows + row] = data[row * cols + col]; + } + }); + + return result; + } + + public double get(int index) { + return this.getData()[index]; + } + + public Matrix multiply(double rate) { + return this.apply((index, value) -> value * rate); + } + + public Matrix copy() { + Matrix result = new Matrix(rows, cols); + System.arraycopy(data, 0, result.data, 0, data.length); + return result; + } + + + public interface RowColumnProducer { + double produce(int row, int col, double value); + } + + public interface Producer { + double produce(int index); + } + + public interface IndexValueProducer { + double produce(int index, double value); + } + + public interface ValueProducer { + double produce(double value); + } + + public interface IndexValueConsumer { + void consume(int index, double value); + } + + public interface RowColValueConsumer { + void consume(int row, int col, double value); + } + + + public interface RowColIndexValueConsumer { + void consume(int row, int col, int index, double value); + } + + public double[] getData() { + return data; + } + + + + public Matrix(int rows, int cols) { + data = new double[rows * cols]; + this.rows = rows; + this.cols = cols; + } + + public Matrix(int rows, int cols, Producer producer) { + this(rows, cols); + for (int i = 0; i < data.length; i++) { + data[i] = producer.produce(i); + } + } + + public Matrix(int rows, int cols, double[] values) { + this.rows = rows; + this.cols = cols; + + Matrix temp = new Matrix(cols, rows); + temp.data = values; + Matrix transposed = temp.transpose(); + data = transposed.data; + } + + public Matrix(double[][]values){ + this.rows = values.length; + this.cols = values[0].length; + this.data = new double[rows*cols]; + for(int i = 0; i < rows; i++){ + for(int j = 0; j < cols; j++){ + this.data[i*cols+j] = values[i][j]; + } + } + } + + public Matrix apply(IndexValueProducer function) { + Matrix result = new Matrix(rows, cols); + for (int i = 0; i < data.length; i++) { + result.data[i] = function.produce(i, data[i]); + } + return result; + } + + public Matrix multiply(Matrix other) { + if (cols != other.rows) { + throw new IllegalArgumentException("Matrix dimensions do not match"); + } + Matrix result = new Matrix(rows, other.cols); + for (int row = 0; row < rows; row++) { + for (int col = 0; col < other.cols; col++) { + double sum = 0; + for (int i = 0; i < cols; i++) { + sum += data[row * cols + i] * other.data[i * other.cols + col]; + } + result.data[row * other.cols + col] = sum; + } + } + return result; + } + + public Matrix multiplyParallel(Matrix other) { + if (cols != other.rows) { + throw new IllegalArgumentException("Matrix dimensions do not match"); + } + Matrix result = new Matrix(rows, other.cols); + IntStream.range(0, rows) + .parallel() + .forEach( + row -> { + for (int col = 0; col < other.cols; col++) { + double sum = 0; + for (int i = 0; i < cols; i++) { + sum += data[row * cols + i] * other.data[i * other.cols + col]; + } + result.data[row * other.cols + col] = sum; + } + }); + + return result; + } + + public Matrix averageColumn() { + Matrix result = new Matrix(rows, 1); + forEach((row, col, value) -> { + result.data[row] += value / cols; + }); + return result; + } + + public Matrix add(Matrix other) { + // Überprüfen, ob die andere Matrix eine Spaltenmatrix ist, die als Bias verwendet werden kann + if (this.cols != other.cols && other.cols != 1) { + throw new IllegalArgumentException( + "Für die Addition muss die zweite Matrix entweder dieselbe Größe haben oder eine Spaltenmatrix sein."); + } + + Matrix result = new Matrix(rows, cols); + for (int row = 0; row < this.rows; row++) { + for (int col = 0; col < this.cols; col++) { + if (other.cols == 1) { + // Addiere den Bias, wenn die zweite Matrix eine Spaltenmatrix ist + result.data[row * cols + col] = this.data[row * cols + col] + other.data[row]; + } else { + // Normale elementweise Addition, wenn die zweite Matrix dieselbe Größe hat + result.data[row * cols + col] = + this.data[row * cols + col] + other.data[row * cols + col]; + } + } + } + + return result; + } + + public Matrix modify(RowColumnProducer function) { + int index = 0; + for (int row = 0; row < rows; row++) { + for (int col = 0; col < cols; col++, index++) { + data[index] = function.produce(row, col, data[index]); + } + } + return this; + } + + public Matrix modify(ValueProducer function) { + for (int i = 0; i < data.length; i++) { + data[i] = function.produce(data[i]); + } + return this; + } + + public Matrix modify(IndexValueProducer function) { + for (int i = 0; i < data.length; i++) { + data[i] = function.produce(i, data[i]); + } + return this; + } + + + public void forEach(IndexValueConsumer consumer) { + for (int i = 0; i < data.length; i++) { + consumer.consume(i, data[i]); + } + } + + public void forEach(RowColIndexValueConsumer consumer) { + int index = 0; + for (int row = 0; row < rows; row++) { + for (int col = 0; col < cols; col++) { + consumer.consume(row, col, index, data[index++]); + } + } + } + + public void forEach(RowColValueConsumer consumer) { + int index = 0; + for (int row = 0; row < rows; row++) { + for (int col = 0; col < cols; col++) { + consumer.consume(row, col, data[index++]); + } + } + } + + public void setTolerance(double tolerance) { + this.tolerance = tolerance; + } + + public Matrix sumColumns() { + Matrix result = new Matrix(1, cols); + int index = 0; + for (int row = 0; row < rows; row++) { + for (int col = 0; col < cols; col++) { + result.data[col] += data[index++]; + } + } + + return result; + } + + public Matrix softmax() { + Matrix result = new Matrix(rows, cols, i -> Math.exp(data[i])); + Matrix colSum = result.sumColumns(); + + result.modify((row, col, value) -> { + return value / colSum.getData()[col]; + }); + return result; + } + + public Matrix getGreatestRowNumber() { + Matrix result = new Matrix(1, cols); + double[] greatest = new double[cols]; + for (int i = 0; i < cols; i++) { + greatest[i] = Double.MIN_VALUE; + } + + forEach((row, col, value) -> { + if (value > greatest[col]) { + greatest[col] = value; + result.data[col] = row; + } + }); + return result; + } + + public int getRows() { + return rows; + } + + public int getCols() { + return cols; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + Matrix matrix = (Matrix) o; + + for (int i = 0; i < data.length; i++) { + if (Math.abs(data[i] - matrix.data[i]) > tolerance) { + return false; + } + } + return true; + } + + @Override + public int hashCode() { + int result = Objects.hash(rows, cols); + result = 31 * result + Arrays.hashCode(data); + return result; + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + + // Berechnen der maximalen Breite jeder Spalte + int[] maxWidth = new int[cols]; + for (int row = 0; row < rows; row++) { + for (int col = 0; col < cols; col++) { + int length = String.format(NUMBER_FORMAT, data[row * cols + col]).length(); + if (length > maxWidth[col]) { + maxWidth[col] = length; + } + } + } + + // Hinzufügen der Rahmenlinien und der Daten + String rowSeparator = + "+" + + Arrays.stream(maxWidth) + .mapToObj(width -> "-".repeat(width + 2)) + .collect(Collectors.joining("+")) + + "+\n"; + + for (int row = 0; row < rows; row++) { + sb.append(rowSeparator); + sb.append("|"); + for (int col = 0; col < cols; col++) { + String formattedNumber = + String.format( + "%" + maxWidth[col] + "s", String.format(NUMBER_FORMAT, data[row * cols + col])); + sb.append(" ").append(formattedNumber).append(" |"); + } + sb.append("\n"); + } + sb.append(rowSeparator); + + return sb.toString(); + } + + public String toString(boolean showValues) { + if (showValues) { + return toString(); + } else { + return "{" + + "rows=" + rows + + ", cols=" + cols + + '}'; + } + } +} diff --git a/lib/src/main/java/de/edux/ml/mlp/core/transformer/Transform.java b/lib/src/main/java/de/edux/ml/mlp/core/transformer/Transform.java new file mode 100644 index 0000000..16caad6 --- /dev/null +++ b/lib/src/main/java/de/edux/ml/mlp/core/transformer/Transform.java @@ -0,0 +1,5 @@ +package de.edux.ml.mlp.core.transformer; + +public enum Transform { + DENSE, RELU, SOFTMAX; +} diff --git a/lib/src/main/java/de/edux/ml/mlp/exceptions/LoaderException.java b/lib/src/main/java/de/edux/ml/mlp/exceptions/LoaderException.java new file mode 100644 index 0000000..f6ae931 --- /dev/null +++ b/lib/src/main/java/de/edux/ml/mlp/exceptions/LoaderException.java @@ -0,0 +1,7 @@ +package de.edux.ml.mlp.exceptions; + +public class LoaderException extends RuntimeException{ + public LoaderException(String message) { + super(message); + } +} diff --git a/lib/src/main/java/de/edux/ml/mlp/exceptions/UnsupportedLayerException.java b/lib/src/main/java/de/edux/ml/mlp/exceptions/UnsupportedLayerException.java new file mode 100644 index 0000000..a1eb422 --- /dev/null +++ b/lib/src/main/java/de/edux/ml/mlp/exceptions/UnsupportedLayerException.java @@ -0,0 +1,28 @@ +package de.edux.ml.mlp.exceptions; + +/** + * This class represents an exception that is thrown when an unsupported + * layer type is encountered in the MLP context. It extends RuntimeException + * to indicate that this is an unchecked exception that might occur during + * the runtime of the application, particularly when configuring or building + * MLP models with incompatible or unsupported layer types. + */ +public class UnsupportedLayerException extends RuntimeException { + + /** + * Constructs a new UnsupportedLayerException with the default message. + */ + public UnsupportedLayerException() { + super("The specified layer type is not supported."); + } + + /** + * Constructs a new UnsupportedLayerException with a custom message. + * + * @param message the detail message. The detail message is saved for + * later retrieval by the Throwable.getMessage() method. + */ + public UnsupportedLayerException(String message) { + super(message); + } +} diff --git a/lib/src/main/java/de/edux/ml/mlp/exceptions/UnsupportedLossFunction.java b/lib/src/main/java/de/edux/ml/mlp/exceptions/UnsupportedLossFunction.java new file mode 100644 index 0000000..79203d9 --- /dev/null +++ b/lib/src/main/java/de/edux/ml/mlp/exceptions/UnsupportedLossFunction.java @@ -0,0 +1,28 @@ +package de.edux.ml.mlp.exceptions; + +/** + * Represents an exception that is thrown when an unsupported loss function is encountered. + * This class is part of the MLP (Multi-Layer Perceptron) framework and is used to signal + * that the specified loss function is not supported by the current implementation. + * + * It extends RuntimeException, which allows this exception to be thrown and propagated + * through the call stack without being explicitly declared in method signatures. + */ +public class UnsupportedLossFunction extends RuntimeException { + + /** + * Constructs a new UnsupportedLossFunction exception with the default message. + */ + public UnsupportedLossFunction() { + super("Unsupported loss function."); + } + + /** + * Constructs a new UnsupportedLossFunction exception with a custom message. + * + * @param message The custom message that describes this exception. + */ + public UnsupportedLossFunction(String message) { + super(message); + } +} diff --git a/lib/src/main/java/de/edux/ml/mlp/util/TrainingArrays.java b/lib/src/main/java/de/edux/ml/mlp/util/TrainingArrays.java new file mode 100644 index 0000000..070f6ac --- /dev/null +++ b/lib/src/main/java/de/edux/ml/mlp/util/TrainingArrays.java @@ -0,0 +1,27 @@ +package de.edux.ml.mlp.util; + +public class TrainingArrays { + private double[] input; + private double[] output; + + public TrainingArrays(double[] input, double[] output) { + this.input = input; + this.output = output; + } + + public double[] getInput() { + return input; + } + + public void setInput(double[] input) { + this.input = input; + } + + public double[] getOutput() { + return output; + } + + public void setOutput(double[] output) { + this.output = output; + } +} diff --git a/lib/src/main/java/de/edux/ml/mlp/util/TrainingMatrices.java b/lib/src/main/java/de/edux/ml/mlp/util/TrainingMatrices.java new file mode 100644 index 0000000..b79ee07 --- /dev/null +++ b/lib/src/main/java/de/edux/ml/mlp/util/TrainingMatrices.java @@ -0,0 +1,29 @@ +package de.edux.ml.mlp.util; + +import de.edux.ml.mlp.core.tensor.Matrix; + +public class TrainingMatrices { + private Matrix input; + private Matrix output; + + public TrainingMatrices(Matrix input, Matrix output) { + this.input = input; + this.output = output; + } + + public Matrix getInput() { + return input; + } + + public Matrix getOutput() { + return output; + } + + public void setInput(Matrix input) { + this.input = input; + } + + public void setOutput(Matrix output) { + this.output = output; + } +} diff --git a/lib/src/main/java/de/edux/ml/mlp/util/Util.java b/lib/src/main/java/de/edux/ml/mlp/util/Util.java new file mode 100644 index 0000000..0be9be0 --- /dev/null +++ b/lib/src/main/java/de/edux/ml/mlp/util/Util.java @@ -0,0 +1,81 @@ +package de.edux.ml.mlp.util; + +import de.edux.ml.mlp.core.tensor.Matrix; +import java.util.Random; + +public class Util { + + private static final Random random = new Random(); + + public static Matrix generateInputMatrix(int rows, int cols) { + return new Matrix(rows, cols, i -> random.nextGaussian()); + } + + public static Matrix generateExpectedMatrix(int rows, int cols) { + Matrix expected = new Matrix(rows, cols, i -> 0); + for (int col = 0; col < cols; col++) { + int randowmRow = random.nextInt(rows); + expected.set(randowmRow, col, 1); + } + + return expected; + } + + public static Matrix generateTrainableExpectedMatrix(int outputRows, Matrix input) { + Matrix expected = new Matrix(outputRows, input.getCols()); + + Matrix columnSum = input.sumColumns(); + columnSum.forEach((row, col, value) -> { + int rowIndex = (int) (outputRows * (Math.sin(value) + 1) / 2.0); + expected.set(rowIndex, col, 1); + }); + + return expected; + + } + + /** + * Generates a matrix with a gaussian distribution and a radius between 0 and outputRows + * + * @param inputRows number of rows + * @param outputRows number of rows + * @param cols number of columns + * @return a matrix with a gaussian distribution and a radius between 0 and outputRows + */ + public static TrainingMatrices generateTrainingMatrices(int inputRows, int outputRows, int cols) { + var io = generateTrainingArrays(inputRows, outputRows, cols); + Matrix input = new Matrix(inputRows, cols, io.getInput()); + Matrix output = new Matrix(outputRows, cols, io.getOutput()); + + return new TrainingMatrices(input, output); + } + + public static TrainingArrays generateTrainingArrays(int inputSize, int outputSize, int numberItems) { + double[] input = new double[inputSize * numberItems]; + double[] output = new double[outputSize * numberItems]; + + int inputPos = 0; + int outputPos = 0; + for (int col = 0; col < numberItems; col++) { + int radius = random.nextInt(outputSize); + + double[] values = new double[inputSize]; + double initialRadius = 0; + for (int row = 0; row < inputSize; row++) { + values[row] = random.nextGaussian(); + initialRadius += values[row] * values[row]; + } + initialRadius = Math.sqrt(initialRadius); + + for (int row = 0; row < inputSize; row++) { + input[inputPos++] = values[row] * radius / initialRadius; + + } + output[outputPos + radius] = 1; + outputPos += outputSize; + + } + return new TrainingArrays(input, output); + } + +} diff --git a/lib/src/main/java/de/edux/ml/nn/config/NetworkConfiguration.java b/lib/src/main/java/de/edux/ml/nn/config/NetworkConfiguration.java deleted file mode 100644 index 4864d01..0000000 --- a/lib/src/main/java/de/edux/ml/nn/config/NetworkConfiguration.java +++ /dev/null @@ -1,18 +0,0 @@ -package de.edux.ml.nn.config; - -import de.edux.functions.activation.ActivationFunction; -import de.edux.functions.initialization.Initialization; -import de.edux.functions.loss.LossFunction; -import java.util.List; - -public record NetworkConfiguration( - int inputSize, - List hiddenLayersSize, - int outputSize, - double learningRate, - int epochs, - ActivationFunction hiddenLayerActivationFunction, - ActivationFunction outputLayerActivationFunction, - LossFunction lossFunction, - Initialization hiddenLayerWeightInitialization, - Initialization outputLayerWeightInitialization) {} diff --git a/lib/src/main/java/de/edux/ml/nn/config/package-info.java b/lib/src/main/java/de/edux/ml/nn/config/package-info.java deleted file mode 100644 index 6bbb23d..0000000 --- a/lib/src/main/java/de/edux/ml/nn/config/package-info.java +++ /dev/null @@ -1,2 +0,0 @@ -/** Classes for the configuration of the neural network. */ -package de.edux.ml.nn.config; diff --git a/lib/src/main/java/de/edux/ml/nn/network/MultilayerPerceptron.java b/lib/src/main/java/de/edux/ml/nn/network/MultilayerPerceptron.java deleted file mode 100644 index 5fa8b2f..0000000 --- a/lib/src/main/java/de/edux/ml/nn/network/MultilayerPerceptron.java +++ /dev/null @@ -1,273 +0,0 @@ -package de.edux.ml.nn.network; - -import de.edux.api.Classifier; -import de.edux.functions.activation.ActivationFunction; -import de.edux.ml.nn.config.NetworkConfiguration; -import java.util.ArrayList; -import java.util.List; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * The {@code MultilayerPerceptron} class represents a simple feedforward neural network, which - * consists of input, hidden, and output layers. It implements the {@code Classifier} interface, - * facilitating both the training and prediction processes on a given dataset. - * - *

This implementation utilizes a backpropagation algorithm for training the neural network to - * adjust weights and biases, considering a set configuration defined by {@link - * NetworkConfiguration}. The network's architecture is multi-layered, comprising one or more hidden - * layers in addition to the input and output layers. Neurons within these layers utilize activation - * functions defined per layer through the configuration. - * - *

The training process adjusts the weights and biases of neurons within the network based on the - * error between predicted and expected outputs. Additionally, the implementation provides - * functionality to save and restore the best model achieved during training based on accuracy. - * Early stopping is applied during training to prevent overfitting and unnecessary computational - * expense by monitoring the performance improvement across epochs. - * - *

Usage example: - * - *

- *    NetworkConfiguration config = ... ;
- *    double[][] testFeatures = ... ;
- *    double[][] testLabels = ... ;
- *
- *    MultilayerPerceptron mlp = new MultilayerPerceptron(config, testFeatures, testLabels);
- *    mlp.train(features, labels);
- *
- *    double accuracy = mlp.evaluate(testFeatures, testLabels);
- *    double[] prediction = mlp.predict(singleInput);
- * 
- * - *

Note: This implementation logs informative messages, such as accuracy per epoch, using SLF4J - * logging. - * - * @see de.edux.api.Classifier - * @see de.edux.ml.nn.network.Neuron - * @see de.edux.ml.nn.config.NetworkConfiguration - * @see de.edux.functions.activation.ActivationFunction - */ -public class MultilayerPerceptron implements Classifier { - private static final Logger LOG = LoggerFactory.getLogger(MultilayerPerceptron.class); - - private final NetworkConfiguration config; - private final ActivationFunction hiddenLayerActivationFunction; - private final ActivationFunction outputLayerActivationFunction; - private final double[][] testFeatures; - private final double[][] testLabels; - private List hiddenLayers; - private Neuron[] outputLayer; - private double bestAccuracy; - private ArrayList bestHiddenLayers; - private Neuron[] bestOutputLayer; - - public MultilayerPerceptron( - NetworkConfiguration config, double[][] testFeatures, double[][] testLabels) { - this.config = config; - this.testFeatures = testFeatures; - this.testLabels = testLabels; - - hiddenLayerActivationFunction = config.hiddenLayerActivationFunction(); - outputLayerActivationFunction = config.outputLayerActivationFunction(); - - hiddenLayers = new ArrayList<>(); - - int inputSizeForCurrentLayer = config.inputSize(); - for (int layerSize : config.hiddenLayersSize()) { - Neuron[] hiddenLayer = new Neuron[layerSize]; - for (int i = 0; i < layerSize; i++) { - hiddenLayer[i] = - new Neuron( - inputSizeForCurrentLayer, - hiddenLayerActivationFunction, - this.config.hiddenLayerWeightInitialization()); - } - hiddenLayers.add(hiddenLayer); - inputSizeForCurrentLayer = layerSize; - } - - outputLayer = new Neuron[config.outputSize()]; - for (int i = 0; i < config.outputSize(); i++) { - outputLayer[i] = - new Neuron( - inputSizeForCurrentLayer, - outputLayerActivationFunction, - this.config.outputLayerWeightInitialization()); - } - } - - private double[] feedforward(double[] input) { - - double[] currentInput = passInputThroughAllHiddenLayers(input); - - double[] output = passInputTroughOutputLayer(currentInput); - - return outputLayerActivationFunction.calculateActivation(output); - } - - private double[] passInputThroughAllHiddenLayers(double[] input) { - double[] currentInput = input; - for (Neuron[] layer : hiddenLayers) { - double[] hiddenOutputs = new double[layer.length]; - for (int i = 0; i < layer.length; i++) { - hiddenOutputs[i] = layer[i].calculateOutput(currentInput); - } - currentInput = hiddenOutputs; - } - return currentInput; - } - - private double[] passInputTroughOutputLayer(double[] currentInput) { - double[] output = new double[config.outputSize()]; - for (int i = 0; i < config.outputSize(); i++) { - output[i] = outputLayer[i].calculateOutput(currentInput); - } - return output; - } - - @Override - public boolean train(double[][] features, double[][] labels) { - bestAccuracy = 0; - int epochsWithoutImprovement = 0; - final int PATIENCE = 10; - - for (int epoch = 0; epoch < config.epochs(); epoch++) { - for (int i = 0; i < features.length; i++) { - double[] output = feedforward(features[i]); - - double[] output_error_signal = new double[config.outputSize()]; - for (int j = 0; j < config.outputSize(); j++) { - output_error_signal[j] = labels[i][j] - output[j]; - } - - List hidden_error_signals = new ArrayList<>(); - for (int j = hiddenLayers.size() - 1; j >= 0; j--) { - double[] hidden_error_signal = new double[hiddenLayers.get(j).length]; - for (int k = 0; k < hiddenLayers.get(j).length; k++) { - for (int l = 0; l < output_error_signal.length; l++) { - hidden_error_signal[k] += - output_error_signal[l] - * (j == hiddenLayers.size() - 1 - ? outputLayer[l].getWeight(k) - : hiddenLayers.get(j + 1)[l].getWeight(k)); - } - } - hidden_error_signals.add(0, hidden_error_signal); - output_error_signal = hidden_error_signal; - } - - updateWeights(i, output_error_signal, hidden_error_signals, features); - } - - double accuracy = evaluate(testFeatures, testLabels); - LOG.info("Epoch: {} - Accuracy: {}%", epoch, String.format("%.2f", accuracy * 100)); - - if (accuracy > bestAccuracy) { - bestAccuracy = accuracy; - epochsWithoutImprovement = 0; - saveBestModel(hiddenLayers, outputLayer); - } else { - epochsWithoutImprovement++; - } - - if (epochsWithoutImprovement >= PATIENCE) { - LOG.info( - "Early stopping: Stopping training as the model has not improved in the last {} epochs.", - PATIENCE); - loadBestModel(); - LOG.info( - "Best accuracy after restoring best MLP model: {}%", - String.format("%.2f", bestAccuracy * 100)); - break; - } - } - return true; - } - - private void loadBestModel() { - this.hiddenLayers = this.bestHiddenLayers; - this.outputLayer = this.bestOutputLayer; - } - - private void saveBestModel(List hiddenLayers, Neuron[] outputLayer) { - this.bestHiddenLayers = new ArrayList<>(); - this.bestOutputLayer = new Neuron[outputLayer.length]; - for (int i = 0; i < hiddenLayers.size(); i++) { - Neuron[] layer = hiddenLayers.get(i); - Neuron[] newLayer = new Neuron[layer.length]; - for (int j = 0; j < layer.length; j++) { - newLayer[j] = - new Neuron( - layer[j].getWeights().length, - layer[j].getActivationFunction(), - layer[j].getInitialization()); - newLayer[j].setBias(layer[j].getBias()); - for (int k = 0; k < layer[j].getWeights().length; k++) { - newLayer[j].getWeights()[k] = layer[j].getWeight(k); - } - } - this.bestHiddenLayers.add(newLayer); - } - for (int i = 0; i < outputLayer.length; i++) { - this.bestOutputLayer[i] = - new Neuron( - outputLayer[i].getWeights().length, - outputLayer[i].getActivationFunction(), - outputLayer[i].getInitialization()); - this.bestOutputLayer[i].setBias(outputLayer[i].getBias()); - for (int j = 0; j < outputLayer[i].getWeights().length; j++) { - this.bestOutputLayer[i].getWeights()[j] = outputLayer[i].getWeight(j); - } - } - } - - private void updateWeights( - int i, - double[] output_error_signal, - List hidden_error_signals, - double[][] features) { - double[] currentInput = features[i]; - - for (int j = 0; j < hiddenLayers.size(); j++) { - Neuron[] layer = hiddenLayers.get(j); - double[] errorSignal = hidden_error_signals.get(j); - for (int k = 0; k < layer.length; k++) { - layer[k].adjustBias(errorSignal[k], config.learningRate()); - layer[k].adjustWeights(currentInput, errorSignal[k], config.learningRate()); - } - currentInput = new double[layer.length]; - for (int k = 0; k < layer.length; k++) { - currentInput[k] = layer[k].calculateOutput(features[i]); - } - } - - for (int j = 0; j < config.outputSize(); j++) { - outputLayer[j].adjustBias(output_error_signal[j], config.learningRate()); - outputLayer[j].adjustWeights(currentInput, output_error_signal[j], config.learningRate()); - } - } - - @Override - public double evaluate(double[][] testInputs, double[][] testTargets) { - int correctCount = 0; - - for (int i = 0; i < testInputs.length; i++) { - double[] predicted = predict(testInputs[i]); - int predictedIndex = 0; - int targetIndex = 0; - - for (int j = 0; j < predicted.length; j++) { - if (predicted[j] > predicted[predictedIndex]) predictedIndex = j; - if (testTargets[i][j] > testTargets[i][targetIndex]) targetIndex = j; - } - - if (predictedIndex == targetIndex) correctCount++; - } - - return (double) correctCount / testInputs.length; - } - - public double[] predict(double[] input) { - return feedforward(input); - } -} diff --git a/lib/src/main/java/de/edux/ml/nn/network/Neuron.java b/lib/src/main/java/de/edux/ml/nn/network/Neuron.java deleted file mode 100644 index 6fdb4bc..0000000 --- a/lib/src/main/java/de/edux/ml/nn/network/Neuron.java +++ /dev/null @@ -1,66 +0,0 @@ -package de.edux.ml.nn.network; - -import de.edux.functions.activation.ActivationFunction; -import de.edux.functions.initialization.Initialization; - -class Neuron { - private final Initialization initialization; - private final ActivationFunction activationFunction; - private double[] weights; - private double bias; - - public Neuron( - int inputSize, ActivationFunction activationFunction, Initialization initialization) { - this.weights = new double[inputSize]; - this.activationFunction = activationFunction; - this.initialization = initialization; - this.bias = initialization.weightInitialization(inputSize, new double[1])[0]; - this.weights = initialization.weightInitialization(inputSize, weights); - } - - public Initialization getInitialization() { - return initialization; - } - - public double calculateOutput(double[] input) { - double output = bias; - for (int i = 0; i < input.length; i++) { - output += input[i] * weights[i]; - } - return activationFunction.calculateActivation(output); - } - - public void adjustWeights(double[] input, double error, double learningRate) { - for (int i = 0; i < weights.length; i++) { - weights[i] += learningRate * input[i] * error; - } - } - - public void adjustBias(double error, double learningRate) { - bias += learningRate * error; - } - - public double getWeight(int index) { - return weights[index]; - } - - public double[] getWeights() { - return weights; - } - - public void setWeights(double[] weights) { - this.weights = weights; - } - - public double getBias() { - return bias; - } - - public void setBias(double bias) { - this.bias = bias; - } - - public ActivationFunction getActivationFunction() { - return activationFunction; - } -} diff --git a/lib/src/main/java/de/edux/ml/nn/network/api/Dataset.java b/lib/src/main/java/de/edux/ml/nn/network/api/Dataset.java deleted file mode 100644 index e4a159c..0000000 --- a/lib/src/main/java/de/edux/ml/nn/network/api/Dataset.java +++ /dev/null @@ -1,5 +0,0 @@ -package de.edux.ml.nn.network.api; - -import java.util.List; - -public record Dataset(List trainData, List testData) {} diff --git a/lib/src/main/java/de/edux/ml/nn/network/api/INeuron.java b/lib/src/main/java/de/edux/ml/nn/network/api/INeuron.java deleted file mode 100644 index 88c63aa..0000000 --- a/lib/src/main/java/de/edux/ml/nn/network/api/INeuron.java +++ /dev/null @@ -1,9 +0,0 @@ -package de.edux.ml.nn.network.api; - -public interface INeuron { - double calculateOutput(double[] inputs); - - double calculateError(double targetOutput); - - void updateWeights(double[] inputs, double error); -} diff --git a/lib/src/main/java/de/edux/ml/nn/network/api/IPerceptron.java b/lib/src/main/java/de/edux/ml/nn/network/api/IPerceptron.java deleted file mode 100644 index 486fcb1..0000000 --- a/lib/src/main/java/de/edux/ml/nn/network/api/IPerceptron.java +++ /dev/null @@ -1,11 +0,0 @@ -package de.edux.ml.nn.network.api; - -public interface IPerceptron { - void train(double[][] inputs, double[][] targetOutputs); - - double[] predict(double[] inputs); - - void backpropagate(double[] inputs, double target); - - double evaluate(double[][] inputs, double[][] targetOutputs); -} diff --git a/lib/src/test/java/de/edux/core/BackpropagationTest.java b/lib/src/test/java/de/edux/core/BackpropagationTest.java new file mode 100644 index 0000000..e594ded --- /dev/null +++ b/lib/src/test/java/de/edux/core/BackpropagationTest.java @@ -0,0 +1,60 @@ +package de.edux.core; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import de.edux.ml.mlp.core.network.loss.LossFunctions; +import de.edux.ml.mlp.core.network.optimizer.Approximator; +import de.edux.ml.mlp.core.tensor.Matrix; +import java.util.Random; +import org.junit.jupiter.api.Test; + +public class BackpropagationTest { + private final Random random = new Random(); + + @Test + void shouldBackpropagate() { + + interface NeuralNetwork { + Matrix apply(Matrix m); + } + final int inputRows = 4; + final int cols = 5; + final int outputRows = 4; + + Matrix input =new Matrix(inputRows, cols, i->random.nextGaussian()); + Matrix expected = new Matrix(outputRows, cols, i->0); + + Matrix weights = new Matrix(outputRows, inputRows, i->random.nextGaussian()); + Matrix biases = new Matrix(outputRows, 1, i->random.nextGaussian()); + + for(int col =0; col { + Matrix out = m.relu(); //input + out = weights.multiply(out).add(biases); // Dense + out = out.softmax(); // Softmax + return out; + }; + + Matrix softmaxOutput = neuralNet.apply(input); + + Matrix approximatedResult = Approximator.gradient(input, in-> { + Matrix out = neuralNet.apply(in); + return LossFunctions.crossEntropy(expected, out); + }); + + Matrix calculatedResult = softmaxOutput.subtract(expected); //Softmax backward + calculatedResult = weights.transpose().multiply(calculatedResult); + calculatedResult = calculatedResult.reluDerivative(input); + + System.out.println("Approximated Result"); + System.out.println(approximatedResult); + System.out.println("Backpropagated Result"); + System.out.println(calculatedResult); + + assertEquals(approximatedResult, calculatedResult); + } +} diff --git a/lib/src/test/java/de/edux/core/CategoricalCrossEntropyLossTest.java b/lib/src/test/java/de/edux/core/CategoricalCrossEntropyLossTest.java new file mode 100644 index 0000000..ffcf94e --- /dev/null +++ b/lib/src/test/java/de/edux/core/CategoricalCrossEntropyLossTest.java @@ -0,0 +1,32 @@ +package de.edux.core; + +import static org.junit.jupiter.api.Assertions.assertTrue; + +import de.edux.ml.mlp.core.network.loss.LossFunctions; +import de.edux.ml.mlp.core.tensor.Matrix; +import org.junit.jupiter.api.Test; + +public class CategoricalCrossEntropyLossTest { + + private static final double DELTA = 0.05; + + @Test + public void shouldCalculateCategoricalCrossEntropyLoss() { + double[] expectedValues = {1, 0, 0, 0, 0, 1, 0, 1, 0}; + + Matrix expected = new Matrix(3, 3, i -> expectedValues[i]); + Matrix actual = new Matrix(3, 3, i -> DELTA * i * i).softmax(); + Matrix result = LossFunctions.crossEntropy(expected, actual); + + actual.forEach((row, col, index, value) -> { + double expectedValue = expected.getData()[index]; + double loss = result.getData()[col]; + + if (expectedValue > 0.9) { + assertTrue(Math.abs(-Math.log(value) - loss) < 0.001, String.format("expected: %f, actual: %f", -Math.log(value), loss)); + } + }); + + + } +} diff --git a/lib/src/test/java/de/edux/core/MatrixTest.java b/lib/src/test/java/de/edux/core/MatrixTest.java new file mode 100644 index 0000000..e483b54 --- /dev/null +++ b/lib/src/test/java/de/edux/core/MatrixTest.java @@ -0,0 +1,283 @@ +package de.edux.core; + +import static org.junit.jupiter.api.Assertions.*; + +import de.edux.ml.mlp.core.tensor.Matrix; +import java.util.Random; +import org.junit.jupiter.api.Test; + +class MatrixTest { + private static final double TOLERANCE = 1e-6; + private final Random random = new Random(); + + @Test + public void testMultiply() { + // Creating a matrix with elements initialized to their index - 1 + Matrix a = new Matrix(2, 3, (index) -> index - 1); + double x = 0.5; + + // Applying the multiplication + Matrix result = a.apply((index, value) -> value * x); + + // Testing the result + for (int i = 0; i < 2 * 3; i++) { + assertEquals((i - 1) * x, result.getData()[i]); + } + } + + @Test + public void testEquals() { + Matrix a = new Matrix(2, 3, (index) -> index - 6); + Matrix b = new Matrix(2, 3, (index) -> index - 6); + Matrix c = new Matrix(2, 3, (index) -> index - 6.2); + + assertEquals(a, b); + assertNotEquals(a, c); + } + + @Test + public void testAddMatrices() { + Matrix a = new Matrix(2, 3, (index) -> index); + Matrix b = new Matrix(2, 3, (index) -> index * 2); + Matrix expected = a.apply((index, value) -> value + b.getData()[index]); + + Matrix result = a.add(b); + + assertEquals(expected, result); + } + + @Test + public void shouldNotMultiplyMatricesWithWrongDimensions() { + Matrix a = new Matrix(2, 2); + Matrix b = new Matrix(3, 2); + assertThrows(IllegalArgumentException.class, () -> a.multiply(b)); + } + + @Test + public void shouldMultiplyMatrices() { + Matrix a = new Matrix(2, 2, (index) -> index); + Matrix b = new Matrix(2, 2, (index) -> index * 2); + + /*+-------+-------+ + | 0,000 | 1,000 | + +-------+-------+ + | 2,000 | 3,000 | + +-------+-------+ + multiply + +-------+-------+ + | 0,000 | 2,000 | + +-------+-------+ + | 4,000 | 6,000 | + +-------+-------+*/ + + Matrix expected = new Matrix(2, 2); + expected.getData()[0] = 4; + expected.getData()[1] = 6; + expected.getData()[2] = 12; + expected.getData()[3] = 22; + + Matrix result = a.multiply(b); + assertEquals(expected, result); + } + + @Test + public void shouldMultiplyWithDifferentColsAndRows() { + Matrix a = new Matrix(2, 3, (index) -> index); + Matrix b = new Matrix(3, 2, (index) -> index); + + /*+-------+-------+-------+ + | 0,000 | 1,000 | 2,000 | + +-------+-------+-------+ + | 3,000 | 4,000 | 5,000 | + +-------+-------+-------+ + multiply + +-------+--------+ + | 0,000 | 2,000 | + +-------+--------+ + | 4,000 | 6,000 | + +-------+--------+ + | 8,000 | 10,000 | + +-------+--------+*/ + + Matrix expected = new Matrix(2, 2); + expected.getData()[0] = 10; + expected.getData()[1] = 13; + expected.getData()[2] = 28; + expected.getData()[3] = 40; + + Matrix result = a.multiply(b); + /* + *+--------+--------+ + | 10,000 | 13,000 | + +--------+--------+ + | 28,000 | 40,000 | + +--------+--------+ + */ + System.out.println(result); + assertEquals(expected, result); + } + + @Test + public void shouldRunWithoutOutOfMemory() { + try { + var matrixSize = 1000; + Matrix a = new Matrix(matrixSize, matrixSize, (index) -> index); + Matrix b = new Matrix(matrixSize, matrixSize, (index) -> index); + + long startTime = System.nanoTime(); + a.multiply(b); + long endTime = System.nanoTime(); + + System.out.println("Time: " + (endTime - startTime) / 1e9 + "s"); + } catch (OutOfMemoryError e) { + fail("Test failed due to insufficient memory: " + e.getMessage()); + } + } + + @Test + public void shouldRunWithoutOutOfMemoryOnParallelMultiplication() { + try { + Matrix a = new Matrix(1500, 1500, (index) -> index); + Matrix b = new Matrix(1500, 1500, (index) -> index); + + long startTime = System.nanoTime(); + a.multiplyParallel(b); + long endTime = System.nanoTime(); + + System.out.println("Time: " + (endTime - startTime) / 1e9 + "s"); + } catch (OutOfMemoryError e) { + fail("Test failed due to insufficient memory: " + e.getMessage()); + } + } + + @Test + void shouldMultiplyAndAdd() { + Matrix inout = new Matrix(3, 3, (index) -> index + 1); + Matrix weights = new Matrix(3, 3, (index) -> index + 1); + Matrix bias = new Matrix(3, 1, (index) -> index + 1); + + Matrix result = inout.multiply(weights).add(bias); + + Matrix expected = new Matrix(3, 3); + + expected.getData()[0] = 31; + expected.getData()[1] = 37; + expected.getData()[2] = 43; + expected.getData()[3] = 68; + expected.getData()[4] = 83; + expected.getData()[5] = 98; + expected.getData()[6] = 105; + expected.getData()[7] = 129; + expected.getData()[8] = 153; + + assert result.equals(expected); + } + + @Test + void shouldSumColumns() { + Matrix a = new Matrix(4, 5, (index) -> index); + Matrix result = a.sumColumns(); + Matrix expected = new Matrix(1, 5); + expected.getData()[0] = 30; + expected.getData()[1] = 34; + expected.getData()[2] = 38; + expected.getData()[3] = 42; + expected.getData()[4] = 46; + assert result.equals(expected); + } + + @Test + void shouldAddIncrement() { + Matrix a = new Matrix(5, 8, (index) -> random.nextGaussian()); + int row = 3; + int col = 2; + double increment = 10; + + Matrix result = a.addIncrement(row, col, increment); + + double incrementedValue = result.get(row, col); + double originalValue = a.get(row, col); + + assertEquals(a.get(row, col) + increment, result.get(row, col)); + assertTrue(Math.abs(incrementedValue - (originalValue + increment)) < TOLERANCE); + } + + @Test + void shouldTranspose() { + Matrix m = new Matrix(2, 3, i -> i); + System.out.println(m); + Matrix result = m.transpose(); + + System.out.println(result); + double[] expectedData = {0, 3, 1, 4, 2, 5}; + Matrix expected = new Matrix(3, 2, i -> expectedData[i]); + + assertEquals(expected, result); + } + + @Test + public void shouldCalculateAverageColumn() { + int rows = 3; + int cols = 4; + Matrix m = new Matrix(rows, cols, i -> 2 * i - 3); + double averageIndex = (cols - 1) / 2.0; + + Matrix expected = new Matrix(rows, 1); + expected.modify((row, col, value) -> 2 * (row * cols + averageIndex) - 3); + + Matrix result = m.averageColumn(); + assertEquals(expected, result); + + } + + @Test + void shouldFindGreatestRowNumber() { + double[] values = {7, -6, -6, 7, 2, 10, 3, -1, 1}; + Matrix m = new Matrix(3,3, i->values[i]); + + Matrix result = m.getGreatestRowNumber(); + + double[] expectedValues = {0, 1, 1}; + Matrix expected = new Matrix(3,1, i->expectedValues[i]); + + assertEquals(expected, result); + + System.out.println(m); + System.out.println(result); + } + + @Test + void testDivideNormalCase() { + Matrix matrix = new Matrix(4, 4, i -> (double)i); + int batches = 2; + Matrix result = matrix.divide(batches); + + assertNotNull(result, "The resulting matrix should not be null"); + assertEquals(matrix.getRows(), result.getRows(), "The number of rows should remain the same"); + assertEquals(matrix.getCols(), result.getCols(), "The number of columns should remain the same"); + + for (int row = 0; row < matrix.getRows(); row++) { + for (int col = 0; col < matrix.getCols(); col++) { + double expectedValue = matrix.get(row, col) / batches; + assertEquals(expectedValue, result.get(row, col), "The value at position [" + row + "][" + col + "] is not correct"); + } + } + } + + + @Test + void testDivideWithInvalidBatches() { + Matrix matrix = new Matrix(4, 4); + int invalidBatches = 0; + assertThrows(IllegalArgumentException.class, () -> matrix.divide(invalidBatches)); + } + + @Test + void testDivideWithBoundaryBatches() { + Matrix matrix = new Matrix(4, 4); + int batches = 1; // Oder ein anderer Grenzwert + Matrix result = matrix.divide(batches); + assertNotNull(result); + + } +} diff --git a/lib/src/test/java/de/edux/core/NeuralNetTest.java b/lib/src/test/java/de/edux/core/NeuralNetTest.java new file mode 100644 index 0000000..0626e25 --- /dev/null +++ b/lib/src/test/java/de/edux/core/NeuralNetTest.java @@ -0,0 +1,86 @@ +package de.edux.core; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import de.edux.ml.mlp.core.network.Engine; +import de.edux.ml.mlp.core.network.layers.DenseLayer; +import de.edux.ml.mlp.core.network.layers.ReLuLayer; +import de.edux.ml.mlp.core.network.layers.SoftmaxLayer; +import de.edux.ml.mlp.core.network.loss.LossFunctions; +import de.edux.ml.mlp.core.network.optimizer.Approximator; +import de.edux.ml.mlp.core.tensor.Matrix; +import de.edux.ml.mlp.util.Util; +import java.util.Random; +import org.junit.jupiter.api.Test; + +public class NeuralNetTest { + + private final Random random = new Random(); + + + @Test + void testWeightGradient() { + int inputRows = 4; + int outputRows = 5; + Matrix weights = new Matrix(outputRows, inputRows, i -> random.nextGaussian()); + Matrix input = Util.generateInputMatrix(inputRows, 1); + Matrix expected = Util.generateExpectedMatrix(outputRows, 1); + Matrix output = weights.multiply(input).softmax(); + + Matrix loss = LossFunctions.crossEntropy(expected, output); + + Matrix calculatedGradient = output.apply((index, value) -> value - expected.get(index)); + + Matrix calculatedWeightGradients = calculatedGradient.multiply(input.transpose()); + + Matrix approximatedWeightGradients = Approximator.weightGradient(weights, in -> { + Matrix out = in.multiply(input).softmax(); + return LossFunctions.crossEntropy(expected, out); + }); + + calculatedWeightGradients.setTolerance(0.01); + assertEquals(approximatedWeightGradients, calculatedWeightGradients); + } + + @Test + void testEngineLayerbased() { + int rows = 5; + int cols = 6; + int outputRows = 4; + + Engine engine = new Engine(10); + engine.addLayer(new DenseLayer(5, 8)); + engine.addLayer(new ReLuLayer()); + engine.addLayer(new DenseLayer(8, 5)); + engine.addLayer(new ReLuLayer()); + engine.addLayer(new DenseLayer(5, 4)); + engine.addLayer(new SoftmaxLayer()); + + Matrix input = Util.generateInputMatrix(rows, cols); + Matrix expected = Util.generateTrainableExpectedMatrix(outputRows, input); + + //Forward pass + Matrix softmaxOutput = engine.forwardLayerbased(input); + + //Loss function + Matrix approximatedError = Approximator.gradient(input, in -> { + Matrix forwardResult = engine.forwardLayerbased(in); + return LossFunctions.crossEntropy(expected, forwardResult); + }); + + + //Backward pass + Matrix calculatedError = engine.backwardLayerBased(expected, 0.01f); + System.out.println("Approximated Error"); + System.out.println(approximatedError); + System.out.println("Calculated Error"); + System.out.println(calculatedError); + + calculatedError.setTolerance(0.0001); + + assertEquals(approximatedError, calculatedError); + + } + + +} diff --git a/lib/src/test/java/de/edux/core/ReLuTest.java b/lib/src/test/java/de/edux/core/ReLuTest.java new file mode 100644 index 0000000..d1569a9 --- /dev/null +++ b/lib/src/test/java/de/edux/core/ReLuTest.java @@ -0,0 +1,35 @@ +package de.edux.core; + +import static org.junit.jupiter.api.Assertions.assertTrue; + +import de.edux.ml.mlp.core.tensor.Matrix; +import java.util.Random; +import org.junit.jupiter.api.Test; + +public class ReLuTest { + + private Random random = new Random(); + + @Test + void shouldReLu() { + final int numberOfNeurons = 50; + final int numberOfInputs = 60; + final int inputSize = 50; + Matrix inout = new Matrix(inputSize, numberOfInputs, (index) -> random.nextDouble()); + Matrix weights = new Matrix(numberOfNeurons, inputSize, (index) -> random.nextGaussian()); + Matrix bias = new Matrix(numberOfNeurons, 1, (index) -> random.nextGaussian()); + + Matrix result1 = weights.multiply(inout).add(bias); + Matrix result2 = weights.multiply(inout).add(bias).relu(); + + result2.forEach( + (index, value) -> { + double originalValue = result1.getData()[index]; + if (originalValue <= 0) { + assertTrue(Math.abs(originalValue - value) < 1e-6 || Math.abs(value) < 1e-6); + } else { + assertTrue(Math.abs(value) < 1e-6 || Math.abs(originalValue - value) < 1e-6); + } + }); + } +} diff --git a/lib/src/test/java/de/edux/core/SoftmaxTest.java b/lib/src/test/java/de/edux/core/SoftmaxTest.java new file mode 100644 index 0000000..3b6dbb5 --- /dev/null +++ b/lib/src/test/java/de/edux/core/SoftmaxTest.java @@ -0,0 +1,32 @@ +package de.edux.core; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import de.edux.ml.mlp.core.tensor.Matrix; +import java.util.Random; +import org.junit.jupiter.api.Test; + +public class SoftmaxTest { + final Random random = new Random(); + + @Test + void shouldSoftmax() { + + Matrix a = new Matrix(5, 8, (index) -> random.nextGaussian()); + Matrix result = a.softmax(); + double[] colSums = new double[8]; + result.forEach((row, col, value) -> { + assertTrue(value >= 0 && value <= 1); + colSums[col] += value; + }); + + for (int i = 0; i < colSums.length; i++) { + assertEquals(1, colSums[i], 1e-6); + } + + + + + } +} diff --git a/lib/src/test/java/de/edux/core/network/loader/LoaderTest.java b/lib/src/test/java/de/edux/core/network/loader/LoaderTest.java new file mode 100644 index 0000000..785eec7 --- /dev/null +++ b/lib/src/test/java/de/edux/core/network/loader/LoaderTest.java @@ -0,0 +1,51 @@ +package de.edux.core.network.loader; + +import static org.junit.jupiter.api.Assertions.*; + +import de.edux.ml.mlp.core.network.loader.BatchData; +import de.edux.ml.mlp.core.network.loader.Loader; +import de.edux.ml.mlp.core.network.loader.MetaData; +import de.edux.ml.mlp.core.network.loader.test.TestLoader; +import de.edux.ml.mlp.core.tensor.Matrix; +import org.junit.jupiter.api.Test; + +class LoaderTest { + + @Test + void shouldOpen() { + var batchSize = 33; + Loader loader = new TestLoader(600, batchSize, 33); + MetaData metaData = loader.open(); + + int numberItems = metaData.getNumberItems(); + int lastBatchSize = numberItems % batchSize; + int numberBatches = metaData.getNumberBatches(); + + for (int i = 0; i random.nextGaussian()).softmax(); + + Matrix expected = new Matrix(rows, cols, i -> 0); + for (int col = 0; col < cols; col++) { + int randowmRow = random.nextInt(rows); + expected.set(randowmRow, col, 1); + } + + + Matrix result = Approximator.gradient(input, in -> LossFunctions.crossEntropy(expected, in)); + + input.forEach((index, value) -> { + double resultValue = result.getData()[index]; + double expectedValue = expected.getData()[index]; + + if (expectedValue < 0.001) { + assertTrue(Math.abs(resultValue) < 0.01); + } else { + assertTrue(Math.abs(resultValue + 1.0 / value) < 0.01); + } + }); + } + + @Test + void shouldSoftmaxCrossEntropyGradient() { + final int rows = 4; + final int cols = 5; + + Matrix input = new Matrix(rows, cols, i -> random.nextGaussian()); + Matrix expected = new Matrix(rows, cols, i -> 0); + for (int col = 0; col < cols; col++) { + int randowmRow = random.nextInt(rows); + expected.set(randowmRow, col, 1); + } + + Matrix softmaxOutput = input.softmax(); + Matrix result = Approximator.gradient(input, in -> LossFunctions.crossEntropy(expected, in.softmax())); + + result.forEach((index, value) -> { + double softmaxValue = softmaxOutput.getData()[index]; + double expectedValue = expected.getData()[index]; + assertTrue(Math.abs(value - (softmaxValue - expectedValue)) < 0.01); + }); + + } +} \ No newline at end of file diff --git a/lib/src/test/java/de/edux/ml/nn/network/NeuronTest.java b/lib/src/test/java/de/edux/ml/nn/network/NeuronTest.java deleted file mode 100644 index 8ddbad7..0000000 --- a/lib/src/test/java/de/edux/ml/nn/network/NeuronTest.java +++ /dev/null @@ -1,50 +0,0 @@ -package de.edux.ml.nn.network; - -import static org.junit.jupiter.api.Assertions.assertEquals; - -import de.edux.functions.activation.ActivationFunction; -import de.edux.functions.initialization.Initialization; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -public class NeuronTest { - - private final int inputSize = 3; - private final ActivationFunction dummyActivationFunction = ActivationFunction.SOFTMAX; - private Neuron neuron; - - @BeforeEach - public void setUp() { - neuron = new Neuron(inputSize, dummyActivationFunction, Initialization.XAVIER); - } - - @Test - public void testAdjustWeights() { - double[] initialWeights = new double[inputSize]; - for (int i = 0; i < inputSize; i++) { - initialWeights[i] = neuron.getWeight(i); - } - - double[] input = {1.0, 2.0, 3.0}; - double error = 0.5; - double learningRate = 0.1; - neuron.adjustWeights(input, error, learningRate); - - for (int i = 0; i < inputSize; i++) { - double expectedWeight = initialWeights[i] + learningRate * input[i] * error; - assertEquals(expectedWeight, neuron.getWeight(i)); - } - } - - @Test - public void testAdjustBias() { - double initialBias = neuron.getBias(); - - double error = 0.5; - double learningRate = 0.1; - neuron.adjustBias(error, learningRate); - - double expectedBias = initialBias + learningRate * error; - assertEquals(expectedBias, neuron.getBias()); - } -}