diff --git a/MachineLearning/.gitignore b/MachineLearning/.gitignore new file mode 100644 index 0000000..796b96d --- /dev/null +++ b/MachineLearning/.gitignore @@ -0,0 +1 @@ +/build diff --git a/MachineLearning/build.gradle b/MachineLearning/build.gradle new file mode 100644 index 0000000..a5d22a6 --- /dev/null +++ b/MachineLearning/build.gradle @@ -0,0 +1,10 @@ +apply plugin: 'java-library' + +dependencies { + implementation fileTree(dir: 'libs', include: ['*.jar']) + implementation project(path: ':Matrix') + implementation project(path: ':NeuralNetwork') +} + +sourceCompatibility = "8" +targetCompatibility = "8" diff --git a/MachineLearning/src/main/java/ml/Matrix.java b/MachineLearning/src/main/java/ml/Matrix.java new file mode 100644 index 0000000..2af4055 --- /dev/null +++ b/MachineLearning/src/main/java/ml/Matrix.java @@ -0,0 +1,767 @@ +package ml; + +import java.util.Random; +import java.util.function.BiFunction; +import java.util.function.DoubleFunction; + +/** + * Matrix class for storage & calculations + * + * @author Sebastian Gössl + * @version 1.2 26.03.2018 + */ +public class Matrix { + + /** Matrix dimensions */ + private final int height, width; + /** Matrix elements */ + private final double[][] matrix; + + + + /** + * Constructs a new copy of an existing matrix + * @param input Matrix to copy + */ + public Matrix(Matrix input) { + this(input.getHeight(), input.getWidth()); + + for(int j=0; j= getHeight() || column < 0 || column >= getWidth()) { + throw new ArrayIndexOutOfBoundsException("Indices out of bounds!"); + } + + + matrix[row][column] = value; + } + + /** + * Returns the value of a specific element + * @param row Row index of the element + * @param column Column index of the element + * @return The value of the element + * @throws ArrayIndexOutOfBoundsException If the indices are smaller than 0 + * or bigger than the width/height -1 + */ + public double get(int row, int column) { + if(row < 0 || row >= getHeight() || column < 0 || column >= getWidth()) { + throw new ArrayIndexOutOfBoundsException("Indices out of bounds!"); + } + + + return matrix[row][column]; + } + + /** + * Returns the height (number of rows) of the matrix + * @return Height of the matrix + */ + public int getHeight() { + return height; + } + + /** + * Returns the width (number of columns) of the matrix + * @return Width of the matrix + */ + public int getWidth() { + return width; + } + + + /** + * Sets every element of the matrix to the given value + * @param value Value to set every element to + */ + public void fill(double value) { + for(int j=0; j function) { + final Matrix result = new Matrix(getHeight(), getWidth()); + + for(int j=0; j function) { + final Matrix result = new Matrix(getHeight(), getWidth()); + + for(int j=0; j= getWidth()) { + throw new ArrayIndexOutOfBoundsException("Index out of bounds!"); + } + + return getRows(index, index + 1); + } + + /** + * Extracts multiple rows as a new Matrix + * @param fromIndex Index of the first row + * that should be extracted (inclusive) + * @param toIndex Index of the last row that should be extracted (exclusive) + * @return The rows as a new Matrix + * @throws ArrayIndexOutOfBoundsException If an index does not point + * to an existing row + */ + public Matrix getRows(int fromIndex, int toIndex) { + if(fromIndex < 0 || fromIndex >= getHeight() + || toIndex < 0 || toIndex > getHeight()) { + throw new ArrayIndexOutOfBoundsException("Indices out of bounds!"); + } + if(fromIndex >= toIndex) { + throw new IllegalArgumentException("Illegal index direction!"); + } + + + final Matrix result = new Matrix(toIndex - fromIndex, getWidth()); + + for(int j=0; j= getHeight()) { + throw new ArrayIndexOutOfBoundsException("Index out of bounds!"); + } + + return removeRows(index, index + 1); + } + + /** + * Removes multiple rows of this matrix + * @param fromIndex Index of the first row that should be removed (inclusive) + * @param toIndex Index of the last row that should be removed (exclusive) + * @return Resulting matrix + * @throws ArrayIndexOutOfBoundsException If this matrix is to small to + * remove the rows or an index does not point to an existing row + */ + public Matrix removeRows(int fromIndex, int toIndex) { + if(getHeight() <= toIndex - fromIndex) { + throw new ArrayIndexOutOfBoundsException("Matrix to small!"); + } + if(fromIndex < 0 || fromIndex >= getHeight() + || toIndex < 0 || toIndex > getHeight()) { + throw new ArrayIndexOutOfBoundsException("Indices out of bounds!"); + } + if(fromIndex >= toIndex) { + throw new IllegalArgumentException("Illegal index direction!"); + } + + + final Matrix result = + new Matrix(getHeight() - (toIndex - fromIndex), getWidth()); + + for(int j=0; j= getHeight()) { + throw new ArrayIndexOutOfBoundsException("Index out of bounds!"); + } + + return getColumns(index, index + 1); + } + + /** + * Extracts multiple columns as a new Matrix + * @param fromIndex Index of the first column + * that should be extracted (inclusive) + * @param toIndex Index of the last column + * that should be extracted (exclusive) + * @return The columns as a new Matrix + * @throws ArrayIndexOutOfBoundsException If an index does not point + * to an existing column + */ + public Matrix getColumns(int fromIndex, int toIndex) { + if(fromIndex < 0 || fromIndex >= getWidth() + || toIndex < 0 || toIndex > getWidth()) { + throw new ArrayIndexOutOfBoundsException("Indices out of bounds!"); + } + if(fromIndex >= toIndex) { + throw new IllegalArgumentException("Illegal index direction!"); + } + + + final Matrix result = new Matrix(getHeight(), toIndex - fromIndex); + + for(int j=0; j= getWidth()) { + throw new ArrayIndexOutOfBoundsException("Index out of bounds!"); + } + + return removeColumns(index, index + 1); + } + + /** + * Removes multiple columns of this matrix + * @param fromIndex Index of the first column + * that should be removed (inclusive) + * @param toIndex Index of the last column that should be removed (exclusive) + * @return Resulting matrix + * @throws ArrayIndexOutOfBoundsException If this matrix is to small to + * remove the columns or an index does not point to an existing column + */ + public Matrix removeColumns(int fromIndex, int toIndex) { + if(getWidth() <= toIndex - fromIndex) { + throw new ArrayIndexOutOfBoundsException("Matrix to small!"); + } + if(fromIndex < 0 || fromIndex >= getWidth() + || toIndex < 0 || toIndex > getWidth()) { + throw new ArrayIndexOutOfBoundsException("Indices out of bounds!"); + } + if(fromIndex >= toIndex) { + throw new IllegalArgumentException("Illegal index direction!"); + } + + + final Matrix result = + new Matrix(getHeight(), getWidth() - (toIndex - fromIndex)); + + for(int j=0; j x, +// //Tanh +// Math::tanh, +// //Sigmoid +// x -> 1 / (1 + Math.exp(-x)), +// //ReLU +// x -> { +// if(x >= 0) { +// return x; +// } else { +// return 0.0; +// }}, +// //SoftPlus +// x -> Math.log(1 + Math.exp(x)), +// //Leaky ReLU +// x -> { +// if(x >= 0) { +// return x; +// } else { +// return RELU_LEAKY_LEAKAGE * x; +// }} +// }; +// +// private static final DoubleFunction[] prime = { +// //None +// x -> 1.0, +// //Tanh +// x -> 1 - Math.tanh(x) * Math.tanh(x), +// //Sigmoid +// x -> Math.exp(-x) / ((1 + Math.exp(-x)) * (1 + Math.exp(-x))), +// //ReLU +// x -> { +// if(x >= 0) { +// return 1.0; +// } else { +// return 0.0; +// }}, +// //Softplus +// x -> 1 / (1 + Math.exp(-x)), +// //Leaky ReLU +// x -> { +// if(x >= 0) { +// return 1.0; +// } else { +// return RELU_LEAKY_LEAKAGE; +// }} +// }; +// +// /** +// * Returns this activation function as a function +// * @return Function +// */ +// public DoubleFunction function() { +// return function[ordinal()]; +// } +// +// /** +// * Returns this activation function's derivative as a function +// * @return Function +// */ +// public DoubleFunction prime() { +// return prime[ordinal()]; +// } +// +// +// +// @Override +// public String toString() { +// return name[ordinal()]; +// } +// } + + + /* + * Inputs + * + * Weights[0] + * + * Layer[0] + * ActivationZ[0] (Weighted sum) + * ActivationA[0] (Activated sum) + * + * Weights[1] + * + * Layer[1] + * ActivationZ[1] (Weighted sum) + * ActivationA[1] (Activated sum) + * + * ... + */ + + /** + * Number of input neurons + */ + private final int numberOfInputs; + /** + * Number of neurons in each layer + */ + private final int[] layerSizes; + /** + * Each layers activation function + */ + private final ActivationFunction[] activationFunctions; + + /** + * Weights + */ + private final Matrix[] weights; + /** + * Activities, needed for backpropagation + */ + private final Matrix[] activityA; + private final Matrix[] activityZ; + + /** + * Constructs a new copy of an existing network + * + * @param net Network to copy + */ + public Network(Network net) { + this(net.getNumberOfInputs(), + net.copyLayerSizes(), + net.copyActivationFunctions()); + + setWeights(net.copyWeights()); + } + + /** + * Constructs a new network + * + * @param numberOfInputs Number of inputs + * @param layerSizes Numbers of neurons in each hidden layer, last layer is the output + * layer (number of outputs) + * @param activationFunctions Activation functions for every layer + * @throws IllegalArgumentException If the number of layers or the number of neurons in a layer + * is smaller than 1 or if the number of given activation + * functions does not equal the number of layers + */ + public Network(int numberOfInputs, int[] layerSizes, + ActivationFunction[] activationFunctions) { + if (numberOfInputs < 1) { + throw new IllegalArgumentException( + "Number of input neurons less than 1!"); + } + if (layerSizes.length < 1) { + throw new IllegalArgumentException("Number of layers less than 1!"); + } + if (activationFunctions.length != layerSizes.length) { + throw new IllegalArgumentException( + "Not as many activation functions as layers!"); + } + for (int layerSize : layerSizes) { + if (layerSize < 1) { + throw new IllegalArgumentException( + "Number of neurons in layer less than 1!"); + } + } + + //Dimensions + this.numberOfInputs = numberOfInputs; + this.layerSizes = Arrays.copyOf(layerSizes, layerSizes.length); + + //Activation functions + this.activationFunctions = activationFunctions; + + //Weights + weights = new Matrix[layerSizes.length]; + weights[0] = new Matrix(numberOfInputs, layerSizes[0]); + for (int i = 1; i < layerSizes.length; i++) { + weights[i] = new Matrix(layerSizes[i - 1], layerSizes[i]); + } + + //Activities (needed for backpropagation) + activityA = new Matrix[weights.length]; + activityZ = new Matrix[weights.length]; + } + + /** + * Constructs a new network based on a saved one + * + * @param stream Stream to read from + * @throws IOException + */ + public Network(DataInputStream stream) throws IOException { + //Dimensions + numberOfInputs = stream.readInt(); + layerSizes = new int[stream.readInt()]; + for (int i = 0; i < layerSizes.length; i++) { + layerSizes[i] = stream.readInt(); + } + + //Activation functions + activationFunctions = new ActivationFunction[layerSizes.length]; + for (int i = 0; i < activationFunctions.length; i++) { + activationFunctions[i] = ActivationFunction.values()[stream.readInt()]; + } + + //Weights + weights = new Matrix[layerSizes.length]; + weights[0] = new Matrix(numberOfInputs, layerSizes[0]); + for (int i = 1; i < weights.length; i++) { + weights[i] = new Matrix(layerSizes[i - 1], layerSizes[i]); + } + + for (Matrix weight : weights) { + for (int j = 0; j < weight.getHeight(); j++) { + for (int i = 0; i < weight.getWidth(); i++) { + weight.set(j, i, stream.readDouble()); + } + } + } + + //Activities + activityA = new Matrix[weights.length]; + activityZ = new Matrix[weights.length]; + } + + /** + * Returns the number of inputs + * + * @return Number of inputs + */ + public int getNumberOfInputs() { + return numberOfInputs; + } + + /** + * Returns the number of outputs (last layer size) + * + * @return Number of outputs + */ + public int getNumberOfOutputs() { + return layerSizes[layerSizes.length - 1]; + } + + /** + * Returns the number of layers + * + * @return Number of Layers + */ + public int getNumberOfLayers() { + return layerSizes.length; + } + + /** + * Returns the number of neurons in the specified layer + * + * @param index Index of the layer + * @return Number of neurons in the layer + * @throws ArrayIndexOutOfBoundsException If the Index does not point to an existing layer + */ + public int getLayerSize(int index) { + if (index < 0 || index >= layerSizes.length) { + throw new ArrayStoreException("Index out of bounds!"); + } + + return layerSizes[index]; + } + + /** + * Returns a copy of the numbers of neurons in every layer + * + * @return Copy of numbers of neurons in every layer + */ + public int[] copyLayerSizes() { + return Arrays.copyOf(layerSizes, layerSizes.length); + } + + /** + * Sets the activation function of the specified layer + * + * @param index Index of the layer + * @param function Activation function + * @throws ArrayIndexOutOfBoundsException If the index does not point to an existing layer + */ + public void setActivationFunction(int index, ActivationFunction function) { + if (index < 0 || index >= activationFunctions.length) { + throw new ArrayIndexOutOfBoundsException("Index out of bounds!"); + } + + activationFunctions[index] = function; + } + + /** + * Returns the activation function of the specific layer + * + * @param index Index of the layer + * @return Activation function of the layer + * @throws ArrayIndexOutOfBoundsException If the index does not point to an existing layer + */ + public ActivationFunction getActivationFunction(int index) { + if (index < 0 || index >= activationFunctions.length) { + throw new ArrayIndexOutOfBoundsException("Index out of bounds!"); + } + + return activationFunctions[index]; + } + + /** + * Returns the activation functions of every layer + * + * @return Activation functions + */ + public ActivationFunction[] getActivationFunctions() { + return activationFunctions; + } + + /** + * Returns a copy of the activation functions of every layer + * + * @return Copy of the activation functions of every layer + */ + public ActivationFunction[] copyActivationFunctions() { + return Arrays.copyOf(activationFunctions, activationFunctions.length); + } + + /** + * Sets the weights of a single layer + * + * @param index Layer index + * @param layer New weights + * @throws IllegalArgumentException If the index does not point to an existing matrix or the + * given matrix dimensions do not equal the needed size + */ + public void setWeights(int index, Matrix layer) { + if (index < 0 || index >= weights.length) { + throw new ArrayIndexOutOfBoundsException("Index out of bounds!"); + } + if (layer.getHeight() != weights[index].getHeight() + || layer.getWidth() != weights[index].getWidth()) { + throw new IllegalArgumentException("Incorrect layer dimensions!"); + } + + weights[index] = layer; + } + + /** + * Sets the weights for every layer + * + * @param weights New weights + * @throws IllegalArgumentException If the number of matricies does not equal the number of + * layers or the dimensions of a matrix do not equal the needed + * dimensions + */ + public void setWeights(Matrix[] weights) { + if (weights.length != this.weights.length) { + throw new IllegalArgumentException("Incorrect number of layers!"); + } + for (int i = 0; i < this.weights.length; i++) { + if (weights[i].getHeight() != this.weights[i].getHeight() + || weights[i].getWidth() != this.weights[i].getWidth()) { + throw new IllegalArgumentException("Incorrect layer dimensions!"); + } + } + + System.arraycopy(weights, 0, this.weights, 0, this.weights.length); + } + + /** + * Returns the weight matrix of one specific layer + * + * @param index Index of the layer + * @return Weights of the layer + */ + public Matrix getWeights(int index) { + if (index < 0 || index >= weights.length) { + throw new ArrayIndexOutOfBoundsException("Index out of bounds!"); + } + + return weights[index]; + } + + /** + * Returns the weights of every layer + * + * @return Weights + */ + public Matrix[] getWeights() { + return Arrays.copyOf(weights, weights.length); + } + + /** + * Returns a copy of all weights + * + * @return Copy of all weights + */ + public Matrix[] copyWeights() { + final Matrix[] copy = new Matrix[weights.length]; + + for (int i = 0; i < copy.length; i++) { + copy[i] = new Matrix(weights[i]); + } + + return copy; + } + + /** + * Seeds the weights within the given boundaries + * + * @param minimum Minimum value + * @param maximum Maximum value + */ + public void seedWeights(double minimum, double maximum) { + final Random rand = new Random(); + + for (Matrix layer : weights) { + layer.rand(rand, minimum, maximum); + } + } + + /** + * Seeds the weights, based on a seed, between the given boundaries + * + * @param seed Seed for the random number generator + * @param minimum Minimum value + * @param maximum Maximum value + */ + public void seedWeights(long seed, double minimum, double maximum) { + final Random rand = new Random(seed); + + for (Matrix layer : weights) { + layer.rand(rand, minimum, maximum); + } + } + + /** + * Seeds the weights within the given boundaries for each layer + * + * @param minimums Minimum values for each layer + * @param maximums Maximum values for each layer + * @throws ArrayIndexOutOfBoundsException If the number of boundaries does not equal the number + * of layers + */ + public void seedWeights(double[] minimums, double[] maximums) { + if (minimums.length != weights.length + || maximums.length != weights.length) { + throw new ArrayIndexOutOfBoundsException("Illegal number of boundaries!"); + } + + final Random rand = new Random(); + + for (int i = 0; i < weights.length; i++) { + weights[i].rand(rand, minimums[i], maximums[i]); + } + } + + /** + * Seeds the weights, based on a seed, between the given boundaries for each layer + * + * @param seed Seed for the random number generator + * @param minimums Minimum values for each layer + * @param maximums Maximum values for each layer + * @throws ArrayIndexOutOfBoundsException If the number of boundaries does not equal the number + * of layers + */ + public void seedWeights(long seed, double[] minimums, double[] maximums) { + if (minimums.length != weights.length + || maximums.length != weights.length) { + throw new ArrayIndexOutOfBoundsException("Illegal number of boundaries!"); + } + + final Random rand = new Random(seed); + + for (int i = 0; i < weights.length; i++) { + weights[i].rand(rand, minimums[i], maximums[i]); + } + } + + /** + * Eliminates infinite numbers & NaNs + */ + public void keepWeightsInBounds() { + for (int i = 0; i < weights.length; i++) { + weights[i] = weights[i].apply(x -> { + if (Double.isNaN(x)) { + return 0.0; + } else if (x <= Double.NEGATIVE_INFINITY) { + return -Double.MAX_VALUE; + } else if (x >= Double.POSITIVE_INFINITY) { + return Double.MAX_VALUE; + } + + return x; + }); + } + } + + /** + * Keeps weights within the given boundaries + * + * @param minimum Minimum value + * @param maximum Maximum value + */ + public void keepWeightsInBounds(double minimum, double maximum) { + if (minimum >= maximum) { + throw new IllegalArgumentException( + "Minimum greater than or equal to maximum!"); + } + + for (int i = 0; i < weights.length; i++) { + weights[i] = weights[i].apply(x -> { + if (Double.isNaN(x)) { + return (minimum + maximum) / 2; + } else if (x < minimum) { + return minimum; + } else if (x > maximum) { + return maximum; + } + + return x; + }); + } + } + + /** + * Forward propagates a matrix of data sets. Every single row represents one data set Every + * column gets feed into one input neuron + * + * @param input Input sets + * @return Output sets + * @throws IllegalArgumentException If the number of input values (columns) does not equal the + * number of input neurons + */ + public Matrix forward(Matrix input) { + if (input.getWidth() != numberOfInputs) { + throw new IllegalArgumentException("Illegal number of inputs!"); + } + + activityZ[0] = input.multiply(weights[0]); + activityA[0] = activityZ[0].apply(activationFunctions[0].get()/*.function()*/); + + for (int i = 1; i < weights.length; i++) { + activityZ[i] = activityA[i - 1].multiply(weights[i]); + activityA[i] = activityZ[i].apply(activationFunctions[i].get()/*.function()*/); + } + + return new Matrix(activityA[weights.length - 1]); + } + + /** + * Calculates the mean squared error of the prediction to the given output. Every single row + * represents one data set Every column gets feed into one input/output neuron + * + * @param input Input sets + * @param output Output sets + * @return Mean squared error + * @throws IllegalArgumentException If the number of inputs or outputs does not fit the + * dimensions of this network or the number of input sets is + * not equal to the number of output sets + */ + public double cost(Matrix input, Matrix output) { + if (input.getWidth() != getNumberOfInputs()) { + throw new IllegalArgumentException("Illegal number of inputs!"); + } + if (output.getWidth() != getNumberOfOutputs()) { + throw new IllegalArgumentException("Illegal number of outputs!"); + } + if (input.getHeight() != output.getHeight()) { + throw new IllegalArgumentException( + "Unequal number of input and output sets!"); + } + + final Matrix yHat = forward(input); + final Matrix difference = output.subtract(yHat); + final Matrix squaredError = difference.multiplyElementwise(difference); + + double cost = 0; + for (int j = 0; j < squaredError.getHeight(); j++) { + for (int i = 0; i < squaredError.getWidth(); i++) { + cost += squaredError.get(j, i); + } + } + cost /= 2; + cost /= input.getHeight(); + + return cost; + } + + /** + * Backpropagates the error to every weight. Every single row represents one data set Every + * column gets feed into one input/output neuron + * + * @param input Input sets + * @param output Output sets + * @return Derivative of the error to every weight + * @throws IllegalArgumentException If the number of inputs or outputs does not fit the + * dimensions of this network or the number of input sets is + * not equal to the number of output sets + */ + public Matrix[] costPrime(Matrix input, Matrix output) { + if (input.getWidth() != getNumberOfInputs()) { + throw new IllegalArgumentException("Illegal number of inputs!"); + } + if (output.getWidth() != getNumberOfOutputs()) { + throw new IllegalArgumentException("Illegal number of outputs!"); + } + if (input.getHeight() != output.getHeight()) { + throw new IllegalArgumentException( + "Unequal number of input and output sets!"); + } + + Matrix delta; + final Matrix[] dJdW = new Matrix[weights.length]; + final Matrix yHat = forward(input); + + delta = yHat.subtract(output).multiplyElementwise( + activityZ[weights.length - 1].apply( + activationFunctions[weights.length - 1].getPrime()/*.Prime()*/)); + + for (int i = weights.length - 1; i > 0; i--) { + dJdW[i] = activityA[i - 1].transpose().multiply(delta); + delta = delta.multiply(weights[i].transpose()).multiplyElementwise( + activityZ[i - 1].apply(activationFunctions[i - 1].getPrime()/*.Prime()*/)); + } + + dJdW[0] = input.transpose().multiply(delta); + + return dJdW; + } + + /** + * Trains the network. (override the method "keepTraining" to set the continuation condition) + * + * @param learningRate Initial learning rate + * @param input Input sets + * @param output Wanted output sets + * @param printToConsole Print progress to console + * @return Last cost + * @throws IllegalArgumentException If the number of inputs or outputs does not fit the + * dimensions of this network or the number of input sets is + * not equal to the number of output sets + */ + public double train(double learningRate, + Matrix input, Matrix output, boolean printToConsole) { + if (input.getWidth() != getNumberOfInputs()) { + throw new IllegalArgumentException("Illegal number of inputs!"); + } + if (output.getWidth() != getNumberOfOutputs()) { + throw new IllegalArgumentException("Illegal number of outputs!"); + } + if (input.getHeight() != output.getHeight()) { + throw new IllegalArgumentException( + "Unequal number of input and output sets!"); + } + + double lastCost = cost(input, output); + + for (int iterations = 0; + keepTraining(iterations, learningRate, lastCost) && learningRate > 0; + iterations++) { + final Matrix[] lastWeights = copyWeights(); + + singleGradientDescent(learningRate, input, output); + final double currentCost = cost(input, output); + + if (printToConsole) { + System.out.println(String.format("%d: %e", iterations, currentCost)); + } + + if (currentCost <= lastCost) { + lastCost = currentCost; + learningRate *= 1.1; + } else { + setWeights(lastWeights); + learningRate /= 2; + } + } + + return lastCost; + } + + /** + * Tells the network how long to continue to train + * + * @param iterations Number of completed training cycles + * @param learningRate Current learning rate + * @param cost Current cost + * @return If the training process should continue + */ + public boolean keepTraining(int iterations, double learningRate, + double cost) { + return iterations < 100; + } + + /** + * Backpropagates and applies the gradient with the given learning rate once + * + * @param learningRate Learning rate + * @param input Input sets + * @param output Wanted output sets + * @throws IllegalArgumentException If the number of inputs or outputs does not fit the + * dimensions of this network or the number of input sets is + * not equal to the number of output sets + */ + private void singleGradientDescent(double learningRate, + Matrix input, Matrix output) { + final Matrix[] dJdW = costPrime(input, output); + + for (int i = 0; i < weights.length; i++) { + final Matrix update = dJdW[i].multiply(-learningRate); + weights[i] = weights[i].add(update); + } + keepWeightsInBounds(); + } + + /** + * Writes the network to a stream + * + * @param stream Stream to write to + * @throws IOException + */ + public void writeToStream(DataOutputStream stream) throws IOException { + //Dimensions + stream.writeInt(numberOfInputs); + stream.writeInt(getNumberOfLayers()); + for (int layerSize : layerSizes) { + stream.writeInt(layerSize); + } + + //Activation functions + for (ActivationFunction function : activationFunctions) { + stream.writeInt(function.ordinal()); + } + + //Weights + for (Matrix layer : weights) { + for (int j = 0; j < layer.getHeight(); j++) { + for (int i = 0; i < layer.getWidth(); i++) { + stream.writeDouble(layer.get(j, i)); + } + } + } + } + + @Override + public String toString() { + final StringBuilder result = new StringBuilder("Network {"); + result.append(numberOfInputs); + result.append(Arrays.toString(layerSizes)); + result.append("\n"); + + for (int i = 0; i < getNumberOfLayers(); i++) { + result.append(activationFunctions[i]).append('\n'); + result.append(weights[i]).append('\n'); + } + result.append('}'); + + return result.toString(); + } + + public static void main(String[] args) { + //New network + final Network net = new Network( + 2, //2 inputs + new int[]{3, 1}, //2 layers with 3 & 1 neurons +// new this.ActivationFunction[]{ +// this.ActivationFunction.NONE, //both layers with ... +// this.ActivationFunction.NONE}); //... no activation function + new ActivationFunction[]{ + ActivationFunction.IDENTITY, //both layers with ... + ActivationFunction.IDENTITY}); //... no activation function + //Randomize weights + net.seedWeights(-1, 1); + //Show network + System.out.println(net); + + //Generate 10 training sets + //Every row represents one training set (10 rows = 10 sets) + //Every column gets fed into the same input/comes out of the same output + //(first column gets into the first input) + //(2 columns = 2 inputs / 1 column = 1 output) + final Matrix trainInput = new Matrix(10, 2); + final Matrix trainOutput = new Matrix(10, 1); + //Fill the training sets + //Inputs: two random numbers + //Outputs: average of these two numbers + final Random rand = new Random(); + for (int set = 0; set < trainInput.getHeight(); set++) { + trainInput.set(set, 0, rand.nextInt(10)); + trainInput.set(set, 1, rand.nextInt(10)); + + final double out = (trainInput.get(set, 0) + trainInput.get(set, 1)) / 2; + trainOutput.set(set, 0, out); + } + + //Show untrained network results + System.out.println("Cost before training:"); + System.out.println(net.cost(trainInput, trainOutput)); + System.out.println("Result before training:"); + System.out.println(net.forward(trainInput) + "\n"); + + //Train + System.out.println("Training ..."); + net.train(0.2, trainInput, trainOutput, true); + System.out.println("Done!" + "\n"); + + //Show trained network results + System.out.println("Cost after training:"); + System.out.println(net.cost(trainInput, trainOutput)); + System.out.println("Result after training:"); + System.out.println(net.forward(trainInput) + "\n"); + } +} diff --git a/Matrix/.gitignore b/Matrix/.gitignore new file mode 100644 index 0000000..796b96d --- /dev/null +++ b/Matrix/.gitignore @@ -0,0 +1 @@ +/build diff --git a/Matrix/build.gradle b/Matrix/build.gradle new file mode 100644 index 0000000..82292e2 --- /dev/null +++ b/Matrix/build.gradle @@ -0,0 +1,8 @@ +apply plugin: 'java-library' + +dependencies { + implementation fileTree(dir: 'libs', include: ['*.jar']) +} + +sourceCompatibility = "8" +targetCompatibility = "8" diff --git a/Matrix/src/main/java/matrix/Matrix.java b/Matrix/src/main/java/matrix/Matrix.java new file mode 100644 index 0000000..722aade --- /dev/null +++ b/Matrix/src/main/java/matrix/Matrix.java @@ -0,0 +1,466 @@ +/* + * MIT License + * + * Copyright (c) 2019 Sebastian Gössl + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package matrix; + +import java.util.Arrays; +import java.util.NoSuchElementException; +import java.util.PrimitiveIterator; +import java.util.Random; +import java.util.function.DoubleBinaryOperator; +import java.util.function.DoubleSupplier; +import java.util.function.DoubleUnaryOperator; +import java.util.function.ToDoubleBiFunction; + +/** + * Matrix class used to store and operate on matricies. + * The indices of the elements are zero indexed. + * + * @author Sebastian Gössl + * @version 1.0 21.1.2019 + */ +public class Matrix implements Iterable { + + /** + * Dimensions of the matrix. + * Height: Number of rows + * Width: Number of columns + */ + private final int height, width; + /** + * Elements of the matrix. + */ + private final double[][] data; + + /** + * Constructs a copy of the given matrix. + * + * @param other matrix to copy + */ + public Matrix(Matrix other) { + this(other.getHeight(), other.getWidth()); + final PrimitiveIterator.OfDouble iterator = other.iterator(); + set(iterator::nextDouble); + } + + /** + * Constructs a new matrix with the content of the given array. + * + * @param array data to be stored into the matrix + */ + public Matrix(double[][] array) { + this(array.length, array[0].length, (y, x) -> (array[y][x])); + } + + /** + * Constructs a new matrix with height rows and + * width columns and fills it with the elements returned from + * the given supplier. + * + * @param height number of rows + * @param width number of columns + * @param supplier supplier to fill the matrix with values + */ + public Matrix(int height, int width, DoubleSupplier supplier) { + this(height, width); + set(supplier); + } + + /** + * Constructs a new matrix with height rows and + * width columns and fills the elements with the given + * function. + * The function receives the row and column indices of the current element + * to calculate. + * + * @param height number of rows + * @param width number of columns + * @param function function that recieves the indices of the element it + * shall calculate + */ + public Matrix(int height, int width, + ToDoubleBiFunction function) { + this(height, width); + set(function); + } + + /** + * Constructs a new matrix with height rows and + * width columns. + * All elements are initialized to zero. + * + * @param height number of rows + * @param width number of columns + */ + public Matrix(int height, int width) { + this.height = height; + this.width = width; + + data = new double[height][width]; + } + + + + /** + * Returns the number of rows. + * + * @return number of rows + */ + public int getHeight() { + return height; + } + + /** + * Returns the number of columns. + * + * @return number of columns + */ + public int getWidth() { + return width; + } + + /** + * Returns the element at the specified position. + * + * @param row row of the element to return + * @param column column of the element to return + * @return the element at the specified position + */ + public double get(int row, int column) { + return data[row][column]; + } + + /** + * Replaces the element at the specified position with the specified + * element. + * + * @param row row of the element to set + * @param column column of the element to set + * @param value element to be stored at the specified position + * @return element previously at the specified position + */ + public double set(int row, int column, double value) { + final double oldElement = data[row][column]; + data[row][column] = value; + return oldElement; + } + + /** + * Replaces all elements of the matrix with the specified elements. + * + * @param value element to replace all elements + */ + public void set(double value) { + set(() -> (value)); + } + + /** + * Replaces all elements with the values returned from the given supplier. + * + * @param supplier supplier to supply new values for all elements + */ + public void set(DoubleSupplier supplier) { + for(int j=0; j function) { + for(int j=0; j (x - y)); + } + + /** + * Multiplies every element of this matrix with the given value and returns + * the result. + * Scalar multiplication. + * + * @param factor scalar factor + * @return product + */ + public Matrix multiply(double factor) { + return apply((x) -> (factor * x)); + } + + /** + * Matrix multiplies this matrix with the given matrix and returns the + * result. + * + * @param operand second factor + * @return product + */ + public Matrix multiply(Matrix operand) { + final Matrix result = new Matrix(getHeight(), operand.getWidth(), + (y, x) -> { + double sum = 0; + for(int i=0; i (x * y)); + } + + /** + * Divides this matrix by the given matrix elementwise and returns the + * result. + * + * @param operand divisor + * @return quotient + */ + public Matrix divideElementwise(Matrix operand) { + return apply(operand, (x, y) -> (x / y)); + } + + /** + * Returns the transpose of this matrix. + * + * @return Transpose of this matrix. + */ + public Matrix transpose() { + final Matrix result = new Matrix(getWidth(), getHeight(), + (y, x) -> (get(x, y))); + + return result; + } + + + /** + * Applies the given operator on every element of this matrix. + * + * @param operator operator to apply on every element of this matrix + */ + public void forEach(DoubleUnaryOperator operator) { + final PrimitiveIterator.OfDouble iterator = iterator(); + set(() -> (operator.applyAsDouble(iterator.nextDouble()))); + } + + /** + * Applies the given operator elementwise on every element of this matrix + * and the given one. + * + * @param operand second operand + * @param operator operator to apply on every element of this matrix + */ + public void forEach(Matrix operand, DoubleBinaryOperator operator) { + final PrimitiveIterator.OfDouble i1 = iterator(); + final PrimitiveIterator.OfDouble i2 = operand.iterator(); + set(() -> (operator.applyAsDouble(i1.nextDouble(), i2.nextDouble()))); + } + + /** + * Applies the given operator on every element of this matrix and returns + * the result. + * + * @param operator operator to apply on every element of this matrix + * @return result of the operation + */ + public Matrix apply(DoubleUnaryOperator operator) { + final PrimitiveIterator.OfDouble iterator = iterator(); + final Matrix result = new Matrix(getHeight(), getWidth(), + () -> (operator.applyAsDouble(iterator.nextDouble()))); + + return result; + } + + /** + * Applies the given operator elementwise on every element of this matrix + * and the given one and returns the result. + * + * @param operand second operand + * @param operator operator to apply on every element of the matricies + * @return result of the operation + */ + public Matrix apply(Matrix operand, DoubleBinaryOperator operator) { + final PrimitiveIterator.OfDouble i1 = iterator(); + final PrimitiveIterator.OfDouble i2 = operand.iterator(); + final Matrix result = new Matrix(getHeight(), getWidth(), + () -> (operator.applyAsDouble(i1.nextDouble(), i2.nextDouble()))); + + return result; + } + + /** + * Applies the given operator on every element of this matrix and the given + * matrix elementwise wrapping around and returns the result. + * The result has as many rows and the matrix with more rows and as many + * columns as the matrix with more columns. + * + * @param operand second operand + * @param operator operator to apply on every element of the matricies + * @return result of the operation + */ + public Matrix applyDifSize(Matrix operand, DoubleBinaryOperator operator) { + final Matrix result = new Matrix( + Math.max(getHeight(), operand.getHeight()), + Math.max(getWidth(), operand.getWidth()), + (y, x) -> { + final double value1 = get(y % getHeight(), x % getWidth()); + final double value2 = operand.get( + y % operand.getHeight(), x % operand.getWidth()); + return operator.applyAsDouble(value1, value2); + }); + + return result; + } + + /** + * Fills the matrix with random values + */ + public void rand() { + rand(new Random(), -Double.MAX_VALUE, Double.MAX_VALUE); + } + + /** + * Fills the matrix with random values, + * from minimum (inclusive) to maximum (exclusive), + * given by the random number generator + * @param rand Random number generator + * @param minimum Minimum value (inclusive) + * @param maximum Maximum value (exclusive) + */ + public void rand(Random rand, double minimum, double maximum) { + final double range = maximum - minimum; + + for(int j=0; j= getWidth()) { + i = 0; + j++; + } + + return element; + } + }; + } + + /** + * Returns a copy of this matrix in 2 dimensional array form + * + * @return copy of this matrix in 2 dimensional array form + */ + public double[][] toArray() { + final double[][] array = new double[getHeight()][getWidth()]; + + for(int i=0; i