In [None]:
import java.util.stream.IntStream;
import java.util.ArrayList;
import java.util.List;

In [54]:
/**
 * ネイピア数を求める関数
 * 
 * @param x ネイピア数の底
 * @return ネイピア数
 */
double napiersLogarithm(double x) {
    return Math.pow((1 + 1 / x), x);
}
double napierNumber = napiersLogarithm(100000000.0);

In [55]:
public class ActivationFunction {
    /**
     * Sigmoid 関数およびその微分
     * 
     * @param x 入力
     * @param derivative true の場合は Sigmoid 関数の微分を返す
     * @return Sigmoid 関数の値またはその微分
     */
    public double sigmoid(double x, boolean derivative) {
        if (derivative) {
            double sigmoidValue = 1 / (1 + Math.exp(-x));
            return sigmoidValue * (1 - sigmoidValue);
        }
        return 1 / (1 + Math.exp(-x));
    }

    /**
     * ReLU 関数およびその微分
     * 
     * @param x 入力
     * @param derivative true の場合は ReLU 関数の微分を返す
     * @return ReLU 関数の値またはその微分
     */
    public double relu(double x, boolean derivative) {
        if (derivative) {
            return x > 0 ? 1 : 0;
        }
        return Math.max(0, x);
    }

    /**
     * Leaky ReLU 関数およびその微分
     * 
     * @param x 入力
     * @param derivative true の場合は Leaky ReLU 関数の微分を返す
     * @return Leaky ReLU 関数の値またはその微分
     */
    public double leakyRelu(double x, boolean derivative) {
        double alpha = 0.01;
        if (derivative) {
            return x > 0 ? 1 : alpha;
        }
        return x > 0 ? x : alpha * x;
    }

    /**
     * 恒等関数およびその微分
     * 
     * @param x 入力
     * @param derivative true の場合は恒等関数の微分を返す
     * @return 恒等関数の値またはその微分
     */
    public double identity(double x, boolean derivative) {
        if (derivative) {
            return 1;
        }
        return x;
    }
}

In [56]:
public class LossFunction{
    /**
     * 交差エントロピー損失関数
     * 
     * @param yTrue 正解値
     * @param yPred 予測値
     * @return 交差エントロピー損失
     * @throws IllegalArgumentException 入力リストの長さが異なる場合
     */
    double crossEntropyError(double[] yTrue, double[] yPred) {
        if (yTrue.length != yPred.length) {
            throw new IllegalArgumentException("Input lists must have the same length.");
        }
        return -IntStream.range(0, yTrue.length)
                            .mapToDouble(i -> yTrue[i] * Math.log(yPred[i] + 1e-9))
                            .sum();
    }

    /**
     * 平均二乗誤差関数
     * 
     * @param yTrue 正解値
     * @param yPred 予測値
     * @return 平均二乗誤差
     * @throws IllegalArgumentException 入力リストの長さが異なる場合
     */
    double meanSquaredError(double[] yTrue, double[] yPred) {
        if (yTrue.length != yPred.length) {
            throw new IllegalArgumentException("Input lists must have the same length.");
        }
        return IntStream.range(0, yTrue.length)
                        .mapToDouble(i -> Math.pow(yTrue[i] - yPred[i], 2))
                        .average()
                        .orElse(Double.NaN);
    }

    /**
     * 平均絶対誤差関数
     * 
     * @param yTrue 正解値
     * @param yPred 予測値
     * @return 平均絶対誤差
     * @throws IllegalArgumentException 入力リストの長さが異なる場合
     */
    double meanAbsoluteError(double[] yTrue, double[] yPred) {
        if (yTrue.length != yPred.length) {
            throw new IllegalArgumentException("Input lists must have the same length.");
        }
        return IntStream.range(0, yTrue.length)
                        .mapToDouble(i -> Math.abs(yTrue[i] - yPred[i]))
                        .average()
                        .orElse(Double.NaN);
    }

    /**
     * バイナリ交差エントロピー損失関数
     * 
     * @param yTrue 正解値
     * @param yPred 予測値
     * @return バイナリ交差エントロピー損失
     * @throws IllegalArgumentException 入力リストの長さが異なる場合
     */
    double binaryCrossEntropy(double[] yTrue, double[] yPred) {
        if (yTrue.length != yPred.length) {
            throw new IllegalArgumentException("Input lists must have the same length.");
        }
        double epsilon = 1e-9;
        return -IntStream.range(0, yTrue.length)
                            .mapToDouble(i -> yTrue[i] * Math.log(yPred[i] + epsilon) + (1 - yTrue[i]) * Math.log(1 - yPred[i] + epsilon))
                            .sum() / yTrue.length;
    }

    /**
     * カテゴリカル交差エントロピー損失関数
     * 
     * @param yTrue 正解値
     * @param yPred 予測値
     * @return カテゴリカル交差エントロピー損失
     * @throws IllegalArgumentException 入力リストの長さが異なる場合
     */
    double categoricalCrossEntropy(double[] yTrue, double[] yPred) {
        if (yTrue.length != yPred.length) {
            throw new IllegalArgumentException("Input lists must have the same length.");
        }
        double epsilon = 1e-9;
        return -IntStream.range(0, yTrue.length)
                            .mapToDouble(i -> yTrue[i] * Math.log(yPred[i] + epsilon))
                            .sum() / yTrue.length;
    }
}

In [57]:
public class NeuralNetwork {
    // 学習メソッド
    public static void train(List<List<Double>> X, List<List<Double>> y, int[] layerSizes, int epochs, double learningRate, ActivationFunction hiddenActivation, ActivationFunction outputActivation, LossFunction lossFunction) {
        // 重みとバイアスの初期化
        double[][][] weights = initializeWeights(layerSizes);
        double[][] biases = initializeBiases(layerSizes);

        for (int epoch = 0; epoch < epochs; epoch++) {
            double totalLoss = 0;

            // 各データに対する学習
            for (int i = 0; i < X.size(); i++) {
                // 順伝播
                List<List<Double>> activations = forwardPropagation(X.get(i), weights, biases, hiddenActivation, outputActivation);

                // 損失計算
                double[] yTrue = y.get(i).stream().mapToDouble(Double::doubleValue).toArray();
                double[] yPred = activations.get(activations.size() - 1).stream().mapToDouble(Double::doubleValue).toArray();
                totalLoss += lossFunction.crossEntropyError(yTrue, yPred);

                // 逆伝播
                backwardPropagation(activations, y.get(i), weights, biases, learningRate, hiddenActivation, outputActivation);
            }

            // エポックごとの損失表示
            if (epoch % 10 == 0 || epoch == epochs - 1) {
                int m = epoch / (epochs / 20) + 1;
                String memory = "";
                for (int j = 0; j < m; j++) {
                    memory += "+";
                }
                String space = "";
                for (int j = 0; j < 20 - m; j++) {
                    space += " ";
                }
                System.out.print("\rEpoch " + (epoch + 1) + "/" + epochs + ", Loss: " + totalLoss / X.size() + ", [" + memory + space + "]             ");
            }
        }
    }

    // 重みの初期化
    private static double[][][] initializeWeights(int[] layerSizes) {
        Random random = new Random();
        double[][][] weights = new double[layerSizes.length - 1][][];

        for (int layer = 0; layer < layerSizes.length - 1; layer++) {
            weights[layer] = new double[layerSizes[layer]][layerSizes[layer + 1]];
            double variance = 2.0 / (layerSizes[layer] + layerSizes[layer + 1]);
            for (int i = 0; i < layerSizes[layer]; i++) {
                for (int j = 0; j < layerSizes[layer + 1]; j++) {
                    weights[layer][i][j] = random.nextGaussian() * Math.sqrt(variance);
                }
            }
        }
        return weights;
    }

    // バイアスの初期化
    private static double[][] initializeBiases(int[] layerSizes) {
        double[][] biases = new double[layerSizes.length - 1][];

        for (int layer = 0; layer < layerSizes.length - 1; layer++) {
            biases[layer] = new double[layerSizes[layer + 1]];
            for (int i = 0; i < layerSizes[layer + 1]; i++) {
                biases[layer][i] = 0; // Xavier初期化ではバイアスは0に初期化
            }
        }
        return biases;
    }

    // 順伝播
    public static List<List<Double>> forwardPropagation(List<Double> input, double[][][] weights, double[][] biases, ActivationFunction hiddenActivation, ActivationFunction outputActivation) {
        List<List<Double>> activations = new ArrayList<>();
        List<Double> currentLayer = new ArrayList<>(input);
        activations.add(currentLayer);

        for (int layer = 0; layer < weights.length; layer++) {
            List<Double> nextLayer = new ArrayList<>();
            for (int neuron = 0; neuron < weights[layer][0].length; neuron++) {
                double z = 0;
                for (int prevNeuron = 0; prevNeuron < currentLayer.size(); prevNeuron++) {
                    z += currentLayer.get(prevNeuron) * weights[layer][prevNeuron][neuron];
                }
                z += biases[layer][neuron];

                double activation = (layer == weights.length - 1)
                        ? outputActivation.sigmoid(z, false)
                        : hiddenActivation.relu(z, false);
                nextLayer.add(activation);
            }
            currentLayer = nextLayer;
            activations.add(currentLayer);
        }

        return activations;
    }

    // 逆伝播
    public static void backwardPropagation(List<List<Double>> activations, List<Double> yTrue, double[][][] weights, double[][] biases, double learningRate, ActivationFunction hiddenActivation, ActivationFunction outputActivation) {
        int numLayers = weights.length;
        double[][] delta = new double[numLayers][];

        // 出力層のデルタ計算
        List<Double> output = activations.get(numLayers);
        delta[numLayers - 1] = new double[output.size()];
        for (int i = 0; i < output.size(); i++) {
            double error = output.get(i) - yTrue.get(i);
            delta[numLayers - 1][i] = error * outputActivation.sigmoid(output.get(i), true);
        }

        // 隠れ層のデルタ計算
        for (int layer = numLayers - 2; layer >= 0; layer--) {
            delta[layer] = new double[weights[layer][0].length];
            for (int i = 0; i < weights[layer][0].length; i++) {
                double error = 0;
                for (int j = 0; j < weights[layer + 1][i].length; j++) {
                    error += delta[layer + 1][j] * weights[layer + 1][i][j];
                }
                delta[layer][i] = error * hiddenActivation.relu(activations.get(layer + 1).get(i), true);
            }
        }

        // 重みとバイアスの更新
        for (int layer = 0; layer < numLayers; layer++) {
            for (int i = 0; i < weights[layer].length; i++) {
                for (int j = 0; j < weights[layer][i].length; j++) {
                    weights[layer][i][j] -= learningRate * activations.get(layer).get(i) * delta[layer][j];
                }
            }
            for (int i = 0; i < biases[layer].length; i++) {
                biases[layer][i] -= learningRate * delta[layer][i];
            }
        }
    }
}

In [None]:
List<List<Double>> X = Arrays.asList(
        Arrays.asList(0.0, 0.0),
        Arrays.asList(0.0, 1.0),
        Arrays.asList(1.0, 0.0),
        Arrays.asList(1.0, 1.0)
);

List<List<Double>> y = Arrays.asList(
        Arrays.asList(1.0),
        Arrays.asList(0.0),
        Arrays.asList(0.0),
        Arrays.asList(1.0)
);

// パラメータ設定
int[] layerSizes = {2, 8, 8, 8, 1};
int epochs = 100000;
double learningRate = 0.01;

// 活性化関数と損失関数のインスタンス化
ActivationFunction hiddenActivation = new ActivationFunction();
ActivationFunction outputActivation = new ActivationFunction();
LossFunction lossFunction = new LossFunction();

NeuralNetwork.train(X, y, layerSizes, epochs, learningRate, hiddenActivation, outputActivation, lossFunction);