# Neural Network

In [1]:
from abc import abstractmethod, ABC

import numpy as np

In [2]:
class Layer(ABC):
    def __init__(self):
        self.input = None
        self.output = None

    @abstractmethod
    def forward(self, input):
        pass

    @abstractmethod
    def backward(self, output_gradient, learning_rate):
        pass

In [3]:
class Dense(Layer):
    def __init__(self, input_size, output_size):
        self.weights = np.random.randn(output_size, input_size)
        self.bias = np.random.randn(output_size, 1)

    def forward(self, input):
        self.input = input
        return np.dot(self.weights, self.input) + self.bias
    
    def backward(self, output_gradient, learning_rate): 
        weights_gradient = np.dot(output_gradient, self.input.T)
        self.weights -= learning_rate * weights_gradient
        self.bias -= learning_rate * output_gradient
        return np.dot(self.weights.T, output_gradient)

In [4]:
class Activation(Layer):
    def __init__(self, activation, activation_prime):
        self.activation = activation
        self.activation_prime = activation_prime

    def forward(self, input):
        self.input = input
        return self.activation(self.input)
    
    def backward(self, output_gradient, learning_rate):
        return np.multiply(output_gradient, self.activation_prime(self.input))

In [5]:
class Tanh(Activation):
    def __init__(self):
        tanh = np.tanh
        tanh_prime = lambda x: 1 - tanh(x) ** 2
        super().__init__(tanh, tanh_prime)

In [6]:
def mse(y_true, y_pred):
    return np.mean(np.power(y_true - y_pred, 2))


def mse_prime(y_true, y_pred):
    return 2 * (y_pred - y_true) / np.size(y_true)

## XOR Problem

In [7]:
X = np.reshape([[0, 0], [0, 1], [1, 0], [1, 1]], (4, 2, 1))
Y = np.reshape([[0], [1], [1], [0]], (4, 1, 1))

network = [
    Dense(2, 3),
    Tanh(),
    Dense(3, 1),
    Tanh(),
]

epochs = 10000
learning_rate = 0.1

for i in range(epochs):
    error = 0
    for x, y in zip(X, Y):
        output = x
        for layer in network:
            output = layer.forward(output)
        
        error += mse(y, output)

        grad = mse_prime(y, output)
        for layer in reversed(network):
            grad = layer.backward(grad, learning_rate)

    error /= len(X)
    print(f"{i + 1}/{epochs}={error}")

1/10000=2.3927392303707005
2/10000=2.3082616650588137
3/10000=1.963269162531533
4/10000=0.9340060900341461
5/10000=0.3228312547621399
6/10000=0.29944276574685447
7/10000=0.2854457572429917
8/10000=0.27502820892547564
9/10000=0.26707872715245995
10/10000=0.26068629272059085
11/10000=0.2552566859510911
12/10000=0.2504628942630537
13/10000=0.24613711097611252
14/10000=0.24219400548149944
15/10000=0.23858796642672056
16/10000=0.23529113494066234
17/10000=0.23228262021685137
18/10000=0.2295435471746371
19/10000=0.22705510009799484
20/10000=0.22479804808976764
21/10000=0.22275293561883475
22/10000=0.220900503577192
23/10000=0.2192221224280559
24/10000=0.21770014173384145
25/10000=0.2163181276022823
26/10000=0.2150609934324863
27/10000=0.21391504356930124
28/10000=0.21286795289198635
29/10000=0.21190870346137886
30/10000=0.21102749536037094
31/10000=0.21021564453014357
32/10000=0.20946547655048642
33/10000=0.20877022222721203
34/10000=0.2081239185480728
35/10000=0.2075213169437023
36/10000=0.