# Neural Network

In [3]:
from abc import abstractmethod, ABC

import numpy as np

In [4]:
class Layer(ABC):
    def __init__(self):
        self.input = None
        self.output = None

    @abstractmethod
    def forward(self, input):
        pass

    @abstractmethod
    def backward(self, output_gradient, learning_rate):
        pass

In [5]:
class Dense(Layer):
    def __init__(self, input_size, output_size):
        self.weights = np.random.randn(output_size, input_size)
        self.bias = np.random.randn(output_size, 1)

    def forward(self, input):
        self.input = input
        return np.dot(self.weights, self.input) + self.bias
    
    def backward(self, output_gradient, learning_rate): 
        weights_gradient = np.dot(output_gradient, self.input.T)
        self.weights -= learning_rate * weights_gradient
        self.bias -= learning_rate * output_gradient
        return np.dot(self.weights.T, output_gradient)

In [6]:
class Activation(Layer):
    def __init__(self, activation, activation_prime):
        self.activation = activation
        self.activation_prime = activation_prime

    def forward(self, input):
        self.input = input
        return self.activation(self.input)
    
    def backward(self, output_gradient, learning_rate):
        return np.multiply(output_gradient, self.activation_prime(self.input))

In [7]:
class Tanh(Activation):
    def __init__(self):
        tanh = np.tanh
        tanh_prime = lambda x: 1 - tanh(x) ** 2
        super().__init__(tanh, tanh_prime)

In [8]:
def mse(y_true, y_pred):
    return np.mean(np.power(y_true - y_pred, 2))


def mse_prime(y_true, y_pred):
    return 2 * (y_pred - y_true) / np.size(y_true)

## XOR Problem

In [12]:
X = np.reshape([[0, 0], [0, 1], [1, 0], [1, 1]], (4, 2, 1))
Y = np.reshape([[0], [1], [1], [0]], (4, 1, 1))

network = [
    Dense(2, 3),
    Tanh(),
    Dense(3, 1),
    Tanh(),
]

epochs = 10000
learning_rate = 0.1

for i in range(epochs):
    error = 0
    for x, y in zip(X, Y):
        output = x
        for layer in network:
            output = layer.forward(output)
        
        error += mse(y, output)

        grad = mse_prime(y, output)
        for layer in reversed(network):
            grad = layer.backward(grad, learning_rate)

    error /= len(X)
    print(f"{i + 1}/{epochs}={error}")

1/10000=1.091059492706014
2/10000=0.6219386976102104
3/10000=0.43117902006268755
4/10000=0.3965705933646171
5/10000=0.3839984112876801
6/10000=0.3741165917699177
7/10000=0.3655322831166784
8/10000=0.3580604825889728
9/10000=0.35164315677765356
10/10000=0.3461813875025803
11/10000=0.34153164826077
12/10000=0.337535515011676
13/10000=0.33404395143341287
14/10000=0.33092867768589507
15/10000=0.3280842345604724
16/10000=0.32542558452995385
17/10000=0.32288437224528455
18/10000=0.32040532824845747
19/10000=0.31794334975229543
20/10000=0.3154613579714663
21/10000=0.3129288689792278
22/10000=0.3103211694131862
23/10000=0.3076189817582028
24/10000=0.30480850257390746
25/10000=0.3018816892028194
26/10000=0.298836655878571
27/10000=0.29567802504852214
28/10000=0.2924170742079987
29/10000=0.2890715333438494
30/10000=0.2856649305167568
31/10000=0.28222545292285084
32/10000=0.27878437781660137
33/10000=0.2753742132411556
34/10000=0.2720267507658962
35/10000=0.2687712541675791
36/10000=0.26563298331