## Tiny Neural Network

Building a simple yet functional feedforward Neural Network from the ground up

References:
- https://iamtrask.github.io/2015/07/12/basic-python-network/
- https://www.freecodecamp.org/news/building-a-neural-network-from-scratch/

In [39]:
import numpy as np

np.random.seed(1)

In [103]:
def activation(x):
    """
    Sigmoid Activation
    """
    return 1 / (1 + np.exp(-x))

def activation_deriv(z):
    """
    Derivative of Sigmod w.r.t output z
    """
    return z * (1 - z)

def cost_function(z, y):
    """
    Cost function of binary classification
    j = -y * ln(y_hat) - (1-y) * ln(1 - y_hat)
    """
    return -y * np.log(z) - (1-y) * np.log(1-z)

def cost_function_deriv(z, y):
    """
    Cost function derivative w.r.t output z
    last_error = (z - y) / (z * (1 - z)) where z == y_hat
    """
    return (z - y) / (z * (1 - z))

### Level 1

In [104]:
"""
Feedforward Neural Network
> One-Layer (3 Neurons)
> No bias term

Each layer consists of:
    v1 = z0 * w1
    z1 = sigmoid(v1)
"""
X = np.array([
    [0,0,1],
    [0,1,1],
    [1,0,1],
    [1,1,1]
])
y = np.array([[0,0,1,1]]).T

max_iter = 30000

w1 = 2 * np.random.random((X.shape[1],1)) - 1
z0 = X

for iter_ in range(max_iter):
    z1 = activation(np.dot(z0, w1)) # Shape (4 x 1)
    l1_error = cost_function_deriv(z1, y) # Shape (4 x 1)
    l1_dzdv = l1_error * activation_deriv(z1) # Shape (4 x 1)
    w1 -= np.dot(z0.T, l1_dzdv) # Shape (3, 1)

np.round(z1, 4)

array([[1.e-04],
       [0.e+00],
       [1.e+00],
       [1.e+00]])

### Level 2

In [105]:
"""
Feedforward Neural Network
> Two-Layer (3 Neurons, 4 Neurons)
> Has bias term

Cost function derivative w.r.t next layer:
    l1_error = l2_dzdv * w2

Each layer consists of:
    v1 = z0 * w1 + b1
    z1 = sigmoid(v1)
"""

X = np.array([
    [0,0,1],
    [0,1,1],
    [1,0,1],
    [1,1,1]
])
y = np.array([[0,1,1,0]]).T

max_iter = 30000

w1 = 2 * np.random.random((X.shape[1],4)) - 1
b1 = np.ones((1,4))
w2 = 2 * np.random.random((4,1)) - 1
b2 = np.ones((1,1))
z0 = X

for iter_ in range(max_iter):
    z1 = activation(np.dot(z0, w1) + b1) # Shape (4 x 4)
    z2 = activation(np.dot(z1, w2) + b2) # Shape (4 x 1)
    
    l2_error = cost_function_deriv(z2, y) # Shape (4 x 1)
    l2_dzdv = l2_error * activation_deriv(z2) # Shape (4 x 1)
    l1_error = np.dot(l2_dzdv, w2.T) # Shape (4 x 4)
    l1_dzdv = l1_error * activation_deriv(z1) # Shape (4 x 4)
    
    w2 -= np.dot(z1.T, l2_dzdv) # Shape (4 x 1)
    b2 -= np.sum(l2_dzdv, axis=0, keepdims=True)
    w1 -= np.dot(z0.T, l1_dzdv) # Shape (3 x 4)
    b1 -= np.sum(l1_dzdv, axis=0, keepdims=True)

np.round(z2, 4)

array([[0.000e+00],
       [9.999e-01],
       [9.999e-01],
       [1.000e-04]])

### Level 3

In [107]:
"""
Feedforward Neural Network
> Two-Layer (3 Neurons, 4 Neurons)
> Has bias term
> Has early stopping
> Has learning rate scheduler
"""

X = np.array([
    [0,0,1],
    [0,1,1],
    [1,0,1],
    [1,1,1]
])
y = np.array([[0,1,1,0]]).T

X_val, y_val = X.copy(), y.copy()
lr_scheduler = lambda i: 1 / np.power(i, 0.05)
tol = 0.0001
n_iter_no_change = 100
count_iter_no_change = 0
prev_lowest_val_error = float('inf')
max_iter = 30000

w1 = 2 * np.random.random((X.shape[1],4)) - 1
b1 = np.ones((1,4))
w2 = 2 * np.random.random((4,1)) - 1
b2 = np.ones((1,1))
z0 = X
z0_val = X_val

for iter_ in range(1, max_iter):
    z1 = activation(np.dot(z0, w1) + b1) # Shape (4 x 4)
    z2 = activation(np.dot(z1, w2) + b2) # Shape (4 x 1)
    
    l2_error = cost_function_deriv(z2, y) # Shape (4 x 1)    
    l2_dzdv = l2_error * activation_deriv(z2) # Shape (4 x 1)
    l1_error = np.dot(l2_dzdv, w2.T) # Shape (4 x 4)
    l1_dzdv = l1_error * activation_deriv(z1) # Shape (4 x 4)
    
    w2 -= lr_scheduler(iter_) * np.dot(z1.T, l2_dzdv) # Shape (4 x 1)
    b2 -= lr_scheduler(iter_) * np.sum(l2_dzdv, axis=0, keepdims=True)
    w1 -= lr_scheduler(iter_) * np.dot(z0.T, l1_dzdv) # Shape (3 x 4)
    b1 -= lr_scheduler(iter_) * np.sum(l1_dzdv, axis=0, keepdims=True)
    
    # Early stopping based on validation error score
    z1_val = activation(np.dot(z0_val, w1) + b1) # Shape (4 x 4)
    z2_val = activation(np.dot(z1_val, w2) + b2) # Shape (4 x 1)
    val_error =  cost_function(z2_val, y_val) # Shape (4 x 1)
    val_error_mean = np.abs(np.mean(val_error))
    if val_error_mean + tol < prev_lowest_val_error:
        count_iter_no_change = 0
        prev_lowest_val_error = val_error_mean
    else:
        count_iter_no_change += 1
        if count_iter_no_change >= n_iter_no_change:
            print("Last iteration: " + str(iter_))
            break

np.round(z2, 4)

Last iteration: 2001


array([[0.0015],
       [0.9981],
       [0.9982],
       [0.0017]])

In [None]:
class TinyFeedForwardNeuralNetwork:
    
    def __init__(self, dims, cost_func, activation_func, early_stopping, max_iter, tol):
        pass
    
    def train(self, X, y):
        pass
    
    def predict(self, X, y):
        pass