# A logistic regressor from scratch, with 3 layers

In [163]:
import numpy as np
from sklearn.datasets import make_moons
from random import uniform

## Define the ReLU, sigmoid, binary cross entropy loss functions

In [164]:
def relu(x):
    return np.maximum(0,x)

def sigmoid(z):
    return 1/(1+np.exp(-z))

def avg_log_loss(y_pred,y):
    epsilon = 1e-15
    y_pred_stable = np.clip(y_pred, epsilon, 1 - epsilon)
    cost = -np.sum(np.log(y_pred_stable)*y + np.log(1-y_pred_stable)*(1-y))/y_pred.shape[1]
    return np.squeeze(cost)


## Define the gradients of the functions, this is required for backpropogation

In [165]:
def relu_backward(dA, Z):
    dZ = dA*(Z>0)
    return dZ

def sigmoid_backward(dA, Z):
    "dA is the gradient of the next layer, we want the overall derivative, so compute via chain rule"
    s = sigmoid(Z)
    dAdZ = s*(1-s)
    return dAdZ*dA

In [166]:
def leaky_relu(Z):
    # np.maximum finds the element-wise max
    return np.maximum(0.01 * Z, Z)

def leaky_relu_backward(dA, Z):
    dZ = dA * (Z > 0) + dA * (Z <= 0) * 0.01
    return dZ

## Define our model. We will have 1 input layer, 2 hidden layers and 1 output layer

In [167]:
class MLP:
    def __init__(self, X_train, Y_train):
        self.X = X_train
        self.Y = Y_train
        self.m = X_train.shape[1]

    def model(self,i,l1,l2,o):
        self.W1 = np.random.randn(l1, i) * np.sqrt(2 / i)
        self.W2 = np.random.randn(l2, l1) * np.sqrt(2 / l1)
        self.W3 = np.random.randn(o, l2) * np.sqrt(2 / l2)

        self.B1 = np.ones((l1,1)) * 0.01
        self.B2 = np.ones((l2,1)) * 0.01
        self.B3 = np.zeros((o,1))

    def forward(self):
        self.Z1 = np.dot(self.W1,self.X) + self.B1
        self.A1 = leaky_relu(self.Z1)
        self.Z2 = np.dot(self.W2,self.A1) + self.B2
        self.A2 = leaky_relu(self.Z2)
        self.Z3 = np.dot(self.W3,self.A2) + self.B3
        self.output = sigmoid(self.Z3)

        return self.output

    def backward(self):
        
        self.dZ3 = self.output - self.Y
        self.dW3 = np.dot(self.dZ3, self.A2.T)/self.m
        self.dB3 = np.sum(self.dZ3, axis=1, keepdims=True)/self.m
        self.dA2 = np.dot(self.W3.T, self.dZ3)
        self.dZ2 = leaky_relu_backward(self.dA2,self.Z2)
        self.dW2 = np.dot(self.dZ2, self.A1.T)/self.m
        self.dB2 = np.sum(self.dZ2, axis=1, keepdims=True)/self.m
        self.dA1 = np.dot(self.W2.T, self.dZ2)
        self.dZ1 = leaky_relu_backward(self.dA1,self.Z1)
        self.dW1 = np.dot(self.dZ1, self.X.T)/self.m
        self.dB1 = np.sum(self.dZ1, axis=1, keepdims=True)/self.m

    def train(self, no_epochs, learning_rate):
        for i in range(no_epochs):
            Y_pred = self.forward()
            cost = avg_log_loss(Y_pred, self.Y)
            self.backward()

            self.W1 -= learning_rate * self.dW1
            self.W2 -= learning_rate * self.dW2
            self.W3 -= learning_rate * self.dW3

            self.B1 -= learning_rate * self.dB1
            self.B2 -= learning_rate * self.dB2
            self.B3 -= learning_rate * self.dB3

            print(f"Epoch {i}: cost {cost}")

In [168]:
X, Y = make_moons(n_samples=500, noise=0.2, random_state=42)
X_train = X.T
Y_train = Y.reshape(1, Y.shape[0])

l = MLP(X_train,Y_train)
l.model(2,5,3,1)
l.train(5000,0.1)

Epoch 0: cost 1.0356211543968017
Epoch 1: cost 0.8437328769418189
Epoch 2: cost 0.7291680877374199
Epoch 3: cost 0.6587182365989019
Epoch 4: cost 0.6133777809803324
Epoch 5: cost 0.5821629916261045
Epoch 6: cost 0.5594431051299971
Epoch 7: cost 0.5419185635833053
Epoch 8: cost 0.5275623068150981
Epoch 9: cost 0.5152070060212994
Epoch 10: cost 0.5042947214646317
Epoch 11: cost 0.49443132187019717
Epoch 12: cost 0.4853487117124479
Epoch 13: cost 0.47691046467787146
Epoch 14: cost 0.46903528604481576
Epoch 15: cost 0.46163182498599065
Epoch 16: cost 0.4546748285400919
Epoch 17: cost 0.44808694787097714
Epoch 18: cost 0.44179965985006936
Epoch 19: cost 0.43583899763223494
Epoch 20: cost 0.43016008335167527
Epoch 21: cost 0.42478198217835444
Epoch 22: cost 0.41965532305256464
Epoch 23: cost 0.4147476193211648
Epoch 24: cost 0.4100927194083307
Epoch 25: cost 0.4056395787921528
Epoch 26: cost 0.4014136084606591
Epoch 27: cost 0.39738710307132885
Epoch 28: cost 0.3935565151823361
Epoch 29: cos