# Multiclass regression model

In [118]:
import numpy as np
import sklearn.datasets
import matplotlib.pyplot as plt

mnist = sklearn.datasets.fetch_openml('mnist_784', version=1, cache=True, as_frame=False)

m = 70000
X_full = mnist.data[:m]
Y_raw = mnist.target[:m].astype(int)


## One hot encoder

In [119]:
def one_hot_encoder(Y_raw, C):
    m = Y_raw.shape[0]
    Y_one_hot = np.zeros((C,m))
    Y_one_hot[Y_raw,np.arange(m)] = 1
    return Y_one_hot

## Split data into train, test, validate

In [120]:
m_full = X_full.shape[0]
shuffled_indices = np.random.permutation(m_full)

X_shuffled = X_full[shuffled_indices]
Y_shuffled_raw = Y_raw[shuffled_indices]

m_train = 50000
m_val = 10000
m_test = 10000

X_train = (X_shuffled[:m_train].T)/255.
Y_train_raw = Y_shuffled_raw[:m_train]

X_val = (X_shuffled[m_train:m_train+m_val].T)/255.
Y_val_raw = Y_shuffled_raw[m_train:m_train+m_val]

X_test = (X_shuffled[m_train+m_val:].T)/255.
Y_test_raw = Y_shuffled_raw[m_train+m_val:]


Y_train = one_hot_encoder(Y_train_raw, 10)
Y_val = one_hot_encoder(Y_val_raw, 10)
Y_test = one_hot_encoder(Y_test_raw, 10)

## Activation functions

In [121]:
def leaky_relu(Z):
    return np.maximum(0.01*Z,Z)

def softmax(Z): #deal with exploding gradients by changing to power of negative max
    stable_Z = Z-np.max(Z, axis=0, keepdims=True)
    exp_Z = np.exp(stable_Z)
    res = exp_Z/np.sum(exp_Z, axis=0, keepdims=True)
    return res

## Activation function derivatives

In [122]:
def leaky_relu_backward(dA, Z):
    dZ = dA*(Z > 0) + dA*(Z<=0)*0.01
    return dZ

def softmax_backward(dA, Z):
    pass

## Cost function

In [123]:
def cross_entropy_cost(Y_pred, Y):
    epsilon = 1e-15
    Y_pred_stable = np.clip(Y_pred, epsilon, 1-epsilon)
    log_prob = np.log(Y_pred_stable)
    loss = Y*log_prob
    cost = -np.sum(loss)/Y_pred.shape[1]
    return np.squeeze(cost)

## The classifier

We have 28x28 pixel image, unwrapped into a 728 feature input. We have 10 outputs

In [124]:
class OCR:
    def __init__(self, X_train, Y_train, X_val, Y_val):
        self.X = X_train
        self.Y = Y_train
        self.X_val = X_val
        self.Y_val = Y_val
        self.m = X_train.shape[1]

    def model(self,i,l1,l2,o):
        self.W1 = np.random.randn(l1, i) * np.sqrt(2 / i)
        self.W2 = np.random.randn(l2, l1) * np.sqrt(2 / l1)
        self.W3 = np.random.randn(o, l2) * np.sqrt(2 / l2)

        self.B1 = np.ones((l1,1)) * 0.01
        self.B2 = np.ones((l2,1)) * 0.01
        self.B3 = np.zeros((o,1))

    def forward(self):
        self.Z1 = np.dot(self.W1,self.X) + self.B1
        self.A1 = leaky_relu(self.Z1)
        self.Z2 = np.dot(self.W2,self.A1) + self.B2
        self.A2 = leaky_relu(self.Z2)
        self.Z3 = np.dot(self.W3,self.A2) + self.B3
        self.output = softmax(self.Z3)

        return self.output

    def backward(self):
        self.dZ3 = self.output - self.Y
        self.dW3 = np.dot(self.dZ3, self.A2.T)/self.m
        self.dB3 = np.sum(self.dZ3, axis=1, keepdims=True)/self.m
        self.dA2 = np.dot(self.W3.T, self.dZ3)
        self.dZ2 = leaky_relu_backward(self.dA2,self.Z2)
        self.dW2 = np.dot(self.dZ2, self.A1.T)/self.m
        self.dB2 = np.sum(self.dZ2, axis=1, keepdims=True)/self.m
        self.dA1 = np.dot(self.W2.T, self.dZ2)
        self.dZ1 = leaky_relu_backward(self.dA1,self.Z1)
        self.dW1 = np.dot(self.dZ1, self.X.T)/self.m
        self.dB1 = np.sum(self.dZ1, axis=1, keepdims=True)/self.m

    def predict(self, X):
        Z1 = np.dot(self.W1,X) + self.B1
        A1 = leaky_relu(Z1)
        Z2 = np.dot(self.W2,A1) + self.B2
        A2 = leaky_relu(Z2)
        Z3 = np.dot(self.W3,A2) + self.B3
        output = softmax(Z3)

        return output

    def accuracy(self, X, Y, print_acc=True):
        Y_pred = self.predict(X)
        predicted_labels = np.argmax(Y_pred, axis=0)
        true_labels = np.argmax(Y, axis=0)
        correct = (predicted_labels == true_labels)
        accuracy_val = np.mean(correct)
        if print_acc == True:
            print(f"Accuracy: {accuracy_val * 100:.2f}%")
        return accuracy_val

    def train(self, no_epochs, learning_rate):
        train_costs = []
        val_costs = []
        val_accuracies = []
        for i in range(no_epochs):
            Y_pred = self.forward()
            cost = cross_entropy_cost(Y_pred, self.Y)
            self.backward()

            self.W1 -= learning_rate * self.dW1
            self.W2 -= learning_rate * self.dW2
            self.W3 -= learning_rate * self.dW3

            self.B1 -= learning_rate * self.dB1
            self.B2 -= learning_rate * self.dB2
            self.B3 -= learning_rate * self.dB3
            
            if i % 10 == 0 or i == no_epochs - 1:
                train_costs.append(cost)
                Y_pred_val = self.predict(self.X_val)
                cost_val = cross_entropy_cost(Y_pred_val, self.Y_val)
                val_costs.append(cost_val)
                acc = self.accuracy(self.X_val, self.Y_val, print_acc=False)
                val_accuracies.append(acc)

                print(f"Epoch {i}: cost {cost}")
                print(f"Validation cost {cost_val}, accuracy {acc*100:.2f}%")


In [126]:
mod = OCR(X_train, Y_train, X_val, Y_val)
mod.model(784,128,64,10)
mod.train(100, 0.1)


Epoch 0: cost 2.34505888484713
Validation cost 2.2011261828093818, accuracy 16.63%
Epoch 10: cost 1.4983385972941372
Validation cost 1.4326616030835446, accuracy 65.36%
Epoch 20: cost 0.9592701933834352
Validation cost 0.9336835489575296, accuracy 78.76%
Epoch 30: cost 0.7051010970362649
Validation cost 0.6999441653719412, accuracy 83.57%
Epoch 40: cost 0.5782257588500856
Validation cost 0.5820966411224184, accuracy 85.55%
Epoch 50: cost 0.5059176880134686
Validation cost 0.5139473555840721, accuracy 86.81%
Epoch 60: cost 0.4597359345310487
Validation cost 0.4696664913634091, accuracy 87.74%
Epoch 70: cost 0.4275219610713708
Validation cost 0.4384141091071122, accuracy 88.33%
Epoch 80: cost 0.4035671583589629
Validation cost 0.41494567835502477, accuracy 88.87%
Epoch 90: cost 0.38488619774976
Validation cost 0.39650697951840014, accuracy 89.09%
Epoch 99: cost 0.3711402711607497
Validation cost 0.38286836564293436, accuracy 89.45%


In [127]:
mod.accuracy(X_val, Y_val)

Accuracy: 89.45%


np.float64(0.8945)