In [131]:
from mnist import MNIST
import math
import numpy as np
    
class HiddenLayer():
    def __init__(self, n_outputs, n_inputs):
        self.weights = np.random.randn(n_inputs, n_outputs) * 0.001
        self.bias = np.zeros(n_outputs)

    def activations(self, input_col):
        z = np.dot(input_col, self.weights) + self.bias
        return np.maximum(z, 0)
    
    def backpropagate(self, input_col, activations, dL_dz, lr):
        dReLU = (activations > 0).astype(float)
        dL_dz_h = dL_dz * dReLU

        dW = np.outer(input_col, dL_dz_h)
        db = dL_dz_h

        self.weights -= lr * dW
        self.bias -= lr * db

        return np.dot(dL_dz_h, self.weights.T)


class OutputLayer():
    def __init__(self, n_outputs, n_inputs):
        self.weights = np.random.randn(n_inputs, n_outputs) * 0.001
        self.bias = np.zeros(n_outputs)

    def activations(self, input_col):
        z = np.dot(input_col, self.weights) + self.bias
        z -= np.max(z)
        exp_z = np.exp(z)
        return exp_z/(np.sum(exp_z))
    
    def derivative_cel(self, activations, label_vector):
        return activations - label_vector #label vector one hot encoded
    
    def backpropagate(self, dL_dz, lr, input_col):
        # dL_dz is the Cross Entropy Loss derivative in relation to z for every output neuron
        dW = np.outer(input_col, dL_dz) #Weight gradient
        db = dL_dz # Bias gradient

        self.weights -= lr * dW
        self.bias -= lr * db

        return np.dot(dL_dz, self.weights.T)
        

class LabeLOneHotEncoder():
    def __init__(self, n):
        self.n = n

    def ohe(self, index):
        x = np.zeros(self.n)
        x[index] = 1
        return x

hidden_l = HiddenLayer(128, 784)
output_l = OutputLayer(10, 128)
ohe = LabeLOneHotEncoder(10)

mndata = MNIST("./samples")
mndata.gz = True

images, labels = mndata.load_training()

train_img = images[:54000]
train_label = labels[:54000]
test_img = images[54000:]
test_label = labels[54000:]

n_epochs = 20

for _ in range(n_epochs):
    for img, lbl in zip(train_img, train_label):
        lr = 0.0001
        img = np.array(img)/255

        lbl_i = ohe.ohe(lbl)

        a_h = hidden_l.activations(img)
        a_o = output_l.activations(a_h)

        # Compute derivative of CE loss wrt output logits
        dL_dz_output = output_l.derivative_cel(a_o, lbl_i)

        # Backpropagate output layer
        dL_da_hidden = output_l.backpropagate(dL_dz_output, lr, a_h)

        # Backpropagate hidden layer
        _ = hidden_l.backpropagate(img, a_h, dL_da_hidden, lr)

In [141]:
c_counter = 0
for img, lbl in zip(test_img, test_label):
    a_h = hidden_l.activations(img)
    a_o = output_l.activations(a_h)
    i = np.where(a_o == max(a_o))[0][0]
    if i == lbl:
        c_counter += 1

In [146]:
f"Acurácia: {c_counter/6000}"

'Acurácia: 0.9261666666666667'