In [3]:
import mnist
from tqdm import tqdm

import numpy as np
import pandas as pd
import plotly.express as px

import warnings
warnings.filterwarnings('ignore')

In [6]:
# Load the data
train_images = mnist.train_images()
train_labels = mnist.train_labels()
test_images = mnist.test_images()
test_labels = mnist.test_labels()

train_images = train_images.reshape((-1, 784)) / 255.0
test_images = test_images.reshape((-1, 784)) / 255.0
train_labels = np.eye(10)[train_labels]
test_labels = np.eye(10)[test_labels]

In [16]:
px.imshow(np.reshape(train_images[0], (28, 28)), width=500)

In [98]:
class VisNet:
    def __init__(self, input_size, hidden_size, output_size):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        
        self.w1 = np.random.randn(self.input_size, self.hidden_size)
        self.b1 = np.zeros(self.hidden_size)
        self.w2 = np.random.randn(self.hidden_size, self.output_size)
        self.b2 = np.zeros(self.output_size)

    def cross_entropy_loss(self, y_true, y_pred):
        return -np.mean(y_true * np.log(y_pred + 1e-10))

    def softmax(self, x):
        if x.ndim == 1:
            # Add an extra dimension to the input array
            x = x[np.newaxis, :]
        exps = np.exp(x - np.max(x, axis=1, keepdims=True))
        return exps / np.sum(exps, axis=1, keepdims=True)
        
    def forward(self, x):
        z1 = np.dot(x, self.w1) + self.b1
        a1 = np.tanh(z1)
        z2 = np.dot(a1, self.w2) + self.b2
        y = self.softmax(z2)
        return y, a1
    
    def backward(self, x, y_true, y_pred, a1):
        delta2 = y_pred - y_true
        grad_w2 = np.dot(a1.T, delta2)
        grad_b2 = np.sum(delta2, axis=0)
        delta1 = np.dot(delta2, self.w2.T) * (1 - np.power(a1, 2))
        grad_w1 = np.dot(x.T, delta1)
        grad_b1 = np.sum(delta1, axis=0)
        return grad_w1, grad_b1, grad_w2, grad_b2
    
    def train(self, x_train, y_train, learning_rate=0.1, batch_size=128, num_epochs=10):
        num_batches = len(x_train) // batch_size
        
        for epoch in range(num_epochs):
            for batch in range(num_batches):
                # Get the batch of images and labels
                batch_images = x_train[batch*batch_size:(batch+1)*batch_size]
                batch_labels = y_train[batch*batch_size:(batch+1)*batch_size]
                
                # Forward pass
                eps = 1e-8
                y_pred, a1 = self.forward(batch_images)
                loss = self.cross_entropy_loss(batch_labels, (y_pred+eps))
                
                # Backward pass
                grad_w1, grad_b1, grad_w2, grad_b2 = self.backward(batch_images, batch_labels, y_pred, a1)
                
                # Update weights and biases
                self.w1 -= learning_rate * grad_w1
                self.b1 -= learning_rate * grad_b1
                self.w2 -= learning_rate * grad_w2
                self.b2 -= learning_rate * grad_b2
                
            # Print progress
            print("Epoch "+str(epoch+1)+"/"+str(num_epochs)+" - Loss = "+str(loss.mean()))
                    
    def evaluate(self, x_test, y_test):
        total = 0
        correct = 0
        for i in range(len(x_test)):
            x = x_test[i]
            y_true = y_test[i]
            y_pred, _ = self.forward(x)
            if np.argmax(y_pred) == np.argmax(y_true):
                correct += 1
            total += 1

        # Compute accuracy
        accuracy = correct / total
        print("Test accuracy: {:.2f}%".format(accuracy * 100))


In [101]:
input_size = 784
hidden_size = 128
output_size = 10

model = VisNet(input_size, hidden_size, output_size)
model.train(train_images, train_labels, learning_rate=0.001, num_epochs=10)

Epoch 1/10 - Loss = 0.02860718958430008
Epoch 2/10 - Loss = 0.011998001522854417
Epoch 3/10 - Loss = 0.009129279668106689
Epoch 4/10 - Loss = 0.007370039138432249
Epoch 5/10 - Loss = 0.005957962346904799
Epoch 6/10 - Loss = 0.005090092258169589
Epoch 7/10 - Loss = 0.004716634055918139
Epoch 8/10 - Loss = 0.004551702258245321
Epoch 9/10 - Loss = 0.00445259457502126
Epoch 10/10 - Loss = 0.004389091966282006


In [102]:
model.evaluate(test_images,test_labels)

Test accuracy: 89.69%
