From Scratch Implementation

In [2]:
import numpy as np
from keras.datasets import mnist
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder


In [3]:
# Load the MNIST dataset
(X_train, y_train), (X_test, y_test) = mnist.load_data()

# Preprocessing: Flatten and normalize the images
X_train = X_train.reshape(X_train.shape[0], -1) / 255.0
X_test = X_test.reshape(X_test.shape[0], -1) / 255.0

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [14]:
# One-hot encode the labels
one_hot_encoder = OneHotEncoder(sparse=False)
y_train_one_hot = one_hot_encoder.fit_transform(y_train.reshape(-1, 1))
y_test_one_hot = one_hot_encoder.transform(y_test.reshape(-1, 1))

# Add bias term to the input
X_train = np.hstack((X_train, np.ones((X_train.shape[0], 1))))
X_test = np.hstack((X_test, np.ones((X_test.shape[0], 1))))



In [20]:
# Neural Network Implementation
class NeuralNetwork:
    def __init__(self, input_size, hidden_size, output_size):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        
        # Initialize weights
        self.W1 = np.random.randn(self.input_size, self.hidden_size) * 0.01
        self.W2 = np.random.randn(self.hidden_size, self.output_size) * 0.01
    
    def softmax(self, x):
        exp_scores = np.exp(x - np.max(x, axis=1, keepdims=True))
        return exp_scores / np.sum(exp_scores, axis=1, keepdims=True)
    
    def forward(self, X):
        self.z2 = np.dot(X, self.W1)
        self.a2 = np.tanh(self.z2)
        self.z3 = np.dot(self.a2, self.W2)
        self.probs = self.softmax(self.z3)
        return self.probs
    
    def backward(self, X, y, learning_rate):
        m = X.shape[0]
        delta3 = self.probs - y
        dW2 = (1 / m) * np.dot(self.a2.T, delta3)
        delta2 = np.dot(delta3, self.W2.T) * (1 - np.power(self.a2, 2))
        dW1 = (1 / m) * np.dot(X.T, delta2)
        
        # Update weights
        self.W1 -= learning_rate * dW1
        self.W2 -= learning_rate * dW2
    
    def train(self, X, y, learning_rate=0.01, epochs=200):# epochs 200
        for epoch in range(epochs):
            # Forward propagation
            probs = self.forward(X)
            # Backpropagation
            self.backward(X, y, learning_rate)
            if epoch % 5 == 0:
                loss = self.calculate_loss(X, y)
                print(f"Epoch {epoch}: Loss {loss}")
    
    def calculate_loss(self, X, y):
        m = X.shape[0]
        log_probs = -np.log(self.probs[range(m), np.argmax(y, axis=1)])
        loss = np.sum(log_probs) / m
        return loss

In [21]:
# Define neural network parameters
input_size = X_train.shape[1]
hidden_size = 64
output_size = 10


In [22]:
# Initialize and train the neural network
model_scratch = NeuralNetwork(input_size, hidden_size, output_size)
model_scratch.train(X_train, y_train_one_hot, learning_rate=0.01, epochs=200)

# Evaluation
def accuracy(y_true, y_pred):
    return np.mean(y_true == y_pred)

predictions = np.argmax(model_scratch.forward(X_test), axis=1)
true_labels = np.argmax(y_test_one_hot, axis=1)
print(f"Accuracy of scratch model: {accuracy(true_labels, predictions)}")

Epoch 0: Loss 2.302292111684418
Epoch 5: Loss 2.301628838851462
Epoch 10: Loss 2.3009647929699053
Epoch 15: Loss 2.3002986938137977
Epoch 20: Loss 2.299629261608191
Epoch 25: Loss 2.2989552142858996
Epoch 30: Loss 2.2982752647807048
Epoch 35: Loss 2.297588118353069
Epoch 40: Loss 2.2968924699448103
Epoch 45: Loss 2.2961870015596375
Epoch 50: Loss 2.295470379666884
Epoch 55: Loss 2.2947412526261606
Epoch 60: Loss 2.2939982481311785
Epoch 65: Loss 2.2932399706713857
Epoch 70: Loss 2.2924649990105563
Epoch 75: Loss 2.2916718836819756
Epoch 80: Loss 2.2908591445003346
Epoch 85: Loss 2.2900252680909885
Epoch 90: Loss 2.2891687054377634
Epoch 95: Loss 2.2882878694510445
Epoch 100: Loss 2.287381132558441
Epoch 105: Loss 2.2864468243209335
Epoch 110: Loss 2.285483229077983
Epoch 115: Loss 2.2844885836257216
Epoch 120: Loss 2.2834610749329736
Epoch 125: Loss 2.2823988379005065
Epoch 130: Loss 2.2812999531695803
Epoch 135: Loss 2.28016244498655
Epoch 140: Loss 2.278984279130958
Epoch 145: Loss 2