In [3]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [4]:
import struct
import os

def read_mnist_images(file_path):
    with open(file_path, 'rb') as f:
        magic, num_images = struct.unpack('>II', f.read(8))
        rows, cols = struct.unpack('>II', f.read(8))
        images = np.fromfile(f, dtype=np.uint8).reshape(num_images, rows * cols)
    return images

def read_mnist_labels(file_path):
    with open(file_path, 'rb') as f:
        magic, num_labels = struct.unpack('>II', f.read(8))
        labels = np.fromfile(f, dtype=np.uint8)
    return labels

# Replace these paths with your actual file paths
train_images_path = 'Dataset/train-images-idx3-ubyte/train-images-idx3-ubyte'  # Update with your path
train_labels_path = 'Dataset/train-labels-idx1-ubyte/train-labels-idx1-ubyte'  # Update with your path
test_images_path = 't10k-images-idx3-ubyte'    # Update with your path
test_labels_path = 't10k-labels-idx1-ubyte'    # Update with your path

# Load data
train_images = read_mnist_images(train_images_path)
train_labels = read_mnist_labels(train_labels_path)

# Create DataFrames
# First for the features (images)
train_df = pd.DataFrame(train_images)
# Add the labels
train_df['label'] = train_labels


In [5]:
df=train_df.copy()
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,775,776,777,778,779,780,781,782,783,label
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,5
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,4
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,9


In [38]:
data=df.values
np.random.shuffle(data)
num_samples=data.shape[0]
train_end = int(0.7 * num_samples)
val_end= int(0.85*num_samples)
train_data=data[:train_end]
val_data=data[train_end:val_end]
test_data=data[val_end:]

X_train=train_data[:,:-1]
y_train=train_data[:,-1]
X_val=val_data[:,:-1]
y_val=val_data[:,-1]
X_test=test_data[:,:-1]
y_test=test_data[:,-1]
X_train=X_train/255.0
X_val=X_val/255.0
X_test=X_test/255.0



In [16]:
class Layer_Dense:
    def __init__(self,n_inputs,n_neuron):
        self.weights=0.10*np.random.randn(n_inputs,n_neuron)
        self.bias=0.10*np.random.randn(1,n_neuron)
    def forward(self,inputs):
        self.inputs=inputs
        self.output=np.dot(inputs,self.weights)+self.bias
    def backward(self,dvalues):
        self.dweights=np.dot(self.inputs.T,dvalues)
        self.dbias=np.sum(dvalues,axis=0,keepdims=True)
        self.dinputs=np.dot(dvalues,self.weights.T)

class Activation_RelU:
    def forward(self,inputs):
        self.inputs=inputs
        self.output=np.maximum(0,inputs)
    def backward(self,dvalues):
        self.dinputs=dvalues.copy()
        self.dinputs[self.inputs<=0]=0

class Activation_SoftMax:
    def forward(self,inputs):
        self.inputs=inputs
        exp_values=np.exp(inputs-np.max(inputs,axis=1,keepdims=True))
        probablities=exp_values/np.sum(exp_values,axis=1,keepdims=True)
        self.output=probablities

class Loss:
    def calculate(self,output,y):
        sample_losses=self.forward(output,y)
        data_loss=np.mean(sample_losses)
        return data_loss
class Loss_CategoricalCross_Entropy(Loss):
    def forward(self,y_pred,y_true):
        samples=len(y_pred)
        y_pred_clipped=np.clip(y_pred,1e-7,1-1e-7)
        if len(y_true.shape)==1:
            correct_confidence=y_pred_clipped[range(samples),y_true]
        elif len(y_true.shape)==2:
            correct_confidence=np.sum(y_pred_clipped*y_true,axis=1)
        negative_log_likelihood=-np.log(correct_confidence)
        return negative_log_likelihood

class Activation_SoftMax_Loss_Catorgorical_Cross_Entropy():
    def __init__(self):
        self.activation=Activation_SoftMax()
        self.loss=Loss_CategoricalCross_Entropy()
    def forward(self,inputs,y_true):
        self.activation.forward(inputs)
        self.output=self.activation.output
        return self.loss.calculate(self.output,y_true)
    def backward(self,dvalues,y_true):
        samples=len(dvalues)
        if len(y_true.shape)==2:
            y_true=np.argmax(y_true,axis=1)
        self.dinputs=dvalues.copy()
        self.dinputs[range(samples),y_true] -=1
        self.dinputs=self.dinputs/samples
    
class Optimizer_GD:
    def __init__(self,learning_rate=1):
        self.learning_rate=learning_rate
    def update_params(self,layer):
        layer.weights += -self.learning_rate*layer.dweights
        layer.bias += -self.learning_rate*layer.dbias

class Optimizer_Adam:
    def __init__(self,learning_rate=0.001,decay=0,epsilion=1e-7,beta_1=0.9,beta_2=0.9999):
        self.learning_rate=learning_rate
        self.current_learning_rate=learning_rate
        self.decay=decay
        self.epsilion=epsilion
        self.beta_1=beta_1
        self.beta_2=beta_2
        self.iterations=0
    def pre_update_params(self):
        if self.decay:
            self.current_learning_rate=self.learning_rate*(1/(1+self.decay*self.iterations))
    def update_params(self,layer):
        if not hasattr(layer,'weight_cache'):
            layer.weight_cache=np.zeros_like(layer.weights)
            layer.weight_momentum=np.zeros_like(layer.weights)
            layer.bias_cache=np.zeros_like(layer.bias)
            layer.bias_momentum=np.zeros_like(layer.bias)
        layer.weight_momentum=self.beta_1*layer.weight_momentum +(1-self.beta_1)*layer.dweights
        layer.bias_momentum=self.beta_1*layer.bias_momentum + (1-self.beta_1)*layer.dbias
        
        weight_momentum_corrected=layer.weight_momentum / (1-self.beta_1**(self.iterations+1))
        bias_momentum_corrected=layer.bias_momentum /(1-self.beta_1**(self.iterations+1))

        layer.weight_cache=self.beta_2*layer.weight_cache +(1-self.beta_2)*layer.dweights**2
        layer.bias_cache=self.beta_2*layer.bias_cache +(1-self.beta_2)*layer.dbias**2

        weight_cache_corrected=layer.weight_cache/(1-self.beta_2**(self.iterations+1))
        bias_cache_corrected=layer.bias_cache/(1-self.beta_2**(self.iterations+1))

        layer.weights += -self.current_learning_rate*weight_momentum_corrected \
                         /(np.sqrt(weight_cache_corrected + self.epsilion))
        layer.bias += -self.current_learning_rate*bias_momentum_corrected \
                     / (np.sqrt(bias_cache_corrected + self.epsilion))
    def post_update_params(self):
        self.iterations +=1
        

In [39]:
dense1=Layer_Dense(784,128)
activation1=Activation_RelU()
dense2=Layer_Dense(128,10)
loss_activation=Activation_SoftMax_Loss_Catorgorical_Cross_Entropy()
optimizer=Optimizer_Adam(learning_rate=0.02,decay=1e-5)
for epoch in range(50):
    dense1.forward(X_train)
    activation1.forward(dense1.output)
    dense2.forward(activation1.output)
    loss=loss_activation.forward(dense2.output,y_train)

    predictions=np.argmax(loss_activation.output,axis=1)
    accuracy=np.mean(predictions==y_train)
    if not epoch %100==0:
        print(f'epoch: {epoch}, ' +
              f'acc: {accuracy :.3f}, ' +
              f'loss: {loss:.3f}')
    loss_activation.backward(loss_activation.output,y_train)
    dense2.backward(loss_activation.dinputs)
    activation1.backward(dense2.dinputs)
    dense1.backward(activation1.dinputs)
    
    optimizer.pre_update_params()
    optimizer.update_params(dense1)
    optimizer.update_params(dense2)
    optimizer.post_update_params()
    

epoch: 1, acc: 0.394, loss: 2.727
epoch: 2, acc: 0.294, loss: 2.675
epoch: 3, acc: 0.460, loss: 1.571
epoch: 4, acc: 0.625, loss: 1.170
epoch: 5, acc: 0.696, loss: 0.956
epoch: 6, acc: 0.690, loss: 0.953
epoch: 7, acc: 0.693, loss: 0.906
epoch: 8, acc: 0.759, loss: 0.740
epoch: 9, acc: 0.821, loss: 0.598
epoch: 10, acc: 0.846, loss: 0.537
epoch: 11, acc: 0.836, loss: 0.545
epoch: 12, acc: 0.836, loss: 0.538
epoch: 13, acc: 0.853, loss: 0.495
epoch: 14, acc: 0.867, loss: 0.450
epoch: 15, acc: 0.878, loss: 0.415
epoch: 16, acc: 0.885, loss: 0.395
epoch: 17, acc: 0.886, loss: 0.389
epoch: 18, acc: 0.887, loss: 0.381
epoch: 19, acc: 0.892, loss: 0.362
epoch: 20, acc: 0.898, loss: 0.342
epoch: 21, acc: 0.902, loss: 0.329
epoch: 22, acc: 0.906, loss: 0.317
epoch: 23, acc: 0.909, loss: 0.307
epoch: 24, acc: 0.911, loss: 0.301
epoch: 25, acc: 0.912, loss: 0.297
epoch: 26, acc: 0.914, loss: 0.287
epoch: 27, acc: 0.918, loss: 0.275
epoch: 28, acc: 0.920, loss: 0.266
epoch: 29, acc: 0.921, loss: 

In [43]:
#Validations Accuracy

In [40]:
dense1.forward(X_val)
activation1.forward(dense1.output)
dense2.forward(activation1.output)
loss=loss_activation.forward(dense2.output,y_val)

predictions=np.argmax(loss_activation.output,axis=1)
accuracy=np.mean(predictions==y_val)
    
print(f'acc: {accuracy :.3f}, ' +  f'loss: {loss:.3f}')

acc: 0.938, loss: 0.207


In [44]:
# Testing Accuracy

In [41]:
dense1.forward(X_test)
activation1.forward(dense1.output)
dense2.forward(activation1.output)
loss=loss_activation.forward(dense2.output,y_val)

predictions=np.argmax(loss_activation.output,axis=1)
accuracy=np.mean(predictions==y_test)
    
print(f'acc: {accuracy :.3f}, ' +  f'loss: {loss:.3f}')

acc: 0.941, loss: 9.111
