In [1]:
cd D:\Sabudh\MNIST

D:\Sabudh\MNIST


# Importing libraries

In [2]:
import pandas as pd
import numpy as np
from mlxtend.data import loadlocal_mnist
import matplotlib.pyplot as plt
%matplotlib inline

# Implement NN

In [101]:
class Layer():
    """
    size: Number of nodes in the hidden layer 
    activation: name of activation function for the layer
    """
    def __init__(self,size,activation='sigmoid'): 
        self.shape=(1,size)
        self.activation=activation
                
class NeuralNetwork():
    def __init__(self,x,y):
        """
        x is 2d array of input images
        y are one hot encoded labels 
        """
        self.x=x/255   # Divide by 255 to normalise the pixel values (0-255)
        self.y=y
        self.weights=[]
        self.outputs=[]
        self.derivatives=[]
        self.activations=[]
        self.delta_weights=[]
        
    def connect(self,layer1,layer2):
        """layer 2 of shape 1xn"""
        #Initialise weights,derivatives and activation lists
        self.derivatives.append(np.random.uniform(0,0.1,size=(layer1.shape[1]+1,layer2.shape[1])))
        self.weights.append(np.random.uniform(-1,1,size=(layer1.shape[1]+1,layer2.shape[1])))
        self.delta_weights.append(np.zeros((layer1.shape[1]+1,layer2.shape[1])))
        if isinstance(layer2,Layer):
            self.activations.append(layer2.activation)
            
    def activation(self,name,z,derivative=False):
        
        #implementation of various activation functions and their derivatives
        if name=='sigmoid':
            if derivative==False:
                return 1/(1+np.exp(-z))
            else:
                return z*(1-z)
        elif name=='relu':
            pass
        
    def softmax(self,z):
        e=np.exp(z)
        return e/np.sum(e,axis=1).reshape(-1,1) 
    
    def max_log_likelihood(self,y_pred,y):
        """cross entropy"""
        return y*np.log(y_pred)
    
    def delta_mll(self,y,y_pred):
        """derivative of cross entropy"""
        #return y*(y_pred-1)
        return y_pred-y
    
    def forward_pass(self,x,y,weights):
        cost=0
        self.outputs=[]
        for i in range(len(weights)):
            samples=len(x)
            ones_array=np.ones(samples).reshape(samples,1)
            self.outputs.append(x) #append without adding ones array
            z=np.dot(np.append(ones_array,x,axis=1),weights[i])
            if i==len(weights)-1:
                x=self.softmax(z)
            else:
                x=self.activation(self.activations[i],z)
        self.outputs.append(x)
        self.y_pred=x
        
        temp=-self.max_log_likelihood(self.y_pred,y)
        cost=np.mean(np.sum(temp,axis=1))
        return cost
    
    
    def backward_pass(self,y,step,momentum=False,beta=0.9):
        for i in range(len(self.weights)-1,-1,-1):
            ones_array=np.ones(len(n.outputs[i])).reshape(len(n.outputs[i]),1)
            if i==len(self.weights)-1:
                prev_term=self.delta_mll(y,self.y_pred)  
                # derivatives follow specific order,last three terms added new,rest from previous term  
                self.derivatives[i]=np.dot(prev_term.T,np.append(ones_array,self.outputs[i],axis=1))   
            else:
                prev_term=np.dot(prev_term,self.weights[i+1][1:].T)*self.activation(self.activations[i],self.outputs[i+1],derivative=True)
                self.derivatives[i]=np.dot(prev_term.T,np.append(ones_array,self.outputs[i],axis=1))
            if momentum:
                self.delta_weights[i]=beta*self.delta_weights[i]-step*((self.derivatives[i].T)/len(y))
                self.weights[i]=self.weights[i]+self.delta_weights[i]
            else:
                self.weights[i]=self.weights[i]-step*((self.derivatives[i].T)/len(y))
                
    
    def train(self,batches,step=1e-3,epoch=10):
        """number of batches to split data in,step size and epochs"""
        for epochs in range(epoch):
            samples=len(self.x)
            c=0
            for i in range(batches):
                x_batch=self.x[int((samples/batches)*i):int((samples/batches)*(i+1))]
                y_batch=self.y.loc[int((samples/batches)*i):int((samples/batches)*(i+1))-1]
                
                c=self.forward_pass(x_batch,y_batch,self.weights)
                self.backward_pass(y_batch,step,momentum=True)
            print(epochs,c/batches)
    
    def predict(self,x):
        """input: x_test values"""
        x=x/255
        for i in range(len(self.weights)):
            samples=len(x)
            ones_array=np.ones(samples).reshape(samples,1)
            z=np.dot(np.append(ones_array,x,axis=1),self.weights[i])
            if i==len(self.weights)-1:
                x=self.softmax(z)
            else:
                x=self.activation(self.activations[i],z)
        #self.y_pred=x
        # return index(digit) with highest probability 
        return np.argmax(x,axis=1)
        #return x
        

# Load data

In [102]:
#load mnist data images as X ,labels as Y (Train data)
X, Y = loadlocal_mnist(images_path='train-images.idx3-ubyte', labels_path='train-labels.idx1-ubyte')

#load mnist data images as x ,labels as y (Test data)
x,y=loadlocal_mnist(images_path='t10k-images.idx3-ubyte', labels_path='t10k-labels.idx1-ubyte')

#One hot encoding of training labels 
Labels=pd.get_dummies(Y)


In [103]:
n=NeuralNetwork(X,Labels)
l1=Layer(100)
#l2=Layer(100)
n.connect(X,l1)
#n.connect(l1,l2)
n.connect(l1,Labels)
n.train(batches=500,step=0.1,epoch=20)

0 0.0006290452105615551
1 0.0004855376653063454
2 0.00042538433851476565
3 0.00038778345264871755
4 0.00036200951415491547
5 0.00034338361992765977
6 0.00032803182694229703
7 0.00031402587149243876
8 0.0003012513615820466
9 0.00028987312758038826
10 0.0002798709193301331
11 0.00027111904912354215
12 0.0002635075917120462
13 0.000256921607475452
14 0.0002511622199604051
15 0.00024596912330632925
16 0.0002411120417814635
17 0.0002364304592246316
18 0.00023180362526945038
19 0.00022712999497762805


In [104]:
pred=n.predict(x)
np.bincount(n.predict(x)),np.bincount(y)

(array([1012, 1139, 1038, 1005,  981,  920,  932, 1019,  937, 1017],
       dtype=int64),
 array([ 980, 1135, 1032, 1010,  982,  892,  958, 1028,  974, 1009],
       dtype=int64))

In [105]:
print(f"accuracy is {np.bincount(np.abs(y-pred))[0]*100/len(y)} %")

accuracy is 96.33 %
