## VAISHNAVI JAMDADE(TM39453)

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import idx2numpy
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn.metrics import accuracy_score

### Converting the file into numpy array of dimensions (60000,28,28)

In [2]:
def convertfunc(file):
    arr = idx2numpy.convert_from_file(file)
    return arr

In [3]:
X_train=convertfunc('train-images.idx3-ubyte')
y_train=convertfunc('train-labels.idx1-ubyte')
X_test=convertfunc('t10k-images.idx3-ubyte')
y_test=convertfunc('t10k-labels.idx1-ubyte')

In [4]:
X_train.shape

(60000, 28, 28)

In [5]:
X_test.shape, y_train.shape

((10000, 28, 28), (60000,))

### Reshaping the dataset :

In [6]:
def reshapefunc(array,rows,columns):
    array=array.reshape(rows,columns)
    return array

In [7]:
X_train=reshapefunc(X_train,60000,784)
X_test=reshapefunc(X_test,10000,784)
#y_train=reshapefunc(y_train,1,60000)
#y_test=reshapefunc(y_test,1,10000)


In [8]:
X_train.shape,X_test.shape, y_train.shape,y_test.shape


((60000, 784), (10000, 784), (60000,), (10000,))

### Using One hot label encoding:

In [9]:
y_train = pd.get_dummies(y_train)
y_train=np.array(y_train)


In [10]:
y_train

array([[0, 0, 0, ..., 0, 0, 0],
       [1, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 1, 0]], dtype=uint8)

In [11]:
X_train.shape, X_test.shape

((60000, 784), (10000, 784))

### Initializing weights:

In [12]:
def initialize_weights(inputs, hNeurons, oNeurons):

    weights1=np.random.randn(inputs,hNeurons)*np.sqrt(1./inputs)    #784 rows 5 columns
    bias1=np.zeros((1, hNeurons))*np.sqrt(1./inputs)
    #hence x.w = 1,5 dimensions + bias [1,5]
    weights2=np.random.randn(hNeurons,oNeurons)*np.sqrt(1./hNeurons)
    #hence x.w= 5,10 dimensions + bias [1,10]
    bias2= np.zeros((1, oNeurons))*np.sqrt(1./hNeurons)

    #ouptut vecotr is going to be of dimensions [1,10] then softmax
    #W1.shape , b1.shape, W2.shape, b2.shape
    
    return weights1, bias1, weights2, bias2


### Sigmoid Activation Function:

In [13]:
def sigmoid(z):
    act = 1. / (1. + np.exp(-z))
    return act

### Softmax function :

In [14]:
#Normalising the values:

def softmax(z):
    softact = np.exp(z)/np.sum(np.exp(z), axis=1,keepdims=True)                                       
    return softact

'''
def softmax(z):
    softact = np.exp(z - np.max(z, axis=1, keepdims=True))
    softact=softact/np.sum(softact, axis=1, keepdims=True)
    
    return softact
'''

'\ndef softmax(z):\n    softact = np.exp(z - np.max(z, axis=1, keepdims=True))\n    softact=softact/np.sum(softact, axis=1, keepdims=True)\n    \n    return softact\n'

### Feed Forward :

In [15]:
def feedforward(X_train,W1,b1,W2,b2):
    z1= np.dot(X_train,W1)+b1
    h1=sigmoid(z1)
    
    z2=np.dot(h1,W2)+b2
    output=softmax(z2)
    return z1, h1, z2, output
        

### Error Function :

In [16]:
def error(ypred,y_train):     
    n_samples = y_train.shape[0]
    L = (ypred-y_train)/n_samples
    return L
    

### Cross Entropy Loss Function:

In [17]:

def crossentropy(ypred,y_train):
    L_sum = np.sum(np.multiply(y_train, np.log(ypred)))
    num_samples = y_train.shape[0]
    L = -(1./num_samples) * L_sum

    return L

### Sigmoid Derivation Function:

In [18]:
def sigmoid_derivative(h1):
    deriv=h1*(1-h1)
    return deriv

### Backpropagation Function to compute Gradient Information:

In [19]:
def backpropagation(h1,W2,ypred,y_train):
    
    #Calculate error
    L=error(ypred,y_train)
    
    # z1, h1, z2, output
    # z1, and z2 => dot products
    # h1 and output => activations
    change_output=L
    #dL/dW2
    delta_W2= np.dot(h1.T,change_output)
    
    #dL/db2
    delta_b2= np.sum(change_output,axis=0,keepdims=True)
    
    #dL/dh1
    delta_h1=np.dot(change_output,W2.T)
    
    change_h1= delta_h1*sigmoid_derivative(h1)
    
    #dL/dW1
    delta_W1= np.dot(X_train.T, change_h1)
    
    #dL/db1
    delta_b1= np.sum(change_h1,axis=0,keepdims=True)
    
    return delta_W2, delta_b2, delta_W1, delta_b1
    

### Function to update weights:

In [20]:
def update_weights(alpha,W1,b1,W2,b2,delta_W2,delta_b2,delta_W1,delta_b1):
    W2= W2- alpha*(delta_W2)
    b2= b2 - alpha*(delta_b2)
    W1= W1- alpha*(delta_W1)
    b1=b1- alpha*(delta_b1)
    
    return W2,b2,W1,b1

### Training Function:

In [21]:
def training():
    hNeurons=128                               #hidden neuron
    inputs=X_train.shape[1]                   #number of inputs
    oNeurons=y_train.shape[1]
    alpha= 0.01                        #learning rate
    epochs=100
    
    #retriving the actual labels for y_train:
    labely_train=np.argmax(y_train,axis=1)
    
    #initialize weights
    W1, b1, W2, b2= initialize_weights(inputs,hNeurons,oNeurons)
    
    for i in range(epochs):
        
        #feed forward
        dotprod1, h1, dotprod2, output=feedforward(X_train,W1,b1,W2,b2)

        #back propogate
        delta_W2, delta_b2, delta_W1, delta_b1= backpropagation(h1,W2,output,y_train)
        
        #update weights using gradient descent
        W2,b2,W1,b1=update_weights(alpha,W1,b1,W2,b2,delta_W2,delta_b2,delta_W1,delta_b1)

        #again feed forward using updated weights
        dotprod1, h1, dotprod2, output=feedforward(X_train,W1,b1,W2,b2)

        #Calculating Total Loss
        loss=crossentropy(output,y_train)                 # put in training func later
        print("Epoch {}: training loss = {}".format(i + 1,loss))
        
        
        
        #Retrieving the corresponding class labels:
        labelpred=np.argmax(output,axis=1)
        
        #Training Accuracy:
        accuracy = round((accuracy_score(labely_train, labelpred)*100),2)
        print("Training Accuracy after Epoch {} : {}%".format(i+1, accuracy))
        
    return W2,b2,W1,b1
        


In [22]:
W2,b2,W1,b1=training()

Epoch 1: training loss = 2.5772442278369647
Training Accuracy after Epoch 1 : 7.94%
Epoch 2: training loss = 2.5063311704192204
Training Accuracy after Epoch 2 : 8.36%
Epoch 3: training loss = 2.4452462100527184
Training Accuracy after Epoch 3 : 9.06%
Epoch 4: training loss = 2.3916610658032345
Training Accuracy after Epoch 4 : 9.89%
Epoch 5: training loss = 2.3447142972356776
Training Accuracy after Epoch 5 : 10.9%
Epoch 6: training loss = 2.3017191934991574
Training Accuracy after Epoch 6 : 12.17%
Epoch 7: training loss = 2.2616384106740215
Training Accuracy after Epoch 7 : 13.6%
Epoch 8: training loss = 2.2235819588499184
Training Accuracy after Epoch 8 : 15.33%
Epoch 9: training loss = 2.1879372284300116
Training Accuracy after Epoch 9 : 17.26%
Epoch 10: training loss = 2.1544648429522493
Training Accuracy after Epoch 10 : 19.37%
Epoch 11: training loss = 2.12386305749903
Training Accuracy after Epoch 11 : 21.57%
Epoch 12: training loss = 2.095459668031324
Training Accuracy after E

Epoch 96: training loss = 1.1873630733149845
Training Accuracy after Epoch 96 : 76.3%
Epoch 97: training loss = 1.1824414023261494
Training Accuracy after Epoch 97 : 76.45%
Epoch 98: training loss = 1.1775791412312644
Training Accuracy after Epoch 98 : 76.58%
Epoch 99: training loss = 1.172768033130031
Training Accuracy after Epoch 99 : 76.7%
Epoch 100: training loss = 1.1680189430376624
Training Accuracy after Epoch 100 : 76.82%


In [26]:
y_train.shape

(60000, 10)

In [27]:
def testing(W2,b2,W1,b1,X_test,y_test):
    
    dotprod1, h1, dotprod2, output=feedforward(X_test,W1,b1,W2,b2)   
    labely_test=y_test
    #np.argmax(y_test,axis=1)
    labelpred=np.argmax(output,axis=1)
    accuracytest = round((accuracy_score(labely_test, labelpred)*100),2)        
    print("Training Accuracy : {}%".format(accuracytest))
        

In [28]:
result=testing(W2,b2,W1,b1,X_test,y_test)

Training Accuracy : 76.68%
