In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import idx2numpy
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import preprocessing

### Converting the file into numpy array of dimensions (60000,28,28)

In [2]:
def convertfunc(file):
    arr = idx2numpy.convert_from_file(file)
    return arr

In [3]:
X_train=convertfunc('train-images.idx3-ubyte')
y_train=convertfunc('train-labels.idx1-ubyte')
X_test=convertfunc('t10k-images.idx3-ubyte')
y_test=convertfunc('t10k-labels.idx1-ubyte')

In [4]:
X_train.shape

(60000, 28, 28)

In [5]:
X_test.shape, y_train.shape

((10000, 28, 28), (60000,))

### Reshaping the dataset :

In [6]:
def reshapefunc(array,rows,columns):
    array=array.reshape(rows,columns)
    return array

In [7]:
X_train=reshapefunc(X_train,60000,784)
X_test=reshapefunc(X_test,10000,784)
#y_train=reshapefunc(y_train,1,60000)
#y_test=reshapefunc(y_test,1,10000)


In [8]:
X_train.shape,X_test.shape, y_train.shape,y_test.shape


((60000, 784), (10000, 784), (60000,), (10000,))

### Using One hot label encoding:

In [9]:
y_train = pd.get_dummies(y_train)
y_train=np.array(y_train)


In [10]:
y_train

array([[0, 0, 0, ..., 0, 0, 0],
       [1, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 1, 0]], dtype=uint8)

In [11]:
X_train.shape, X_test.shape

((60000, 784), (10000, 784))

In [12]:
def initialize_weights(inputs, hNeurons, oNeurons):

    weights1=np.random.randn(inputs,hNeurons)*np.sqrt(1./inputs)    #784 rows 5 columns
    bias1=np.zeros((1, hNeurons))*np.sqrt(1./inputs)
    #hence x.w = 1,5 dimensions + bias [1,5]
    weights2=np.random.randn(hNeurons,oNeurons)*np.sqrt(1./hNeurons)
    #hence x.w= 5,10 dimensions + bias [1,10]
    bias2= np.zeros((1, oNeurons))*np.sqrt(1./hNeurons)

    #ouptut vecotr is going to be of dimensions [1,10] then softmax
    #W1.shape , b1.shape, W2.shape, b2.shape
    
    return weights1, bias1, weights2, bias2


In [13]:
def sigmoid(z):
    act = 1. / (1. + np.exp(-z))
    return act

In [14]:
def softmax(z):
    softact = np.exp(z)/np.sum(np.exp(z), axis=0)                                       #- np.max(z, axis=1, keepdims=True)
    return softact

In [15]:
def feedforward(X_train,W1,b1,W2,b2):
    z1= np.dot(X_train,W1)+b1
    h1=sigmoid(z1)
    
    z2=np.dot(h1,W2)+b2
    output=softmax(z2)
    return z1, h1, z2, output
        

In [16]:
#dotprod1, h1, dotprod2, output=feedforward(X_train[:30000],W1,b1,W2,b2)

In [17]:
def error(ypred,y_train):     
    n_samples = y_train.shape[0]
    L = (ypred-y_train)/n_samples
    return L
    

In [18]:
'''
def crossentropy(ypred, y_train):       #loss function
    n_samples = y_train.shape[0]
    logp = - np.log(ypred[np.arange(n_samples), y_train.argmax(axis=1)])
    loss = np.sum(logp)/n_samples
    return loss
'''

'\ndef crossentropy(ypred, y_train):       #loss function\n    n_samples = y_train.shape[0]\n    logp = - np.log(ypred[np.arange(n_samples), y_train.argmax(axis=1)])\n    loss = np.sum(logp)/n_samples\n    return loss\n'

In [19]:
def crossentropy(ypred,y_train):
    L_sum = np.sum(np.multiply(y_train, np.log(ypred)))
    m = y_train.shape[0]
    L = -(1./m) * L_sum

    return L

In [20]:
def sigmoid_derivative(h1):
    deriv=h1*(1-h1)
    return deriv

In [21]:
def backpropagation(h1,W2,ypred,y_train):
    
    #Calculate error
    L=error(ypred,y_train)
    
    # z1, h1, z2, output
    # z1, and z2 => dot products
    # h1 and output => activations
    change_output=L
    #dL/dW2
    delta_W2= np.dot(h1.T,change_output)
    
    #dL/db2
    delta_b2= np.sum(change_output,axis=0)
    
    #dL/dh1
    delta_h1=np.dot(change_output,W2.T)
    
    change_h1= delta_h1*sigmoid_derivative(h1)
    
    #dL/dW1
    delta_W1= np.dot(X_train[:30000].T, change_h1)
    
    #dL/db1
    delta_b1= np.sum(change_h1,axis=0)
    
    return delta_W2, delta_b2, delta_W1, delta_b1
    

In [22]:
#delta_W2, delta_b2, delta_W1, delta_b1= backpropagation(output,y_train[:30000])

In [23]:
def update_weights(alpha,W1,b1,W2,b2,delta_W2,delta_b2,delta_W1,delta_b1):
    W2= W2- alpha*(delta_W2)
    b2= b2 - alpha*(delta_b2)
    W1= W1- alpha*(delta_W1)
    b1=b1- alpha*(delta_b1)
    
    return W2,b2,W1,b1

In [24]:
#W2,b2,W1,b1=update_weights(0.001,W1,b1,W2,b2,delta_W2,delta_b2,delta_W1,delta_b1)

In [25]:
def training():
    hNeurons=64                               #hidden neuron
    inputs=X_train.shape[1]                   #number of inputs
    oNeurons=y_train.shape[1]
    aplha= 0.005                         #learning rate
    epochs=100
    
    
    #initialize weights
    W1, b1, W2, b2= initialize_weights(inputs,hNeurons,oNeurons)
    
    for i in range(epochs):
        
        #feed forward
        dotprod1, h1, dotprod2, output=feedforward(X_train[:30000],W1,b1,W2,b2)

        #back propogate
        delta_W2, delta_b2, delta_W1, delta_b1= backpropagation(h1,W2,output,y_train[:30000])
        #update weights using gradient descent
        W2,b2,W1,b1=update_weights(aplha,W1,b1,W2,b2,delta_W2,delta_b2,delta_W1,delta_b1)

        #again feed forward using updated weights
        dotprod1, h1, dotprod2, output=feedforward(X_train[:30000],W1,b1,W2,b2)

        #Calculating Total Loss
        loss=crossentropy(output,y_train[:30000])                 # put in training func later
        print("loss on total training set is : ", loss)
        
    

In [26]:
training()

loss on total training set is :  10.396443529951062
loss on total training set is :  10.3808189647438
loss on total training set is :  10.365155749450599
loss on total training set is :  10.349403219409451
loss on total training set is :  10.333766840672817
loss on total training set is :  10.319180312619235
loss on total training set is :  10.305232184664327
loss on total training set is :  10.291283199396991
loss on total training set is :  10.277463560862525
loss on total training set is :  10.26384937418235
loss on total training set is :  10.250521241800557
loss on total training set is :  10.237333436657034
loss on total training set is :  10.22456004803703
loss on total training set is :  10.212406374562214
loss on total training set is :  10.200357802459838
loss on total training set is :  10.188576448666456
loss on total training set is :  10.177393640220728
loss on total training set is :  10.166618795097726
loss on total training set is :  10.15609761738266
loss on total tra