In [14]:
import pandas as pd
import matplotlib.pyplot as plt
import idx2numpy
import numpy as np
from sklearn.model_selection import train_test_split

### Converting the file into numpy array of dimensions (60000,28,28)

In [15]:
def convertfunc(file):
    arr = idx2numpy.convert_from_file(file)
    return arr

In [16]:
X_train=convertfunc('train-images.idx3-ubyte')
y_train=convertfunc('train-labels.idx1-ubyte')
X_test=convertfunc('t10k-images.idx3-ubyte')
y_test=convertfunc('t10k-labels.idx1-ubyte')

In [17]:
X_train.shape

(60000, 28, 28)

In [18]:
X_test.shape, y_train.shape

((10000, 28, 28), (60000,))

### Reshaping the dataset :

In [19]:
def reshapefunc(array,rows,columns):
    array=array.reshape(rows,columns)
    return array

In [20]:
X_train=reshapefunc(X_train,60000,784)
X_test=reshapefunc(X_test,10000,784)
#y_train=reshapefunc(y_train,1,60000)
#y_test=reshapefunc(y_test,1,10000)

In [21]:
X_train.shape,X_test.shape, y_train.shape,y_test.shape


((60000, 784), (10000, 784), (60000,), (10000,))

### Using One hot label encoding:

In [22]:
y_train = pd.get_dummies(y_train)
y_train=np.array(y_train)


In [23]:
y_train

array([[0, 0, 0, ..., 0, 0, 0],
       [1, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 1, 0]], dtype=uint8)

In [24]:
X_train.shape, X_test.shape

((60000, 784), (10000, 784))

In [25]:
def initialize_weights(inputs, hNeurons, oNeurons):

    weights1=np.zeros((inputs,hNeurons),dtype=np.float32)    #784 rows 5 columns
    bias1=np.zeros((1, hNeurons))
    #hence x.w = 1,5 dimensions + bias [1,5]
    weights2=np.zeros((hNeurons,oNeurons), dtype=np.float32)
    #hence x.w= 5,10 dimensions + bias [1,10]
    bias2= np.zeros((1, oNeurons))

    #ouptut vecotr is going to be of dimensions [1,10] then softmax
    #W1.shape , b1.shape, W2.shape, b2.shape
    
    return weights1, bias1, weights2, bias2


In [28]:
hNeurons=60                               #hidden neuron
inputs=X_train.shape[1]
oNeurons=y_train.shape[1]
W1, b1, W2, b2= initialize_weights(inputs,hNeurons,oNeurons)

In [29]:
def sigmoid(z):
    act = 1. / (1. + np.exp(-z))
    return act

In [30]:
def softmax(z):
    softact = np.exp(z)/np.sum(np.exp(z), axis=0)                                       #- np.max(z, axis=1, keepdims=True)
    return softact

In [31]:
def feedforward(X_train,W1,b1,W2,b2):
    z1= np.dot(X_train,W1)+b1
    h1=sigmoid(z1)
    
    z2=np.dot(h1,W2)+b2
    output=softmax(z2)
    return z1, h1, z2, output
        

In [32]:
dotprod1, h1, dotprod2, output=feedforward(X_train[:30000],W1,b1,W2,b2)

In [33]:
def error(ypred,y_train):     
    n_samples = y_train.shape[0]
    L = (ypred-y_train)/n_samples
    return L
    

In [34]:
def crossentropy(ypred, y_train):       #loss function
    n_samples = y_train.shape[0]
    logp = - np.log(ypred[np.arange(n_samples), y_train.argmax(axis=1)])
    loss = np.sum(logp)/n_samples
    return loss

In [35]:
def sigmoid_derivative(h1):
    deriv=h1*(1-h1)
    return deriv

In [36]:
def backpropagation(ypred,y_train):
    
    #Calculate error
    L=error(ypred,y_train[:30000])
    
    # z1, h1, z2, output
    # z1, and z2 => dot products
    # h1 and output => activations
    change_output=L
    #dL/dW2
    delta_W2= np.dot(h1.T,change_output)
    
    #dL/db2
    delta_b2= change_output
    
    #dL/dh1
    delta_h1=np.dot(change_output,W2.T)
    
    change_h1= delta_h1*sigmoid_derivative(h1)
    
    #dL/dW1
    delta_W1= np.dot(X_train[:30000].T, change_h1)
    
    #dL/db1
    delta_b1= change_h1
    
    return delta_W2, delta_b2, delta_W1, delta_b1
    

In [37]:
delta_W2, delta_b2, delta_W1, delta_b1= backpropagation(output,y_train[:30000])

In [38]:
def update_weights(alpha,W1,b1,W2,b2,delta_W2,delta_b2,delta_W1,delta_b1):
    W2= W2- alpha*(delta_W2)
    b2= b2 - alpha*(delta_b2)
    W1= W1- alpha*(delta_W1)
    b1=b1- alpha*(delta_b1)
    
    return W2,b2,W1,b1

In [39]:
W2,b2,W1,b1=update_weights(0.001,W1,b1,W2,b2,delta_W2,delta_b2,delta_W1,delta_b1)

In [40]:
def training():
    hNeurons=60                               #hidden neuron
    inputs=X_train.shape[1]
    oNeurons=ytrain.shape[1]
    aplha= 0.0001                            #learning rate
    
    #initialize weights
    W1, b1, W2, b2= initialize_weights(inputs,hNeurons,oNeurons)
    
    #feed forward
    dotprod1, h1, dotprod2, output=feedforward(X_train[:30000],W1,b1,W2,b2)
    
    #back propogate
    delta_W2, delta_b2, delta_W1, delta_b1= backpropagation(output,y_train[:30000])
    #update weights using gradient descent
    W2,b2,W1,b1=update_weights(aplha,W1,b1,W2,b2,delta_W2,delta_b2,delta_W1,delta_b1)
    
    #again feed forward using updated weights
    dotprod1, h1, dotprod2, output=feedforward(X_train[:30000],W1,b1,W2,b2)
    
    #Calculating Total Loss
    loss=crossentropy(ypred,y_train[:30000])                 # put in training func later
    print("loss on total training set is : ", loss)
     
    