Dependencies and dataset

In [94]:
import numpy as np
import pandas as pd
import zipfile
import os

!kaggle datasets download -d oddrationale/mnist-in-csv -p ./data
with zipfile.ZipFile('./data/mnist-in-csv.zip', 'r') as zip_ref:
    zip_ref.extractall('./data')
os.remove('./data/mnist-in-csv.zip')

Downloading mnist-in-csv.zip to ./data




  0%|          | 0.00/15.2M [00:00<?, ?B/s]
  7%|▋         | 1.00M/15.2M [00:00<00:09, 1.59MB/s]
 20%|█▉        | 3.00M/15.2M [00:00<00:02, 4.54MB/s]
 39%|███▉      | 6.00M/15.2M [00:00<00:01, 9.56MB/s]
 66%|██████▌   | 10.0M/15.2M [00:01<00:00, 16.0MB/s]
 92%|█████████▏| 14.0M/15.2M [00:01<00:00, 21.2MB/s]
100%|██████████| 15.2M/15.2M [00:01<00:00, 13.6MB/s]


Define cost and activation functions

In [120]:
def sigmoid(z):
    return 1/(1+np.exp(-z))

def sigmoid_prime(z):
    return sigmoid(z)*(1-sigmoid(z))

def cost(y,x):
    return np.sum(0.5*(y-x)**2)

def cost_prime(y,x):
    return x-y

Define Network class

In [96]:
class Network:
    def __init__(self, layers):
        self.L = layers
        self.W = [np.random.randn(x,y) for x,y in zip(self.L[1:],self.L[0:-1])]
        self.B = [np.random.randn(x,1) for x in self.L[1:]]

    def feedforward(self,a):
        a=a.reshape(-1,1)
        for w,b in zip(self.W,self.B):
            a = sigmoid(np.dot(w,a)+b)
        return a

    def train(self, X_train, Y_train, epochs, eta):
        for epoch in range(epochs):
            for i in range(X_train.shape[1]):
                a = X_train[:,i].reshape(-1,1)
                y = Y_train[:,i].reshape(-1,1)
                self.learn(a,y, eta)
                
    def learn(self, a, y, eta):
        Z=[]
        A=[]
        A.append(a)
        for w,b in zip(self.W,self.B):
            z = np.dot(w,A[-1])+b
            a=sigmoid(z)
            Z.append(z)
            A.append(a)

        self.__backprob(y, A, Z, eta)

    def __backprob(self,y,A,Z, eta):
        D = []
        D.append(cost_prime(y,A[-1])*sigmoid_prime(Z[-1]))
        for i in range(1,len(Z)):
            D.insert(0, np.dot(self.W[-i].T,D[0])*sigmoid_prime(Z[-i-1]))
        B_shifts = D
        W_shifts = []
        for a,d in zip(A[0:-1],D):
            W_shifts.append(np.dot(d,a.T))

        for i in range(len(self.W)):
            self.W[i] -= eta * W_shifts[i]
            self.B[i] -= eta * B_shifts[i] 

    def evaluate(self, X_test, Y_test):
        correct_predictions = 0
        for i in range(X_test.shape[1]):
            
            a = X_test[:,i]
            output = self.feedforward(a)
            prediction = np.argmax(output)
            if prediction == np.argmax(Y_test[:,i]):
                correct_predictions += 1
        return correct_predictions, correct_predictions/(i+1)


`feedforward()` - calculates the output of the network.
<br>
`train()` - prepares training  for training (each training input needs to be of size)
`learn()` - calculates activations in each layer and stores it in A list (similar for z, where z is a dot product of weights and previous activation plus bias). Then it calls `backprob()`.
<br>
`backprob()` - performs backpropagation to update weights and biases.

Preprocess data

In [None]:
train = pd.read_csv('data\mnist_train.csv').to_numpy()
test = pd.read_csv('data\mnist_test.csv').to_numpy()

X_train, Y_train = train[:,1:] / 255 , train[:,0]
X_test, Y_test = test[:,1:] / 255, test[:,0] 

X_train=X_train.T
X_test=X_test.T

Y_train = np.eye(10)[Y_train].T
Y_test = np.eye(10)[Y_test].T

In [141]:
net = Network([784,100,10])
a=X_train[:,0:]
y=Y_train[:,0:]

In [142]:
net.train(a,y,100, 0.5)

In [139]:
net.feedforward(a[:,1])

array([[9.99999769e-01],
       [8.39796869e-14],
       [1.77719158e-07],
       [1.51154982e-11],
       [1.84063785e-12],
       [2.64546408e-06],
       [3.01327463e-07],
       [1.24442315e-10],
       [5.12831393e-10],
       [5.69167700e-06]])

In [143]:
net.evaluate(X_test, Y_test)

(9691, 0.9691)