In [1]:
import numpy as np

In [2]:
mnist = np.load('mnist.npz')
images = mnist['x_train'][:60000]
labels = mnist['y_train'][:60000]


images = np.asarray([e.flatten()/255 for e in images])
images = images.reshape(images.shape[0], images.shape[1], 1)

from sklearn.preprocessing import OneHotEncoder
enc = OneHotEncoder(categories='auto')
labels = enc.fit_transform(labels.reshape(-1, 1)).toarray()
labels = labels.reshape(labels.shape[0], labels.shape[1], 1)



images_test = mnist['x_test'][:]
labels_test = mnist['y_test'][:]


images_test = np.asarray([e.flatten()/255 for e in images_test])
images_test = images_test.reshape(images_test.shape[0], images_test.shape[1], 1)

enc_test = OneHotEncoder(categories='auto')
labels_test = enc_test.fit_transform(labels_test.reshape(-1, 1)).toarray()
labels_test = labels_test.reshape(labels_test.shape[0], labels_test.shape[1], 1)


In [3]:
class Network():
    inputshape = 0
    learning_rate = 0
    
    weight = []
    bias = []
    
    def __init__(self, inputshape, learning_rate=0.05):
        self.inputshape = inputshape
        self.learning_rate = learning_rate
        
    def getInputShape(self):
        if len(self.weight) == 0:
            return self.inputshape
        else:
            return self.weight[-1].shape[0]
    
    def addLayer(self, neurons):
        ishape = self.getInputShape()
        self.weight.append(np.random.randn(neurons, ishape) * 0.1)
        self.bias.append(np.random.randn(neurons, 1) * 0.1)
    
    def sigmoid(self, x):
        return 1 / (1 + np.power(np.e, -x) )
    
    def sigmoid_derivative(self, x):
        return self.sigmoid(x) * (1 - self.sigmoid(x))
    
    def forward(self, ip):
        activations = []
        activations.append(ip)
        sigmoid_derivatives = []
        
        for i in range(len(self.weight)):
            z = self.weight[i] @ activations[i] + self.bias[i]
            a = self.sigmoid(z)
            sd = self.sigmoid_derivative(z)
            activations.append(a)
            sigmoid_derivatives.append(sd)

        return activations, sigmoid_derivatives
    
    def backward(self, activations, sigmoid_derivatives, label):
        das = []
        das.append(2 * (activations[-1] - label))
        
        for i in range(len(self.weight)):
            dz = das[i] * sigmoid_derivatives[-i-1]
            dw = dz @ activations[-i-2].transpose()
            db = dz
            das.append(self.weight[-i-1].transpose() @ dz)
            
            self.weight[-i-1] -= self.learning_rate * dw
            self.bias[-i-1] -= self.learning_rate * db
            

    def fit(self, ips, labels, epoch = 1):
        if len(ips) == len(labels):
            for e in range(epoch):
                print("Epoch ", e+1)
                for i in range(len(ips)):
                    ip = ips[i]
                    label = labels[i]

                    # Forward Pass
                    activations, sigmoid_derivatives = self.forward(ip)
                    # Backward Pass and weight update
                    self.backward(activations, sigmoid_derivatives, label)
                    
                    print(str(int(((i+1) / len(ips)) * 100))+" % Trained", end="\r")
                print()

    def predict(self, ips, labels):
        c = 0
        for i in range(len(ips)):
            ip = ips[i]
            act, _ = self.forward(ip)
            
            op = np.argmax(act[-1])
            label = np.argmax(labels[i])
            
            if op == label:
                c += 1
            
        return c/len(ips) * 100
        

In [4]:
n = Network(784)

In [5]:
n.addLayer(128)
n.addLayer(64)
n.addLayer(10)

In [6]:
n.fit(images, labels, epoch=5)

Epoch  1
100 % Trained
Epoch  2
100 % Trained
Epoch  3
100 % Trained
Epoch  4
100 % Trained
Epoch  5
100 % Trained


In [7]:
# Training accuracy
n.predict(images, labels)

97.86

In [8]:
# Testing accuracy
n.predict(images_test, labels_test)

96.99