In [47]:
import numpy as np
import idx2numpy as idx
import matplotlib.pyplot as plt

i = 0

file = "mnist dataset/train-labels.idx1-ubyte"
labels = idx.convert_from_file(file)

file = "mnist dataset/train-images.idx3-ubyte"
images = idx.convert_from_file(file)

def linearizeOne(image):
    arr = [0]*len(image)*len(image)
    i = 0
    for j in range(len(image)):
        for k in range(len(image[0])):
            arr[i] = [image[j][k]]
            i += 1
    return arr

def linearizeAll(images):
    arr = [0]*len(images)
    for i in range(len(images)):        
        arr[i] = linearizeOne(images[i])
    return np.array(arr)/256

def fix_labels(labels):
    arr = [0]*len(labels)
    for i in range(len(labels)):
        arr[i] = [[0] for i in range(10)]
        arr[i][labels[i]] = [1]
    return np.array(arr)

In [48]:
training_inputs = linearizeAll(images)
training_outputs = fix_labels(labels)

# clearing memory
# labels = []
# images = []



In [68]:
class NeuralNet:

    def __init__(self, sizes): # sizes tell how many neurons are there in each layer from input to output
        self.sizes = sizes
        self.num_layers = len(sizes)
        self.biases = [np.random.randn(i,1) for i in self.sizes[1:]] # output or intermediate number of biases. a column matrix
        self.weights = [np.random.randn(self.sizes[i+1], self.sizes[i]) for i in range(self.num_layers-1)] # prev_layer X curr_layer dimension

    def sigmoid(self, z):
        return 1/(1 + np.exp(-z))
    
    def sigmoid_derivative(self, z):
        return self.sigmoid(z)*(1-self.sigmoid(z))
    
    def feedforward(self, a):
        for w,b in zip(self.weights, self.biases):
            a = self.sigmoid(np.dot(w,a) + b)
        return a
    
    def cost_derivative(self, a, y):
        return a-y

    def backprop(self, x, y): # return delta_b and delta_a arrays
        # x is input and y is output

        delta_w = [np.zeros(w.shape) for w in self.weights]    
        delta_b = [np.zeros(b.shape) for b in self.biases]
    
        # forward pass
        activation = x
        activations = [x]
        zs = []
        for w,b in zip(self.weights, self.biases):
            z = np.dot(w, activation)+b
            zs.append(z)
            activation = self.sigmoid(z)
            activations.append(activation)

        # backward pass

        delta = self.cost_derivative(activations[-1], y)*self.sigmoid_derivative(zs[-1])
        delta_b[-1] = delta
        delta_w[-1] = np.dot(delta, activations[-2].T)

        for i in range(2, self.num_layers):
            z = zs[-i]
            delta = np.dot(self.weights[-i+1].T, delta) * self.sigmoid_derivative(z)
            delta_b[-i] = delta
            delta_w[-i] = np.dot(delta, activations[-i-1].T)

        return (delta_w, delta_b)
    
    def epoch_train(self, training_data, labels, epochs, batch_size, learning_rate, test_data, test_labels):
        for i in range(epochs):
            t = list(zip(training_data, labels))
            np.random.shuffle(t)
            training_data = [j[0] for j in t]
            labels = [j[1] for j in t]
            batches = [(training_data[k:k+batch_size], labels[k:k+batch_size]) for k in range(0, len(training_data), batch_size)]
            for batch, label in batches:
                self.train(batch, label, learning_rate)
            hits = 0
            for j in range(len(test_data)):
                if (self.evaluate(test_labels[j]) == self.evaluate(self.feedforward(test_data[j]))):
                    hits += 1
            print(f"epoch {i+1}: {hits}/{len(test_data)}")
            

    def train(self, training_data, labels, learning_rate):

        delta_w = [np.zeros(w.shape) for w in self.weights]
        delta_b = [np.zeros(b.shape) for b in self.biases]

        for im, lb in zip(training_data, labels):
            DDW, DDB = self.backprop(im, lb)
            delta_w = [dw + ddw for dw, ddw in zip(delta_w, DDW)]
            delta_b = [db + ddb for db, ddb in zip(delta_b, DDB)]
        
        self.weights = [w - (learning_rate/len(training_data)*dw) for w, dw in zip(self.weights, delta_w)]
        self.biases = [b - (learning_rate/len(training_data))*db for b, db in zip(self.biases, delta_b)]


    def predict(self, inp):
        a = self.feedforward(inp)
        m = 0
        j = 0
        for i in range(len(a)):
            if a[i] > m:
                m = a[i]
                j = i
        return j
    
    def evaluate(self, a):
        m = 0
        j = 0
        for i in range(len(a)):
            if a[i] > m:
                m = a[i]
                j = i
        return j

In [70]:
# testing

file = "mnist dataset/t10k-images.idx3-ubyte"
images = idx.convert_from_file(file)

file = "mnist dataset/t10k-labels.idx1-ubyte"
labels = idx.convert_from_file(file)

testing_input = linearizeAll(images)
testing_ouput = fix_labels(labels)


In [71]:
N = NeuralNet([784, 30, 10])

N.epoch_train(training_data=training_inputs, labels=training_outputs, epochs = 30, batch_size=10, learning_rate=3.0, test_data=testing_input, test_labels=testing_ouput)

epoch 1: 9165/10000
epoch 2: 9331/10000
epoch 3: 9369/10000
epoch 4: 9408/10000
epoch 5: 9408/10000
epoch 6: 9438/10000
epoch 7: 9480/10000
epoch 8: 9448/10000
epoch 9: 9473/10000
epoch 10: 9468/10000
epoch 11: 9470/10000
epoch 12: 9488/10000
epoch 13: 9500/10000
epoch 14: 9485/10000
epoch 15: 9507/10000
epoch 16: 9518/10000
epoch 17: 9478/10000
epoch 18: 9488/10000
epoch 19: 9513/10000
epoch 20: 9523/10000
epoch 21: 9532/10000
epoch 22: 9518/10000
epoch 23: 9506/10000
epoch 24: 9537/10000
epoch 25: 9533/10000
epoch 26: 9527/10000
epoch 27: 9525/10000
epoch 28: 9526/10000
epoch 29: 9542/10000
epoch 30: 9530/10000


In [None]:
i = 4

a = N.predict(testing_input[i])
b = labels[i]

print(a, b)

plt.imshow(images[i], cmap = plt.cm.binary)