In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt

data = pd.read_csv('/kaggle/input/digit-recognizer/train.csv')
data = np.array(data)

def interpret_output(A2):
    return np.argmax(A2, axis=0)[0]

def encode(y):
    encoded = np.zeros((1, 10))
    encoded[0, y] = 1
    return np.transpose(encoded)

def sigmoid(x, deriv=False):
    sig = 1/(1 + np.exp(-x))
    if(deriv):
        return sig*(1-sig)
    return sig


def init_params():
    W1 = np.random.uniform(-0.5, 0.5, (30, 784)).astype('float128')
    b1 = np.zeros((30,1)).astype('float128')
    W2 = np.random.uniform(-0.5, 0.5, (10, 30)).astype('float128')
    b2 = np.zeros((10,1)).astype('float128')
    return W1, b1, W2, b2

def forward_prop(W1, b1, W2, b2, A0):
    Z1=W1.dot(A0) + b1
    A1=sigmoid(Z1)
    Z2=W2.dot(A1) + b2
    A2=sigmoid(Z2)
    return A1, Z1, A2, Z2

def back_prop(A1, Z1, A2, Z2, A0, y):
    #cost function = 1/2 * sum((A2_i - Y_i)^2)
    dZ2 = (A2 - encode(y))*sigmoid(Z2, deriv=True)
    dW2 = dZ2.dot(np.transpose(A1))
    db2 = dZ2
    dZ1 = np.transpose(dW2).dot(dZ2) * sigmoid(Z1, deriv=True)
    dW1 = dZ1.dot(np.transpose(A0))
    db1 = dZ1
    return dW1, db1, dW2, db2

def update_params(W1, b1, W2, b2, dW1, db1, dW2, db2, alpha):
    W1 = W1 - alpha*dW1
    W2 = W2 - alpha*dW2
    b1 = b1 - alpha*db1
    b2 = b2 - alpha*db2
    return W1, b1, W2, b2

In [2]:
def train(data, epochs, alpha):
    W1, b1, W2, b2 = init_params()
    for i in range(epochs):
        for j in range(len(data)-1):
            A0 = data[j][1:785]
            A0.shape += (1,)
            y = data[j][0]
            A1, Z1, A2, Z2 = forward_prop(W1, b1, W2, b2, A0)
            dW1, db1, dW2, db2 = back_prop(A1, Z1, A2, Z2, A0, y)
            W1, b1, W2, b2 = update_params(W1, b1, W2, b2, dW1, db1, dW2, db2, alpha)
    return W1, b1, W2, b2

a, b, c, d = train(data[0:300], 10000, 0.01)

In [3]:
def test(data, W1, b1, W2, b2):
    results = 0
    for i in range(len(data)):
        A0 = data[i][1:785]
        A0.shape += (1,)
        y = data[i][0]
        A1, Z1, A2, Z2 = forward_prop(W1, b1, W2, b2, A0)
        res = interpret_output(A2) == y
        if(res):
            results = results + 1
    print('accuracy:' , results/len(data) * 100 , "%")
test(data[1000:42000], a, b, c, d)

accuracy: 41.06341463414634 %
