In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
data = pd.read_csv('/kaggle/input/digit-recognizer/train.csv')

def init_params():
    W1 = np.random.uniform(-0.5, 0.5, (20, 784)).astype('float128')
    b1 = np.zeros((20,1)).astype('float128')
    W2 = np.random.uniform(-0.5, 0.5, (10, 20)).astype('float128')
    b2 = np.zeros((10,1)).astype('float128')
    return W1, b1, W2, b2

def sigmoid(x, deriv=False):
    sig = 1/(1 + np.exp(-x))
    if(deriv):
        return sig*(1-sig)
    return sig

def forward_prop(W1, b1, W2, b2, x):
    Z1=W1.dot(x) + b1
    A1=sigmoid(Z1)
    Z2=W2.dot(A1) + b2
    A2=sigmoid(Z2)
    return A1, A2

def encode(x):
    encoded = np.zeros((1, 10))
    encoded[0, x] = 1
    return np.transpose(encoded)


def back_prop(A1, A2, x, y):
    #cost function = 1/10 * sum((A2_i - Y_i)^2)
    dA2 = A2 - y
    dW2 = dA2.dot(np.transpose(A1))
    db2 = dA2
    dA1 = np.transpose(dW2).dot(dA2) * sigmoid(A1, deriv=True)
    dW1 = dA1.dot(np.transpose(x))
    db1 = dA1
    return dW1, db1, dW2, db2

def update_params(W1, b1, W2, b2, dW1, db1, dW2, db2, alpha):
    W1 = W1 - alpha*dW1
    W2 = W2 - alpha*dW2
    b1 = b1 - alpha*db1
    b2 = b2 - alpha*db2
    return W1, b1, W2, b2

In [2]:
data = np.array(data)
alpha = 0.1
x = data[0][1:785]
y=encode(data[0][0])
x.shape += (1,)
W1, b1, W2, b2 = init_params()
A1, A2 = forward_prop(W1, b1, W2, b2, x)
dW1, db1, dW2, db2 = back_prop(A1, A2, x, y)
update_params(W1, b1, W2, b2, dW1, db1, dW2, db2, alpha)

def evaluate(x,y):
    idx = np.argmax(x, axis=0)[0]
    return y[idx][0] == 1

def train(data, epochs, alpha):
    W1, b1, W2, b2 = init_params()
    for i in range(epochs):
        for j in range(len(data)-1):
            x = data[j][1:785]
            x.shape += (1,)
            y=encode(data[j][0])
            A1, A2 = forward_prop(W1, b1, W2, b2, x)
            dW1, db1, dW2, db2 = back_prop(A1, A2, x, y)
            W1, b1, W2, b2 = update_params(W1, b1, W2, b2, dW1, db1, dW2, db2, alpha)
    return W1, b1, W2, b2

a, b, c, d = train(data[0:10000], 5, 0.1)

  del sys.path[0]


In [3]:
def test(data, W1, b1, W2, b2):
    results = 0
    for i in range(len(data)-1):
        x = data[i][1:785]
        x.shape += (1,)
        y=encode(data[i][0])
        A1, A2 = forward_prop(W1, b1, W2, b2, x)
        res = evaluate(A2, y)
        if(res):
            results = results + 1
    print('accuracy:' , results/len(data) * 100 , "%")
test(data[0:10000], a, b, c, d)

  del sys.path[0]


accuracy: 10.39 %
