In [None]:
import pandas as pd
import numpy as np
import cv2
from tqdm import tqdm
import os
import csv

In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/drive


In [None]:
# GET BEST PARAMS

os.chdir('/content/drive/My Drive/Digit Recognizer/Parameters')
params = {}
for l in range(1,4):
    params['W' + str(l)] = np.array(pd.read_csv('params W' + str(l) + '.csv').values)
    params['b' + str(l)] = np.array(pd.read_csv('params b' + str(l) + '.csv').values)

In [None]:
def create_op_vec(i):
    op = np.zeros((10,1))
    op[i] = 1
    return op

In [None]:
# loading data
os.chdir('/content/drive/My Drive/Digit Recognizer')
train_csv = pd.read_csv('train.csv')
data = np.array(train_csv.values)

In [None]:
# gathering data
Y = data.T[0].T
X = np.uint8(data.T[1:785].T)

# generating test and train sets
x_train = X[0:40000]
y_train = Y[0:40000]
x_test = X[40000:42000]
y_test = Y[40000:42000]

for i in range(X.shape[0]):
    _, f = cv2.threshold(X[i].reshape((28,28)), 80, 255, cv2.THRESH_BINARY)
    X[i] = f.reshape(784)
    X[i] = X[i]/255

Y_train = []
for i in y_train:
    Y_train.append(create_op_vec(int(i)))
Y_train = np.array(Y_train)
Y_train = Y_train.reshape(40000, 10)
Y_train = Y_train.T

Y_test = []
for i in y_test:
    Y_test.append(create_op_vec(int(i)))
Y_test = np.array(Y_test)
Y_test = Y_test.reshape(2000, 10)
Y_test = Y_test.T

In [None]:
# cost function

In [None]:
def cost_function(parameters, A, Y, lambd):
    m = Y.shape[1]
    F = Y*np.log(A) + (1-Y)*np.log(1-A)
    J = -np.sum(np.sum(F, axis = 1))/m
    parasum = 0
    L = int(len(parameters)/2)
    for l in range(1, L + 1):
        parasum += np.sum(np.sum(parameters['W' + str(l)]**2))
    return J + parasum*lambd/(2*m)

In [None]:
# FORWARD PROPAGATION

In [None]:
def initialise_parameters(layer_dims):
    L = len(layer_dims) - 1
    
    parameters = {}
    
    for l in range(1,L + 1):
        parameters["W" + str(l)] = np.random.randn(layer_dims[l], layer_dims[l-1]) * 0.01
        parameters["b" + str(l)] = np.random.randn(layer_dims[l], 1) * 0.01
        
        assert(parameters['W' + str(l)].shape == (layer_dims[l], layer_dims[l-1]))
        assert(parameters['b' + str(l)].shape == (layer_dims[l], 1))
        
    return parameters

In [None]:
def relu(Z):
    return np.maximum(0, Z)

In [None]:
def sigmoid(Z):
    return 1/(1 + np.exp(-Z))

In [None]:
def linear_jump(A_prev, W, b):
    return np.dot(W, A_prev) + b

In [None]:
def forward_prop(parameters, X, Y, i, lambd):
    
    L = int(len(parameters)/2)
    cacheZ = []
    cacheA = []
    cacheA.append(X)
    A_prev = X
    
    for l in range(1,L):
        Z = linear_jump(A_prev, parameters["W" + str(l)], parameters["b" + str(l)])
        A_prev = relu(Z)
        cacheZ.append(Z)
        cacheA.append(A_prev)
    
    Z_final = linear_jump(A_prev, parameters["W" + str(L)], parameters["b" + str(L)])
    AL = sigmoid(Z_final)
    
    cacheZ.append(Z_final)
    
    
    if i%5 == 0:
        print("Cost at iteration " + str(i) + " = " + str(cost_function(parameters, AL, Y, lambd)))
    
    return cacheA, cacheZ, AL

In [None]:
# BACKWARD PROPAGATAION

In [None]:
def relu_backward(Z):
    final = np.maximum(0, Z)
    for i in range(final.shape[0]):
        for j in range(final.shape[1]):
            if final[i][j] != 0:
                final[i][j] = 1
    
    return final

In [None]:
def sigmoid_backward(Z):
    A = sigmoid(Z)
    return A*(1-A)

In [None]:
def back_prop(parameters, X, Y, iteration, lambd):
    
    m = X.shape[1]
    L = int(len(parameters)/2)
    cacheA, cacheZ, AL = forward_prop(parameters, X, Y, iteration, lambd)
    gradients = {}
    
    dZ = AL - Y
    gradients['dW' + str(L)] = np.dot(dZ, cacheA[L-1].T)/m + lambd * parameters["W" + str(L)] / m
    gradients['db' + str(L)] = np.sum(dZ, axis=1, keepdims=True)/m
    dA = np.dot(parameters['W' + str(L)].T, dZ)
    
    for l in reversed(range(1,L)):
        dZ = dA * relu_backward(cacheZ[l-1])
        gradients['dW' + str(l)] = np.dot(dZ, cacheA[l-1].T)/m + lambd * parameters["W" + str(l)] / m
        gradients['db' + str(l)] = np.sum(dZ, axis=1, keepdims=True)/m
        dA = np.dot(parameters['W' + str(l)].T, dZ)
        
    return gradients

In [None]:
# UPDATING PARAMETERS

def jump(parameters, gradients, alpha):
    L = int(len(parameters)/2)
    for l in range(1, L + 1):
        parameters["W" + str(l)] -= alpha*gradients["dW" + str(l)]
        parameters["b" + str(l)] -= alpha*gradients["db" + str(l)]
        
    
    return parameters

In [None]:
# TRAINING OUR MODEL

def train(X, Y, alpha, iterations, layer_dims, parameters, lambd):
        
    for i in tqdm(range(iterations)):
        gradients = back_prop(parameters, X, Y, i, lambd)
        parameters = jump(parameters, gradients, alpha)
    
    return parameters

In [None]:
# SPECIFYING NEURAL NETWORK

n_x = 784
n_y = 10
n_h1 = 500
n_h2 = 500
n_h3 = 500
layer_dims = np.array([n_x, n_h1, n_h2, n_y])
r_parameters = initialise_parameters(layer_dims)

In [None]:
# GETTING OPTIMAL PARAMETERS

params = train(x_train.T, Y_train, 0.102, 1000, layer_dims, params, 0)

  0%|          | 0/1000 [00:00<?, ?it/s]

Cost at iteration 0 = 0.18760401256972692


  0%|          | 5/1000 [02:50<9:24:43, 34.05s/it]

Cost at iteration 5 = 0.1870445813893506


  1%|          | 10/1000 [05:40<9:22:01, 34.06s/it]

Cost at iteration 10 = 0.18648743285472622


  2%|▏         | 15/1000 [08:29<9:15:29, 33.84s/it]

Cost at iteration 15 = 0.18593247139502986


  2%|▏         | 20/1000 [11:22<9:25:03, 34.60s/it]

Cost at iteration 20 = 0.18537952340248498


  2%|▎         | 25/1000 [14:11<9:13:02, 34.03s/it]

Cost at iteration 25 = 0.18482906443387517


  3%|▎         | 30/1000 [17:00<9:06:11, 33.78s/it]

Cost at iteration 30 = 0.18428127903874164


  4%|▎         | 35/1000 [19:49<9:04:52, 33.88s/it]

Cost at iteration 35 = 0.18373565663061722


  4%|▍         | 40/1000 [22:42<9:08:32, 34.28s/it]

Cost at iteration 40 = 0.18319217892771045


  4%|▍         | 45/1000 [25:31<8:58:14, 33.82s/it]

Cost at iteration 45 = 0.1826507537828093


  5%|▌         | 50/1000 [28:21<8:57:22, 33.94s/it]

Cost at iteration 50 = 0.18211148198898863


  6%|▌         | 55/1000 [31:13<9:07:39, 34.77s/it]

Cost at iteration 55 = 0.1815742219398126


  6%|▌         | 60/1000 [34:03<8:54:25, 34.11s/it]

Cost at iteration 60 = 0.18103917202379788


  6%|▋         | 65/1000 [36:51<8:47:09, 33.83s/it]

Cost at iteration 65 = 0.1805066782727994


  7%|▋         | 70/1000 [39:41<8:44:11, 33.82s/it]

Cost at iteration 70 = 0.17997650693375217


  8%|▊         | 75/1000 [42:34<8:50:23, 34.40s/it]

Cost at iteration 75 = 0.17944870346301273


  8%|▊         | 80/1000 [45:24<8:41:15, 34.00s/it]

Cost at iteration 80 = 0.17892305200411587


  8%|▊         | 85/1000 [48:13<8:37:40, 33.95s/it]

Cost at iteration 85 = 0.17839957102352794


  9%|▉         | 90/1000 [51:04<8:36:50, 34.08s/it]

Cost at iteration 90 = 0.1778784814376164


 10%|▉         | 95/1000 [53:56<8:34:13, 34.09s/it]

Cost at iteration 95 = 0.17735960248337088


 10%|█         | 100/1000 [56:45<8:28:26, 33.90s/it]

Cost at iteration 100 = 0.1768427983562805


 10%|█         | 105/1000 [59:35<8:25:51, 33.91s/it]

Cost at iteration 105 = 0.1763280935192949


 11%|█         | 110/1000 [1:02:27<8:32:41, 34.56s/it]

Cost at iteration 110 = 0.1758153224553582


 12%|█▏        | 115/1000 [1:05:16<8:20:44, 33.95s/it]

Cost at iteration 115 = 0.17530472730551816


 12%|█▏        | 120/1000 [1:08:06<8:19:20, 34.05s/it]

Cost at iteration 120 = 0.17479644872656286


 12%|█▎        | 125/1000 [1:10:55<8:12:23, 33.76s/it]

Cost at iteration 125 = 0.1742901242034473


 13%|█▎        | 130/1000 [1:13:48<8:15:26, 34.17s/it]

Cost at iteration 130 = 0.17378582823456526


 14%|█▎        | 135/1000 [1:16:38<8:10:10, 34.00s/it]

Cost at iteration 135 = 0.17328344596883824


 14%|█▍        | 140/1000 [1:19:27<8:05:36, 33.88s/it]

Cost at iteration 140 = 0.17278279901130916


 14%|█▍        | 145/1000 [1:22:20<8:18:22, 34.97s/it]

Cost at iteration 145 = 0.17228378976385175


 15%|█▌        | 150/1000 [1:25:11<8:05:20, 34.26s/it]

Cost at iteration 150 = 0.17178645661225986


 16%|█▌        | 155/1000 [1:28:00<7:57:15, 33.89s/it]

Cost at iteration 155 = 0.17129127485692894


 16%|█▌        | 160/1000 [1:30:50<7:56:55, 34.07s/it]

Cost at iteration 160 = 0.1707979532898607


 16%|█▋        | 165/1000 [1:33:43<8:00:18, 34.51s/it]

Cost at iteration 165 = 0.1703063627004541


 17%|█▋        | 170/1000 [1:36:33<7:50:40, 34.02s/it]

Cost at iteration 170 = 0.16981658706238126


 18%|█▊        | 175/1000 [1:39:22<7:45:56, 33.89s/it]

Cost at iteration 175 = 0.16932880193272037


 18%|█▊        | 180/1000 [1:42:17<8:00:54, 35.19s/it]

Cost at iteration 180 = 0.16884297533863168


 18%|█▊        | 185/1000 [1:45:06<7:43:04, 34.09s/it]

Cost at iteration 185 = 0.16835902031910385


 19%|█▉        | 190/1000 [1:47:57<7:41:57, 34.22s/it]

Cost at iteration 190 = 0.16787678485143046


 20%|█▉        | 195/1000 [1:50:47<7:37:21, 34.09s/it]

Cost at iteration 195 = 0.16739658504741675


 20%|██        | 200/1000 [1:53:42<7:41:52, 34.64s/it]

Cost at iteration 200 = 0.16691842123328465


 20%|██        | 205/1000 [1:56:32<7:31:06, 34.05s/it]

Cost at iteration 205 = 0.1664420663525086


 21%|██        | 210/1000 [1:59:23<7:30:10, 34.19s/it]

Cost at iteration 210 = 0.16596767839823495


 22%|██▏       | 215/1000 [2:02:12<7:24:22, 33.96s/it]

Cost at iteration 215 = 0.16549500723823143


 22%|██▏       | 220/1000 [2:05:06<7:25:22, 34.26s/it]

Cost at iteration 220 = 0.1650242629003091


 22%|██▎       | 225/1000 [2:07:57<7:21:38, 34.19s/it]

Cost at iteration 225 = 0.16455523371037592


 23%|██▎       | 230/1000 [2:10:46<7:16:19, 34.00s/it]

Cost at iteration 230 = 0.16408800515382396


 24%|██▎       | 235/1000 [2:13:40<7:22:49, 34.73s/it]

Cost at iteration 235 = 0.16362272898679533


 24%|██▍       | 240/1000 [2:16:31<7:13:59, 34.26s/it]

Cost at iteration 240 = 0.16315904116943103


 24%|██▍       | 245/1000 [2:19:21<7:08:16, 34.03s/it]

Cost at iteration 245 = 0.162697221434105


 25%|██▌       | 250/1000 [2:22:11<7:05:40, 34.05s/it]

Cost at iteration 250 = 0.162237183105419


 26%|██▌       | 255/1000 [2:25:05<7:06:40, 34.36s/it]

Cost at iteration 255 = 0.1617788172970254


 26%|██▌       | 260/1000 [2:27:54<6:59:43, 34.03s/it]

Cost at iteration 260 = 0.1613221936161827


 26%|██▋       | 265/1000 [2:30:45<6:56:11, 33.97s/it]

Cost at iteration 265 = 0.16086736442926855


 27%|██▋       | 270/1000 [2:33:39<7:05:29, 34.97s/it]

Cost at iteration 270 = 0.1604143392443696


 28%|██▊       | 275/1000 [2:36:29<6:52:06, 34.11s/it]

Cost at iteration 275 = 0.15996296440195992


 28%|██▊       | 280/1000 [2:39:19<6:48:08, 34.01s/it]

Cost at iteration 280 = 0.1595130734835718


 28%|██▊       | 285/1000 [2:42:09<6:44:26, 33.94s/it]

Cost at iteration 285 = 0.15906492441494224


 29%|██▉       | 290/1000 [2:45:02<6:46:39, 34.37s/it]

Cost at iteration 290 = 0.1586182863412712


 30%|██▉       | 295/1000 [2:47:52<6:39:10, 33.97s/it]

Cost at iteration 295 = 0.15817323266360606


 30%|███       | 300/1000 [2:50:43<6:40:03, 34.29s/it]

Cost at iteration 300 = 0.15772968452181982


 30%|███       | 305/1000 [2:53:36<6:45:20, 34.99s/it]

Cost at iteration 305 = 0.15728752341137728


 31%|███       | 310/1000 [2:56:26<6:32:25, 34.12s/it]

Cost at iteration 310 = 0.15684685029776158


 32%|███▏      | 315/1000 [2:59:16<6:27:44, 33.96s/it]

Cost at iteration 315 = 0.15640799988287102


 32%|███▏      | 320/1000 [3:02:06<6:24:53, 33.96s/it]

Cost at iteration 320 = 0.15597074324607343


 32%|███▎      | 325/1000 [3:04:58<6:26:10, 34.33s/it]

Cost at iteration 325 = 0.15553486718061343


 33%|███▎      | 330/1000 [3:07:48<6:21:02, 34.12s/it]

Cost at iteration 330 = 0.1551003263848861


 34%|███▎      | 335/1000 [3:10:37<6:14:44, 33.81s/it]

Cost at iteration 335 = 0.15466720298400427


 34%|███▍      | 340/1000 [3:13:27<6:13:21, 33.94s/it]

Cost at iteration 340 = 0.1542356643927712


 34%|███▍      | 345/1000 [3:16:21<6:13:42, 34.23s/it]

Cost at iteration 345 = 0.15380593065275033


 35%|███▌      | 350/1000 [3:19:10<6:08:23, 34.00s/it]

Cost at iteration 350 = 0.15337793387878554


 36%|███▌      | 355/1000 [3:22:00<6:04:45, 33.93s/it]

Cost at iteration 355 = 0.15295143154997665


 36%|███▌      | 360/1000 [3:24:53<6:10:50, 34.77s/it]

Cost at iteration 360 = 0.15252643788159156


 36%|███▋      | 365/1000 [3:27:42<5:59:20, 33.95s/it]

Cost at iteration 365 = 0.15210298936153677


 37%|███▋      | 370/1000 [3:30:32<5:56:33, 33.96s/it]

Cost at iteration 370 = 0.15168118397012337


 38%|███▊      | 375/1000 [3:33:22<5:53:17, 33.92s/it]

Cost at iteration 375 = 0.1512609334647073


 38%|███▊      | 380/1000 [3:36:15<5:54:41, 34.33s/it]

Cost at iteration 380 = 0.1508421076737774


 38%|███▊      | 385/1000 [3:39:04<5:47:25, 33.90s/it]

Cost at iteration 385 = 0.15042484687159907


 39%|███▉      | 390/1000 [3:41:55<5:46:29, 34.08s/it]

Cost at iteration 390 = 0.15000893417689132


 40%|███▉      | 395/1000 [3:44:47<5:49:32, 34.66s/it]

Cost at iteration 395 = 0.1495946263024851


 40%|████      | 400/1000 [3:47:37<5:41:07, 34.11s/it]

Cost at iteration 400 = 0.14918170953094237


 40%|████      | 405/1000 [3:50:27<5:37:10, 34.00s/it]

Cost at iteration 405 = 0.14877034856480553


 41%|████      | 410/1000 [3:53:17<5:34:22, 34.00s/it]

Cost at iteration 410 = 0.14836054551791697


 42%|████▏     | 415/1000 [3:56:10<5:33:32, 34.21s/it]

Cost at iteration 415 = 0.147952387304344


 42%|████▏     | 420/1000 [3:59:01<5:30:21, 34.17s/it]

Cost at iteration 420 = 0.1475459160410029


 42%|████▎     | 425/1000 [4:01:50<5:24:30, 33.86s/it]

Cost at iteration 425 = 0.14714110824278542


 43%|████▎     | 430/1000 [4:04:43<5:33:31, 35.11s/it]

Cost at iteration 430 = 0.14673781042653566


 44%|████▎     | 435/1000 [4:07:33<5:20:55, 34.08s/it]

Cost at iteration 435 = 0.14633589168125918


 44%|████▍     | 440/1000 [4:10:22<5:16:54, 33.95s/it]

Cost at iteration 440 = 0.14593520947663868


 44%|████▍     | 445/1000 [4:13:12<5:13:05, 33.85s/it]

Cost at iteration 445 = 0.14553589922720697


 45%|████▌     | 450/1000 [4:16:06<5:17:50, 34.67s/it]

Cost at iteration 450 = 0.14513798542396952


 46%|████▌     | 455/1000 [4:18:56<5:08:57, 34.01s/it]

Cost at iteration 455 = 0.1447413856755264


 46%|████▌     | 460/1000 [4:21:46<5:06:47, 34.09s/it]

Cost at iteration 460 = 0.14434607216665798


 46%|████▋     | 465/1000 [4:24:36<5:02:04, 33.88s/it]

Cost at iteration 465 = 0.1439521807953498


 47%|████▋     | 470/1000 [4:27:29<5:01:47, 34.17s/it]

Cost at iteration 470 = 0.14355991363131052


 48%|████▊     | 475/1000 [4:30:18<4:56:09, 33.85s/it]

Cost at iteration 475 = 0.14316907178980542


 48%|████▊     | 480/1000 [4:33:08<4:55:25, 34.09s/it]

Cost at iteration 480 = 0.14277981707515636


 48%|████▊     | 485/1000 [4:36:01<4:56:58, 34.60s/it]

Cost at iteration 485 = 0.14239193932742683


 49%|████▉     | 490/1000 [4:38:51<4:49:33, 34.07s/it]

Cost at iteration 490 = 0.1420055537244467


 50%|████▉     | 495/1000 [4:41:40<4:45:04, 33.87s/it]

Cost at iteration 495 = 0.1416202711187926


 50%|█████     | 500/1000 [4:44:30<4:42:21, 33.88s/it]

Cost at iteration 500 = 0.14123617803223662


 50%|█████     | 505/1000 [4:47:23<4:42:30, 34.24s/it]

Cost at iteration 505 = 0.14085340205866298


 51%|█████     | 510/1000 [4:50:13<4:38:46, 34.14s/it]

Cost at iteration 510 = 0.14047164468019854


 52%|█████▏    | 515/1000 [4:53:02<4:34:08, 33.91s/it]

Cost at iteration 515 = 0.140091328506184


 52%|█████▏    | 520/1000 [4:55:56<4:39:11, 34.90s/it]

Cost at iteration 520 = 0.13971240374982863


 52%|█████▎    | 525/1000 [4:58:46<4:29:38, 34.06s/it]

Cost at iteration 525 = 0.13933488576725073


 53%|█████▎    | 530/1000 [5:01:35<4:25:57, 33.95s/it]

Cost at iteration 530 = 0.13895852496340844


 54%|█████▎    | 535/1000 [5:04:25<4:22:24, 33.86s/it]

Cost at iteration 535 = 0.13858345789618465


 54%|█████▍    | 540/1000 [5:07:19<4:24:46, 34.54s/it]

Cost at iteration 540 = 0.1382098064401305


 55%|█████▍    | 545/1000 [5:10:09<4:18:06, 34.04s/it]

Cost at iteration 545 = 0.1378374923112261


 55%|█████▌    | 550/1000 [5:13:00<4:16:13, 34.16s/it]

Cost at iteration 550 = 0.1374667019633014


 56%|█████▌    | 555/1000 [5:15:53<4:19:44, 35.02s/it]

Cost at iteration 555 = 0.13709715812748183


 56%|█████▌    | 560/1000 [5:18:43<4:11:29, 34.29s/it]

Cost at iteration 560 = 0.13672878079765557


 56%|█████▋    | 565/1000 [5:21:33<4:06:01, 33.93s/it]

Cost at iteration 565 = 0.1363614738485096


 57%|█████▋    | 570/1000 [5:24:24<4:04:57, 34.18s/it]

Cost at iteration 570 = 0.13599563994544175


 57%|█████▊    | 575/1000 [5:27:17<4:03:35, 34.39s/it]

Cost at iteration 575 = 0.13563107441889838


 58%|█████▊    | 580/1000 [5:30:07<3:58:57, 34.14s/it]

Cost at iteration 580 = 0.13526771000914745


 58%|█████▊    | 585/1000 [5:32:57<3:54:27, 33.90s/it]

Cost at iteration 585 = 0.13490567520772392


 59%|█████▉    | 590/1000 [5:35:47<3:52:09, 33.97s/it]

Cost at iteration 590 = 0.1345448126306832


 60%|█████▉    | 595/1000 [5:38:40<3:50:12, 34.11s/it]

Cost at iteration 595 = 0.13418508369443072


 60%|██████    | 600/1000 [5:41:30<3:47:34, 34.14s/it]

Cost at iteration 600 = 0.13382662728726294


 60%|██████    | 605/1000 [5:44:20<3:43:33, 33.96s/it]

Cost at iteration 605 = 0.13346933738937986


 61%|██████    | 610/1000 [5:47:13<3:45:40, 34.72s/it]

Cost at iteration 610 = 0.13311325026865406


 62%|██████▏   | 615/1000 [5:50:03<3:38:38, 34.07s/it]

Cost at iteration 615 = 0.13275835425990354


 62%|██████▏   | 620/1000 [5:52:53<3:35:14, 33.99s/it]

Cost at iteration 620 = 0.13240476601066256


 62%|██████▎   | 625/1000 [5:55:42<3:31:41, 33.87s/it]

Cost at iteration 625 = 0.1320524375191638


 63%|██████▎   | 630/1000 [5:58:36<3:32:09, 34.40s/it]

Cost at iteration 630 = 0.1317012854410776


 64%|██████▎   | 635/1000 [6:01:26<3:26:42, 33.98s/it]

Cost at iteration 635 = 0.1313514892826258


 64%|██████▍   | 640/1000 [6:04:17<3:24:33, 34.09s/it]

Cost at iteration 640 = 0.13100310438005655


 64%|██████▍   | 645/1000 [6:07:11<3:25:49, 34.79s/it]

Cost at iteration 645 = 0.1306557806184927


 65%|██████▌   | 650/1000 [6:10:01<3:19:43, 34.24s/it]

Cost at iteration 650 = 0.1303098075597255


 66%|██████▌   | 655/1000 [6:12:51<3:15:43, 34.04s/it]

Cost at iteration 655 = 0.1299651211060669


 66%|██████▌   | 660/1000 [6:15:42<3:13:34, 34.16s/it]

Cost at iteration 660 = 0.12962149483435692


 66%|██████▋   | 665/1000 [6:18:36<3:12:11, 34.42s/it]

Cost at iteration 665 = 0.1292793014960198


 67%|██████▋   | 670/1000 [6:21:28<3:08:47, 34.33s/it]

Cost at iteration 670 = 0.12893846550434535


 68%|██████▊   | 675/1000 [6:24:18<3:04:11, 34.00s/it]

Cost at iteration 675 = 0.12859886085147565


 68%|██████▊   | 680/1000 [6:27:12<3:07:19, 35.12s/it]

Cost at iteration 680 = 0.12826024366319924


 68%|██████▊   | 685/1000 [6:30:02<2:59:13, 34.14s/it]

Cost at iteration 685 = 0.12792263104993915


 69%|██████▉   | 690/1000 [6:32:52<2:56:23, 34.14s/it]

Cost at iteration 690 = 0.12758606010079343


 70%|██████▉   | 695/1000 [6:35:42<2:52:56, 34.02s/it]

Cost at iteration 695 = 0.12725066530528742


 70%|███████   | 700/1000 [6:38:36<2:52:42, 34.54s/it]

Cost at iteration 700 = 0.1269163716976973


 70%|███████   | 705/1000 [6:41:26<2:47:27, 34.06s/it]

Cost at iteration 705 = 0.12658333704049907


 71%|███████   | 710/1000 [6:44:17<2:44:46, 34.09s/it]

Cost at iteration 710 = 0.12625149345076428


 72%|███████▏  | 715/1000 [6:47:07<2:41:30, 34.00s/it]

Cost at iteration 715 = 0.12592069583850599


 72%|███████▏  | 720/1000 [6:50:02<2:41:22, 34.58s/it]

Cost at iteration 720 = 0.12559103039390815


 72%|███████▎  | 725/1000 [6:52:53<2:36:45, 34.20s/it]

Cost at iteration 725 = 0.12526249346479043


 73%|███████▎  | 730/1000 [6:55:44<2:33:35, 34.13s/it]

Cost at iteration 730 = 0.12493493629233068


 74%|███████▎  | 735/1000 [6:58:37<2:32:36, 34.55s/it]

Cost at iteration 735 = 0.12460843679372641


 74%|███████▍  | 740/1000 [7:01:28<2:27:52, 34.13s/it]

Cost at iteration 740 = 0.12428281802750218


 74%|███████▍  | 745/1000 [7:04:18<2:25:07, 34.15s/it]

Cost at iteration 745 = 0.12395831445631333


 75%|███████▌  | 750/1000 [7:07:09<2:22:52, 34.29s/it]

Cost at iteration 750 = 0.1236349177161092


 76%|███████▌  | 755/1000 [7:10:04<2:20:47, 34.48s/it]

Cost at iteration 755 = 0.12331271546180667


 76%|███████▌  | 760/1000 [7:12:56<2:17:42, 34.43s/it]

Cost at iteration 760 = 0.12299150178793482


 76%|███████▋  | 765/1000 [7:15:48<2:14:25, 34.32s/it]

Cost at iteration 765 = 0.122671159643215


 77%|███████▋  | 770/1000 [7:18:44<2:14:38, 35.12s/it]

Cost at iteration 770 = 0.1223517552498667


 78%|███████▊  | 775/1000 [7:21:36<2:09:39, 34.58s/it]

Cost at iteration 775 = 0.1220333213978347


 78%|███████▊  | 780/1000 [7:24:28<2:06:14, 34.43s/it]

Cost at iteration 780 = 0.12171620222477358


 78%|███████▊  | 785/1000 [7:27:20<2:02:41, 34.24s/it]

Cost at iteration 785 = 0.12140013929557163


 79%|███████▉  | 790/1000 [7:30:15<2:00:52, 34.54s/it]

Cost at iteration 790 = 0.1210853488500177


 80%|███████▉  | 795/1000 [7:33:06<1:56:41, 34.15s/it]

Cost at iteration 795 = 0.12077156889546188


 80%|████████  | 800/1000 [7:35:57<1:54:14, 34.27s/it]

Cost at iteration 800 = 0.12045865261534418


 80%|████████  | 805/1000 [7:38:51<1:53:11, 34.83s/it]

Cost at iteration 805 = 0.1201467302836869


 81%|████████  | 810/1000 [7:41:42<1:49:08, 34.47s/it]

Cost at iteration 810 = 0.1198355968054342


 82%|████████▏ | 815/1000 [7:44:33<1:45:28, 34.21s/it]

Cost at iteration 815 = 0.11952539127886694


 82%|████████▏ | 820/1000 [7:47:25<1:43:02, 34.35s/it]

Cost at iteration 820 = 0.11921594028300844


 82%|████████▎ | 825/1000 [7:50:19<1:40:31, 34.47s/it]

Cost at iteration 825 = 0.11890739799893965


 83%|████████▎ | 830/1000 [7:53:11<1:37:09, 34.29s/it]

Cost at iteration 830 = 0.11859974347862483


 84%|████████▎ | 835/1000 [7:56:02<1:33:54, 34.15s/it]

Cost at iteration 835 = 0.11829299676433025


 84%|████████▍ | 840/1000 [7:58:56<1:33:38, 35.12s/it]

Cost at iteration 840 = 0.11798734306939432


 84%|████████▍ | 845/1000 [8:01:47<1:28:48, 34.38s/it]

Cost at iteration 845 = 0.11768277628930866


 85%|████████▌ | 850/1000 [8:04:38<1:25:27, 34.19s/it]

Cost at iteration 850 = 0.11737924047601891


 86%|████████▌ | 855/1000 [8:07:30<1:23:13, 34.43s/it]

Cost at iteration 855 = 0.11707660457263055


 86%|████████▌ | 860/1000 [8:10:24<1:20:32, 34.52s/it]

Cost at iteration 860 = 0.11677508353050838


 86%|████████▋ | 865/1000 [8:13:14<1:17:02, 34.24s/it]

Cost at iteration 865 = 0.11647457147315797


 87%|████████▋ | 870/1000 [8:16:05<1:14:02, 34.17s/it]

Cost at iteration 870 = 0.1161747949638945


 88%|████████▊ | 875/1000 [8:19:00<1:13:27, 35.26s/it]

Cost at iteration 875 = 0.11587598890472468


 88%|████████▊ | 880/1000 [8:21:50<1:08:39, 34.33s/it]

Cost at iteration 880 = 0.11557827296631276


 88%|████████▊ | 885/1000 [8:24:42<1:05:35, 34.22s/it]

Cost at iteration 885 = 0.11528145760763889


 89%|████████▉ | 890/1000 [8:27:32<1:02:26, 34.06s/it]

Cost at iteration 890 = 0.1149856321551655


 90%|████████▉ | 895/1000 [8:30:26<1:00:39, 34.66s/it]

Cost at iteration 895 = 0.11469065820607922


 90%|█████████ | 900/1000 [8:33:17<56:55, 34.15s/it]

Cost at iteration 900 = 0.1143965397698991


 90%|█████████ | 905/1000 [8:36:07<53:55, 34.06s/it]

Cost at iteration 905 = 0.11410333807782318


 91%|█████████ | 910/1000 [8:38:58<51:19, 34.21s/it]

Cost at iteration 910 = 0.1138107881024212


 92%|█████████▏| 915/1000 [8:41:51<48:21, 34.13s/it]

Cost at iteration 915 = 0.11351925402386975


 92%|█████████▏| 920/1000 [8:44:41<45:28, 34.10s/it]

Cost at iteration 920 = 0.11322877097265673


 92%|█████████▎| 925/1000 [8:47:31<42:24, 33.93s/it]

Cost at iteration 925 = 0.11293919979852346


 93%|█████████▎| 930/1000 [8:50:25<40:20, 34.57s/it]

Cost at iteration 930 = 0.11265043329478276


 94%|█████████▎| 935/1000 [8:53:15<36:56, 34.10s/it]

Cost at iteration 935 = 0.11236238428050478


 94%|█████████▍| 940/1000 [8:56:05<34:08, 34.15s/it]

Cost at iteration 940 = 0.11207527295951049


 94%|█████████▍| 945/1000 [8:58:55<31:08, 33.98s/it]

Cost at iteration 945 = 0.11178887240546662


 95%|█████████▌| 950/1000 [9:01:49<28:33, 34.26s/it]

Cost at iteration 950 = 0.11150334553910081


 96%|█████████▌| 955/1000 [9:04:39<25:32, 34.07s/it]

Cost at iteration 955 = 0.11121853242497576


 96%|█████████▌| 960/1000 [9:07:30<22:42, 34.05s/it]

Cost at iteration 960 = 0.1109344422767997


 96%|█████████▋| 965/1000 [9:10:23<20:15, 34.72s/it]

Cost at iteration 965 = 0.11065100624664158


 97%|█████████▋| 970/1000 [9:13:14<17:07, 34.25s/it]

Cost at iteration 970 = 0.110368572109956


 98%|█████████▊| 975/1000 [9:16:04<14:11, 34.05s/it]

Cost at iteration 975 = 0.11008690726208246


 98%|█████████▊| 980/1000 [9:18:55<11:23, 34.18s/it]

Cost at iteration 980 = 0.10980608617694596


 98%|█████████▊| 985/1000 [9:21:49<08:38, 34.55s/it]

Cost at iteration 985 = 0.10952607671481464


 99%|█████████▉| 990/1000 [9:24:40<05:41, 34.13s/it]

Cost at iteration 990 = 0.10924703538087349


100%|█████████▉| 995/1000 [9:27:30<02:50, 34.11s/it]

Cost at iteration 995 = 0.10896878752048342


100%|██████████| 1000/1000 [9:30:25<00:00, 34.23s/it]


In [None]:
 params

{'W1': array([[-4.78033428e-05,  7.65396409e-04,  1.22445836e-02, ...,
          1.75577268e-02,  2.33512010e-03, -4.10735975e-03],
        [ 5.19937869e-03, -1.77309342e-02, -1.39296903e-02, ...,
         -4.96210588e-03,  6.39777035e-04, -1.59029335e-02],
        [-6.98164303e-03, -6.69264721e-03,  7.62955594e-03, ...,
         -1.17092553e-02, -2.36001078e-02,  6.37170048e-03],
        ...,
        [ 4.60102911e-03, -1.19884865e-02,  1.20841917e-03, ...,
         -1.64973824e-03, -3.20075812e-03, -1.49853592e-02],
        [-1.34262077e-02,  1.61367275e-03,  8.47106640e-03, ...,
          4.09493553e-03,  2.13859507e-02,  1.51418694e-02],
        [ 2.90331970e-03, -1.36101609e-02, -2.18674431e-02, ...,
         -1.53202390e-02, -2.69308206e-03, -1.13398838e-03]]),
 'W2': array([[ 0.06345484,  0.02470317,  0.01672148, ...,  0.00957294,
          0.02548475,  0.01547999],
        [ 0.02291387, -0.01022143, -0.00461837, ..., -0.00115049,
          0.00112585,  0.02704834],
        [-0.0

In [None]:
_, _, A = forward_prop(params, x_test.T, Y_test, 0, 0)
_, _, A_t = forward_prop(params, x_train.T, Y_train, 0, 0)

Cost at iteration 0 = 0.22458282824601333
Cost at iteration 0 = 0.10869141585860348


In [None]:
pred = np.argmax(A, axis=0)
pred1 = np.argmax(A_t, axis=0)

In [None]:
np.mean(y_test == pred)

0.967

In [None]:
np.mean(y_train == pred1)

0.988275

In [None]:
x_train.shape

(40000, 784)

In [None]:
for i in range(len(pred)):
    print(str(pred[i]) + " " + str(y_test[i]))

In [None]:
y_test.tolist()

In [None]:
# SAVING THE PARAMETERS IN A CSV FILE

os.chdir('/content/drive/My Drive/Digit Recognizer/Parameters/')
L = int(len(params)/2)
for l in tqdm(range(1, L+1)):
    with open('params W' + str(l) + '.csv', 'w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(np.ones(params['W' + str(l)].shape[1]) * 2)
        writer.writerows(params['W' + str(l)])
    
    with open('params b' + str(l) + '.csv', 'w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(np.ones(params['b' + str(l)].shape[1]) * 2)
        writer.writerows(params['b' + str(l)])

100%|██████████| 3/3 [00:01<00:00,  2.76it/s]
