In [1]:
import numpy as np
import pandas as pd
from tqdm import tqdm
import torch

In [2]:
data1 = pd.read_csv("mnist_train.csv")
data = data1[:5000]
data.head()

Unnamed: 0,label,1x1,1x2,1x3,1x4,1x5,1x6,1x7,1x8,1x9,...,28x19,28x20,28x21,28x22,28x23,28x24,28x25,28x26,28x27,28x28
0,5,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [3]:
num_classes = len(data.iloc[:, 0].value_counts().index)
num_classes

10

In [4]:
def OneHotEncode(Y, num_classes):
    t_Y = np.zeros((len(Y), num_classes), dtype=np.int64)
    for i in range(len(Y)):
        t_Y[i][Y[i]] = 1.0

    return t_Y

In [5]:
Y = data.iloc[:, 0]
X = data.iloc[:, 1:]
Y = np.array(Y)
X = np.array(X, dtype=np.float64)

In [6]:
# Normalize X from range 0-255 to 0-1
X = X / 255

In [7]:
Y

array([5, 0, 4, ..., 2, 1, 2], dtype=int64)

In [8]:
# Y = OneHotEncode(Y, num_classes)
# Y

In [9]:
input_nodes = 784
output_nodes = 10
hidden_nodes = 50

In [10]:
wh = torch.tensor(
    np.random.uniform(-1, 1.1, (input_nodes, hidden_nodes)), requires_grad=True
)
bh = torch.tensor(np.random.uniform(-1, 1.1, hidden_nodes), requires_grad=True)

wo = torch.tensor(
    np.random.uniform(-1, 1.1, (hidden_nodes, output_nodes)), requires_grad=True
)
bo = torch.tensor(np.random.uniform(-1, 1.1, output_nodes), requires_grad=True)

In [11]:
x = torch.tensor(X)
y = torch.tensor(Y, dtype=torch.long)

In [12]:
def MSE(predicted_y, target_y):
    return torch.mean(torch.pow(predicted_y - target_y, 2))

In [13]:
sigmoid = torch.nn.Sigmoid()
softmax = torch.nn.Softmax()

In [14]:
def forward_pass():
    zh = torch.matmul(x, wh) + bh
    ah = sigmoid(zh)

    zo = torch.matmul(ah, wo) + bo
    ao = softmax(zo)
    return ao

In [23]:
epochs = 20000
lr = 0.4

In [24]:
CrossEntropyLoss = torch.nn.CrossEntropyLoss()
for epoch in tqdm(range(epochs)):
    predicted_y = forward_pass()
    loss = CrossEntropyLoss(predicted_y, y)

    loss.backward()

    with torch.no_grad():
        wh -= lr * wh.grad
        bh -= lr * bh.grad
        wo -= lr * wo.grad
        bo -= lr * bo.grad

        wh.grad.zero_()
        bh.grad.zero_()
        wo.grad.zero_()
        bo.grad.zero_()

  
100%|████████████████████████████████████████████████████████████████████████████| 20000/20000 [21:02<00:00, 16.00it/s]


In [25]:
predicted = forward_pass()
predicted
# CrossEntropyLoss

  


tensor([[3.0238e-05, 3.0173e-08, 1.2207e-05,  ..., 6.9400e-08, 9.6538e-07,
         1.5170e-06],
        [1.0000e+00, 9.9205e-10, 3.6515e-06,  ..., 1.6997e-09, 9.1649e-12,
         2.6203e-08],
        [2.5580e-07, 7.5654e-10, 7.4787e-06,  ..., 1.1091e-09, 2.2595e-10,
         9.7092e-06],
        ...,
        [4.0590e-05, 9.7442e-04, 9.9481e-01,  ..., 1.1651e-07, 1.4959e-04,
         1.9522e-06],
        [8.3814e-09, 9.9998e-01, 9.8874e-06,  ..., 2.1453e-08, 3.2637e-06,
         1.3001e-06],
        [1.1499e-04, 5.5959e-04, 9.6598e-01,  ..., 3.9456e-06, 3.1916e-02,
         1.0084e-03]], dtype=torch.float64, grad_fn=<SoftmaxBackward>)

In [26]:
np.count_nonzero(
    y.detach().numpy() == np.argmax(predicted.detach().numpy(), axis=1)
) / len(y)

0.8718

In [39]:
X_test = data1[5000:10000].iloc[:, 1:]
Y_test = data1[5000:10000].iloc[:, 0]

In [40]:
def test(X):
    zh = torch.matmul(X, wh) + bh
    ah = sigmoid(zh)

    zo = torch.matmul(ah, wo) + bo
    ao = softmax(zo)
    return ao

In [45]:
predicted = test(torch.tensor(np.array(X_test), dtype=torch.float64))

  


In [46]:
np.count_nonzero(
    np.array(Y_test) == np.argmax(predicted.detach().numpy(), axis=1)
) / len(np.array(Y_test))

0.7986