In [110]:
import pickle,gzip,math,os,time,shutil,torch,matplotlib as mpl, numpy as np
from pathlib import Path
from torch import tensor
from fastcore.test import test_close
import random

def seed(n=40):
    torch.manual_seed(n)
    random.seed(n)

mpl.rcParams['image.cmap'] = 'gray'
torch.set_printoptions(precision=2, linewidth=125, sci_mode=False)
np.set_printoptions(precision=2, linewidth=125)

path_gz = './mnist.pkl.gz'
with gzip.open(path_gz, 'rb') as f: ((x_train, y_train), (x_valid, y_valid), _) = pickle.load(f, encoding='latin-1')
x_train, y_train, x_valid, y_valid = map(tensor, [x_train, y_train, x_valid, y_valid])

In [111]:
x_train.shape, y_train.shape

(torch.Size([50000, 784]), torch.Size([50000]))

In [112]:
y_train[0:5]

tensor([5, 0, 4, 1, 9])

In [113]:
#x_train = x_train[0:5,]; y_train = y_train[0:5]

In [114]:
# weigts & biases
nh = 50
m = x_train.shape[1]

seed()
w1 = torch.randn(m,nh)
b1 = torch.randn(nh)
w2 = torch.randn(nh,1)
b2 = torch.zeros(1)

In [115]:
alpha = 0.0001
class Module:
    def __call__(self, *args):
        self.args = args
        self.out = self.forward()
        return self.out
    
    def backward(self):
        """Calculate gradients"""
        raise NotImplemented
    
    def forward(self):
        raise NotImplemented
        
class Lin(Module):
    def __init__(self, w, b):
        self.w, self.b = w, b
    
    def forward(self):
        self.inp = self.args[0]
        return self.inp @ self.w + self.b
    
    def backward(self):
        # inp[ij] : i=no. sample, j=no. input
        # w[jk] : j = no. input, k = no. output
        # out[k] = sum(wjk*inpij)
        # dy/dinp = dy/dout * dout/dinp = out.g*sum_k(wjk)
        #print(self.inp.T.shape)
        #print(self.out.g.shape)
        self.w.g = self.inp.T@self.out.g
        self.b.g = self.out.g.sum(0)
        self.inp.g = self.out.g @ self.w.T
    
class Relu(Module):
    def forward(self):
        self.inp = self.args[0]
        self.out = np.maximum(0, self.inp)
        return self.out
    
    def backward(self):
        self.inp.g = (self.inp>0).float() * self.out.g

class Mse(Module):
    def forward(self):
        self.inp, self.y = self.args[0], self.args[1]
        self.out = ((self.inp.T-self.y)**2).mean()
        return self.out
    
    def backward(self):
        # dy/d(inp[i])
        self.inp.g = (2.*(self.inp.squeeze(1)-self.y)/len(self.y)).unsqueeze(1)
        
class Model:
    def __init__(self, w1, b1, w2, b2):
        self.layers = [Lin(w1, b1), Relu(), Lin(w2, b2)]
        self.loss_func = Mse()
        
    def __call__(self, x, y):
        out = x
        for l in self.layers:
            out = l(out)
            
        loss = self.loss_func(out, y)
            
        return out.squeeze(1), float(loss)
    
    def backward(self):
        self.loss_func.backward()
        for l in self.layers[::-1]:
            l.backward()
        
    def step(self):
        with torch.no_grad():
            for l in self.layers[::-1]:
                if hasattr(l, 'w'):
                    l.w -= l.w.g * alpha
                    l.b -= l.b.g * alpha
                    l.w.g *= 0
                    l.b.g *= 0

seed()
model = Model(w1, b1, w2, b2)
new_loss = None
loss = None

while True:
    if loss and new_loss and (new_loss/loss > 0.99 or new_loss > loss):
        break
    
    loss = new_loss
    preds, new_loss = model(x_train, y_train)
    print(f"Loss = {str(loss)}")
    
    # calc gradients
    model.backward()
    
    # update gradients
    model.step()

Loss = None
Loss = 601.9739379882812
Loss = 583.3956298828125
Loss = 567.975341796875
Loss = 554.0646362304688
Loss = 541.06640625
Loss = 528.7515258789062
Loss = 517.0145874023438
Loss = 505.7985534667969
Loss = 495.0658264160156
Loss = 484.78485107421875
Loss = 474.92633056640625
Loss = 465.4649963378906
Loss = 456.3765869140625
Loss = 447.64263916015625
Loss = 439.24481201171875
Loss = 431.1626281738281
Loss = 423.3763732910156
Loss = 415.8726806640625
Loss = 408.6355285644531
Loss = 401.6523742675781
Loss = 394.909912109375
Loss = 388.3963317871094
Loss = 382.099365234375
Loss = 376.00787353515625
Loss = 370.1133728027344
Loss = 364.4066467285156
Loss = 358.8783874511719
Loss = 353.5189208984375
Loss = 348.32025146484375
Loss = 343.2760314941406
Loss = 338.3805236816406
Loss = 333.6271667480469
Loss = 329.0087585449219
Loss = 324.51904296875
Loss = 320.1536865234375
Loss = 315.90740966796875
Loss = 311.7757873535156
Loss = 307.7546691894531
Loss = 303.8395080566406
Loss = 300.02432

In [116]:
preds=preds.type(torch.int64)

In [117]:
with torch.no_grad():
    acc = (preds == y_train).sum()/len(preds)
    print(f"Accuracy = {acc}")

Accuracy = 0.028139999136328697


In [118]:
preds

tensor([ -7,  34,  12,  ..., -17,  -4, -16])