In [2]:
#| default_exp model

# model

> A simple API for creating and using playing cards

In [3]:
#| export
import torch
from fastcore.utils import *
from tinypytorch.core import *

In [4]:
#| hide
from nbdev.showdoc import *
from fastcore.test import *

### Module

In [5]:
#| export
class Module():
    def __call__(self, *args):
        self.args = args # it will call self.inp, and self.targ...
        self.out = self.forward(*args)
        return self.out
    
    def forward(self):
        raise Exception("Not implemented")
    
    def backward(self):
        self.bwd(self.out, *self.args)

### Activation Functions

#### ReLU Function

In [6]:
#| export
class ReLU(Module):
    # def __call__(self, inp: 'input'):
    #     self.inp = inp
    #     self.out = inp.clamp_min(0.) - 0.5
    #     return self.out
    
    def forward(self, inp: 'input'):
        return inp.clamp_min(0.) - 0.5
    
    def bwd(self, out, inp):
        inp.g = (inp > 0).float() * out.g

### Loss Functions

#### Mean Squared Error

In [7]:
#| export
class MSE(Module):
    
#     def __call__(self, inp: 'input', targ: 'target'):
#         self.inp = inp
#         self.targ = targ
        
#         print("MSE.forward")
#         print(f"inp.shape={inp.shape}")
#         print(f"inp.squeeze().shape={inp.squeeze(-1).shape}")
#         print(f"targ.shape={targ.shape}")
        
#         temp = (inp.squeeze() - targ)
#         print(f"temp={temp}")
        
#         return torch.pow(temp, 2).mean()
    
    def forward(self, inp, targ):
        
        # print("MSE.forward")
        # print(f"inp.shape={inp.shape}")
        # print(f"inp.squeeze().shape={inp.squeeze(-1).shape}")
        # print(f"targ.shape={targ.shape}")
        
        temp = (inp.squeeze() - targ)
        # print(f"temp={temp}")
        
        return torch.pow(temp, 2).mean()

    def bwd(self, out, inp, targ):
        inp.g = 2*(inp.squeeze() - targ).unsqueeze(-1) / targ.shape[0]
        
    # def backward(self):
    #     self.inp.g = 2. * (self.inp.squeeze() - self.targ).unsqueeze(-1) / self.targ.shape[0]

In [8]:
mse = MSE()

In [9]:
tensor_a = torch.randn(size=[4, 1])

In [10]:
tensor_a

tensor([[-0.6239],
        [ 1.0623],
        [-1.1395],
        [-0.2901]])

In [11]:
tensor_b = torch.rand_like(tensor_a)

In [12]:
tensor_b

tensor([[0.7852],
        [0.1587],
        [0.6888],
        [0.3324]])

In [13]:
mse.forward(tensor_a, tensor_b)

tensor(1.2752)

#### Cross-entropy Loss

##### Log Softmax

$$\hbox{softmax(x)}_{i} = \frac{e^{x_{i}}}{e^{x_{0}} + e^{x_{1}} + \cdots + e^{x_{n-1}}}$$

In [28]:
#| export
def log_softmax(x):
    return (x.exp()/(x.exp().sum(-1, keepdim=True))).log()

Cross-entropy is a derivation from Kull-back divergence

##### Negative Log Likelihood

In [15]:
targ = torch.tensor([5, 0, 4])

In [16]:
sm_pred = torch.tensor([[0, 1, 2], [5, 0, 4]])

In [20]:
sm_pred

tensor([[0, 1, 2],
        [5, 0, 4]])

In [33]:
sm_pred.shape

torch.Size([2, 3])

In [32]:
sm_pred[[0, 1], [2, 1]]

tensor([2, 0])

In [26]:
#| export
def nll(inp: 'input', targ: 'target'):
    return -inp[range(targ.shape[0]), targ].mean()

##### Cross-entropy loss

In [38]:
#| export
def cross_entropy(pred: 'prediction', targ: 'target'):
    sm_pred = log_softmax(pred)
    return nll(sm_pred, targ)

In [42]:
pred = torch.tensor([[0, 1, 2], [5, 0, 4]])

In [40]:
targ = torch.tensor([2, 1])

In [41]:
cross_entropy(pred, targ)

tensor(2.8629)

### Linear layer

In [27]:
# #| export
# class Lin():
#     def __init__(self, w, b):
#         self.w = w
#         self.b = b
    
#     def __call__(self, inp):
#         self.inp = inp
#         self.out = inp @ self.w + self.b
#         return self.out
    
#     def backward(self):
#         self.inp.g = self.out.g @ self.w.t()
#         self.w.g = (self.inp.unsqueeze(-1) * self.out.g.unsqueeze(1)).sum(0)
#         self.b.g = self.out.g.sum(0)

#### The gradient of Linear Layer

In [43]:
#| export
class Lin(Module):
    def __init__(self, w: 'weight', b: 'bias'):
        self.w, self.b = w, b
    
    # def __call__(self, inp):
    #     self.inp = inp
    #     self.out = inp @ self.w + self.b
    #     return self.out
    
    def forward(self, inp: 'input'):
        print("Lin.forward")
        print(f"inp={inp.shape}")
        print(f"w={self.w.shape}")
        print(f"b={self.b.shape}")
        
        output = inp @ self.w + self.b
        print(f"output.shape={output.shape}")
        return output
    
    def bwd(self, out, inp):
        inp.g = out.g @ self.w.t()
        # self.w.g = torch.einsum("bi,bj->ij", inp, out.g)
        self.w.g = inp.t() @ out.g
        # self.w.g = (self.inp.unsqueeze(-1) * self.out.g.unsqueeze(1)).sum(0)
        self.b.g = out.g.sum(0)
        #self.w.g = torch.einsum("bi,bj->ij", self.inp, self.out.g)
        #self.b.g = out.g.sum(0)

### Model

In [59]:
# #| export
# class Model():
#     def __init__(self, m, nh: "number of hidden"):
#         self.m, self.nh = m, nh
    
#     def forward(self, xb: 'training batch'):
#         w1, b1, w2, b2 = self.initialize_parameters()
#         l1 = Lin(xb, w1, b1)
#         l2 = ReLU(l1)
#         l3 = Lin(l2, w2, b2)
        
#         return l3
    
#     def initialize_parameters(self):
#         # kaiming init / he init for relu
#         w1 = torch.randn(self.m, self.nh)*math.sqrt(2./self.m)
#         b1 = torch.zeros(self.nh)
#         w2 = torch.randn(self.nh, 1)/math.sqrt(self.nh)
#         b2 = torch.zeros
#         return w1, b1, w2, b2

In [60]:
# #| export
# class Model():
#     def __init__(self, )

In [61]:
# #| export
# class Model():
#     def __init__(self, w1, b1, w2, b2):
#         self.layers = []

In [62]:
# #| export
# def get_model():
#     model = nn.Sequential(nn.Linear(m, nh), nn.ReLU(), nn.Linear(nh, 10))
#     return model

In [31]:
#| export
def initialize_parameters(m, nh: "number of hidden layers"):
    # kaiming init / he init for relu
    w1 = torch.randn(m, nh)*math.sqrt(2./m)
    b1 = torch.zeros(nh)
    w2 = torch.randn(nh, 1)/math.sqrt(nh)
    b2 = torch.zeros(1)
    return w1, b1, w2, b2

In [17]:
#| export
class Model():
    def __init__(self, w1, b1, w2, b2):
        self.layers = [Lin(w1, b1), ReLU(), Lin(w2, b2)]
        self.loss = MSE()
    
    def __call__(self, x, targ):
        for l in self.layers:
            print("Model.__call__")
            print(f"l={l}")
            x = l(x)
            print(f"x.shape={x.shape}")
        
        #assert x.shape==torch.Size([targ.shape[0],1])
        
        return self.loss(x, targ)
    
    def backward(self):
        self.loss.backward()
        for l in reversed(self.layers):
            l.backward()

### Training Loop

In [66]:
#| export
def train(epochs):
    pass

### Calculate the accuracy

In [9]:
#| export
def accuracy(out, yb):
    return (torch.argmax(out, dim=1) == yb).float().mean()