In [71]:
%load_ext autoreload
%autoreload 2

%matplotlib inline

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [3]:
#export
from exp.nb_02 import *
import torch.nn.functional as F

##  Initial setup

### Data

[Jump_to lesson 9 video](https://course.fast.ai/videos/?lesson=9&t=1786)

In [4]:
mpl.rcParams['image.cmap'] = 'gray'

In [5]:
#export
def get_data():
    path = Config().data_path()/'mnist'
    with gzip.open(path/'mnist.pkl.gz', 'rb') as f:
        ((x_train, y_train), (x_valid, y_valid), _) = pickle.load(f, encoding='latin-1')
    return map(tensor, (x_train,y_train,x_valid,y_valid))

In [6]:
x_train,y_train,x_valid,y_valid = get_data()

In [7]:
n,m = x_train.shape
c = y_train.max()+1
nh = 50

In [8]:
class Model(nn.Module):
    def __init__(self, n_in, nh, n_out):
        super().__init__()
        self.layers = [nn.Linear(n_in,nh), nn.ReLU(), nn.Linear(nh,n_out)]
        
    def __call__(self, x):
        for l in self.layers: x = l(x)
        return x

In [9]:
model = Model(m, nh, 10)

In [10]:
pred = model(x_train)

### Cross entropy loss

###### softmax

In [11]:
def softmax(x):
    return torch.exp(x) / torch.exp(x).sum(dim=-1, keepdim=True)

In [12]:
x = torch.rand(2,5)

In [13]:
test_near(softmax(x), x.softmax(dim=-1))

###### log softmax

In [14]:
def log_softmax(x):
    return (torch.exp(x) / torch.exp(x).sum(dim=-1, keepdim=True)).log()

In [15]:
test_near(log_softmax(x), x.log_softmax(-1))

###### log softmax with log(a/b) rule

In [16]:
def log_softmax(x):
    return x - torch.exp(x).sum(dim=-1, keepdim=True).log()

In [17]:
test_near(log_softmax(x), x.log_softmax(-1))

###### log sum exp

In [18]:
def logsumexp(x):
    a = x.max(-1, keepdim=True).values
    return a + torch.exp(x-a).sum(dim=-1, keepdim=True).log()

In [19]:
x.shape, logsumexp(x).shape

(torch.Size([2, 5]), torch.Size([2, 1]))

In [20]:
test_near(logsumexp(x), torch.logsumexp(x, -1, keepdim=True))

###### log softmax with logsumexp

In [21]:
def log_softmax(x):
    return x - logsumexp(x)

In [22]:
x.shape, log_softmax(x).shape

(torch.Size([2, 5]), torch.Size([2, 5]))

In [23]:
test_near(log_softmax(x), torch.log_softmax(x, -1))

###### cross entropy with log softmax

negative log likelihood: sum( -x * log(p))

x: one hot encoded binaries, signaling true class

nll: -log(p_true)

In [24]:
def nll_l(inp, targ):
    return -inp[range(len(targ)),targ].mean()

In [25]:
def crossentropy(inp, targ):
    return nll_l(log_softmax(inp), targ)

In [26]:
x = torch.rand(5,10)
y = y_train[:5]
print(x.shape)
print(y.shape)
print(min(y_train), max(y_train))

torch.Size([5, 10])
torch.Size([5])
tensor(0) tensor(9)


In [27]:
test_near(log_softmax(x), F.log_softmax(x, -1))

In [28]:
test_near(nll_l(x, y), F.nll_loss(x, y))

In [29]:
test_near(crossentropy(x,y), F.cross_entropy(x, y))

## Basic training loop

In [30]:
loss_func = F.cross_entropy

In [31]:
bs = 64

xb = x_train[:bs, :]
yb = y_train[0:bs]

In [32]:
class Model(nn.Module):
    def __init__(self, n_in, nh, n_out):
        super().__init__()
        self.layers = [nn.Linear(n_in,nh), nn.ReLU(), nn.Linear(nh,n_out)]
        
    def __call__(self, x):
        for l in self.layers: x = l(x)
        return x

In [33]:
def accuracy_own(out, yb):
    pred_cat = out.argmax(dim=-1)
    return (pred_cat == yb).float().mean()

In [34]:
def accuracy(out, yb): return (torch.argmax(out, dim=1)==yb).float().mean()

In [35]:
model = Model(x_train.shape[-1], 64, 10)

In [432]:
preds = model(xb)
loss = F.cross_entropy(preds, yb)
loss

tensor(2.3164, grad_fn=<NllLossBackward>)

In [433]:
out = model(x_train)
loss = loss_func(out, y_train)
acc = accuracy(out, y_train)
loss, acc

(tensor(2.3087, grad_fn=<NllLossBackward>), tensor(0.0901))

In [434]:
def fit(epoch, lr):
    for e in range(epoch):
        for i in range(0, x_train.shape[0], bs):
            xb, yb = x_train[i:i+bs, :], y_train[i:i+bs]
#             print('x.shape: ', x.shape)
#             print('y.shape: ', y.shape)
            preds = model(xb)
#             print('pred.shape: ', preds.shape)
            loss = F.cross_entropy(preds, yb)
            if i % 100 == 0: print(loss)
            
            loss.backward()
            with torch.no_grad():
                for l in model.layers:
                    if hasattr(l, 'weight'):
                        l.weight.sub_(lr*l.weight.grad)
                        l.bias.sub_(lr*l.bias.grad)
                        l.weight.grad.zero_()
                        l.bias.grad.zero_()

In [435]:
fit(1, 0.05)

tensor(2.3164, grad_fn=<NllLossBackward>)
tensor(2.0731, grad_fn=<NllLossBackward>)
tensor(1.6482, grad_fn=<NllLossBackward>)
tensor(1.3556, grad_fn=<NllLossBackward>)
tensor(0.9325, grad_fn=<NllLossBackward>)
tensor(0.8723, grad_fn=<NllLossBackward>)
tensor(0.7249, grad_fn=<NllLossBackward>)
tensor(0.7064, grad_fn=<NllLossBackward>)
tensor(0.5961, grad_fn=<NllLossBackward>)
tensor(0.5683, grad_fn=<NllLossBackward>)
tensor(0.7520, grad_fn=<NllLossBackward>)
tensor(0.5580, grad_fn=<NllLossBackward>)
tensor(0.4889, grad_fn=<NllLossBackward>)
tensor(0.5233, grad_fn=<NllLossBackward>)
tensor(0.3837, grad_fn=<NllLossBackward>)
tensor(0.4549, grad_fn=<NllLossBackward>)
tensor(0.3626, grad_fn=<NllLossBackward>)
tensor(0.4485, grad_fn=<NllLossBackward>)
tensor(0.3191, grad_fn=<NllLossBackward>)
tensor(0.4123, grad_fn=<NllLossBackward>)
tensor(0.3885, grad_fn=<NllLossBackward>)
tensor(0.2425, grad_fn=<NllLossBackward>)
tensor(0.4430, grad_fn=<NllLossBackward>)
tensor(0.3382, grad_fn=<NllLossBac

In [436]:
loss = loss_func(model(x_train), y_train)
acc = accuracy_own(model(x_train), y_train)
loss, acc

(tensor(0.3718, grad_fn=<NllLossBackward>), tensor(0.8927))

## Parameters and optim

### Parameters

In [35]:
class Model(nn.Module):
    def __init__(self, n_in, nh, n_out):
        super().__init__()
        self.lin1 = nn.Linear(n_in,nh)
        self.relu = nn.ReLU()
        self.lin2 = nn.Linear(nh,n_out)
        
    def __call__(self, x):
        return self.lin2(self.relu(self.lin1(x)))

In [36]:
model = Model(x_train.shape[1], 50, 10)

In [627]:
for name, l in model.named_children():
    print(f'({name}): {l}')

(lin1): Linear(in_features=784, out_features=50, bias=True)
(relu): ReLU()
(lin2): Linear(in_features=50, out_features=10, bias=True)


In [628]:
for name, l in model.named_modules():
    print(f'({name}): {l}')

(): Model(
  (lin1): Linear(in_features=784, out_features=50, bias=True)
  (relu): ReLU()
  (lin2): Linear(in_features=50, out_features=10, bias=True)
)
(lin1): Linear(in_features=784, out_features=50, bias=True)
(relu): ReLU()
(lin2): Linear(in_features=50, out_features=10, bias=True)


In [629]:
for name, l in model.named_parameters():
    print(f'({name}): \t {l.shape}')

(lin1.weight): 	 torch.Size([50, 784])
(lin1.bias): 	 torch.Size([50])
(lin2.weight): 	 torch.Size([10, 50])
(lin2.bias): 	 torch.Size([10])


In [38]:
def fit(epoch, lr):
    for e in range(epoch):
        for i in range(0, x_train.shape[0], bs):
            xb, yb = x_train[i:i+bs, :], y_train[i:i+bs]
#             print('x.shape: ', x.shape)
#             print('y.shape: ', y.shape)
            preds = model(xb)
#             print('pred.shape: ', preds.shape)
            loss = F.cross_entropy(preds, yb)
            if i % 100 == 0: print(loss)
            
            loss.backward()
            with torch.no_grad():
                for p in model.parameters():
                    p.sub_(lr*p.grad)
                    p.grad.zero_()
#                 model.zero_grad()

In [654]:
fit(1, 0.05)

tensor(2.3072, grad_fn=<NllLossBackward>)
tensor(2.0856, grad_fn=<NllLossBackward>)
tensor(1.7235, grad_fn=<NllLossBackward>)
tensor(1.4159, grad_fn=<NllLossBackward>)
tensor(0.9805, grad_fn=<NllLossBackward>)
tensor(0.8861, grad_fn=<NllLossBackward>)
tensor(0.7389, grad_fn=<NllLossBackward>)
tensor(0.7167, grad_fn=<NllLossBackward>)
tensor(0.5723, grad_fn=<NllLossBackward>)
tensor(0.5652, grad_fn=<NllLossBackward>)
tensor(0.7450, grad_fn=<NllLossBackward>)
tensor(0.5177, grad_fn=<NllLossBackward>)
tensor(0.4725, grad_fn=<NllLossBackward>)
tensor(0.5316, grad_fn=<NllLossBackward>)
tensor(0.3542, grad_fn=<NllLossBackward>)
tensor(0.4452, grad_fn=<NllLossBackward>)
tensor(0.3571, grad_fn=<NllLossBackward>)
tensor(0.4336, grad_fn=<NllLossBackward>)
tensor(0.3145, grad_fn=<NllLossBackward>)
tensor(0.4323, grad_fn=<NllLossBackward>)
tensor(0.3809, grad_fn=<NllLossBackward>)
tensor(0.2362, grad_fn=<NllLossBackward>)
tensor(0.4326, grad_fn=<NllLossBackward>)
tensor(0.3413, grad_fn=<NllLossBac

In [475]:
loss = loss_func(model(x_train), y_train)
acc = accuracy_own(model(x_train), y_train)
loss, acc

(tensor(0.3760, grad_fn=<NllLossBackward>), tensor(0.8915))

In [37]:
class DummyModule():
    def __init__(self, n_in, nh, n_out):
        self._modules = {}
        self.lin1 = nn.Linear(n_in,nh)
        self.relu = nn.ReLU()
        self.lin2 = nn.Linear(nh,n_out)
    
    def __setattr__(self, k, v):
        if not k.startswith("_"): self._modules[k] = v
        super().__setattr__(k, v)
    
    def __repr__(self):
        return f'{self._modules}'
    
    def parameters(self):
        for l in self._modules.values():
            for p in l.parameters(): yield p
    
    def __call__(self, x):
        for m in self._modules.values():
            x = m(x)
        return x

In [515]:
model = DummyModule(x_train.shape[1], 50, 10)

In [516]:
model._modules

{'lin1': Linear(in_features=784, out_features=50, bias=True),
 'relu': ReLU(),
 'lin2': Linear(in_features=50, out_features=10, bias=True)}

In [517]:
model

{'lin1': Linear(in_features=784, out_features=50, bias=True), 'relu': ReLU(), 'lin2': Linear(in_features=50, out_features=10, bias=True)}

In [518]:
fit(1, 0.05)

tensor(2.3127, grad_fn=<NllLossBackward>)
tensor(2.0662, grad_fn=<NllLossBackward>)
tensor(1.7052, grad_fn=<NllLossBackward>)
tensor(1.4187, grad_fn=<NllLossBackward>)
tensor(0.9750, grad_fn=<NllLossBackward>)
tensor(0.8586, grad_fn=<NllLossBackward>)
tensor(0.6953, grad_fn=<NllLossBackward>)
tensor(0.7399, grad_fn=<NllLossBackward>)
tensor(0.5799, grad_fn=<NllLossBackward>)
tensor(0.5585, grad_fn=<NllLossBackward>)
tensor(0.7173, grad_fn=<NllLossBackward>)
tensor(0.5368, grad_fn=<NllLossBackward>)
tensor(0.4873, grad_fn=<NllLossBackward>)
tensor(0.5328, grad_fn=<NllLossBackward>)
tensor(0.3624, grad_fn=<NllLossBackward>)
tensor(0.4320, grad_fn=<NllLossBackward>)
tensor(0.3399, grad_fn=<NllLossBackward>)
tensor(0.4404, grad_fn=<NllLossBackward>)
tensor(0.3157, grad_fn=<NllLossBackward>)
tensor(0.4112, grad_fn=<NllLossBackward>)
tensor(0.3843, grad_fn=<NllLossBackward>)
tensor(0.2421, grad_fn=<NllLossBackward>)
tensor(0.4389, grad_fn=<NllLossBackward>)
tensor(0.3502, grad_fn=<NllLossBac

In [519]:
[w.shape for w in model.parameters()]

[torch.Size([50, 784]),
 torch.Size([50]),
 torch.Size([10, 50]),
 torch.Size([10])]

### Registering modules

In [40]:
layers = [nn.Linear(m, nh), nn.ReLU(), nn.Linear(nh, 10)]

In [38]:
class Model(nn.Module):
    def __init__(self, layers):
        super().__init__()
        self.layers = layers
        for i, l in enumerate(layers):
            self.add_module(f'layer_{i}', l)
    def __call__(self, x):
        for l in self.layers:
            x = l(x)
        return x

In [39]:
model = Model(layers)

NameError: name 'layers' is not defined

In [544]:
model(xb).shape

torch.Size([64, 10])

In [547]:
[m for m in model.modules()]

[Model(
   (layer_0): Linear(in_features=784, out_features=50, bias=True)
   (layer_1): ReLU()
   (layer_2): Linear(in_features=50, out_features=10, bias=True)
 ),
 Linear(in_features=784, out_features=50, bias=True),
 ReLU(),
 Linear(in_features=50, out_features=10, bias=True)]

### nn.ModuleList

In [40]:
class Model(nn.Module):
    def __init__(self, layers):
        super().__init__()
        self.layers = nn.ModuleList(layers)

    def __call__(self, x):
        for l in self.layers:
            x = l(x)
        return x

In [41]:
model = Model(layers)

NameError: name 'layers' is not defined

In [603]:
model

Model(
  (layers): ModuleList(
    (0): Linear(in_features=784, out_features=50, bias=True)
    (1): ReLU()
    (2): Linear(in_features=50, out_features=10, bias=True)
  )
)

In [604]:
[m for m in model.modules()]

[Model(
   (layers): ModuleList(
     (0): Linear(in_features=784, out_features=50, bias=True)
     (1): ReLU()
     (2): Linear(in_features=50, out_features=10, bias=True)
   )
 ),
 ModuleList(
   (0): Linear(in_features=784, out_features=50, bias=True)
   (1): ReLU()
   (2): Linear(in_features=50, out_features=10, bias=True)
 ),
 Linear(in_features=784, out_features=50, bias=True),
 ReLU(),
 Linear(in_features=50, out_features=10, bias=True)]

### nn.Sequential

### optim

In [42]:
class MyOptimizer():
    def __init__(self, parameters, lr=0.05):
        self.parameters = list(parameters)
        self.lr = lr
    
    def step(self):
        with torch.no_grad():
            for p in self.parameters:
                p.sub_(self.lr * p.grad)
            
    def zero_grad(self):
        for p in self.parameters:
            p.grad.zero_()

In [43]:
class Optimizer():
    def __init__(self, params, lr=0.5): self.params,self.lr=list(params),lr
        
    def step(self):
        with torch.no_grad():
            for p in self.params: p -= p.grad * lr

    def zero_grad(self):
        for p in self.params: p.grad.data.zero_()

In [44]:
opt = MyOptimizer(model.parameters(), 0.5)

In [45]:
def fit(epoch, lr, opt):
    for e in range(epoch):
        for i in range(0, x_train.shape[0], bs):
            xb, yb = x_train[i:i+bs, :], y_train[i:i+bs]
#             print('x.shape: ', x.shape)
#             print('y.shape: ', y.shape)
            preds = model(xb)
#             print('pred.shape: ', preds.shape)
            loss = F.cross_entropy(preds, yb)
            if i % 100 == 0: print(loss)
            
            loss.backward()
            opt.step()
            opt.zero_grad()

In [839]:
fit(5, 0.5, opt)

tensor(2.2874, grad_fn=<NllLossBackward>)
tensor(5.6318, grad_fn=<NllLossBackward>)
tensor(6.5605, grad_fn=<NllLossBackward>)
tensor(2.1561, grad_fn=<NllLossBackward>)
tensor(1.3476, grad_fn=<NllLossBackward>)
tensor(1.4224, grad_fn=<NllLossBackward>)
tensor(1.3367, grad_fn=<NllLossBackward>)
tensor(1.1250, grad_fn=<NllLossBackward>)
tensor(1.2640, grad_fn=<NllLossBackward>)
tensor(1.0863, grad_fn=<NllLossBackward>)
tensor(1.4891, grad_fn=<NllLossBackward>)
tensor(1.0267, grad_fn=<NllLossBackward>)
tensor(1.5184, grad_fn=<NllLossBackward>)
tensor(1.1963, grad_fn=<NllLossBackward>)
tensor(1.1625, grad_fn=<NllLossBackward>)
tensor(1.1903, grad_fn=<NllLossBackward>)
tensor(1.0302, grad_fn=<NllLossBackward>)
tensor(1.0549, grad_fn=<NllLossBackward>)
tensor(1.1198, grad_fn=<NllLossBackward>)
tensor(1.0234, grad_fn=<NllLossBackward>)
tensor(1.1417, grad_fn=<NllLossBackward>)
tensor(0.7502, grad_fn=<NllLossBackward>)
tensor(1.0754, grad_fn=<NllLossBackward>)
tensor(0.8963, grad_fn=<NllLossBac

In [840]:
m, nh

(784, 50)

In [46]:
def get_model():
    model = nn.Sequential(nn.Linear(m,nh), nn.ReLU(), nn.Linear(nh,10))
#     opt = optim.SGD(model.parameters(), lr=lr)
    opt = MyOptimizer(model.parameters(), lr=lr)
    return model, opt

In [47]:
lr=0.05

## Dataset and DataLoader

### Dataset

In [48]:
class Dataset():
    def __init__(self, x, y):
        self.x = x
        self.y = y
    
    def __getitem__(self, i): return (self.x[i], self.y[i])
    
    def __len__(self): return self.y.shape[0]

In [49]:
train_ds, valid_ds = Dataset(x_train, y_train), Dataset(x_valid, y_valid)

In [50]:
print(x_train.shape)
print(y_train.shape)
print(x_valid.shape)
print(y_valid.shape)
print(len(train_ds))
print(len(valid_ds))

torch.Size([50000, 784])
torch.Size([50000])
torch.Size([10000, 784])
torch.Size([10000])
50000
10000


In [51]:
assert(len(train_ds) == len(y_train))
assert(len(valid_ds) == len(y_valid))

In [52]:
xb, yb = train_ds[:5]
print(xb.shape)
print(yb.shape)

torch.Size([5, 784])
torch.Size([5])


In [53]:
model, opt = get_model()

In [844]:
def fit(epoch, model, lr, opt):
    for e in range(epoch):
        for i in range(0, len(train_ds), bs):
            xb, yb = train_ds[i:i+bs]
#             print('x.shape: ', x.shape)
#             print('y.shape: ', y.shape)
            preds = model(xb)
#             print('pred.shape: ', preds.shape)
            loss = F.cross_entropy(preds, yb)
            if i % 100 == 0: print(loss)
            
            loss.backward()
            opt.step()
            opt.zero_grad()

In [845]:
fit(1, model, 0.05, opt)

tensor(2.2989, grad_fn=<NllLossBackward>)
tensor(2.0851, grad_fn=<NllLossBackward>)
tensor(1.6298, grad_fn=<NllLossBackward>)
tensor(1.4115, grad_fn=<NllLossBackward>)
tensor(1.0037, grad_fn=<NllLossBackward>)
tensor(0.9240, grad_fn=<NllLossBackward>)
tensor(0.6987, grad_fn=<NllLossBackward>)
tensor(0.7166, grad_fn=<NllLossBackward>)
tensor(0.5644, grad_fn=<NllLossBackward>)
tensor(0.5761, grad_fn=<NllLossBackward>)
tensor(0.7638, grad_fn=<NllLossBackward>)
tensor(0.5492, grad_fn=<NllLossBackward>)
tensor(0.4997, grad_fn=<NllLossBackward>)
tensor(0.5406, grad_fn=<NllLossBackward>)
tensor(0.3854, grad_fn=<NllLossBackward>)
tensor(0.4487, grad_fn=<NllLossBackward>)
tensor(0.3333, grad_fn=<NllLossBackward>)
tensor(0.4377, grad_fn=<NllLossBackward>)
tensor(0.3153, grad_fn=<NllLossBackward>)
tensor(0.4209, grad_fn=<NllLossBackward>)
tensor(0.3879, grad_fn=<NllLossBackward>)
tensor(0.2434, grad_fn=<NllLossBackward>)
tensor(0.4340, grad_fn=<NllLossBackward>)
tensor(0.3442, grad_fn=<NllLossBac

### DataLoader

In [54]:
class DataLoader():
    def __init__(self, ds, bs):
        self.ds = ds
        self.bs = bs
    
    def __iter__(self):
        for i in range(0,len(self.ds), self.bs):
            yield self.ds[i:i+self.bs]

In [55]:
train_dl = DataLoader(train_ds, 64)

In [56]:
it_dl = iter(train_dl)

In [57]:
xb, yb = next(it_dl)
yb[:5]

tensor([5, 0, 4, 1, 9])

In [885]:
xb, yb = next(it_dl)
yb[:5]

tensor([7, 2, 8, 7, 6])

In [894]:
model, opt = get_model()

In [895]:
def fit(epoch, model, lr, opt):
    for e in range(epoch):
        for xb, yb in train_dl:
#             print('x.shape: ', x.shape)
#             print('y.shape: ', y.shape)
            preds = model(xb)
#             print('pred.shape: ', preds.shape)
            loss = F.cross_entropy(preds, yb)
            print(loss)
            
            loss.backward()
            opt.step()
            opt.zero_grad()

In [896]:
fit(1, model, 0.05, opt)

tensor(2.2956, grad_fn=<NllLossBackward>)
tensor(2.2872, grad_fn=<NllLossBackward>)
tensor(2.2919, grad_fn=<NllLossBackward>)
tensor(2.2709, grad_fn=<NllLossBackward>)
tensor(2.2779, grad_fn=<NllLossBackward>)
tensor(2.2696, grad_fn=<NllLossBackward>)
tensor(2.2470, grad_fn=<NllLossBackward>)
tensor(2.2407, grad_fn=<NllLossBackward>)
tensor(2.2515, grad_fn=<NllLossBackward>)
tensor(2.2464, grad_fn=<NllLossBackward>)
tensor(2.2220, grad_fn=<NllLossBackward>)
tensor(2.2221, grad_fn=<NllLossBackward>)
tensor(2.1906, grad_fn=<NllLossBackward>)
tensor(2.2005, grad_fn=<NllLossBackward>)
tensor(2.2230, grad_fn=<NllLossBackward>)
tensor(2.1912, grad_fn=<NllLossBackward>)
tensor(2.1992, grad_fn=<NllLossBackward>)
tensor(2.1887, grad_fn=<NllLossBackward>)
tensor(2.1643, grad_fn=<NllLossBackward>)
tensor(2.1784, grad_fn=<NllLossBackward>)
tensor(2.1335, grad_fn=<NllLossBackward>)
tensor(2.0837, grad_fn=<NllLossBackward>)
tensor(2.1187, grad_fn=<NllLossBackward>)
tensor(2.0927, grad_fn=<NllLossBac

tensor(0.3388, grad_fn=<NllLossBackward>)
tensor(0.3192, grad_fn=<NllLossBackward>)
tensor(0.4410, grad_fn=<NllLossBackward>)
tensor(0.5489, grad_fn=<NllLossBackward>)
tensor(0.2823, grad_fn=<NllLossBackward>)
tensor(0.4005, grad_fn=<NllLossBackward>)
tensor(0.6022, grad_fn=<NllLossBackward>)
tensor(0.3923, grad_fn=<NllLossBackward>)
tensor(0.4049, grad_fn=<NllLossBackward>)
tensor(0.1664, grad_fn=<NllLossBackward>)
tensor(0.2379, grad_fn=<NllLossBackward>)
tensor(0.3910, grad_fn=<NllLossBackward>)
tensor(0.3383, grad_fn=<NllLossBackward>)
tensor(0.3535, grad_fn=<NllLossBackward>)
tensor(0.6378, grad_fn=<NllLossBackward>)
tensor(0.5449, grad_fn=<NllLossBackward>)
tensor(0.4634, grad_fn=<NllLossBackward>)
tensor(0.4458, grad_fn=<NllLossBackward>)
tensor(0.2497, grad_fn=<NllLossBackward>)
tensor(0.3890, grad_fn=<NllLossBackward>)
tensor(0.7784, grad_fn=<NllLossBackward>)
tensor(0.6829, grad_fn=<NllLossBackward>)
tensor(0.5561, grad_fn=<NllLossBackward>)
tensor(0.3973, grad_fn=<NllLossBac

tensor(0.3891, grad_fn=<NllLossBackward>)
tensor(0.4728, grad_fn=<NllLossBackward>)
tensor(0.3259, grad_fn=<NllLossBackward>)
tensor(0.4062, grad_fn=<NllLossBackward>)
tensor(0.4769, grad_fn=<NllLossBackward>)
tensor(0.2831, grad_fn=<NllLossBackward>)
tensor(0.7063, grad_fn=<NllLossBackward>)
tensor(0.3280, grad_fn=<NllLossBackward>)
tensor(0.6378, grad_fn=<NllLossBackward>)
tensor(0.5895, grad_fn=<NllLossBackward>)
tensor(0.1970, grad_fn=<NllLossBackward>)
tensor(0.3086, grad_fn=<NllLossBackward>)
tensor(0.3666, grad_fn=<NllLossBackward>)
tensor(0.3638, grad_fn=<NllLossBackward>)
tensor(0.4296, grad_fn=<NllLossBackward>)
tensor(0.4831, grad_fn=<NllLossBackward>)
tensor(0.4991, grad_fn=<NllLossBackward>)
tensor(0.3833, grad_fn=<NllLossBackward>)
tensor(0.4682, grad_fn=<NllLossBackward>)
tensor(0.2481, grad_fn=<NllLossBackward>)
tensor(0.5085, grad_fn=<NllLossBackward>)
tensor(0.1526, grad_fn=<NllLossBackward>)
tensor(0.2356, grad_fn=<NllLossBackward>)
tensor(0.2172, grad_fn=<NllLossBac

### Random sampling

In [58]:
class Sampler():
    def __init__(self, ds, bs, shuffle=False):
        self.n = len(ds)
        self.ds = ds
        self.bs = bs
        self.shuffle = shuffle
    
    def __iter__(self):
        self.idxs = torch.randperm(self.n) if self.shuffle else torch.arange(self.n)
        for i in range(0, self.n, self.bs): yield self.idxs[i:i+self.bs]

In [59]:
small_ds = Dataset(*train_ds[:10])

In [60]:
s = Sampler(small_ds,3,True)
[o for o in s]

[tensor([3, 9, 6]), tensor([4, 0, 5]), tensor([7, 2, 8]), tensor([1])]

In [61]:
[o for o in s]

[tensor([6, 5, 1]), tensor([0, 9, 2]), tensor([7, 4, 3]), tensor([8])]

### Pytorch DataLoader

In [102]:
#export
from torch.utils.data import DataLoader, SequentialSampler, RandomSampler

In [103]:
train_dl = DataLoader(train_ds, bs, sampler=RandomSampler(train_ds))

In [104]:
valid_dl = DataLoader(valid_ds, bs)

In [105]:
model, opt = get_model()

In [78]:
def fit(epoch, model, opt):
    for e in range(epoch):
        for xb, yb in train_dl:
#             print('x.shape: ', x.shape)
#             print('y.shape: ', y.shape)
            preds = model(xb)
#             print('pred.shape: ', preds.shape)
            loss = F.cross_entropy(preds, yb)
            print(loss)
            
            loss.backward()
            opt.step()
            opt.zero_grad()

In [79]:
fit(1, model, opt )

tensor(2.3174, grad_fn=<NllLossBackward>)
tensor(2.3126, grad_fn=<NllLossBackward>)
tensor(2.2992, grad_fn=<NllLossBackward>)
tensor(2.2997, grad_fn=<NllLossBackward>)
tensor(2.2896, grad_fn=<NllLossBackward>)
tensor(2.2730, grad_fn=<NllLossBackward>)
tensor(2.2721, grad_fn=<NllLossBackward>)
tensor(2.2759, grad_fn=<NllLossBackward>)
tensor(2.2603, grad_fn=<NllLossBackward>)
tensor(2.2597, grad_fn=<NllLossBackward>)
tensor(2.2526, grad_fn=<NllLossBackward>)
tensor(2.2540, grad_fn=<NllLossBackward>)
tensor(2.2409, grad_fn=<NllLossBackward>)
tensor(2.2218, grad_fn=<NllLossBackward>)
tensor(2.2459, grad_fn=<NllLossBackward>)
tensor(2.2159, grad_fn=<NllLossBackward>)
tensor(2.2123, grad_fn=<NllLossBackward>)
tensor(2.1994, grad_fn=<NllLossBackward>)
tensor(2.1950, grad_fn=<NllLossBackward>)
tensor(2.1704, grad_fn=<NllLossBackward>)
tensor(2.1914, grad_fn=<NllLossBackward>)
tensor(2.1790, grad_fn=<NllLossBackward>)
tensor(2.1425, grad_fn=<NllLossBackward>)
tensor(2.1791, grad_fn=<NllLossBac

tensor(0.4625, grad_fn=<NllLossBackward>)
tensor(0.5185, grad_fn=<NllLossBackward>)
tensor(0.5326, grad_fn=<NllLossBackward>)
tensor(0.5404, grad_fn=<NllLossBackward>)
tensor(0.3770, grad_fn=<NllLossBackward>)
tensor(0.2610, grad_fn=<NllLossBackward>)
tensor(0.4551, grad_fn=<NllLossBackward>)
tensor(0.3789, grad_fn=<NllLossBackward>)
tensor(0.5246, grad_fn=<NllLossBackward>)
tensor(0.4676, grad_fn=<NllLossBackward>)
tensor(0.5197, grad_fn=<NllLossBackward>)
tensor(0.5322, grad_fn=<NllLossBackward>)
tensor(0.4868, grad_fn=<NllLossBackward>)
tensor(0.5175, grad_fn=<NllLossBackward>)
tensor(0.4476, grad_fn=<NllLossBackward>)
tensor(0.5099, grad_fn=<NllLossBackward>)
tensor(0.4675, grad_fn=<NllLossBackward>)
tensor(0.3772, grad_fn=<NllLossBackward>)
tensor(0.4964, grad_fn=<NllLossBackward>)
tensor(0.4195, grad_fn=<NllLossBackward>)
tensor(0.4715, grad_fn=<NllLossBackward>)
tensor(0.5668, grad_fn=<NllLossBackward>)
tensor(0.3575, grad_fn=<NllLossBackward>)
tensor(0.4481, grad_fn=<NllLossBac

tensor(0.3649, grad_fn=<NllLossBackward>)
tensor(0.4127, grad_fn=<NllLossBackward>)
tensor(0.4611, grad_fn=<NllLossBackward>)
tensor(0.5052, grad_fn=<NllLossBackward>)
tensor(0.3877, grad_fn=<NllLossBackward>)
tensor(0.3386, grad_fn=<NllLossBackward>)
tensor(0.4556, grad_fn=<NllLossBackward>)
tensor(0.4794, grad_fn=<NllLossBackward>)
tensor(0.2986, grad_fn=<NllLossBackward>)
tensor(0.5800, grad_fn=<NllLossBackward>)
tensor(0.3331, grad_fn=<NllLossBackward>)
tensor(0.4543, grad_fn=<NllLossBackward>)
tensor(0.2787, grad_fn=<NllLossBackward>)
tensor(0.3257, grad_fn=<NllLossBackward>)
tensor(0.5904, grad_fn=<NllLossBackward>)
tensor(0.3538, grad_fn=<NllLossBackward>)
tensor(0.4048, grad_fn=<NllLossBackward>)
tensor(0.2749, grad_fn=<NllLossBackward>)
tensor(0.6019, grad_fn=<NllLossBackward>)
tensor(0.4475, grad_fn=<NllLossBackward>)
tensor(0.3420, grad_fn=<NllLossBackward>)
tensor(0.3017, grad_fn=<NllLossBackward>)
tensor(0.2774, grad_fn=<NllLossBackward>)
tensor(0.3385, grad_fn=<NllLossBac

## Validation

In [109]:
def fit(epoch, model, opt):
    nv = len(valid_dl)
    for e in range(epoch):
        model.train()
        for xb, yb in train_dl:
            preds = model(xb)
            loss = F.cross_entropy(preds, yb)            
            loss.backward()
            opt.step()
            opt.zero_grad()
        
        #val
        model.eval()        
        val_acc = 0
        val_loss = 0
        with torch.no_grad():
            for xb, yb in valid_dl:
                val_preds = model(xb)
                val_loss += F.cross_entropy(val_preds, yb)
                val_acc += accuracy(val_preds, yb)
            val_loss = val_loss / nv
            val_acc = val_acc / nv
            print(f'val_loss: {val_loss}, val_acc: {val_acc}')

In [115]:
fit(10, model, opt)

val_loss: 0.09680737555027008, val_acc: 0.9727308750152588
val_loss: 0.0958971306681633, val_acc: 0.9730294346809387
val_loss: 0.0973888486623764, val_acc: 0.9735270738601685
val_loss: 0.09926646202802658, val_acc: 0.9722332954406738
val_loss: 0.0973941907286644, val_acc: 0.9728304147720337
val_loss: 0.1012895479798317, val_acc: 0.9719347357749939
val_loss: 0.09750808030366898, val_acc: 0.9733280539512634
val_loss: 0.0978778749704361, val_acc: 0.9735270738601685
val_loss: 0.09866394102573395, val_acc: 0.9733280539512634
val_loss: 0.09939221292734146, val_acc: 0.9737260937690735
