In [105]:
%load_ext autoreload
%autoreload 2

%matplotlib inline

from pathlib import Path
from IPython.core.debugger import set_trace
from fastai import datasets
import pickle, gzip, math, torch, matplotlib as mpl
import matplotlib.pyplot as plt
from torch import tensor, from_numpy, flatten, nn
import operator
from functools import partial

import pandas as pd
import numpy as np

import torch.nn.functional as F
from utils import get_data

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [106]:
X_train, X_test, y_train, y_test = get_data()

In [97]:
n,m = X_train.shape
c = int(y_train.max()+1)
nh = 50

### Training loop:
1. run batch through model to get predictions
2. calculate the loss with loss(pred, actual)
3. calculate grad of each param with backpropgation
4. update the params

For multiclass classification, the loss function we use is the cross entropy (in pytorch this is deinfed in `F.cross_entropy`

We use batches of data because in a normal setting the whole dataset is unlikely to fit into memory, and we would also not want to run images one by one through the network because that is rather inefficient (and also there are other tweaks, e.g. batchnorm, that expects more than 1 trainign sample to pass through at a time)

In [84]:
# if we use accuracy as the metric:
def accuracy(pred, actual):
    return (torch.argmax(preds, dim=1).int()== y_train[:bs]).float().mean()

Redine our simple model. This time, we are not adding the loss function to the model itself

In [88]:
class Model(nn.Module):
    def __init__(self, n_in, nh, n_out):
        super().__init__()
        self.layers = [nn.Linear(n_in, nh), nn.ReLU(), nn.Linear(nh, c)]
    
    def __call__(self, x):
        for l in self.layers:
            x = l(x)
        return x


Instead, we define our loss function as the pytorch cross entropy loss:

In [86]:
loss_func = F.nll_loss

run one batch through the model:

In [98]:
bs = 64
model = Model(m, nh, c)
batch = X_train[:bs, :]
preds = model(batch)

In [99]:
# since the model isn't trained, we would expect the model to get around 1/10 correct by pure chance
accuracy(preds, y_train[:bs])

tensor(0.0625)

In [107]:
# the loss function is what we're calculating the gradients against to optimise the params
loss_func(preds, y_train[:bs])

tensor(-0.0107, grad_fn=<NllLossBackward>)

In [67]:
preds.size()

torch.Size([64, 10])