In [1]:
import torch
from torch import nn
from torch import optim
from torchvision import datasets, transforms
from torch.utils.data import random_split, DataLoader

In [2]:
torch.randn(5).cuda()

tensor([ 0.3130,  0.9697,  1.3606, -0.3271,  0.1759], device='cuda:0')

In [9]:
#train, val split
train_data = datasets.MNIST('data', train=True, download=True, transform=transforms.ToTensor())
train, val = random_split(train_data, [55000, 5000])
train_loader = DataLoader(train, batch_size=32)
val_loader = DataLoader(val, batch_size=32)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to data/MNIST/raw/train-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting data/MNIST/raw/train-images-idx3-ubyte.gz to data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to data/MNIST/raw/train-labels-idx1-ubyte.gz



HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting data/MNIST/raw/train-labels-idx1-ubyte.gz to data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to data/MNIST/raw/t10k-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting data/MNIST/raw/t10k-images-idx3-ubyte.gz to data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to data/MNIST/raw/t10k-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting data/MNIST/raw/t10k-labels-idx1-ubyte.gz to data/MNIST/raw
Processing...
Done!


  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


In [8]:
#define my simple model
model = nn.Sequential(
    nn.Linear(28 * 28, 64),
    nn.ReLU(),
    nn.Linear(64, 64),
    nn.ReLU(),
    nn.Linear(64, 10)
)

In [6]:
#More flexible model
class Resnet(nn.Module):
    def __init__(self):
      super().__init__()
      self.l1 = nn.Linear(28*28, 64)
      self.l2 = nn.Linear(64, 64)
      self.l3 = nn.Linear(64, 10)
      self.do = nn.Dropout(0.1)
    
    def forward(self,x):
      h1 = nn.functional.relu(self.l1(x))
      h2 = nn.functional.relu(self.l2(h1))
      do = self.do(h2+h1)
      logits = self.l3(do)
      return logits

model = Resnet().cuda()


In [7]:
#define my optimiser
params = model.parameters()
optimiser = optim.SGD(params, lr = 1e-2)

In [8]:
#define my loss function
loss = nn.CrossEntropyLoss()

In [12]:
#my training and validation loop
nb_epochs = 5
for epoch in range(nb_epochs):
    losses = list()
    accuracies = list()
    model.train() #because of dropout
    for batch in train_loader:
        x,y = batch

        #x: b * 1 * 28 * 28
        b = x.size(0)
        x = x.view(b, -1).cuda() #-1 would mean everything

        #First step: Forward
        l = model(x) #l: logits

        #Second step: Compute objective function
        J = loss(l, y.cuda())

        #Third step: Cleaning the gradients
        model.zero_grad()
        #params.grad._zero()

        #Fourth step: Accumulate the partial derivatives of J wrt params
        J.backward()
        #params.grad._sum(dJ/dparams)

        #Fifth step: Opposite direction of the gradient
        optimiser.step()
        # with torch.no_grad(): params = params - eta * params.grad ; eta: learning rate

        losses.append(J.item())
        accuracies.append(y.eq(l.detach().argmax(dim=1).cpu()).float().mean())

    print(f'Epoch {epoch+1}, train loss: {torch.tensor(losses).mean():.2f}', end=',')
    print(f'training accuracy: {torch.tensor(accuracies).mean():.2f}')

    losses = list()
    accuracies = list()
    model.eval()
    for batch in val_loader:
        x,y = batch

        #x: b * 1 * 28 * 28
        b = x.size(0)
        x = x.view(b, -1).cuda() #-1 would mean everything

        #First step: Forward
        with torch.no_grad():
            l = model(x) #l: logits

        #Second step: Compute objective function
        J = loss(l, y.cuda())

        losses.append(J.item())
        accuracies.append(y.eq(l.detach().argmax(dim=1).cpu()).float().mean())

    print(f'Epoch {epoch+1}, validation loss: {torch.tensor(losses).mean():.2f}', end=',')
    print(f'validation accuracy: {torch.tensor(accuracies).mean():.2f}')



Epoch 1, train loss: 0.86,training accuracy: 0.77
Epoch 1, validation loss: 0.37,validation accuracy: 0.90
Epoch 2, train loss: 0.38,training accuracy: 0.89
Epoch 2, validation loss: 0.29,validation accuracy: 0.91
Epoch 3, train loss: 0.31,training accuracy: 0.91
Epoch 3, validation loss: 0.25,validation accuracy: 0.93
Epoch 4, train loss: 0.27,training accuracy: 0.92
Epoch 4, validation loss: 0.22,validation accuracy: 0.94
Epoch 5, train loss: 0.23,training accuracy: 0.93
Epoch 5, validation loss: 0.20,validation accuracy: 0.94
