# Training a ConvNet PyTorch

In this notebook, you'll learn how to use the powerful PyTorch framework to specify a conv net architecture and train it on the CIFAR-10 dataset.

In [1]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   
os.environ["CUDA_VISIBLE_DEVICES"]="1,2,3"

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torch.utils.data import sampler

import torchvision.datasets as dset
import torchvision.transforms as T

import numpy as np

import timeit

In [3]:
from cs231n.resnet import resnet18, resnet34

In [4]:
class ChunkSampler(sampler.Sampler):
    """Samples elements sequentially from some offset. 
    Arguments:
        num_samples: # of desired datapoints
        start: offset where we should start selecting from
    """
    def __init__(self, num_samples, start = 0):
        self.num_samples = num_samples
        self.start = start

    def __iter__(self):
        return iter(range(self.start, self.start + self.num_samples))

    def __len__(self):
        return self.num_samples

def check_accuracy(model, loader):
    if loader.dataset.train:
        print('Checking accuracy on validation set')
    else:
        print('Checking accuracy on test set')   
    num_correct = 0
    num_samples = 0
    model.eval() # Put the model in test mode (the opposite of model.train(), essentially)
    for x, y in loader:
        x_var = Variable(x.type(gpu_dtype), volatile=True)

        scores = model(x_var)
        _, preds = scores.data.cpu().max(1)
        num_correct += (preds == y).sum()
        num_samples += preds.size(0)
    acc = float(num_correct) / num_samples
    print('Got %d / %d correct (%.2f)' % (num_correct, num_samples, 100 * acc))
NUM_TRAIN = 49000
NUM_VAL = 1000

trans = T.Compose([
        T.Scale(224),
        T.ToTensor(),
    ])
cifar10_train = dset.CIFAR10('./cs231n/datasets', train=True, download=True,
                           transform=trans)
loader_train = DataLoader(cifar10_train, batch_size=64, sampler=ChunkSampler(NUM_TRAIN, 0),
                         num_workers=0)

cifar10_val = dset.CIFAR10('./cs231n/datasets', train=True, download=True,
                           transform=trans)
loader_val = DataLoader(cifar10_val, batch_size=64, sampler=ChunkSampler(NUM_VAL, NUM_TRAIN), num_workers=0)

cifar10_test = dset.CIFAR10('./cs231n/datasets', train=False, download=True,
                          transform=trans)
loader_test = DataLoader(cifar10_test, batch_size=64, num_workers=0)

Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified


In [5]:
gpu_dtype = torch.cuda.FloatTensor
print_every = 50
model = resnet18(num_classes=10).type(gpu_dtype)
net = torch.nn.DataParallel(model)
net.cuda()
loss_fn = nn.CrossEntropyLoss().type(gpu_dtype)
optimizer = optim.RMSprop(net.parameters(), lr=1e-3) # lr sets the learning rate of the optimizer


In [6]:
def mytrain(model, loss_fn, optimizer, num_epochs = 1):
    import time
    start_time =time.time()
    for epoch in range(num_epochs):
        print('Starting epoch %d / %d' % (epoch + 1, num_epochs))
        model.train()
        for t, (x, y) in enumerate(loader_train):
            x_var = Variable(x.type(gpu_dtype))
            y_var = Variable(y.type(gpu_dtype).long())
            scores = model(x_var)
            
            loss = loss_fn(scores, y_var)
            if (t + 1) % print_every == 0:
                print('t = %d, loss = %.4f' % (t + 1, loss.data[0]))
                check_time = time.time()
                print('batch time: %f' % (check_time - start_time))
                # check_accuracy(model, loader_val)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

mytrain(net, loss_fn, optimizer, num_epochs=1)
check_accuracy(net, loader_val)

Starting epoch 1 / 1
t = 50, loss = 2.1845
batch time: 26.981170
t = 100, loss = 1.8839
batch time: 43.515702
t = 150, loss = 1.6566
batch time: 59.917748
t = 200, loss = 2.0922
batch time: 76.829442
t = 250, loss = 1.7644
batch time: 93.563796
t = 300, loss = 1.5536
batch time: 110.286716
t = 350, loss = 1.4521
batch time: 127.360156
t = 400, loss = 1.3485
batch time: 144.131982
t = 450, loss = 1.5557
batch time: 161.096243
t = 500, loss = 1.3746
batch time: 177.734682
t = 550, loss = 1.5786
batch time: 194.487471
t = 600, loss = 1.3363
batch time: 211.020145
t = 650, loss = 1.6014
batch time: 228.031575
t = 700, loss = 1.7012
batch time: 245.273895
t = 750, loss = 1.2304
batch time: 262.228501
Checking accuracy on validation set
Got 364 / 1000 correct (36.40)


In [7]:
best_model = net
check_accuracy(best_model, loader_test)

Checking accuracy on test set
Got 3441 / 10000 correct (34.41)


In [None]:
314 / 480.0