In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.utils.data import sampler

import torchvision.datasets as dset
import torchvision.transforms as T
import torch.nn.functional as F

import numpy as np

In [2]:
NUM_TRAIN = 49000

# The torchvision.transforms package provides tools for preprocessing data
# and for performing data augmentation; here we set up a transform to
# preprocess the data by subtracting the mean RGB value and dividing by the
# standard deviation of each RGB value; we've hardcoded the mean and std.
transform = T.Compose([
                T.ToTensor(),
                T.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
            ])

# We set up a Dataset object for each split (train / val / test); Datasets load
# training examples one at a time, so we wrap each Dataset in a DataLoader which
# iterates through the Dataset and forms minibatches. We divide the CIFAR-10
# training set into train and val sets by passing a Sampler object to the
# DataLoader telling how it should sample from the underlying Dataset.
cifar10_train = dset.CIFAR10('./cs231n/datasets', train=True, download=True,
                             transform=transform)
loader_train = DataLoader(cifar10_train, batch_size=64, 
                          sampler=sampler.SubsetRandomSampler(range(NUM_TRAIN)))

cifar10_val = dset.CIFAR10('./cs231n/datasets', train=True, download=True,
                           transform=transform)
loader_val = DataLoader(cifar10_val, batch_size=64, 
                        sampler=sampler.SubsetRandomSampler(range(NUM_TRAIN, 50000)))

cifar10_test = dset.CIFAR10('./cs231n/datasets', train=False, download=True, 
                            transform=transform)
loader_test = DataLoader(cifar10_test, batch_size=64)

Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified


ResNet Implementation 

In [3]:
class ResNetBlock(nn.Module):
    def __init__(self,in_channels, out_channels,stride=1,kernel_size=3,padding=1):
        super().__init__()
        self.model=nn.Sequential(
            nn.BatchNorm2d(in_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels,out_channels,kernel_size=kernel_size,stride=stride,padding=padding),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_channels,out_channels,kernel_size=kernel_size,stride=stride,padding=padding),
            nn.Dropout2d(),
        )
        
        self.shortcut=nn.Sequential()
        if in_channels!=out_channels or stride!=1:
            self.shortcut=nn.Sequential(
                nn.Conv2d(in_channels,out_channels, kernel_size=1,stride=stride,bias=False),
#                 nn.BatchNorm2d(out_channels),
            )
            
    def forward(self,x):
        shortcut=self.shortcut(x)
        out=self.model(x)
        out=out+shortcut
        return out    
    

In [5]:
class Flatten(nn.Module):
    def forward(self, x):
        x = x.view(x.size()[0], -1)
        return x

In [76]:
USE_GPU = True

dtype = torch.float32 # we will be using float throughout this tutorial

if USE_GPU and torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

# Constant to control how frequently we print train loss
print_every = 100

print('using device:', device)

using device: cuda


In [77]:
def check_accuracy_part34(loader, model):
    if loader.dataset.train:
        print('Checking accuracy on validation set')
    else:
        print('Checking accuracy on test set')   
    num_correct = 0
    num_samples = 0
    model.eval()  # set model to evaluation mode
    with torch.no_grad():
        for x, y in loader:
            x = x.to(device=device, dtype=dtype)  # move to device, e.g. GPU
            y = y.to(device=device, dtype=torch.long)
            scores = model(x)
            _, preds = scores.max(1)
            num_correct += (preds == y).sum()
            num_samples += preds.size(0)
        acc = float(num_correct) / num_samples
        print('Got %d / %d correct (%.2f)' % (num_correct, num_samples, 100 * acc))

In [78]:
def train_part34(model, optimizer, epochs=1):
    """
    Train a model on CIFAR-10 using the PyTorch Module API.
    
    Inputs:
    - model: A PyTorch Module giving the model to train.
    - optimizer: An Optimizer object we will use to train the model
    - epochs: (Optional) A Python integer giving the number of epochs to train for
    
    Returns: Nothing, but prints model accuracies during training.
    """
    model = model.to(device=device)  # move the model parameters to CPU/GPU
    for e in range(epochs):
        for t, (x, y) in enumerate(loader_train):
            model.train()  # put model to training mode
            x = x.to(device=device, dtype=dtype)  # move to device, e.g. GPU
            y = y.to(device=device, dtype=torch.long)

            scores = model(x)
            loss = F.cross_entropy(scores,y)

            # Zero out all of the gradients for the variables which the optimizer
            # will update.
            optimizer.zero_grad()

            # This is the backwards pass: compute the gradient of the loss with
            # respect to each  parameter of the model.
            loss.backward()

            # Actually update the parameters of the model using the gradients
            # computed by the backwards pass.
            optimizer.step()

            if t % print_every == 0:
                print('Iteration %d, loss = %.4f' % (t, loss.item()))
                check_accuracy_part34(loader_val, model)
                print()

In [116]:
model=nn.Sequential(ResNetBlock(3,256,kernel_size=5,padding=2),
                   ResNetBlock(256,256,kernel_size=5,padding=2),
                   ResNetBlock(256,64,kernel_size=5,padding=2),
#                    ResNetBlock(64,64,kernel_size=5,padding=2),
#                    ResNetBlock(64,64),
#                    ResNetBlock(64,128),
                   ResNetBlock(64,32),
                   ResNetBlock(32,32),
                   Flatten(),
                   nn.Linear(32*32*32,50),
                   nn.BatchNorm1d(50),
                   nn.ReLU(),
                   nn.Linear(50,30),
                   nn.BatchNorm1d(30),
                   nn.ReLU(),
                   nn.Linear(30,10)                   
                   )

optimizer = optim.RMSprop(model.parameters())

In [117]:
train_part34(model, optimizer, epochs=10)

Iteration 0, loss = 2.2991
Checking accuracy on validation set
Got 79 / 1000 correct (7.90)

Iteration 100, loss = 1.7183
Checking accuracy on validation set
Got 387 / 1000 correct (38.70)

Iteration 200, loss = 1.5597
Checking accuracy on validation set
Got 440 / 1000 correct (44.00)

Iteration 300, loss = 1.6887
Checking accuracy on validation set
Got 442 / 1000 correct (44.20)

Iteration 400, loss = 1.3306
Checking accuracy on validation set
Got 545 / 1000 correct (54.50)

Iteration 500, loss = 1.2242
Checking accuracy on validation set
Got 483 / 1000 correct (48.30)

Iteration 600, loss = 1.1815
Checking accuracy on validation set
Got 516 / 1000 correct (51.60)

Iteration 700, loss = 1.3390
Checking accuracy on validation set
Got 567 / 1000 correct (56.70)

Iteration 0, loss = 0.9691
Checking accuracy on validation set
Got 624 / 1000 correct (62.40)

Iteration 100, loss = 0.9848
Checking accuracy on validation set
Got 615 / 1000 correct (61.50)

Iteration 200, loss = 0.9690
Checkin

KeyboardInterrupt: 

In [32]:
PM=model.parameters()
L=list(PM)

In [58]:

x = torch.randint(0,10,(2, 2,2) ,requires_grad=True)
y=x+2
z = y * y * 3
out = z.mean()
out.backward()

In [65]:
print(x.grad)
x.grad.max(dim=2)
x.grad.numpy()

tensor([[[6.7500, 3.7500],
         [4.5000, 5.2500]],

        [[5.2500, 5.2500],
         [8.2500, 6.0000]]])


array([[[6.75, 3.75],
        [4.5 , 5.25]],

       [[5.25, 5.25],
        [8.25, 6.  ]]], dtype=float32)

In [75]:
N, C = 4, 5
s = torch.randn(N, C)
y = torch.LongTensor([1, 2, 1, 3])
s.requires_grad_()
print(s)
print(y)
print(s.gather(1, y.view(-1, 1)).squeeze())
out=s.gather(1, y.view(-1, 1)).squeeze()

print(out)
out.backward()

tensor([[-0.2539,  1.6013, -0.1921, -1.0006,  1.1410],
        [ 0.3710,  1.8149,  0.2135,  1.4004,  0.3115],
        [-0.5849,  1.7745, -1.1358, -1.0849,  0.1275],
        [ 1.3938,  0.4705, -1.0626,  0.9832, -1.0800]], requires_grad=True)
tensor([1, 2, 1, 3])
tensor([1.6013, 0.2135, 1.7745, 0.9832], grad_fn=<SqueezeBackward0>)
tensor([1.6013, 0.2135, 1.7745, 0.9832], grad_fn=<SqueezeBackward0>)


RuntimeError: grad can be implicitly created only for scalar outputs

In [121]:
nn.Dropout2d()