In [2]:
from utils import count_params
import preprocessing as preprocess
import torch
import numpy as np
import torchvision.datasets as datasets
from torch.utils import data
from torch.utils.tensorboard import SummaryWriter
import torch.nn as nn
import torch.nn.init as init
import torch.nn.functional as F
from torchvision import transforms

import pickle
import tensorflow as tf
import time

import matplotlib.pyplot as plt
%matplotlib inline

if torch.cuda.is_available():  
    DEVICE = "cuda:0" 
else:  
    DEVICE = "cpu"
print(DEVICE)

cuda:0


In [2]:
writer = SummaryWriter(comment = '_resnet20')

In [3]:
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
trainloader = torch.utils.data.DataLoader(
        datasets.CIFAR100(root='./data', train=True, transform=transforms.Compose([
            transforms.RandomHorizontalFlip(),
            transforms.RandomCrop(32, 4),
            transforms.ToTensor(),
            normalize,
        ]), download=True),
        batch_size=500, shuffle=True,
        num_workers=4, pin_memory=True)

testloader = torch.utils.data.DataLoader(
        datasets.CIFAR100(root='./data', train=False, transform=transforms.Compose([
            transforms.ToTensor(),
            normalize,
        ])),
        batch_size=500, shuffle=False,
        num_workers=4, pin_memory=True)

Files already downloaded and verified


### Model

In [5]:
def _weights_init(m):
    """
        Initialization of CNN weights
    """
    classname = m.__class__.__name__
    if isinstance(m, nn.Linear) or isinstance(m, nn.Conv2d):
        init.kaiming_normal_(m.weight)


class LambdaLayer(nn.Module):
    """
      Identity mapping between ResNet blocks with diffrenet size feature map
    """
    def __init__(self, lambd):
        super(LambdaLayer, self).__init__()
        self.lambd = lambd

    def forward(self, x):
        return self.lambd(x)

# A basic block as shown in Fig.3 (right) in the paper consists of two convolutional blocks, each followed by a Bach-Norm layer. 
# Every basic block is shortcuted in ResNet architecture to construct f(x)+x module. 
# Expansion for option 'A' in the paper is equal to identity with extra zero entries padded
# for increasing dimensions between layers with different feature map size. This option introduces no extra parameter. 
class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1, option='A'):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != planes:
            if option == 'A':
                """
                For CIFAR10 experiment, ResNet paper uses option A.
                """
                self.shortcut = LambdaLayer(lambda x:
                                            F.pad(x[:, :, ::2, ::2], (0, 0, 0, 0, planes//4, planes//4), "constant", 0))
            elif option == 'B':
                self.shortcut = nn.Sequential(
                     nn.Conv2d(in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False),
                     nn.BatchNorm2d(self.expansion * planes)
                )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out

# Stack of 3 times 2*n (n is the number of basic blocks) layers are used for making the ResNet model, 
# where each 2n layers have feature maps of size {16,32,64}, respectively. 
# The subsampling is performed by convolutions with a stride of 2.
class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=100):
        super(ResNet, self).__init__()
        self.in_planes = 16
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(16)
        self.layer1 = self._make_layer(block, 16, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 32, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 64, num_blocks[2], stride=2)
        self.linear = nn.Linear(64, num_classes)
        self.apply(_weights_init)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion

        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = F.avg_pool2d(out, out.size()[3])
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out


def resnet20():
    return ResNet(BasicBlock, [3, 3, 3])

In [5]:
## Training Routine
def training_routine(model, train_generator, test_generator, n_epochs, writer = writer,  
                     eval_every=5):
    
    model.to(DEVICE)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(model.parameters(), lr =0.1, 
                                momentum = 0.9, weight_decay = 0.001)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor =0.5, patience = 1)
    accuracies = []
    
    for i in range(n_epochs):
        # Iterate over batches
        batch_losses = []
        
        for X_batch, y_batch in train_generator:
            optimizer.zero_grad()
            # forward pass
            X_batch, y_batch = X_batch.to(DEVICE), y_batch.to(DEVICE)
            batch_output = model(X_batch)
            batch_loss = criterion(batch_output, y_batch)
            # backward pass and optimization
            batch_loss.backward()
            optimizer.step()
            batch_losses.append(batch_loss.cpu().detach())
        print("Epoch {} | training loss: {}".format(i, np.mean(batch_losses)))
        writer.add_scalar('Loss/train', np.mean(batch_losses), i)
       
        
        # Once every 100 iterations, print statistics
        if i%eval_every==0:
            train_accuracy = []
            test_accuracy = []
            for X_batch, y_batch in train_generator:
                X_batch, y_batch = X_batch.to(DEVICE), y_batch.to(DEVICE)
                batch_output = model(X_batch)
                batch_prediction = batch_output.cpu().detach().argmax(dim=1)
                train_accuracy.append((batch_prediction.numpy()==y_batch.cpu().numpy()).mean())
                
            for X_batch, y_batch in test_generator:
                X_batch, y_batch = X_batch.to(DEVICE), y_batch.to(DEVICE)
                batch_output = model(X_batch)
                batch_prediction = batch_output.cpu().detach().argmax(dim=1)
                test_accuracy.append((batch_prediction.numpy()==y_batch.cpu().numpy()).mean())
            print("Epoch {} | train acc: {}, test acc: {}".format(i, np.mean(train_accuracy), np.mean(test_accuracy)))
            writer.add_scalar('Accuracy/train', np.mean(train_accuracy), i)
            writer.add_scalar('Accuracy/test', np.mean(test_accuracy), i)
            scheduler.step(np.mean(test_accuracy))
            accuracies.append((i, np.mean(train_accuracy), np.mean(test_accuracy)))
            
    return model.cpu(), accuracies

In [6]:
model = resnet20()
start = time.time()
trained_net, accuracies = training_routine(model, trainloader, testloader, 51)
end = time.time()
print((end - start)/60) 

Epoch 0 | training loss: 4.499466419219971
Epoch 0 | train acc: 0.0666, test acc: 0.06330000000000001
Epoch 1 | training loss: 3.979645013809204
Epoch 2 | training loss: 3.6204090118408203
Epoch 3 | training loss: 3.391629219055176
Epoch 4 | training loss: 3.129716396331787
Epoch 5 | training loss: 2.857428789138794
Epoch 5 | train acc: 0.29943999999999993, test acc: 0.2884
Epoch 6 | training loss: 2.674858331680298
Epoch 7 | training loss: 2.5371713638305664
Epoch 8 | training loss: 2.3982579708099365
Epoch 9 | training loss: 2.2790615558624268
Epoch 10 | training loss: 2.172884225845337
Epoch 10 | train acc: 0.43467999999999996, test acc: 0.40630000000000005
Epoch 11 | training loss: 1.9350450038909912
Epoch 12 | training loss: 1.8679378032684326
Epoch 13 | training loss: 1.821645975112915
Epoch 14 | training loss: 1.7743632793426514
Epoch 15 | training loss: 1.7472634315490723
Epoch 15 | train acc: 0.5280199999999999, test acc: 0.48279999999999995
Epoch 16 | training loss: 1.7212222

In [8]:
model_dict = trained_net.state_dict()
torch.save(model_dict, "resnet20")

In [1]:
num_params = count_params(model)
print(num_params)

NameError: name 'count_params' is not defined

### Evaluation

In [10]:
y_pred = []
model = model.cuda()
model.eval()

testloader = torch.utils.data.DataLoader(
        datasets.CIFAR100(root='./data', train=False, transform=transforms.Compose([
            transforms.ToTensor(),
            normalize,
        ])),
        batch_size=100, shuffle=False,
        num_workers=1, pin_memory=True)

start = time.time()
for local_batch, local_labels in testloader:
    local_batch, local_labels = local_batch.cuda(), local_labels.cuda()
    batch_output = model(local_batch)
    batch_prediction = batch_output.cpu().detach().argmax(dim=1)
    y_pred.append(batch_prediction)
end = time.time()  

y_pred = torch.cat(y_pred).numpy()

In [12]:
(end-start)/100

0.021204571723937988

In [None]:
from sklearn.metrics import f1_score, precision_score, recall_score, classification_report, confusion_matrix
n_classes = 100

import matplotlib.pyplot as plt
%matplotlib inline
plt.imshow(confusion_matrix(y_test, y_pred, labels=range(n_classes)))
plt.colorbar()

In [None]:
accuracies = np.array(accuracies)
fig, ax = plt.subplots(1, 1, figsize = (6, 4))
ax.plot(accuracies[:, 0], accuracies[:, 1], label = "Train")
ax.plot(accuracies[:, 0], accuracies[:, 2], label = 'Test')
ax.legend(fontsize = 10)
ax.set_xlim((0,50))
ax.set_ylabel('Accuarcy', fontsize = 12)
ax.set_xlabel("# of Epochs", fontsize = 12)