## Convolutional Networks

We'll check out how to build a **convolutional network** to classify CIFAR10 images. By using weight sharing - multiple units with the same weights - convolutional layers are able to learn repeated patterns in your data. For example, a unit could learn the pattern for an eye, or a face, or lower level features like edges.


In [1]:
import numpy as np
import time

import torch
#from torch import nn
from torch import optim
#import torch.nn.functional as F
import torch.utils.data as utils
from data_utils import load_CIFAR10
import math
import torch.nn as nn
import torch.nn.functional as func

import helper

In [2]:
def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=1000, num_dev=500):
    """
    Load the CIFAR-10 dataset from disk and perform preprocessing to prepare
    it for the linear classifier.  
    """
    # Load the raw CIFAR-10 data
    cifar10_dir = './data/cifar-10-batches-py'
    X_train, y_train = load_CIFAR10(cifar10_dir)
    
    # subsample the data
    mask = list(range(num_training, num_training + num_validation))
    X_val = X_train[mask]
    y_val = y_train[mask]
    mask = list(range(num_training))
    X_train = X_train[mask]
    y_train = y_train[mask]
    
    return X_train, y_train, X_val, y_val


# Invoke the above function to get our data.
X_train, y_train, X_val, y_val = get_CIFAR10_data()
print('Train data shape: ', X_train.shape)
print('Train labels shape: ', y_train.shape)
print('Validation data shape: ', X_val.shape)
print('Validation labels shape: ', y_val.shape)

X_train, y_train = torch.from_numpy(X_train).type(torch.FloatTensor), torch.from_numpy(y_train).type(torch.LongTensor)
X_val, y_val = torch.from_numpy(X_val).type(torch.FloatTensor), torch.from_numpy(y_val).type(torch.LongTensor)

traindataset = utils.TensorDataset(X_train, y_train)
trainloader = utils.DataLoader(traindataset, batch_size=64, shuffle=True)

valdataset = utils.TensorDataset(X_val, y_val)
valloader = utils.DataLoader(valdataset, batch_size=64, shuffle=True)

Files already downloaded and verified
Train data shape:  (49000, 3, 32, 32)
Train labels shape:  (49000,)
Validation data shape:  (1000, 3, 32, 32)
Validation labels shape:  (1000,)


In [3]:

class Bottleneck(nn.Module):
    def __init__(self, in_planes, growth_rate):
        super(Bottleneck, self).__init__()
        self.bn1 = nn.BatchNorm2d(in_planes)
        self.conv1 = nn.Conv2d(in_planes, 4 * growth_rate, kernel_size=1, bias=False)
        self.bn2 = nn.BatchNorm2d(4 * growth_rate)
        self.conv2 = nn.Conv2d(4 * growth_rate, growth_rate, kernel_size=3, padding=1, bias=False)
    def forward(self, x):
        y = self.conv1(func.relu(self.bn1(x)))
        y = self.conv2(func.relu(self.bn2(y)))
        x = torch.cat([y, x], 1)
        return x

class Transition(nn.Module):
    def __init__(self, in_planes, out_planes):
        super(Transition, self).__init__()
        self.bn = nn.BatchNorm2d(in_planes)
        self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=1, bias=False)

    def forward(self, x):
        x = self.conv(func.relu(self.bn(x)))
        x = func.avg_pool2d(x, 2)
        return x

class DenseNet(nn.Module):
    def __init__(self, block, num_block, growth_rate=12, reduction=0.5, num_classes=10):
        super(DenseNet, self).__init__()
        self.growth_rate = growth_rate
        num_planes = 2 * growth_rate
        self.conv1 = nn.Conv2d(3, num_planes, kernel_size=3, padding=1, bias=False)
        self.dense1 = self._make_dense_layers(block, num_planes, num_block[0])
        num_planes += num_block[0] * growth_rate
        out_planes = int(math.floor(num_planes * reduction))
        self.trans1 = Transition(num_planes, out_planes)
        num_planes = out_planes
        self.dense2 = self._make_dense_layers(block, num_planes, num_block[1])
        num_planes += num_block[1] * growth_rate
        out_planes = int(math.floor(num_planes * reduction))
        self.trans2 = Transition(num_planes, out_planes)
        num_planes = out_planes
        self.dense3 = self._make_dense_layers(block, num_planes, num_block[2])
        num_planes += num_block[2] * growth_rate
        out_planes = int(math.floor(num_planes * reduction))
        self.trans3 = Transition(num_planes, out_planes)
        num_planes = out_planes
        self.dense4 = self._make_dense_layers(block, num_planes, num_block[3])
        num_planes += num_block[3] * growth_rate
        self.bn = nn.BatchNorm2d(num_planes)
        self.linear = nn.Linear(num_planes, num_classes)

    def _make_dense_layers(self, block, in_planes, num_block):
        layers = []
        for i in range(num_block):
            layers.append(block(in_planes, self.growth_rate))
            in_planes += self.growth_rate
        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.trans1(self.dense1(x))
        x = self.trans2(self.dense2(x))
        x = self.trans3(self.dense3(x))
        x = self.dense4(x)
        x = func.avg_pool2d(func.relu(self.bn(x)), 4)
        x = x.view(x.size(0), -1)
        x = self.linear(x)
        return x   
    
    def predict(self, x):
        logits = self.forward(x)
        return F.softmax(logits)


In [4]:
def _weights_init(m):
    classname = m.__class__.__name__
    if isinstance(m, nn.Linear) or isinstance(m, nn.Conv2d):
        init.kaiming_normal(m.weight)

class LambdaLayer(nn.Module):
    def __init__(self, lambd):
        super(LambdaLayer, self).__init__()
        self.lambd = lambd
        
    def forward(self, x):
        return self.lambd(x)

In [5]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.nn.init as init

class BasicBlock(nn.Module):
    expansion = 1
    def __init__(self, in_planes, planes, stride=1, option='A'):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != planes:
            if option == 'A':
                self.shortcut = LambdaLayer(lambda x:
                                            F.pad(x[:, :, ::2, ::2], (0, 0, 0, 0, planes//4, planes//4), "constant", 0))
            elif option == 'B':
                self.shortcut = nn.Sequential(
                     nn.Conv2d(in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False),
                     nn.BatchNorm2d(self.expansion * planes)
                )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out

class ConvNet(nn.Module):

    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNet, self).__init__()
        self.in_planes = 16
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(16)
        self.layer1 = self._make_layer(block, 16, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 32, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 64, num_blocks[2], stride=2)
        self.linear = nn.Linear(64, num_classes)
        self.apply(_weights_init)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = F.avg_pool2d(out, out.size()[3])
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out   
    
    def predict(self, x):
        logits = self.forward(x)
        return F.softmax(logits)

In [6]:
net = ConvNet(BasicBlock, [3, 3, 3])
################################################################################
# TODO:                                                                        #
# Choose an Optimizer that will be used to minimize the loss function.         #
# Choose a critera that measures the loss                                      #
################################################################################
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(net.parameters(), lr=0.01,momentum=0.9)
#loss_func = torch.nn.CrossEntropyLoss()

epochs = 15
steps = 0
running_loss = 0
print_every = 20
for e in range(epochs):
    start = time.time()
    for images, labels in iter(trainloader):
        
        steps += 1
        ################################################################################
        # TODO:                                                                        #
        # Run the training process                                                     #
        #                                                                              #
        #                                                                              #
        ################################################################################
        #pass
        ################################################################################
        #                              END OF YOUR CODE                                #
        ################################################################################
        
        
        ################################################################################|
        # TODO:                                                                        #
        # Run the training process                                                     #
        #                                                                              #
        # HINT: Calculate the gradient and move one step further                       #
        ################################################################################
        #pass
        ################################################################################
        #                              END OF YOUR CODE                                #
        ################################################################################
        optimizer.zero_grad()
        
        output = net.forward(images)
        loss = criterion(output, labels)
        loss.backward()
        optimizer.step()        
        #loss = criterion(output, labels)
        running_loss += loss.item()
        
        if steps % print_every == 0:
            stop = time.time()
            # Test accuracy
            #net.eval()
            #total = 0
            accuracy = 0
            for ii, (images, labels) in enumerate(valloader):
                outputs = net.predict(images)
                #predicted = net.predict(outputs)
                _, predicted = torch.max(outputs.data, 1)
                total = labels.size(0)
                accuracy += ((predicted == labels).sum().item())/total
                ################################################################################
                # TODO:                                                                        #
                # Calculate the accuracy                                                       #
                ################################################################################
                #pass
                ################################################################################
                #                              END OF YOUR CODE                                #
                ################################################################################
            
            print("Epoch: {}/{}..".format(e+1, epochs),
                  "Loss: {:.4f}..".format(running_loss/print_every),
                  "Test accuracy: {:.4f}..".format(accuracy/(ii+1)),
                  "{:.4f} s/batch".format((stop - start)/print_every)
                 )
            running_loss = 0
            start = time.time()

  """


Epoch: 1/30.. Loss: 2.2293.. Test accuracy: 9.0000.. 0.8902 s/batch
Epoch: 1/30.. Loss: 3.0759.. Test accuracy: 18.9000.. 0.8635 s/batch
Epoch: 1/30.. Loss: 2.1244.. Test accuracy: 26.0000.. 0.8440 s/batch
Epoch: 1/30.. Loss: 2.0706.. Test accuracy: 25.9000.. 0.7839 s/batch
Epoch: 1/30.. Loss: 2.0709.. Test accuracy: 22.6000.. 0.8078 s/batch
Epoch: 1/30.. Loss: 2.0041.. Test accuracy: 26.2000.. 0.8976 s/batch
Epoch: 1/30.. Loss: 1.9713.. Test accuracy: 26.3000.. 0.8550 s/batch
Epoch: 1/30.. Loss: 1.9803.. Test accuracy: 26.6000.. 0.7893 s/batch
Epoch: 1/30.. Loss: 1.9693.. Test accuracy: 27.7000.. 0.8343 s/batch
Epoch: 1/30.. Loss: 1.8686.. Test accuracy: 26.0000.. 0.9081 s/batch
Epoch: 1/30.. Loss: 1.8726.. Test accuracy: 28.5000.. 0.8839 s/batch
Epoch: 1/30.. Loss: 1.8881.. Test accuracy: 35.1000.. 0.8401 s/batch
Epoch: 1/30.. Loss: 1.8648.. Test accuracy: 36.2000.. 0.8626 s/batch
Epoch: 1/30.. Loss: 1.8740.. Test accuracy: 36.3000.. 0.8870 s/batch
Epoch: 1/30.. Loss: 1.7933.. Test a

Epoch: 7/30.. Loss: 0.9153.. Test accuracy: 67.5000.. 0.8467 s/batch
Epoch: 7/30.. Loss: 0.9178.. Test accuracy: 68.4000.. 0.8549 s/batch
Epoch: 7/30.. Loss: 0.8424.. Test accuracy: 63.0000.. 0.7946 s/batch
Epoch: 7/30.. Loss: 0.9236.. Test accuracy: 67.0000.. 0.7912 s/batch
Epoch: 7/30.. Loss: 0.9310.. Test accuracy: 65.7000.. 0.7968 s/batch
Epoch: 7/30.. Loss: 0.9010.. Test accuracy: 68.0000.. 0.8447 s/batch
Epoch: 7/30.. Loss: 0.9035.. Test accuracy: 65.1000.. 0.8727 s/batch
Epoch: 7/30.. Loss: 0.8796.. Test accuracy: 64.7000.. 0.9182 s/batch
Epoch: 7/30.. Loss: 0.9708.. Test accuracy: 65.8000.. 0.8677 s/batch
Epoch: 7/30.. Loss: 0.9372.. Test accuracy: 68.2000.. 0.7980 s/batch
Epoch: 7/30.. Loss: 0.8837.. Test accuracy: 68.5000.. 0.8294 s/batch
Epoch: 7/30.. Loss: 0.8742.. Test accuracy: 69.3000.. 0.7901 s/batch
Epoch: 7/30.. Loss: 0.8728.. Test accuracy: 67.4000.. 0.7994 s/batch
Epoch: 7/30.. Loss: 0.8736.. Test accuracy: 61.3000.. 0.8132 s/batch
Epoch: 7/30.. Loss: 0.8947.. Test 

Epoch: 13/30.. Loss: 0.6785.. Test accuracy: 70.6000.. 0.8437 s/batch
Epoch: 13/30.. Loss: 0.6433.. Test accuracy: 70.5000.. 0.8319 s/batch
Epoch: 13/30.. Loss: 0.7060.. Test accuracy: 70.7000.. 0.8360 s/batch
Epoch: 13/30.. Loss: 0.6879.. Test accuracy: 71.8000.. 0.7946 s/batch
Epoch: 13/30.. Loss: 0.6718.. Test accuracy: 71.0000.. 0.8345 s/batch
Epoch: 13/30.. Loss: 0.6638.. Test accuracy: 70.6000.. 0.8419 s/batch
Epoch: 13/30.. Loss: 0.7496.. Test accuracy: 71.3000.. 0.9326 s/batch
Epoch: 13/30.. Loss: 0.6806.. Test accuracy: 71.7000.. 0.8012 s/batch
Epoch: 13/30.. Loss: 0.6516.. Test accuracy: 69.8000.. 0.8669 s/batch
Epoch: 13/30.. Loss: 0.6863.. Test accuracy: 70.7000.. 0.8600 s/batch
Epoch: 13/30.. Loss: 0.7300.. Test accuracy: 67.8000.. 0.8696 s/batch
Epoch: 13/30.. Loss: 0.7204.. Test accuracy: 69.1000.. 0.8657 s/batch
Epoch: 13/30.. Loss: 0.7030.. Test accuracy: 70.7000.. 0.8059 s/batch
Epoch: 13/30.. Loss: 0.7057.. Test accuracy: 72.3000.. 0.8727 s/batch
Epoch: 13/30.. Loss:

Epoch: 19/30.. Loss: 0.5488.. Test accuracy: 70.0000.. 0.9304 s/batch
Epoch: 19/30.. Loss: 0.5604.. Test accuracy: 70.9000.. 0.8853 s/batch
Epoch: 19/30.. Loss: 0.5181.. Test accuracy: 71.6000.. 0.9030 s/batch
Epoch: 19/30.. Loss: 0.5482.. Test accuracy: 70.7000.. 0.8200 s/batch
Epoch: 19/30.. Loss: 0.6193.. Test accuracy: 71.0000.. 0.8252 s/batch
Epoch: 19/30.. Loss: 0.5579.. Test accuracy: 71.3000.. 0.8193 s/batch
Epoch: 19/30.. Loss: 0.5638.. Test accuracy: 71.6000.. 0.8325 s/batch
Epoch: 19/30.. Loss: 0.6178.. Test accuracy: 71.5000.. 0.8357 s/batch
Epoch: 19/30.. Loss: 0.6102.. Test accuracy: 71.5000.. 0.8484 s/batch
Epoch: 19/30.. Loss: 0.5950.. Test accuracy: 73.6000.. 0.8064 s/batch
Epoch: 19/30.. Loss: 0.5720.. Test accuracy: 71.0000.. 0.8144 s/batch
Epoch: 19/30.. Loss: 0.5388.. Test accuracy: 72.6000.. 0.8769 s/batch
Epoch: 19/30.. Loss: 0.5368.. Test accuracy: 71.9000.. 0.8629 s/batch
Epoch: 19/30.. Loss: 0.5360.. Test accuracy: 72.6000.. 0.8264 s/batch
Epoch: 19/30.. Loss:

Epoch: 25/30.. Loss: 0.5079.. Test accuracy: 73.7000.. 0.7750 s/batch
Epoch: 25/30.. Loss: 0.4807.. Test accuracy: 72.2000.. 0.7770 s/batch
Epoch: 25/30.. Loss: 0.4900.. Test accuracy: 74.5000.. 0.7746 s/batch
Epoch: 25/30.. Loss: 0.5451.. Test accuracy: 73.5000.. 0.7782 s/batch
Epoch: 25/30.. Loss: 0.4694.. Test accuracy: 71.8000.. 0.7715 s/batch
Epoch: 25/30.. Loss: 0.4510.. Test accuracy: 74.4000.. 0.7713 s/batch
Epoch: 25/30.. Loss: 0.4860.. Test accuracy: 74.3000.. 0.7776 s/batch
Epoch: 25/30.. Loss: 0.4727.. Test accuracy: 72.0000.. 0.7726 s/batch
Epoch: 25/30.. Loss: 0.5229.. Test accuracy: 72.7000.. 0.7760 s/batch
Epoch: 25/30.. Loss: 0.4816.. Test accuracy: 72.3000.. 0.7777 s/batch
Epoch: 25/30.. Loss: 0.5034.. Test accuracy: 71.7000.. 0.7758 s/batch
Epoch: 25/30.. Loss: 0.5195.. Test accuracy: 72.8000.. 0.7792 s/batch
Epoch: 25/30.. Loss: 0.5546.. Test accuracy: 73.9000.. 0.7776 s/batch
Epoch: 25/30.. Loss: 0.5008.. Test accuracy: 72.9000.. 0.7780 s/batch
Epoch: 25/30.. Loss:

Save best trained model.

In [7]:
torch.save(net.state_dict(), 'model_resnet.ckpt')