## Convolutional Networks

We'll check out how to build a **convolutional network** to classify CIFAR10 images. By using weight sharing - multiple units with the same weights - convolutional layers are able to learn repeated patterns in your data. For example, a unit could learn the pattern for an eye, or a face, or lower level features like edges.


In [3]:
import numpy as np
import time

import torch
#from torch import nn
from torch import optim
#import torch.nn.functional as F
import torch.utils.data as utils
from data_utils import load_CIFAR10
import math
import torch.nn as nn
import torch.nn.functional as func

import helper

In [4]:
def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=1000, num_dev=500):
    """
    Load the CIFAR-10 dataset from disk and perform preprocessing to prepare
    it for the linear classifier.  
    """
    # Load the raw CIFAR-10 data
    cifar10_dir = './data/cifar-10-batches-py'
    X_train, y_train = load_CIFAR10(cifar10_dir)
    
    # subsample the data
    mask = list(range(num_training, num_training + num_validation))
    X_val = X_train[mask]
    y_val = y_train[mask]
    mask = list(range(num_training))
    X_train = X_train[mask]
    y_train = y_train[mask]
    
    return X_train, y_train, X_val, y_val


# Invoke the above function to get our data.
X_train, y_train, X_val, y_val = get_CIFAR10_data()
print('Train data shape: ', X_train.shape)
print('Train labels shape: ', y_train.shape)
print('Validation data shape: ', X_val.shape)
print('Validation labels shape: ', y_val.shape)

X_train, y_train = torch.from_numpy(X_train).type(torch.FloatTensor), torch.from_numpy(y_train).type(torch.LongTensor)
X_val, y_val = torch.from_numpy(X_val).type(torch.FloatTensor), torch.from_numpy(y_val).type(torch.LongTensor)

traindataset = utils.TensorDataset(X_train, y_train)
trainloader = utils.DataLoader(traindataset, batch_size=64, shuffle=True)

valdataset = utils.TensorDataset(X_val, y_val)
valloader = utils.DataLoader(valdataset, batch_size=64, shuffle=True)

Files already downloaded and verified
Train data shape:  (49000, 3, 32, 32)
Train labels shape:  (49000,)
Validation data shape:  (1000, 3, 32, 32)
Validation labels shape:  (1000,)


In [5]:

class Bottleneck(nn.Module):
    def __init__(self, in_planes, growth_rate):
        super(Bottleneck, self).__init__()
        self.bn1 = nn.BatchNorm2d(in_planes)
        self.conv1 = nn.Conv2d(in_planes, 4 * growth_rate, kernel_size=1, bias=False)
        self.bn2 = nn.BatchNorm2d(4 * growth_rate)
        self.conv2 = nn.Conv2d(4 * growth_rate, growth_rate, kernel_size=3, padding=1, bias=False)
    def forward(self, x):
        y = self.conv1(func.relu(self.bn1(x)))
        y = self.conv2(func.relu(self.bn2(y)))
        x = torch.cat([y, x], 1)
        return x

class Transition(nn.Module):
    def __init__(self, in_planes, out_planes):
        super(Transition, self).__init__()
        self.bn = nn.BatchNorm2d(in_planes)
        self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=1, bias=False)

    def forward(self, x):
        x = self.conv(func.relu(self.bn(x)))
        x = func.avg_pool2d(x, 2)
        return x

class DenseNet(nn.Module):
    def __init__(self, block, num_block, growth_rate=12, reduction=0.5, num_classes=10):
        super(DenseNet, self).__init__()
        self.growth_rate = growth_rate
        num_planes = 2 * growth_rate
        self.conv1 = nn.Conv2d(3, num_planes, kernel_size=3, padding=1, bias=False)
        self.dense1 = self._make_dense_layers(block, num_planes, num_block[0])
        num_planes += num_block[0] * growth_rate
        out_planes = int(math.floor(num_planes * reduction))
        self.trans1 = Transition(num_planes, out_planes)
        num_planes = out_planes
        self.dense2 = self._make_dense_layers(block, num_planes, num_block[1])
        num_planes += num_block[1] * growth_rate
        out_planes = int(math.floor(num_planes * reduction))
        self.trans2 = Transition(num_planes, out_planes)
        num_planes = out_planes
        self.dense3 = self._make_dense_layers(block, num_planes, num_block[2])
        num_planes += num_block[2] * growth_rate
        out_planes = int(math.floor(num_planes * reduction))
        self.trans3 = Transition(num_planes, out_planes)
        num_planes = out_planes
        self.dense4 = self._make_dense_layers(block, num_planes, num_block[3])
        num_planes += num_block[3] * growth_rate
        self.bn = nn.BatchNorm2d(num_planes)
        self.linear = nn.Linear(num_planes, num_classes)

    def _make_dense_layers(self, block, in_planes, num_block):
        layers = []
        for i in range(num_block):
            layers.append(block(in_planes, self.growth_rate))
            in_planes += self.growth_rate
        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.trans1(self.dense1(x))
        x = self.trans2(self.dense2(x))
        x = self.trans3(self.dense3(x))
        x = self.dense4(x)
        x = func.avg_pool2d(func.relu(self.bn(x)), 4)
        x = x.view(x.size(0), -1)
        x = self.linear(x)
        return x   
    
    def predict(self, x):
        logits = self.forward(x)
        return F.softmax(logits)


In [6]:
def _weights_init(m):
    classname = m.__class__.__name__
    if isinstance(m, nn.Linear) or isinstance(m, nn.Conv2d):
        init.kaiming_normal(m.weight)

class LambdaLayer(nn.Module):
    def __init__(self, lambd):
        super(LambdaLayer, self).__init__()
        self.lambd = lambd
        
    def forward(self, x):
        return self.lambd(x)

In [10]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.nn.init as init

class BasicBlock(nn.Module):
    expansion = 1
    def __init__(self, in_planes, planes, stride=1, option='A'):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != planes:
            if option == 'A':
                self.shortcut = LambdaLayer(lambda x:
                                            F.pad(x[:, :, ::2, ::2], (0, 0, 0, 0, planes//4, planes//4), "constant", 0))
            elif option == 'B':
                self.shortcut = nn.Sequential(
                     nn.Conv2d(in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False),
                     nn.BatchNorm2d(self.expansion * planes)
                )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out

class ConvNet(nn.Module):

    def __init__(self, block, num_blocks, num_classes=10):
        super(ConvNet, self).__init__()
        self.in_planes = 16
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(16)
        self.layer1 = self._make_layer(block, 16, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 32, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 64, num_blocks[2], stride=2)
        self.linear = nn.Linear(64, num_classes)
        self.apply(_weights_init)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = F.avg_pool2d(out, out.size()[3])
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out   
    
    def predict(self, x):
        logits = self.forward(x)
        return F.softmax(logits)

In [11]:
net = ConvNet(BasicBlock, [3, 3, 3])
################################################################################
# TODO:                                                                        #
# Choose an Optimizer that will be used to minimize the loss function.         #
# Choose a critera that measures the loss                                      #
################################################################################
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(net.parameters(), lr=0.01,momentum=0.9)
#loss_func = torch.nn.CrossEntropyLoss()

epochs = 15
steps = 0
running_loss = 0
print_every = 20
for e in range(epochs):
    start = time.time()
    for images, labels in iter(trainloader):
        
        steps += 1
        ################################################################################
        # TODO:                                                                        #
        # Run the training process                                                     #
        #                                                                              #
        #                                                                              #
        ################################################################################
        #pass
        ################################################################################
        #                              END OF YOUR CODE                                #
        ################################################################################
        
        
        ################################################################################|
        # TODO:                                                                        #
        # Run the training process                                                     #
        #                                                                              #
        # HINT: Calculate the gradient and move one step further                       #
        ################################################################################
        #pass
        ################################################################################
        #                              END OF YOUR CODE                                #
        ################################################################################
        optimizer.zero_grad()
        
        output = net.forward(images)
        loss = criterion(output, labels)
        loss.backward()
        optimizer.step()        
        #loss = criterion(output, labels)
        running_loss += loss.item()
        
        if steps % print_every == 0:
            stop = time.time()
            # Test accuracy
            #net.eval()
            #total = 0
            accuracy = 0
            for ii, (images, labels) in enumerate(valloader):
                outputs = net.predict(images)
                #predicted = net.predict(outputs)
                _, predicted = torch.max(outputs.data, 1)
                total = labels.size(0)
                accuracy += ((predicted == labels).sum().item())/total
                ################################################################################
                # TODO:                                                                        #
                # Calculate the accuracy                                                       #
                ################################################################################
                #pass
                ################################################################################
                #                              END OF YOUR CODE                                #
                ################################################################################
            
            print("Epoch: {}/{}..".format(e+1, epochs),
                  "Loss: {:.4f}..".format(running_loss/print_every),
                  "Test accuracy: {:.4f}..".format(accuracy/(ii+1)),
                  "{:.4f} s/batch".format((stop - start)/print_every)
                 )
            running_loss = 0
            start = time.time()

  after removing the cwd from sys.path.


Epoch: 1/15.. Loss: 2.3604.. Test accuracy: 0.2289.. 0.8436 s/batch
Epoch: 1/15.. Loss: 2.0345.. Test accuracy: 0.2844.. 0.8056 s/batch
Epoch: 1/15.. Loss: 1.9254.. Test accuracy: 0.3076.. 0.8137 s/batch
Epoch: 1/15.. Loss: 1.8201.. Test accuracy: 0.3604.. 0.8514 s/batch
Epoch: 1/15.. Loss: 1.7230.. Test accuracy: 0.3691.. 0.8257 s/batch
Epoch: 1/15.. Loss: 1.7025.. Test accuracy: 0.3809.. 0.8153 s/batch
Epoch: 1/15.. Loss: 1.6289.. Test accuracy: 0.3820.. 0.8248 s/batch
Epoch: 1/15.. Loss: 1.6287.. Test accuracy: 0.4221.. 0.8161 s/batch
Epoch: 1/15.. Loss: 1.6142.. Test accuracy: 0.3887.. 0.8113 s/batch
Epoch: 1/15.. Loss: 1.5640.. Test accuracy: 0.4377.. 0.8766 s/batch
Epoch: 1/15.. Loss: 1.6196.. Test accuracy: 0.4547.. 0.8730 s/batch
Epoch: 1/15.. Loss: 1.5390.. Test accuracy: 0.4170.. 0.8099 s/batch
Epoch: 1/15.. Loss: 1.5301.. Test accuracy: 0.4105.. 0.8063 s/batch
Epoch: 1/15.. Loss: 1.5271.. Test accuracy: 0.4383.. 0.8074 s/batch
Epoch: 1/15.. Loss: 1.4653.. Test accuracy: 0.46

Epoch: 4/15.. Loss: 0.8897.. Test accuracy: 0.6984.. 0.8067 s/batch
Epoch: 4/15.. Loss: 0.8639.. Test accuracy: 0.6809.. 0.8056 s/batch
Epoch: 4/15.. Loss: 0.7958.. Test accuracy: 0.7016.. 0.8021 s/batch
Epoch: 4/15.. Loss: 0.7900.. Test accuracy: 0.6652.. 0.8042 s/batch
Epoch: 4/15.. Loss: 0.9324.. Test accuracy: 0.6457.. 0.8021 s/batch
Epoch: 4/15.. Loss: 0.8180.. Test accuracy: 0.7051.. 0.8017 s/batch
Epoch: 4/15.. Loss: 0.8949.. Test accuracy: 0.7014.. 0.8032 s/batch
Epoch: 4/15.. Loss: 0.8577.. Test accuracy: 0.6893.. 0.8081 s/batch
Epoch: 4/15.. Loss: 0.8669.. Test accuracy: 0.6949.. 0.8088 s/batch
Epoch: 4/15.. Loss: 0.8514.. Test accuracy: 0.7010.. 0.8039 s/batch
Epoch: 4/15.. Loss: 0.8271.. Test accuracy: 0.6865.. 0.8049 s/batch
Epoch: 4/15.. Loss: 0.8790.. Test accuracy: 0.6771.. 0.8065 s/batch
Epoch: 4/15.. Loss: 0.8526.. Test accuracy: 0.6975.. 0.8074 s/batch
Epoch: 4/15.. Loss: 0.8696.. Test accuracy: 0.6873.. 0.8097 s/batch
Epoch: 4/15.. Loss: 0.8482.. Test accuracy: 0.69

Epoch: 7/15.. Loss: 0.5920.. Test accuracy: 0.7490.. 0.8086 s/batch
Epoch: 7/15.. Loss: 0.6487.. Test accuracy: 0.7596.. 0.8104 s/batch
Epoch: 7/15.. Loss: 0.5829.. Test accuracy: 0.7506.. 0.8046 s/batch
Epoch: 7/15.. Loss: 0.5729.. Test accuracy: 0.7609.. 0.8057 s/batch
Epoch: 7/15.. Loss: 0.6433.. Test accuracy: 0.7619.. 0.8066 s/batch
Epoch: 7/15.. Loss: 0.5622.. Test accuracy: 0.7582.. 0.8074 s/batch
Epoch: 7/15.. Loss: 0.6141.. Test accuracy: 0.7697.. 0.8021 s/batch
Epoch: 7/15.. Loss: 0.6154.. Test accuracy: 0.7561.. 0.8053 s/batch
Epoch: 7/15.. Loss: 0.6014.. Test accuracy: 0.7594.. 0.8050 s/batch
Epoch: 7/15.. Loss: 0.5998.. Test accuracy: 0.7842.. 0.8124 s/batch
Epoch: 7/15.. Loss: 0.6577.. Test accuracy: 0.7629.. 0.8063 s/batch
Epoch: 7/15.. Loss: 0.6165.. Test accuracy: 0.7568.. 0.8038 s/batch
Epoch: 7/15.. Loss: 0.6043.. Test accuracy: 0.7625.. 0.8056 s/batch
Epoch: 7/15.. Loss: 0.6156.. Test accuracy: 0.7533.. 0.8078 s/batch
Epoch: 7/15.. Loss: 0.5893.. Test accuracy: 0.76

Epoch: 10/15.. Loss: 0.5214.. Test accuracy: 0.7934.. 0.8092 s/batch
Epoch: 10/15.. Loss: 0.4480.. Test accuracy: 0.8004.. 0.8105 s/batch
Epoch: 10/15.. Loss: 0.4752.. Test accuracy: 0.7801.. 0.8135 s/batch
Epoch: 10/15.. Loss: 0.4492.. Test accuracy: 0.8049.. 0.8047 s/batch
Epoch: 10/15.. Loss: 0.4025.. Test accuracy: 0.7986.. 0.8063 s/batch
Epoch: 10/15.. Loss: 0.4400.. Test accuracy: 0.7770.. 0.8065 s/batch
Epoch: 10/15.. Loss: 0.4896.. Test accuracy: 0.7867.. 0.8048 s/batch
Epoch: 10/15.. Loss: 0.5103.. Test accuracy: 0.7740.. 0.8094 s/batch
Epoch: 10/15.. Loss: 0.4530.. Test accuracy: 0.7910.. 0.8044 s/batch
Epoch: 10/15.. Loss: 0.5063.. Test accuracy: 0.7746.. 0.8048 s/batch
Epoch: 10/15.. Loss: 0.5003.. Test accuracy: 0.7791.. 0.8065 s/batch
Epoch: 10/15.. Loss: 0.4977.. Test accuracy: 0.7918.. 0.8060 s/batch
Epoch: 10/15.. Loss: 0.4684.. Test accuracy: 0.7887.. 0.8087 s/batch
Epoch: 10/15.. Loss: 0.4886.. Test accuracy: 0.7875.. 0.8030 s/batch
Epoch: 10/15.. Loss: 0.5310.. Test

Epoch: 13/15.. Loss: 0.3497.. Test accuracy: 0.7951.. 0.8045 s/batch
Epoch: 13/15.. Loss: 0.3336.. Test accuracy: 0.7967.. 0.8030 s/batch
Epoch: 13/15.. Loss: 0.3685.. Test accuracy: 0.7898.. 0.8028 s/batch
Epoch: 13/15.. Loss: 0.3743.. Test accuracy: 0.8090.. 0.8050 s/batch
Epoch: 13/15.. Loss: 0.3525.. Test accuracy: 0.7877.. 0.8031 s/batch
Epoch: 13/15.. Loss: 0.3938.. Test accuracy: 0.7807.. 0.8128 s/batch
Epoch: 13/15.. Loss: 0.4196.. Test accuracy: 0.7908.. 0.8042 s/batch
Epoch: 13/15.. Loss: 0.3965.. Test accuracy: 0.7857.. 0.8024 s/batch
Epoch: 13/15.. Loss: 0.4056.. Test accuracy: 0.7969.. 0.8011 s/batch
Epoch: 13/15.. Loss: 0.3453.. Test accuracy: 0.7879.. 0.8051 s/batch
Epoch: 13/15.. Loss: 0.4119.. Test accuracy: 0.7977.. 0.8076 s/batch
Epoch: 13/15.. Loss: 0.4180.. Test accuracy: 0.7943.. 0.7955 s/batch
Epoch: 13/15.. Loss: 0.3912.. Test accuracy: 0.7984.. 0.8066 s/batch
Epoch: 13/15.. Loss: 0.3738.. Test accuracy: 0.8025.. 0.8025 s/batch
Epoch: 13/15.. Loss: 0.4013.. Test

Save best trained model.

In [12]:
torch.save(net.state_dict(), 'model_resnet.ckpt')