[VGG-net](https://arxiv.org/abs/1409.1556).

In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

import torch
import torchvision
import torchvision.transforms as transforms

import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import tools
import tests

In [2]:
data_dir = tools.select_data_dir()

The data directory is ../data


In [3]:
device = torch.device('cpu')

In [4]:
transform = transforms.Compose([
    transforms.ToTensor(),  # Transform to tensor
    transforms.Normalize((0.5,), (0.5,))  # Scale images to [-1, 1]
])

trainset = torchvision.datasets.FashionMNIST(root=data_dir, train=True, download=True, transform=transform)
testset = torchvision.datasets.FashionMNIST(root=data_dir, train=False, download=True, transform=transform)

classes = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal',
           'Shirt', 'Sneaker', 'Bag', 'Ankle boot']

trainloader = torch.utils.data.DataLoader(trainset, batch_size=32, shuffle=True)
testloader = torch.utils.data.DataLoader(testset, batch_size=5, shuffle=False)

The architecture:
- A block of three convolutional layers with:
    - 3x3 kernel
    - 20 output channels
    - one pixel zero-pading on both sides
    - 2d batch normalization after each convolutional layer
    - ReLU nonlinearity after each 2d batch normalization layer
- Max pooling layer with 2x2 kernel and stride 2.
- A block of three convolutional layers with:
    - 3x3 kernel
    - 40 output channels
    - one pixel zero-pading on both sides
    - 2d batch normalization after each convolutional layer
    - ReLU nonlinearity after each 2d batch normalization layer
- Max pooling layer with 2x2 kernel and stride 2.
- One convolutional layer with:
    - 3x3 kernel
    - 60 output channels
    - *no padding*
    - 2d batch normalization after the convolutional layer
    - ReLU nonlinearity after the 2d batch normalization layer
- One convolutional layer with:
    - 1x1 kernel
    - 40 output channels
    - *no padding*
    - 2d batch normalization after the convolutional layer
    - ReLU nonlinearity after the 2d batch normalization layer
- One convolutional layer with:
    - 1x1 kernel
    - 20 output channels
    - *no padding*
    - 2d batch normalization after the convolutional layer
    - ReLU nonlinearity after the 2d batch normalization layer
- Global average pooling (compute the average value of each channel across all the input locations):
    - 5x5 kernel (the input of the layer should be 5x5)
- A fully-connected layer with 10 outputs (no nonlinearity)

In [38]:
class VGGNet(nn.Module):
    def __init__(self):
        super(VGGNet, self).__init__()
        # YOUR CODE HERE
        self.conv1 = nn.Conv2d(1, out_channels=20, kernel_size=3, padding=(1,1)) # block of three conv
        self.conv1_2 = nn.Conv2d(20, out_channels=20, kernel_size=3, padding=(1,1)) # block of three conv
        self.conv1_3 = nn.Conv2d(20, out_channels=20, kernel_size=3, padding=(1,1)) # block of three conv
        self.bn1 = nn.BatchNorm2d(num_features=20)
        self.bn1_2 = nn.BatchNorm2d(num_features=20)
        self.bn1_3 = nn.BatchNorm2d(num_features=20)
        self.conv2 = nn.Conv2d(20,out_channels=40, kernel_size=3,padding=(1,1)) # block of three conv
        self.conv2_2 = nn.Conv2d(40,out_channels=40, kernel_size=3,padding=(1,1)) # block of three conv
        self.conv2_3 = nn.Conv2d(40,out_channels=40, kernel_size=3,padding=(1,1)) # block of three conv
        self.bn2 = nn.BatchNorm2d(num_features=40)
        self.bn2_2 = nn.BatchNorm2d(num_features=40)
        self.bn2_3 = nn.BatchNorm2d(num_features=40)
        self.conv3 = nn.Conv2d(40,out_channels=60, kernel_size=3)
        self.bn3 = nn.BatchNorm2d(num_features=60)
        self.conv4 = nn.Conv2d(60,out_channels=40, kernel_size=1)
        self.bn4 = nn.BatchNorm2d(num_features=40)
        self.conv5 = nn.Conv2d(40,out_channels=20, kernel_size=1)
        self.bn5 = nn.BatchNorm2d(num_features=20)
        self.fc1 = nn.Linear(20 , 10) 

 
    def forward(self, x, verbose=False):
        """
        Args:
          x of shape (batch_size, 1, 28, 28): Input images.
          verbose: True if you want to print the shapes of the intermediate variables.
        
        Returns:
          y of shape (batch_size, 10): Outputs of the network.
        """
        # YOUR CODE HERE
        x = F.relu(self.bn1(self.conv1(x)))
        x = F.relu(self.bn1_2(self.conv1_2(x)))
        x = F.relu(self.bn1_3(self.conv1_3(x)))
        x = F.max_pool2d(x,2,2)
        
        x = F.relu(self.bn2(self.conv2(x)))
        x = F.relu(self.bn2_2(self.conv2_2(x)))
        x = F.relu(self.bn2_3(self.conv2_3(x)))
        x = F.max_pool2d(x,2,2)

        x = F.relu(self.bn3(self.conv3(x)))
        x = F.relu(self.bn4(self.conv4(x)))
        x = F.relu(self.bn5(self.conv5(x)))
        
        x = F.avg_pool2d(x,5,2)
        x = x.view(-1, self.num_flat_features(x)) # Flat   
        x = self.fc1(x)

        return x
    
    def num_flat_features(self, x):
        size = x.size()[1:]  # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        # print("num_features:",num_features) # 20
        return num_features

In [39]:
def test_VGGNet_shapes():
    net = VGGNet()
    net.to(device)

    # Feed a batch of images from the training data to test the network
    with torch.no_grad():
        images, labels = iter(trainloader).next()
        images = images.to(device)
        print('Shape of the input tensor:', images.shape)

        y = net(images, verbose=True)
        assert y.shape == torch.Size([trainloader.batch_size, 10]), f"Bad y.shape: {y.shape}"

    print('Success')

test_VGGNet_shapes()

Shape of the input tensor: torch.Size([32, 1, 28, 28])
Success


In [40]:
tests.test_vgg_net(VGGNet)

y: tensor([[ 10.0382,  10.0382,  10.0382,  10.0382,  10.0382, -10.0382, -10.0382,
         -10.0382, -10.0382, -10.0382]], grad_fn=<AddmmBackward>)
expected: tensor([ 10.0382,  10.0382,  10.0382,  10.0382,  10.0382, -10.0382, -10.0382,
        -10.0382, -10.0382, -10.0382])
Success


In [41]:
# This function computes the accuracy on the test dataset
def compute_accuracy(net, testloader):
    net.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in testloader:
            images, labels = images.to(device), labels.to(device)
            outputs = net(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    return correct / total

In [42]:
net = VGGNet()

In [43]:
optimizer = optim.Adam(net.parameters(),lr=0.01)
loss_function = nn.CrossEntropyLoss()

EPOCHS = 10
BATCH_SIZE = 32

In [44]:
# Implement the training loop in this cell
for epoch in range(EPOCHS):
    for images, labels in trainloader:
        optimizer.zero_grad()
        output = net(images)
        loss = loss_function(output, labels)
        loss.backward()
        optimizer.step()
    print(f"Epoch: {epoch}. Loss: {loss}")

Epoch: 0. Loss: 0.10231073200702667
Epoch: 1. Loss: 0.41243377327919006
Epoch: 2. Loss: 0.10539892315864563
Epoch: 3. Loss: 0.389605849981308
Epoch: 4. Loss: 0.18096397817134857
Epoch: 5. Loss: 0.28332987427711487
Epoch: 6. Loss: 0.34474754333496094
Epoch: 7. Loss: 0.13344526290893555
Epoch: 8. Loss: 0.14712971448898315
Epoch: 9. Loss: 0.09031938016414642


In [45]:
# Compute the accuracy on the test set
accuracy = compute_accuracy(net, testloader)
print(f'Accuracy of the VGG net on the test images: {accuracy: .3f}')
assert accuracy > 0.89, 'Poor accuracy'
print('Success')

Accuracy of the VGG net on the test images:  0.914
Success


In [46]:
accuracy

0.9138