# EQ2425 - Analysis and Search of Visual Data

## Project 3: Image Classification using CNNs

**Author: Yage Hao (yage@kth.se)**

### Import packages

In [178]:
import numpy as np

import torch
import torch.nn.functional as F
import torch.nn as nn 
import torch.optim as optim 
import torchvision
import torchvision.transforms as transforms

from torchvision.datasets import CIFAR10
from torch.utils.data import DataLoader

import statistics 

from sklearn.metrics import recall_score

In [179]:
print('PyTorch version:', torch.__version__)
print('Torchvision version:', torchvision.__version__)

PyTorch version: 1.9.0+cu111
Torchvision version: 0.10.0+cu111


In [180]:
! nvidia-smi

Fri Oct 15 20:52:07 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.74       Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   39C    P0    34W / 250W |   1311MiB / 16280MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

### Data Pre-processing

In [181]:
normalize = transforms.Normalize(mean=[.5, .5, .5],std=[1 ,1, 1])

dataset_transform = transforms.Compose([
    transforms.ToTensor(),
    normalize
])

In [182]:
train_set = CIFAR10('./cifar10', train=True, download=True, transform=dataset_transform)
valid_set = CIFAR10('./cifar10', train=False, download=True, transform=dataset_transform)

print(train_set.data.shape)
print(valid_set.data.shape)

Files already downloaded and verified
Files already downloaded and verified
(50000, 32, 32, 3)
(10000, 32, 32, 3)


In [183]:
train_loader = DataLoader(train_set, batch_size=256, num_workers=0, shuffle=True)
valid_loader = DataLoader(valid_set, batch_size=256, num_workers=0, shuffle=False)

### Build the Default Model

In [184]:
class BasicCNN(nn.Module):

  def __init__(self, num_classes):
    super(BasicCNN, self).__init__()
    # self.conv1 = nn.Conv2d(3, 64, (5,5), stride=1, padding=0) 
    # self.conv2 = nn.Conv2d(64, 128, (3,3), stride=1, padding=0) 
    # self.conv3 = nn.Conv2d(128, 256, (3,3), stride=1, padding=0) 
    self.conv1 = nn.Sequential(
        nn.Conv2d(3, 64, (5,5), stride=1, padding=0),
        nn.BatchNorm2d(64))
    self.conv2 = nn.Sequential(
        nn.Conv2d(64, 128, (3,3), stride=1, padding=0),
        nn.BatchNorm2d(128))
    self.conv3 = nn.Sequential(
        nn.Conv2d(128, 256, (3,3), stride=1, padding=0),
        nn.BatchNorm2d(256))
    self.pool1 = nn.MaxPool2d((2,2), stride=2)
    self.pool2 = nn.MaxPool2d((2,2), stride=2)
    self.pool3 = nn.MaxPool2d((2,2), stride=2)
    
    # self.fc0 = nn.Linear(96, 2*2*96)
    self.fc1 = nn.Linear(2*2*256, 512)
    # self.fc_4a2 = nn.Linear(512, 128)
    self.fc2 = nn.Linear(512, 10)
    self.sm = nn.Softmax() 
  
  def forward(self, x):
    x = F.leaky_relu(self.conv1(x))
    x = self.pool1(x)

    x = F.leaky_relu(self.conv2(x))
    x = self.pool2(x)

    x = self.pool3(self.conv3(x))

    x = x.reshape(x.size(0), -1)
    # print(x.shape)
    # x = self.fc0(x)
    x = F.leaky_relu(self.fc1(x))
    # x = F.relu(self.fc_4a2(x))
    x = F.dropout(x, p=0.3)
    x = self.fc2(x)
    x = self.sm(x)
    return x 

In [185]:
cuda = torch.device('cuda')

model = BasicCNN(10)
model.to(cuda)

BasicCNN(
  (conv1): Sequential(
    (0): Conv2d(3, 64, kernel_size=(5, 5), stride=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (conv2): Sequential(
    (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1))
    (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (conv3): Sequential(
    (0): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1))
    (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (pool1): MaxPool2d(kernel_size=(2, 2), stride=2, padding=0, dilation=1, ceil_mode=False)
  (pool2): MaxPool2d(kernel_size=(2, 2), stride=2, padding=0, dilation=1, ceil_mode=False)
  (pool3): MaxPool2d(kernel_size=(2, 2), stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=1024, out_features=512, bias=True)
  (fc2): Linear(in_features=512, out_features=10, bias=True)
  (sm): Softmax(dim=None)
)

### Define functions for training

Optimizer: SGD

In [186]:
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

Train function

In [187]:
def train(model, loss_fn, optimizer):
  model.train()

  train_batch_losses = []

  for batch, labels in train_loader:
    batch = batch.to(cuda)
    labels = labels.to(cuda)

    optimizer.zero_grad()
    y_pred = model(batch)
    loss = loss_fn(y_pred, labels)
    loss.backward()
    optimizer.step()

    train_batch_losses.append(float(loss))

    mean_loss = statistics.mean(train_batch_losses)
  
  return mean_loss

Validation function

In [188]:
def validate(model, loss_fn, optimizer):
  model.eval()

  predictions = []

  with torch.no_grad():
    validation_batch_losses = []

    for batch, labels in valid_loader:
      batch = batch.to(cuda)
      labels = labels.to(cuda)

      labels_pred = model(batch)
      loss = loss_fn(labels_pred, labels)

      validation_batch_losses.append(float(loss))

      mean_loss = statistics.mean(validation_batch_losses)
  return mean_loss

Recall Rate


In [189]:
def recall(model, loader):
  model.eval()

  with torch.no_grad():
    batch_recall = []

    for batch, labels in loader:
      batch = batch.to(cuda)
      labels = labels.to(cuda)

      labels_pred = model(batch)
      _, predicted = torch.max(labels_pred.cpu().data, 1)

      ma_recall = recall_score(labels.cpu(), predicted.cpu(), average='macro')
      
      batch_recall.append(ma_recall)
      mean_recall = statistics.mean(batch_recall)

  return mean_recall

### Training

In [None]:
loss_fn = nn.CrossEntropyLoss()

epochs = 300

train_losses = []
valid_losses = []

train_recalls = []
valid_recalls = []

for epoch in range(1, epochs+1):
  print('Epoch:', epoch)

  train_loss = train(model, loss_fn, optimizer)
  train_recall = recall(model, train_loader)
  train_losses.append(train_loss)
  train_recalls.append(train_recall)

  print('Training loss:', train_loss)
  print('Training recall: {}%'.format(train_recall))

  valid_loss = validate(model, loss_fn, optimizer)
  valid_recall = recall(model, valid_loader)
  valid_losses.append(valid_loss)
  valid_recalls.append(valid_recall)

  print('Validation loss:', valid_loss)
  print('Validation recall: {}%'.format(valid_recall))

Epoch: 1




Training loss: 2.280154200232759
Training recall: 0.260710053383518%


  _warn_prf(average, modifier, msg_start, len(result))


Validation loss: 2.2504138350486755
Validation recall: 0.2568320097169397%
Epoch: 2
Training loss: 2.218662452941038
Training recall: 0.3158379792512576%
Validation loss: 2.18666473031044
Validation recall: 0.3173786325065762%
Epoch: 3
Training loss: 2.160900982058778
Training recall: 0.36464333656864745%
Validation loss: 2.134967750310898
Validation recall: 0.3619908019348045%
Epoch: 4
Training loss: 2.11576154280682
Training recall: 0.39934677062933754%
Validation loss: 2.096699261665344
Validation recall: 0.4040203832151648%
Epoch: 5
Training loss: 2.080798026250333
Training recall: 0.42956886595776367%
Validation loss: 2.065521961450577
Validation recall: 0.42356971004411736%
Epoch: 6
Training loss: 2.052567928421254
Training recall: 0.4510232101887597%
Validation loss: 2.0398734271526338
Validation recall: 0.4451428444245072%
Epoch: 7
Training loss: 2.0285887000512104
Training recall: 0.4735359199141161%
Validation loss: 2.0185975313186644
Validation recall: 0.46032573983804326%
E

In [None]:
print('Average training recall: {}%'.format(statistics.mean(train_recalls)))
print('Average validation recall: {}%'.format(statistics.mean(valid_recalls)))

Result Document: https://docs.google.com/spreadsheets/d/1ojYF_o1rZ-KGAufCUG6QpHWw0i8j_2-ur9olJV41dvo/edit?usp=sharing

ref:


*   https://colab.research.google.com/github/ccarpenterg/LearningPyTorch1.x/blob/master/04_cifar_10_challenging_convnets.ipynb#scrollTo=YczNCGVTJXp9
*   https://blog.csdn.net/u013347145/article/details/104332094

