<a href="https://colab.research.google.com/github/vinodnbhat/AIML-CEP-2021-Assignments/blob/main/ResNet_CIFAR10.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Import required libraries
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data as data
import torch.optim.lr_scheduler as lr_scheduler
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import matplotlib.pyplot as plt
import numpy as np
import random
import time

In [2]:
SEED = 1234
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)

**CIFAR Dataset**

In [3]:
ROOT = '.data'

# Downloading Cifar10 dataset from torchvision datasets
train_data = datasets.CIFAR10(root = ROOT,
                              train = True,
                              download = True)

mean = train_data.data.mean() / 255
std = train_data.data.std() / 255

print(f'Calculated mean: {mean}')
print(f'Calculated std: {std}')

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to .data/cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting .data/cifar-10-python.tar.gz to .data
Calculated mean: 0.4733630004850899
Calculated std: 0.2515689250632208


In [4]:
print(train_data.data.shape)

(50000, 32, 32, 3)


In [5]:
train_transforms = transforms.Compose([
                                       transforms.ToTensor(),
                                       transforms.Normalize(mean = [mean], std = [std])
                                      ])

test_transforms = transforms.Compose([
                                       transforms.ToTensor(),
                                       transforms.Normalize(mean = [mean], std = [std])
                                      ])

In [6]:
train_set = datasets.CIFAR10(root = ROOT,
                             train = True,
                             download = True,
                             transform = train_transforms)

test_set = datasets.CIFAR10(root = ROOT,
                             train = False,
                             download = True,
                             transform = train_transforms)

Files already downloaded and verified
Files already downloaded and verified


In [7]:
print(train_set.data.shape)

(50000, 32, 32, 3)


In [8]:
print(test_set.data.shape)

(10000, 32, 32, 3)


In [9]:
print(f'Number of training samples: {len(train_set)}')
print(f'Number of testing samples: {len(test_set)}')

Number of training samples: 50000
Number of testing samples: 10000


In [10]:
batch_size = 64

train_loader = data.DataLoader(train_set,
                               shuffle = True,
                               batch_size = batch_size)

test_loader = data.DataLoader(test_set,
                              batch_size = batch_size)

In [11]:
# Checking the bacth dimensions
for images, labels in train_loader:
  print('Image batch dimensions:', images.shape)
  print('Label batch dimensions:', labels.shape)
  break

Image batch dimensions: torch.Size([64, 3, 32, 32])
Label batch dimensions: torch.Size([64])


In [12]:
### Model Settings ###

# Hyperparameters
learning_rate = 0.01

# Architecture
num_classes = 10

**RESNET**

In [24]:
class ResNet(torch.nn.Module):
  
  def __init__(self, num_classes = 10):
    super(ResNet, self).__init__()

    # 1st Residual Block #
    #########################################################################
    #32x32x3 => 16x16x32
    self.conv1 = torch.nn.Conv2d(in_channels = 3,
                                 out_channels = 32,
                                 kernel_size = (3, 3),
                                 stride = (2, 2),
                                 padding = 1)
    self.conv1_bn = torch.nn.BatchNorm2d(32)

    #16x16x32 => 16x16x64
    self.conv2 = torch.nn.Conv2d(in_channels = 32,
                                 out_channels = 64,
                                 kernel_size = (1, 1),
                                 stride = (1, 1),
                                 padding = 0)
    self.conv2_bn = torch.nn.BatchNorm2d(64)

    #32x32x3 => 16x16x64
    self.conv_shortcut1 = torch.nn.Conv2d(in_channels = 3,
                                 out_channels = 64,
                                 kernel_size = (1, 1),
                                 stride = (2, 2),
                                 padding = 0)
    self.conv_shortcut1_bn = torch.nn.BatchNorm2d(64)
    ############################################################################
    
    #2nd residual block#
    ###########################################################################

    #16x16x64 => 8x8x128
    self.conv3 = torch.nn.Conv2d(in_channels = 64,
                                 out_channels = 128,
                                 kernel_size = (3, 3),
                                 stride = (2, 2),
                                 padding = 1)
    self.conv3_bn = torch.nn.BatchNorm2d(128)

    #8x8x128 => 8x8x256
    self.conv4 = torch.nn.Conv2d(in_channels = 128,
                                 out_channels = 256,
                                 kernel_size = (1, 1),
                                 stride = (1, 1),
                                 padding = 0)
    self.conv4_bn = torch.nn.BatchNorm2d(256)

    #16x16x64 => 8x8x256
    self.conv_shortcut2 = torch.nn.Conv2d(in_channels = 64,
                                 out_channels = 256,
                                 kernel_size = (1, 1),
                                 stride = (2, 2),
                                 padding = 0)
    self.conv_shortcut2_bn = torch.nn.BatchNorm2d(256)
    #########################################################################

    # Fully Connected
    #########################################################################
    self.linear_1 = torch.nn.Linear(8 * 8 * 256, num_classes)

  def forward(self, x):

    ###############################
    # 1st Residual Block
    ###############################

    shortcut = x

    out = self.conv1(x)         # 32x32x3 => 16x16x32
    out = self.conv1_bn(out)
    out = F.relu(out)

    out = self.conv2(out)       # 16x16x32 => 16x16x64
    out = self.conv2_bn(out)

    # match up dimensions using a linear function (no relu)
    shortcut = self.conv_shortcut1(shortcut)
    shortcut = self.conv_shortcut1_bn(shortcut)

    out += shortcut
    out = F.relu(out)

    ###############################
    # 2nd Residual Block
    ###############################

    shortcut = out

    out = self.conv3(out)         # 16x16x64 => 8x8x128
    out = self.conv3_bn(out)
    out = F.relu(out)

    out = self.conv4(out)       # 8x8x128 => 8x8x256
    out = self.conv4_bn(out)

    # match up dimensions using a linear function (no relu)
    shortcut = self.conv_shortcut2(shortcut)
    shortcut = self.conv_shortcut2_bn(shortcut)

    out += shortcut
    out = F.relu(out)

    ###############################
    # Fully Connected
    ###############################

    logits = self.linear_1(out.view(-1, 8*8*256))
    probas = F.softmax(logits, dim = 1)
    return logits, probas

In [25]:
# Initialize the model
model = ResNet()

In [26]:
def count_parameters(model):
  return sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f'The model has {count_parameters(model):,} trainable parameters')

The model has 292,234 trainable parameters


In [27]:
optimizer = torch.optim.Adam(model.parameters(), lr =learning_rate)

In [28]:
lossfn = nn.CrossEntropyLoss()

In [29]:
if torch.cuda.is_available():
  print('cuda is available. Using cuda..')
else:
  print('cuda is not available. Using CPU..')

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

cuda is not available. Using CPU..


In [30]:
model = model.to(device)
lossfn = lossfn.to(device)

In [31]:
def epoch_time(start_time, end_time):
  elapsed_time = end_time - start_time
  elapsed_mins = int(elapsed_time / 60)
  elapsed_secs = elapsed_time - (elapsed_mins * 60)
  return elapsed_mins, elapsed_secs

In [32]:
# Compute the Accuracy
def compute_accuracy(model, data_loader):
  correct_pred, num_examples = 0, 0
  for i , (features, targets) in enumerate(data_loader):
    features = features.to(device)
    targets = targets.to(device)
    logits, probas = model(features)
    _, predicted_labels = torch.max(probas, 1)
    num_examples += targets.size(0)
    correct_pred += (predicted_labels == targets).sum()
  return correct_pred.float() / num_examples *100

In [33]:
def train(model, iterator, optimizer, criterion, device):
  epoch_loss = 0

  model.train()

  for (x, y) in iterator:

    x = x.to(device)
    y = y.to(device)

    optimizer.zero_grad()

    y_pred_logits, y_pred_probas = model(x)

    loss = lossfn(y_pred_logits, y)

    loss.backward()

    optimizer.step()

    epoch_loss += loss.item()

  return epoch_loss / len(iterator)

In [None]:
save_model = False
patience_early_stopping = 3 #training will stop if model performance does not improve for these many consecutive epochs
cnt = 0 #counter for checking patience level
EPOCHS = 100
prev_test_acc = 0 #initializing prev test accuracy for early stopping condition
scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode = 'max', factor = 0.2, patience = 1) #learning rate scheduler, update learning rate by 
#factor of 0.2 if test accuracy does not improve for patience+1 consecutive epochs

for epoch in range(EPOCHS):
  print("current learning rate", optimizer.state_dict()['param_groups'][0]['lr'])
  start_time = time.perf_counter()

  train_loss = train(model, train_loader, optimizer, lossfn, device)
  train_acc = compute_accuracy(model, train_loader)

  if save_model:
    torch.save(model.state.dict(), 'resnet_model.pt')

  if epoch % 1 == 0: #for every epoch we shall compute the test accuracy
    test_acc = compute_accuracy(model, test_loader)

    if test_acc > prev_test_acc: #check if test accuracy for current epoch has improved compared to previous epoch
      cnt = 0 #if accuracy improves reset counter to 0
    else:
      cnt += 1 #otherwise increment current counter

    prev_test_acc = test_acc

  scheduler.step(test_acc) #updates learning rate

  end_time = time.perf_counter()

  epoch_mins, epoch_secs = epoch_time(start_time, end_time)

  print(f'Epoch: {epoch+1:2} | Epoch Time: {epoch_mins}m {epoch_secs}s')
  print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc:.2f}%')
  if epoch % 1 == 0:  #for every epoch we shall print the test loss and test accuracy 
    print(f'\tTest Acc: {test_acc:.2f}% \n')

  if cnt == patience_early_stopping:
    print(f'early stopping as accuracy did not improve for {patience_early_stopping} consecutive epochs')
    break



current learning rate 0.01
Epoch:  1 | Epoch Time: 3m 47.54912440499993s
	Train Loss: 1.919 | Train Acc: 55.57%
	Test Acc: 52.51% 

current learning rate 0.01
Epoch:  2 | Epoch Time: 3m 46.9268395729996s
	Train Loss: 1.247 | Train Acc: 61.90%
	Test Acc: 57.57% 

current learning rate 0.01
Epoch:  3 | Epoch Time: 3m 46.59267221100026s
	Train Loss: 1.109 | Train Acc: 63.02%
	Test Acc: 57.26% 

current learning rate 0.01
Epoch:  4 | Epoch Time: 3m 48.319731106999825s
	Train Loss: 1.017 | Train Acc: 67.87%
	Test Acc: 60.73% 

current learning rate 0.01
Epoch:  5 | Epoch Time: 3m 51.06576488200062s
	Train Loss: 0.950 | Train Acc: 68.90%
	Test Acc: 59.76% 

current learning rate 0.01
Epoch:  6 | Epoch Time: 3m 52.20495633900009s
	Train Loss: 0.902 | Train Acc: 72.29%
	Test Acc: 61.91% 

current learning rate 0.01
Epoch:  7 | Epoch Time: 3m 52.93059695700049s
	Train Loss: 0.856 | Train Acc: 75.01%
	Test Acc: 63.07% 

current learning rate 0.01
Epoch:  8 | Epoch Time: 3m 53.107227249999596s
	T