<a href="https://colab.research.google.com/github/yeb2Binfang/ECE-GY9143HPML/blob/main/Lab/Lab2/lab2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Load dataset

We will use CIFAR10, which contains 50K 32 x 32 color images

In [18]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
import random
import os
import argparse
%matplotlib inline

In [3]:
trainsform_train = transforms.Compose([
    transforms.RandomCrop(32, padding = 4),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])

In [4]:
trainsform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])

In [5]:
train_set = torchvision.datasets.CIFAR10(root = './data', train=True, download=True, transform=trainsform_train)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting ./data/cifar-10-python.tar.gz to ./data


In [6]:
test_set = torchvision.datasets.CIFAR10(root = './data', train=False, download=True, transform=trainsform_test)

Files already downloaded and verified


In [7]:
batch_size = 128
train_loader = torch.utils.data.DataLoader(train_set, batch_size = batch_size,shuffle = True, num_workers = 2)
test_loader = torch.utils.data.DataLoader(test_set, batch_size = batch_size, shuffle = True, num_workers = 2)

In [8]:
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

# Build model

Create ResNet18.
Specifically, The first convolutional layer should have 3 input channels, 64 output channels, 3x3 kernel, with stride=1 and padding=1. Followed by 8 basic blocks in 4 subgroups (i.e. 2 basic blocks in each subgroup):
1. The first sub-group contains a convolutional layer with 64 output channels, 3x3 kernel, stride=1, padding=1.
2. The second sub-group contains a convolutional layer with 128 output channels, 3x3 kernel, stride=2, padding=1.
3. The third sub-group contains a convolutional layer with 256 output channels, 3x3 kernel, stride=2, padding=1.
4. The fourth sub-group contains a convolutional layer with 512 output channels, 3x3 kernel, stride=2, padding=1.
5. The final linear layer is of 10 output classes. For all convolutional layers, use RELU activation functions, and use batch normal layers to avoid covariant shift. Since batch-norm layers regularize the training, set the bias to 0 for all the convolutional layers. Use SGD optimizers with 0.1 as the learning rate, momentum 0.9, weight decay 5e-4. The loss function is cross-entropy.

For all convolutional layers, use RELU activation functions, and use batch normal layers to avoid covariant shift. Since batch-norm layers regularize the training, set the bias to 0 for all the convolutional layers. 


In [13]:
class BasicBlock(nn.Module):
  expansion = 1
  
  def __init__(self, input_channels, out_channels, stride = 1):
    super(BasicBlock, self).__init__()
    self.conv1 = nn.Conv2d(input_channels, out_channels, kernel_size = 3, stride = stride, padding = 1, bias = False)
    self.bn1 = nn.BatchNorm2d(out_channels)
    self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size = 3, stride = 1, padding = 1, bias = False)
    self.bn2 = nn.BatchNorm2d(out_channels)

    self.shortcut = nn.Sequential()
    # when stride != 1 or input_channels != out_channels, it means the width and height are different
    if stride != 1 or input_channels != self.expansion * out_channels:
      self.shortcut = nn.Sequential(
          nn.Conv2d(input_channels, self.expansion * out_channels, kernel_size = 1, stride = stride, bias = False),
          nn.BatchNorm2d(self.expansion * out_channels)
      )
    
  def forward(self, x):
    out = F.relu(self.bn1(self.conv1(x)))
    out = self.bn2(self.conv2(out))
    out += self.shortcut(x)
    out = F.relu(out)
    return out    

In [14]:
class ResNet(nn.Module):
  def __init__(self, block, num_blocks, num_classes = 10):
    super(ResNet, self).__init__()
    self.input_channels = 64
    
    self.conv1 = nn.Conv2d(3, 64, kernel_size = 3, stride = 1, padding = 1, bias = False)
    self.bn1 = nn.BatchNorm2d(64)
    self.layer1 = self._make_layer(block, 64, num_blocks[0], stride = 1)
    self.layer2 = self._make_layer(block, 128, num_blocks[1], stride = 2)
    self.layer3 = self._make_layer(block, 256, num_blocks[2], stride = 2)
    self.layer4 = self._make_layer(block, 512, num_blocks[3], stride = 2)
    self.linear = nn.Linear(512 * block.expansion, num_classes)

  def _make_layer(self, block, out_channels, num_blocks, stride):
    strides = [stride] + [1] * (num_blocks - 1)
    layers = []
    for stride in strides:
      layers.append(block(self.input_channels, out_channels, stride))
      self.input_channels = out_channels * block.expansion
    return nn.Sequential(*layers)

  def forward(self, x):
    out = F.relu(self.bn1(self.conv1(x)))
    out = self.layer1(out)
    out = self.layer2(out)
    out = self.layer3(out)
    out = self.layer4(out)
    out = F.avg_pool2d(out, 4)
    out = out.view(out.size(0), -1)
    out = self.linear(out)
    return out



In [15]:
def ResNet18():
  return ResNet(BasicBlock, [2,2,2,2])

In [16]:
net = ResNet18()

In [17]:
print(net)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (shortcut): Sequential()
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=

# C1 Train in Pytorch

Create a main function that creates the DataLoaders for the training set and the neural network, then runs 5 epochs with a complete training phase on all the mini-batches of the training set. Write the code as device-agnostic, use the ArgumentParser to be able to read parameters from input, such as the use of Cuda, the data_path, the number of data loader workers, and the optimizer (as string, eg: ‘sgd’).

For each minibatch calculate the training loss value, the top-1 training accuracy of the predictions, measured on training data.



In [20]:
# parse = argparse.ArgumentParser(description='ResNet training CIFAR10')
# args = parse.parse_args()

In [22]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

cpu


In [23]:
net = net.to(device)

In [25]:
lr = 1e-1
weight_decay = 5e-4
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=lr, momentum=0.9,weight_decay=weight_decay)

## Train the model

In [27]:
epoch = 5
def train(epoch):
  print('\nEpoch: %d' % epoch)
  net.train()
  train_loss = 0
  train_loss_history = []
  train_acc_history = []
  correct = 0
  total = 0
  for batch_idx, (inputs, targets) in enumerate(train_loader):
    inputs, targets = inputs.to(device), targets.to(device)
    optimizer.zero_grad()
    outputs = net(inputs)
    loss = loss_fn(outputs, targets)
    loss.backward()
    optimizer.step()

    train_loss += loss.item()
    train_loss_history.append(loss.item())
    _, predicted = outputs.max(1)
    total += targets.size(0)
    correct += predicted.eq(targets).sum().item()
    train_acc_history.append(100. * correct / total)
    print("\nThe batch index: {0}, len of train loader: {1}, Loss: {2.3f}, acc: {3.3f}".format(batch_idx,
                                                                                             len(train_loader), 
                                                                                             train_loss / (batch_idx + 1)),
                                                                                             100. * correct / total
          )

## Test the model