<a href="https://colab.research.google.com/github/tsai-praveen/era1-assignments/blob/main/S2/ERA_V1_S2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [12]:
!pip install torchsummary # Install torchsummary package

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [13]:
# Import packages such as torch, torchvision, torchsummary

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torchsummary import summary

In [14]:
# Check if CUDA (GPU) is available. Set the device accordingly
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
device

device(type='cuda')

In [15]:
# Set the batch size to 128, meaning there will be 128 images per batch
batch_size = 128

# Create the train loader, load the MNIST dataset (lazy load happens here)
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True, # Save to data folder, download train data
                    transform=transforms.Compose([
                        transforms.ToTensor(), # Normalize the data between 0 - 1
                        transforms.Normalize((0.1307,), (0.3081,)) # Standardize the data so that mean is 0 and standard deviation is 1
                    ])),
    batch_size=batch_size, shuffle=True)

# Create the train loader, load the MNIST test dataset (lazy load happens here)
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False,  # Save to data folder, indicate test data
                        transform=transforms.Compose([
                        transforms.ToTensor(), # Normalize the data between 0 - 1
                        transforms.Normalize((0.1307,), (0.3081,)) # Standardize the data so that mean is 0 and standard deviation is 1
                    ])),
    batch_size=batch_size, shuffle=True)

# Some Notes on our naive model

We are going to write a network based on what we have learnt so far. 

The size of the input image is 28x28x1. We are going to add as many layers as required to reach RF = 32 "atleast". 

In [22]:
class FirstDNN(nn.Module):
  # Define various convolution layers
  def __init__(self):
    super(FirstDNN, self).__init__()
    # r_in:1, n_in:28, j_in:1, s:1, r_out:3, n_out:28, j_out:1
    self.conv1 = nn.Conv2d(1, 32, 3, padding=1)
    # r_in:3 , n_in:28 , j_in:1 , s:1 , r_out:5 , n_out:28 , j_out:1
    self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
    # r_in:5 , n_in:28 , j_in:1 , s:2 , r_out:6 , n_out:14 , j_out:2
    self.pool1 = nn.MaxPool2d(2, 2)
    # r_in:6 , n_in:14 , j_in:2 , s:1 , r_out:10 , n_out:14 , j_out:2
    self.conv3 = nn.Conv2d(64, 128, 3, padding=1)
    # r_in:10 , n_in:14 , j_in:2 , s:1 , r_out:14 , n_out:14 , j_out:2
    self.conv4 = nn.Conv2d(128, 256, 3, padding = 1)
    # r_in:14 , n_in:14 , j_in:2 , s:2 , r_out:16 , n_out:7 , j_out:4
    self.pool2 = nn.MaxPool2d(2, 2)
    # r_in:16 , n_in:7 , j_in:4 , s:1 , r_out:24 , n_out:5, j_out:4
    self.conv5 = nn.Conv2d(256, 512, 3)
    # r_in:24 , n_in:5 , j_in:4 , s:1 , r_out:32 , n_out:3 , j_out:4
    self.conv6 = nn.Conv2d(512, 1024, 3)
    # r_in:32 , n_in:3 , j_in:4 , s:1 , r_out:40 , n_out:1 , j_out:4
    self.conv7 = nn.Conv2d(1024, 10, 3)

  def forward(self, x):
    # x is the input image. Feed it to conv1 layer, ReLU on top of it. After 2 such layers, add a maxpool layer on top
    x = self.pool1(F.relu(self.conv2(F.relu(self.conv1(x)))))
    # Repeat the steps again for conv3, conv4, pool2
    x = self.pool2(F.relu(self.conv4(F.relu(self.conv3(x)))))
    # Repeat for conv5 & 6
    x = F.relu(self.conv6(F.relu(self.conv5(x))))
    # The final output layer, ReLU should not be there.
    # x = F.relu(self.conv7(x))
    x = self.conv7(x)
    x = x.view(-1, 10) # reshape the data
    return F.log_softmax(x) # Return the log softmax of the value


In [23]:
# Instantiate the neural network and port it to GPU
model = FirstDNN().to(device)

In [24]:
# Print the model summary
summary(model, input_size=(1, 28, 28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 32, 28, 28]             320
            Conv2d-2           [-1, 64, 28, 28]          18,496
         MaxPool2d-3           [-1, 64, 14, 14]               0
            Conv2d-4          [-1, 128, 14, 14]          73,856
            Conv2d-5          [-1, 256, 14, 14]         295,168
         MaxPool2d-6            [-1, 256, 7, 7]               0
            Conv2d-7            [-1, 512, 5, 5]       1,180,160
            Conv2d-8           [-1, 1024, 3, 3]       4,719,616
            Conv2d-9             [-1, 10, 1, 1]          92,170
Total params: 6,379,786
Trainable params: 6,379,786
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 1.51
Params size (MB): 24.34
Estimated Total Size (MB): 25.85
-------------------------------------

  return F.log_softmax(x) # Return the log softmax of the value


In [25]:
from tqdm import tqdm

# Function for training
def train(model, device, train_loader, optimizer, epoch):
    model.train() # Set the model to train mode
    pbar = tqdm(train_loader) # Create a tqdm iterator
    for batch_idx, (data, target) in enumerate(pbar): # Run for every batch
        data, target = data.to(device), target.to(device) # Port data to GPU
        optimizer.zero_grad() # Reset any previous gradient to zero
        output = model(data) # Call the model
        loss = F.nll_loss(output, target) # Calculate the negative log likelihood
        loss.backward() # Compute the gradients
        optimizer.step() # Backpropogate the gradients
        pbar.set_description(desc= f'loss={loss.item()} batch_id={batch_idx}') # Prints out the status

# Function for testing
def test(model, device, test_loader):
    model.eval() # Set the model mode to eval
    test_loss = 0
    correct = 0
    with torch.no_grad(): # Turn off gradient accumulation
        for data, target in test_loader:
            data, target = data.to(device), target.to(device) # Port data to GPU
            output = model(data) # Call the model
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item() # Calculate how many of them are correct in that batch

    test_loss /= len(test_loader.dataset) # Get the average test loss

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

In [26]:
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9) # Create a Schocastic Gradient Descent optimizer with lr as 0.01 and momentum 0.9. MomentMomentum helps with early convergence.

for epoch in range(1, 2):
    # Train the model on the device for number of epochs. Get data using train_loader. Use optimizer
    train(model, device, train_loader, optimizer, epoch)
    # Test the model
    test(model, device, test_loader)

  return F.log_softmax(x) # Return the log softmax of the value
loss=0.28070756793022156 batch_id=468: 100%|██████████| 469/469 [00:30<00:00, 15.29it/s]



Test set: Average loss: 0.3318, Accuracy: 8656/10000 (87%)



In [21]:
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9) # Create a Schocastic Gradient Descent optimizer with lr as 0.01 and momentum 0.9. MomentMomentum helps with early convergence.

for epoch in range(1, 2):
    # Train the model on the device for number of epochs. Get data using train_loader. Use optimizer
    train(model, device, train_loader, optimizer, epoch)
    # Test the model
    test(model, device, test_loader)

  return F.log_softmax(x) # Return the log softmax of the value
loss=0.006621516775339842 batch_id=468: 100%|██████████| 469/469 [00:31<00:00, 14.67it/s]



Test set: Average loss: 0.0297, Accuracy: 9904/10000 (99%)



In [20]:
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9) # Create a Schocastic Gradient Descent optimizer with lr as 0.01 and momentum 0.9. MomentMomentum helps with early convergence.

for epoch in range(0, 4):
    # Train the model on the device for number of epochs. Get data using train_loader. Use optimizer
    train(model, device, train_loader, optimizer, epoch)
    # Test the model
    test(model, device, test_loader)

  return F.log_softmax(x) # Return the log softmax of the value
loss=0.06502588838338852 batch_id=468: 100%|██████████| 469/469 [00:29<00:00, 15.68it/s]



Test set: Average loss: 0.0537, Accuracy: 9825/10000 (98%)



loss=0.014189152978360653 batch_id=468: 100%|██████████| 469/469 [00:30<00:00, 15.61it/s]



Test set: Average loss: 0.0418, Accuracy: 9864/10000 (99%)



loss=0.009600267745554447 batch_id=468: 100%|██████████| 469/469 [00:30<00:00, 15.25it/s]



Test set: Average loss: 0.0284, Accuracy: 9905/10000 (99%)



loss=0.0068410951644182205 batch_id=468: 100%|██████████| 469/469 [00:29<00:00, 15.74it/s]



Test set: Average loss: 0.0291, Accuracy: 9901/10000 (99%)

