# Import Libraries

In [19]:
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

## Data Transformations

We first start with defining our data transformations. We need to think what our data is and how can we augment it to correct represent images which it might not see otherwise.


In [20]:
train_transform = transforms.Compose([
    transforms.ToTensor(),  # Convert PIL image to PyTorch tensor
    transforms.ColorJitter(brightness=0.10, contrast=0.1, saturation=0.10, hue=0.1),
    transforms.RandomRotation((-7.0, 7.0), fill=(1,)),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))# Normalize tensor values to [-1, 1]
])


test_transform = transforms.Compose([
    transforms.ToTensor(),  # Convert PIL image to PyTorch tensor
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))# Normalize tensor values to [-1, 1]
])


# Dataset and Creating Train/Test Split

In [21]:
train = datastrain = datasets.CIFAR10('./data', train=True, download=True, transform=train_transform)
test = datasets.CIFAR10('./data', train=False, download=True, transform=test_transform)

Files already downloaded and verified
Files already downloaded and verified


# Dataloader Arguments & Test/Train Dataloaders


In [22]:
SEED = 1

# CUDA?
cuda = torch.cuda.is_available()
print("CUDA Available?", cuda)

# For reproducibility
torch.manual_seed(SEED)

if cuda:
    torch.cuda.manual_seed(SEED)

# dataloader arguments - something you'll fetch these from cmdprmt
# dataloader_args = dict(shuffle=True, batch_size=128, num_workers=4, pin_memory=True) if cuda else dict(shuffle=True, batch_size=64)

# # train dataloader
# train_loader = torch.utils.data.DataLoader(train, **dataloader_args)

# # test dataloader
# test_loader = torch.utils.data.DataLoader(test, **dataloader_args)


train_dataloader = torch.utils.data.DataLoader(train, batch_size=64, shuffle=True, num_workers=0, pin_memory=True)
test_dataloader = torch.utils.data.DataLoader(test, batch_size=64, shuffle=False, num_workers=0, pin_memory=True)

CUDA Available? True


# The model
Let's start with the model we first saw

In [37]:
import torch.nn.functional as F
dropout_value = 0.1
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # Input Block
        self.convblock1 = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=4, kernel_size=(3, 3), padding=2, bias=False),
            nn.ReLU(),
            nn.LayerNorm([4, 34, 34]),
            nn.Dropout(dropout_value)
        ) # output_size = 34

        # CONVOLUTION BLOCK 1
        self.convblock2 = nn.Sequential(
            nn.Conv2d(in_channels=4, out_channels=4, kernel_size=(3, 3), padding=0, bias=False),
            nn.ReLU(),
            nn.LayerNorm([4, 32, 32]),
            nn.Dropout(dropout_value)
        ) # output_size = 32

        # TRANSITION BLOCK 1
        self.convblock3 = nn.Sequential(
            nn.Conv2d(in_channels=4, out_channels=10, kernel_size=(1, 1), padding=0, bias=False),
        )
        self.pool1 = nn.MaxPool2d(2, 2) # output_size = 16

        # CONVOLUTION BLOCK 2
        self.convblock4 = nn.Sequential(
            nn.Conv2d(in_channels=10, out_channels=16, kernel_size=(3, 3), padding=1, bias=False),
            nn.ReLU(),
            nn.LayerNorm([16, 16, 16]),
            nn.Dropout(dropout_value)
        ) # output_size = 16
        self.convblock5 = nn.Sequential(
            nn.Conv2d(in_channels=16, out_channels=14, kernel_size=(3, 3), padding=0, bias=False),
            nn.ReLU(),
            nn.LayerNorm([14, 14,14]),
            nn.Dropout(dropout_value)
        ) # output_size = 14
        self.convblock6 = nn.Sequential(
            nn.Conv2d(in_channels=14, out_channels=10, kernel_size=(1, 1), padding=0, bias=False),
        )

        self.pool2  = nn.MaxPool2d(2, 2) # output_size = 7

        self.convblock7 = nn.Sequential(
            nn.Conv2d(in_channels=10, out_channels=16, kernel_size=(3, 3), padding=1, bias=False),
            nn.ReLU(),
            nn.LayerNorm([16, 7, 7]),
            nn.Dropout(dropout_value)
        ) # output_size = 7

        self.convblock8 = nn.Sequential(
            nn.Conv2d(in_channels=16, out_channels=32, kernel_size=(3, 3), padding=0, bias=False),
            nn.ReLU(),
            nn.LayerNorm([32, 5, 5]),
            nn.Dropout(dropout_value)
        )
        # output_size = 5

        self.convblock9 = nn.Sequential(
            nn.Conv2d(in_channels=32, out_channels=16, kernel_size=(3, 3), padding=0, bias=False),
            nn.ReLU(),
            nn.LayerNorm([16, 3, 3]),
            nn.Dropout(dropout_value)
        )
        # output_size = 3

        # OUTPUT BLOCK
        self.gap = nn.Sequential(
            nn.AvgPool2d(kernel_size=3)
        ) # output_size = 1

        self.convblock10 = nn.Sequential(
            nn.Conv2d(in_channels=16, out_channels=10, kernel_size=(1, 1), padding=0, bias=False),
            # nn.BatchNorm2d(10),
            # nn.ReLU(),
            # nn.Dropout(dropout_value)
        )


        self.dropout = nn.Dropout(dropout_value)

    def forward(self, x):
        x = self.convblock1(x)
        x = self.convblock2(x)
        x = self.convblock3(x)
        x = self.pool1(x)
        x = self.convblock4(x)
        x = self.convblock5(x)
        x = self.convblock6(x)
        x = self.pool2(x)
        x = self.convblock7(x)
        x = self.convblock8(x)
        x = self.convblock9(x)
        x = self.gap(x)
        x = self.convblock10(x)

        x = x.view(-1, 10)
        return F.log_softmax(x, dim=-1)

# Model Params
Can't emphasize on how important viewing Model Summary is.
Unfortunately, there is no in-built model visualizer, so we have to take external help

In [38]:
# !pip install torchsummary
from torchsummary import summary
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
print(device)
model = Net().to(device)
summary(model, input_size=(3, 32,32))

cuda
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 4, 34, 34]             108
              ReLU-2            [-1, 4, 34, 34]               0
         LayerNorm-3            [-1, 4, 34, 34]           9,248
           Dropout-4            [-1, 4, 34, 34]               0
            Conv2d-5            [-1, 4, 32, 32]             144
              ReLU-6            [-1, 4, 32, 32]               0
         LayerNorm-7            [-1, 4, 32, 32]           8,192
           Dropout-8            [-1, 4, 32, 32]               0
            Conv2d-9           [-1, 10, 32, 32]              40
        MaxPool2d-10           [-1, 10, 16, 16]               0
           Conv2d-11           [-1, 16, 16, 16]           1,440
             ReLU-12           [-1, 16, 16, 16]               0
        LayerNorm-13           [-1, 16, 16, 16]           8,192
          Dropout-14           [-1

# Training and Testing

All right, so we have 24M params, and that's too many, we know that. But the purpose of this notebook is to set things right for our future experiments.

Looking at logs can be boring, so we'll introduce **tqdm** progressbar to get cooler logs.

Let's write train and test functions

In [39]:
from tqdm import tqdm

train_losses = []
test_losses = []
train_acc = []
test_acc = []

def train(model, device, train_loader, optimizer, epoch):
  model.train()
  pbar = tqdm(train_loader)
  correct = 0
  processed = 0
  for batch_idx, (data, target) in enumerate(pbar):
    # get samples
    data, target = data.to(device), target.to(device)

    # Init
    optimizer.zero_grad()
    # In PyTorch, we need to set the gradients to zero before starting to do backpropragation because PyTorch accumulates the gradients on subsequent backward passes.
    # Because of this, when you start your training loop, ideally you should zero out the gradients so that you do the parameter update correctly.

    # Predict
    y_pred = model(data)

    # Calculate loss
    loss = F.nll_loss(y_pred, target)
    train_losses.append(loss)

    # Backpropagation
    loss.backward()
    optimizer.step()

    # Update pbar-tqdm

    pred = y_pred.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
    correct += pred.eq(target.view_as(pred)).sum().item()
    processed += len(data)

    pbar.set_description(desc= f'Loss={loss.item()} Batch_id={batch_idx} Accuracy={100*correct/processed:0.2f}')
    train_acc.append(100*correct/processed)

def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    test_losses.append(test_loss)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

    test_acc.append(100. * correct / len(test_loader.dataset))

In [40]:
from torch.optim.lr_scheduler import StepLR

model =  Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
scheduler = StepLR(optimizer, step_size=6, gamma=0.1)


EPOCHS = 20
for epoch in range(EPOCHS):
    print("EPOCH:", epoch)
    train(model, device, train_dataloader, optimizer, epoch)
    scheduler.step()
    test(model, device, test_dataloader)

EPOCH: 0


Loss=1.6325328350067139 Batch_id=781 Accuracy=33.99: 100%|██████████| 782/782 [01:48<00:00,  7.22it/s]



Test set: Average loss: 1.5601, Accuracy: 4323/10000 (43.23%)

EPOCH: 1


Loss=1.4344699382781982 Batch_id=781 Accuracy=45.57: 100%|██████████| 782/782 [01:47<00:00,  7.29it/s]



Test set: Average loss: 1.3773, Accuracy: 5044/10000 (50.44%)

EPOCH: 2


Loss=1.0260025262832642 Batch_id=781 Accuracy=49.87: 100%|██████████| 782/782 [01:48<00:00,  7.19it/s]



Test set: Average loss: 1.2801, Accuracy: 5406/10000 (54.06%)

EPOCH: 3


Loss=1.077168345451355 Batch_id=781 Accuracy=53.24: 100%|██████████| 782/782 [01:47<00:00,  7.28it/s]



Test set: Average loss: 1.2599, Accuracy: 5572/10000 (55.72%)

EPOCH: 4


Loss=1.1708346605300903 Batch_id=781 Accuracy=55.76: 100%|██████████| 782/782 [01:49<00:00,  7.15it/s]



Test set: Average loss: 1.1080, Accuracy: 6030/10000 (60.30%)

EPOCH: 5


Loss=1.1539556980133057 Batch_id=781 Accuracy=57.59: 100%|██████████| 782/782 [01:47<00:00,  7.30it/s]



Test set: Average loss: 1.0911, Accuracy: 6170/10000 (61.70%)

EPOCH: 6


Loss=1.0679919719696045 Batch_id=781 Accuracy=60.85: 100%|██████████| 782/782 [01:47<00:00,  7.28it/s]



Test set: Average loss: 1.0239, Accuracy: 6425/10000 (64.25%)

EPOCH: 7


Loss=1.32744300365448 Batch_id=781 Accuracy=61.41: 100%|██████████| 782/782 [01:47<00:00,  7.27it/s]



Test set: Average loss: 1.0068, Accuracy: 6438/10000 (64.38%)

EPOCH: 8


Loss=1.2978899478912354 Batch_id=781 Accuracy=61.57: 100%|██████████| 782/782 [01:46<00:00,  7.33it/s]



Test set: Average loss: 0.9994, Accuracy: 6465/10000 (64.65%)

EPOCH: 9


Loss=0.9423808455467224 Batch_id=781 Accuracy=62.03: 100%|██████████| 782/782 [01:48<00:00,  7.24it/s]



Test set: Average loss: 0.9889, Accuracy: 6512/10000 (65.12%)

EPOCH: 10


Loss=0.8870269656181335 Batch_id=781 Accuracy=62.09: 100%|██████████| 782/782 [01:47<00:00,  7.27it/s]



Test set: Average loss: 0.9897, Accuracy: 6519/10000 (65.19%)

EPOCH: 11


Loss=1.4567840099334717 Batch_id=781 Accuracy=62.29: 100%|██████████| 782/782 [01:50<00:00,  7.07it/s]



Test set: Average loss: 0.9800, Accuracy: 6537/10000 (65.37%)

EPOCH: 12


Loss=0.7393615245819092 Batch_id=781 Accuracy=62.89: 100%|██████████| 782/782 [01:48<00:00,  7.20it/s]



Test set: Average loss: 0.9743, Accuracy: 6563/10000 (65.63%)

EPOCH: 13


Loss=1.0443243980407715 Batch_id=781 Accuracy=62.98: 100%|██████████| 782/782 [01:47<00:00,  7.25it/s]



Test set: Average loss: 0.9755, Accuracy: 6547/10000 (65.47%)

EPOCH: 14


Loss=1.0112364292144775 Batch_id=781 Accuracy=62.63: 100%|██████████| 782/782 [01:46<00:00,  7.32it/s]



Test set: Average loss: 0.9745, Accuracy: 6569/10000 (65.69%)

EPOCH: 15


Loss=0.7884402871131897 Batch_id=781 Accuracy=62.81: 100%|██████████| 782/782 [01:47<00:00,  7.27it/s]



Test set: Average loss: 0.9738, Accuracy: 6562/10000 (65.62%)

EPOCH: 16


Loss=0.876549482345581 Batch_id=781 Accuracy=62.82: 100%|██████████| 782/782 [01:46<00:00,  7.34it/s]



Test set: Average loss: 0.9727, Accuracy: 6556/10000 (65.56%)

EPOCH: 17


Loss=0.9851260185241699 Batch_id=781 Accuracy=62.95: 100%|██████████| 782/782 [01:47<00:00,  7.30it/s]



Test set: Average loss: 0.9726, Accuracy: 6563/10000 (65.63%)

EPOCH: 18


Loss=0.6351847052574158 Batch_id=781 Accuracy=63.10: 100%|██████████| 782/782 [01:46<00:00,  7.36it/s]



Test set: Average loss: 0.9725, Accuracy: 6556/10000 (65.56%)

EPOCH: 19


Loss=1.5454460382461548 Batch_id=781 Accuracy=62.83: 100%|██████████| 782/782 [01:47<00:00,  7.29it/s]



Test set: Average loss: 0.9726, Accuracy: 6552/10000 (65.52%)

