Setting up the imports

In [229]:
import torch
from torch import nn
import torchvision
from torch.utils.data import DataLoader

import matplotlib.pyplot as plt

from typing import Tuple, List

Setting up the device

In [230]:
device: str = 'cuda' if torch.cuda.is_available() else 'cpu'

Setting up the Cifar10 Dataset

In [231]:
transforms = torchvision.transforms
train_augmentations = transforms.Compose([transforms.RandomGrayscale(0.2),
                                          transforms.RandomHorizontalFlip(0.5),
                                          transforms.RandomVerticalFlip(0.2),
                                          transforms.RandomRotation(30),
                                          transforms.RandomAdjustSharpness(0.4),
                                          transforms.ToTensor(),
                                          transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
                                         ])

test_augmentations = transforms.Compose([transforms.ToTensor(),
                                         transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))]
                                       )
train_dataset = torchvision.datasets.CIFAR10(
    root='./data', train=True, download=True, transform=train_augmentations
)

test_dataset = torchvision.datasets.CIFAR10(
    root='./data', train=False, download=True, transform=test_augmentations
)

Files already downloaded and verified
Files already downloaded and verified


Setting up the DataLoader

In [232]:
batch_size = 8

train_loader = torch.utils.data.DataLoader(
    train_dataset, batch_size=batch_size, shuffle=True, num_workers=2
)

test_loader = torch.utils.data.DataLoader(
    test_dataset, batch_size=batch_size, shuffle=False, num_workers=2
)

Just a preview

In [233]:
for x, y in train_loader:
    print(x.shape)
    print(y.shape)
    break

torch.Size([8, 3, 32, 32])
torch.Size([8])


The Convolutional Neural Network class

In [234]:
class ConvNet(nn.Module):
    def __init__(self):
        super(ConvNet, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Dropout2d(0.2)
        )
        self.layer2 = nn.Sequential(
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Dropout2d(0.18)
        )
        self.layer3 = nn.Sequential(
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Dropout2d(0.205)
        )
        #output_shape = [(input_shape - kernel_size + 2*padding) / stride] + 1
        self.layer4 = nn.Sequential(
            nn.Conv2d(in_channels=128, out_channels=256, kernel_size=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=256, out_channels=256, kernel_size=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.fully_connected = nn.Sequential(
            nn.Linear(2*2*256, 256),
            nn.Tanh(),
            nn.Linear(256, 64),
            nn.Tanh(),
            nn.Linear(64, 10)
        )
    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = out.view(-1, 2*2*256)
        out = self.fully_connected(out)
        return out

Define testing loop

In [235]:
@torch.no_grad()
def test(
        model: nn.Module,
        criterion: nn.CrossEntropyLoss,
        loader: torch.utils.data.DataLoader,
        device: torch.device
) -> Tuple[float, float]:
    model.eval()
    accuracy, test_loss = 0, 0
    for x, y in loader:
        x, y = x.to(device), y.to(device)
        y_hat = model(x)
        test_loss += criterion(y_hat, y).item() * x.shape[0]
        _, predicted = torch.max(y_hat, axis=1)
        accuracy += (predicted == y).sum().item()
    accuracy /= len(loader.dataset)
    test_loss /= len(loader.dataset)
    return accuracy, test_loss

Define training loop

In [236]:
def train(model: nn.Module,
          epochs: int,
          criterion: nn.CrossEntropyLoss,
          train_loader: torch.utils.data.DataLoader,
          test_loader: torch.utils.data.DataLoader,
          device: torch.device) -> None:
    train_losses = []
    test_accuracies = []
    test_losses = []
    test_accuracy_untrained, test_loss_untrained = test(model, criterion, test_loader, device)
    print(f'On untrained model, test set: acc={test_accuracy_untrained*100:.2f}, loss={test_loss_untrained:.4f}')

    for epoch in range(1, epochs + 1):
        model.train()
        running_train_loss = 0
        for x, y in train_loader:
            x, y = x.to(device), y.to(device)
            y_hat = model(x)
            loss = criterion(y_hat, y)
            running_train_loss += loss.item() * x.shape[0]

            model.zero_grad()
            loss.backward()
            optimizer.step()
        train_losses.append(running_train_loss / len(train_loader.dataset))
        test_accuracy, test_loss = test(model, criterion, test_loader, device)
        print(f'Epoch {epoch}/{epochs}, train set: loss={train_losses[-1]:.4f}')
        print(f'Epoch {epoch}/{epochs}, test set: acc={test_accuracy*100:.2f}%, loss={test_loss:.4f}')
        test_losses.append(test_loss)
        test_accuracies.append(test_accuracy)
    x_epochs = list(range(1, epochs+1))
    plt.title('Loss curves')
    plt.plot(x_epochs, train_losses, 'red', label='Train loss')
    plt.plot(x_epochs, test_losses, 'green', label='Test loss')
    plt.legend(loc='best')
    plt.show()

    plt.title('Test accuracy')
    plt.plot(x_epochs, test_accuracies)
    plt.show()

Define the model instance

In [237]:
model = ConvNet()
model.to(device)

ConvNet(
  (layer1): Sequential(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Dropout2d(p=0.2, inplace=False)
  )
  (layer2): Sequential(
    (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Dropout2d(p=0.18, inplace=False)
  )
  (layer3): Sequential(
    (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Dropout2d(p=0.205, 

Request summary

In [238]:
from torchsummary import summary
summary(model)

Layer (type:depth-idx)                   Param #
├─Sequential: 1-1                        --
|    └─Conv2d: 2-1                       896
|    └─ReLU: 2-2                         --
|    └─Conv2d: 2-3                       9,248
|    └─ReLU: 2-4                         --
|    └─MaxPool2d: 2-5                    --
|    └─Dropout2d: 2-6                    --
├─Sequential: 1-2                        --
|    └─Conv2d: 2-7                       18,496
|    └─ReLU: 2-8                         --
|    └─Conv2d: 2-9                       36,928
|    └─ReLU: 2-10                        --
|    └─MaxPool2d: 2-11                   --
|    └─Dropout2d: 2-12                   --
├─Sequential: 1-3                        --
|    └─Conv2d: 2-13                      73,856
|    └─ReLU: 2-14                        --
|    └─Conv2d: 2-15                      147,584
|    └─ReLU: 2-16                        --
|    └─MaxPool2d: 2-17                   --
|    └─Dropout2d: 2-18                   --
├─Sequ

Layer (type:depth-idx)                   Param #
├─Sequential: 1-1                        --
|    └─Conv2d: 2-1                       896
|    └─ReLU: 2-2                         --
|    └─Conv2d: 2-3                       9,248
|    └─ReLU: 2-4                         --
|    └─MaxPool2d: 2-5                    --
|    └─Dropout2d: 2-6                    --
├─Sequential: 1-2                        --
|    └─Conv2d: 2-7                       18,496
|    └─ReLU: 2-8                         --
|    └─Conv2d: 2-9                       36,928
|    └─ReLU: 2-10                        --
|    └─MaxPool2d: 2-11                   --
|    └─Dropout2d: 2-12                   --
├─Sequential: 1-3                        --
|    └─Conv2d: 2-13                      73,856
|    └─ReLU: 2-14                        --
|    └─Conv2d: 2-15                      147,584
|    └─ReLU: 2-16                        --
|    └─MaxPool2d: 2-17                   --
|    └─Dropout2d: 2-18                   --
├─Sequ

In [239]:
epochs = 100
lr = 0.00215
momentum = 0.88
weight_decay = 0.0001

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=momentum, weight_decay=weight_decay)

In [None]:
train(model, epochs, criterion, train_loader, test_loader, device)

On untrained model, test set: acc=10.00, loss=2.3066
Epoch 1/100, train set: loss=2.3035
Epoch 1/100, test set: acc=13.04%, loss=2.3038
Epoch 2/100, train set: loss=2.2574
Epoch 2/100, test set: acc=22.47%, loss=1.9940
Epoch 3/100, train set: loss=2.0127
Epoch 3/100, test set: acc=28.88%, loss=1.8241
Epoch 4/100, train set: loss=1.8969
Epoch 4/100, test set: acc=36.65%, loss=1.7156


layer 1, 2, 3, 128x3x3 + data augmentation
epochs = 100
lr = 0.00205
momentum = 0.86
weight_decay = 0.0011
80% accuracy