In [1]:
import time
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets
from torchvision import transforms
from torch.utils.data import DataLoader

In [2]:
########################
# Settings
########################

# Device
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

# Hyperparameters
random_seed = 1
learning_rate = 0.001
num_epochs = 20
batch_size = 128

# Architecture
num_features = 784
num_classes = 10

In [3]:
#####################
# CIFAR-10
#####################

train_dataset = datasets.CIFAR10(root="data", 
                                 train=True, 
                                 transform=transforms.ToTensor(), 
                                 download=True)

test_dataset = datasets.CIFAR10(root="data", 
                                train=False, 
                                transform=transforms.ToTensor())

train_loader = DataLoader(dataset=train_dataset, 
                          shuffle=True, 
                          batch_size=batch_size)

test_loader = DataLoader(dataset=test_dataset, shuffle=False, batch_size=batch_size)

# Checking the dataset
for images, labels in train_loader:
    print("Image Dimensions: ", images.shape)
    print("Label Dimensions: ", labels.shape)
    break

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to data/cifar-10-python.tar.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting data/cifar-10-python.tar.gz to data
Image Dimensions:  torch.Size([128, 3, 32, 32])
Label Dimensions:  torch.Size([128])


In [4]:
######################
# Model
######################

class VGG19(nn.Module):
    
    def __init__(self, num_features, num_classes):
        super(VGG19, self).__init__()
        
        self.block_1 = nn.Sequential(
            nn.Conv2d(in_channels=3, 
                      out_channels=64, 
                      kernel_size=(3, 3), 
                      stride=(1, 1), 
                      padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=64, 
                      out_channels=64, 
                      kernel_size=(3, 3), 
                      stride=(1, 1), 
                      padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=(2, 2), 
                         stride=(2, 2))
        )
        
        self.block_2 = nn.Sequential(
            nn.Conv2d(in_channels=64, 
                      out_channels=128,
                      kernel_size=(3, 3),
                      stride=(1, 1),
                      padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=128, 
                      out_channels=128, 
                      kernel_size=(3, 3), 
                      stride=(1, 1),
                      padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=(2, 2),
                         stride=(2, 2))
        )
        
        self.block_3 = nn.Sequential(
            nn.Conv2d(in_channels=128,
                      out_channels=256,
                      kernel_size=(3, 3),
                      stride=(1, 1),
                      padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=256,
                      out_channels=256,
                      kernel_size=(3, 3),
                      stride=(1, 1),
                      padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=256,
                      out_channels=256,
                      kernel_size=(3, 3),
                      stride=(1, 1),
                      padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=256,
                      out_channels=256,
                      kernel_size=(3, 3),
                      stride=(1, 1),
                      padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=(2, 2),
                         stride=(2, 2))
        )
        
        self.block_4 = nn.Sequential(
            nn.Conv2d(in_channels=256,
                      out_channels=512,
                      kernel_size=(3, 3),
                      stride=(1, 1),
                      padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=512,
                      out_channels=512,
                      kernel_size=(3, 3),
                      stride=(1, 1),
                      padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=512,
                      out_channels=512,
                      kernel_size=(3, 3),
                      stride=(1, 1),
                      padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=512,
                      out_channels=512,
                      kernel_size=(3, 3),
                      stride=(1, 1),
                      padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=(2, 2),
                         stride=(2, 2))
        )
        
        self.block_5 = nn.Sequential(
            nn.Conv2d(in_channels=512,
                      out_channels=512,
                      kernel_size=(3, 3),
                      stride=(1, 1),
                      padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=512,
                      out_channels=512,
                      kernel_size=(3, 3),
                      stride=(1, 1),
                      padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=512,
                      out_channels=512,
                      kernel_size=(3, 3),
                      stride=(1, 1),
                      padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=512,
                      out_channels=512,
                      kernel_size=(3, 3),
                      stride=(1, 1),
                      padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=(2, 2), 
                         stride=(2, 2))
        )
        
        self.classifier = nn.Sequential(
            nn.Linear(512, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, num_classes)
        )
        
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                m.weight.detach().normal_(0, 0.05)
                if m.bias is not None:
                    m.bias.detach().zero_()
            elif isinstance(m, nn.Linear):
                m.weight.detach().normal_(0, 0.05)
                m.bias.detach().zero_()
    
    def forward(self, x):
        x = self.block_1(x)
        x = self.block_2(x)
        x = self.block_3(x)
        x = self.block_4(x)
        x = self.block_5(x)
        logits = self.classifier(x.view(-1, 512))
        probas = F.softmax(logits, dim=1)

        return logits, probas

torch.manual_seed(random_seed)
model = VGG19(num_features=num_features, num_classes=num_classes)
model = model.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)




In [5]:
########################
# Training
########################

def compute_accuracy(model, dataloader, device):
    model.eval()
    correct_pred, num_examples = 0, 0
    for i, (features, targets) in enumerate(dataloader):
        
        features = features.to(device)
        targets = targets.to(device)
        
        logits, probas = model(features)
        _, predicted_labels = torch.max(probas, 1)
        num_examples += targets.size(0)
        correct_pred += (predicted_labels == targets).sum()
    return correct_pred.float()/num_examples * 100

def compute_epoch_loss(model, dataloader, device):
    model.eval()
    curr_loss, num_examples = 0., 0
    with torch.no_grad():
        for features, targets in dataloader:
            features = features.to(device)
            targets = targets.to(device)
            logits, probas = model(features)
            loss = F.cross_entropy(logits, targets, reduction="sum")
            num_examples += targets.size(0)
            curr_loss += loss
        curr_loss = curr_loss/num_examples
    return curr_loss

start_time = time.time()
for epoch in range(num_epochs):
    model.train()
    
    for batch_idx, (features, targets) in enumerate(train_loader):
        features = features.to(device)
        targets = targets.to(device)
        
        # Forward and Back prop
        logits, probas = model(features)
        cost = F.cross_entropy(logits, targets)
        optimizer.zero_grad()
        
        cost.backward()
        
        # Update model parameters
        optimizer.step()
        
        # LOGGING
        if not batch_idx % 50:
            print("Epoch: %03d/%03d || Batch: %04d/%04d || Cost: %.4f" % (epoch+1, num_epochs, batch_idx, len(train_loader), cost))
        
    model.eval()
    with torch.set_grad_enabled(False):
        print("Epoch: %03d/%03d || Train: %.3f%% | Loss: %.3f" % (epoch+1, num_epochs, compute_accuracy(model, train_loader, device), compute_epoch_loss(model, train_loader, device)))
    
    print("Time elapsed: %.2f min" % ((time.time()-start_time)/60))

print("Total Training Time: %.2f min" % ((time.time()-start_time)/60))

Epoch: 001/020 || Batch: 0000/0391 || Cost: 1061.4156
Epoch: 001/020 || Batch: 0050/0391 || Cost: 2.3025
Epoch: 001/020 || Batch: 0100/0391 || Cost: 1.9715
Epoch: 001/020 || Batch: 0150/0391 || Cost: 1.9078
Epoch: 001/020 || Batch: 0200/0391 || Cost: 1.8211
Epoch: 001/020 || Batch: 0250/0391 || Cost: 1.8317
Epoch: 001/020 || Batch: 0300/0391 || Cost: 1.8077
Epoch: 001/020 || Batch: 0350/0391 || Cost: 1.6687
Epoch: 001/020 || Train: 33.396% | Loss: 1.695
Time elapsed: 0.63 min
Epoch: 002/020 || Batch: 0000/0391 || Cost: 1.7488
Epoch: 002/020 || Batch: 0050/0391 || Cost: 1.6875
Epoch: 002/020 || Batch: 0100/0391 || Cost: 1.5090
Epoch: 002/020 || Batch: 0150/0391 || Cost: 1.5868
Epoch: 002/020 || Batch: 0200/0391 || Cost: 1.4560
Epoch: 002/020 || Batch: 0250/0391 || Cost: 1.4756
Epoch: 002/020 || Batch: 0300/0391 || Cost: 1.6276
Epoch: 002/020 || Batch: 0350/0391 || Cost: 1.4148
Epoch: 002/020 || Train: 46.072% | Loss: 1.450
Time elapsed: 1.26 min
Epoch: 003/020 || Batch: 0000/0391 || Cos

Epoch: 018/020 || Batch: 0100/0391 || Cost: 0.6566
Epoch: 018/020 || Batch: 0150/0391 || Cost: 0.3629
Epoch: 018/020 || Batch: 0200/0391 || Cost: 0.3451
Epoch: 018/020 || Batch: 0250/0391 || Cost: 0.3916
Epoch: 018/020 || Batch: 0300/0391 || Cost: 0.4990
Epoch: 018/020 || Batch: 0350/0391 || Cost: 0.6505
Epoch: 018/020 || Train: 87.032% | Loss: 0.374
Time elapsed: 11.18 min
Epoch: 019/020 || Batch: 0000/0391 || Cost: 0.3114
Epoch: 019/020 || Batch: 0050/0391 || Cost: 0.5267
Epoch: 019/020 || Batch: 0100/0391 || Cost: 0.4429
Epoch: 019/020 || Batch: 0150/0391 || Cost: 0.3149
Epoch: 019/020 || Batch: 0200/0391 || Cost: 0.4357
Epoch: 019/020 || Batch: 0250/0391 || Cost: 0.5742
Epoch: 019/020 || Batch: 0300/0391 || Cost: 0.5342
Epoch: 019/020 || Batch: 0350/0391 || Cost: 0.4330
Epoch: 019/020 || Train: 87.090% | Loss: 0.381
Time elapsed: 11.78 min
Epoch: 020/020 || Batch: 0000/0391 || Cost: 0.3811
Epoch: 020/020 || Batch: 0050/0391 || Cost: 0.3974
Epoch: 020/020 || Batch: 0100/0391 || Cost

In [6]:
with torch.set_grad_enabled(False):
    print("Test Accuracy: %.2f%%" % (compute_accuracy(model, test_loader, device)))

Test Accuracy: 76.49%


In [None]:
! pip freeze | grep "torch"