In [1]:
import time
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets
from torchvision import transforms
from torch.utils.data import DataLoader

if torch.cuda.is_available():
    torch.backends.cudnn.deterministic = True

In [2]:
##################
# Settings
#################

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Hyperparameters
random_seed = 1
learning_rate = 0.001
num_epochs = 10
batch_size = 128

# Architecture
num_features = 784
num_classes = 10

In [3]:
#####################
# CIFAR10
#####################

train_dataset = datasets.CIFAR10(root="data",
                                 train=True,
                                 transform=transforms.ToTensor(),
                                 download=True
                                )

test_dataset = datasets.CIFAR10(root="data",
                                train=False,
                                transform=transforms.ToTensor()
                               )

train_loader = DataLoader(dataset=train_dataset,
                          batch_size=batch_size,
                          shuffle=True,
                          num_workers=4
                         )

test_loader = DataLoader(dataset=test_dataset,
                         batch_size=batch_size,
                         shuffle=False,
                         num_workers=4
                        )

for images, labels in train_loader:
    print("Image Dimensions: ", images.shape)
    print("Label Dimensions: ", labels.shape)
    break

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to data/cifar-10-python.tar.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting data/cifar-10-python.tar.gz to data




Image Dimensions:  torch.Size([128, 3, 32, 32])
Label Dimensions:  torch.Size([128])


In [4]:
class VGG16(nn.Module):
    
    def __init__(self, num_classes, num_features):
        super(VGG16, self).__init__()
        
        self.block_1 = nn.Sequential(
            nn.Conv2d(in_channels=3, 
                      out_channels=64,
                      kernel_size=(3, 3), 
                      stride=(1, 1),
                      # (1(32-1)-32 + 3)/2 = 1
                      padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=64, 
                      out_channels=64, 
                      kernel_size=(3, 3), 
                      stride=(1, 1), 
                      padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=(2, 2), 
                         stride=(2, 2))
        )
        
        self.block_2 = nn.Sequential(
            nn.Conv2d(in_channels=64, 
                      out_channels=128, 
                      kernel_size=(3, 3), 
                      stride=(1, 1), 
                      padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=128,
                      out_channels=128, 
                      kernel_size=(3, 3), 
                      stride=(1, 1), 
                      padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=(2, 2), 
                         stride=(2, 2))
        )
        
        self.block_3 = nn.Sequential(
            nn.Conv2d(in_channels=128, 
                      out_channels=256, 
                      kernel_size=(3, 3), 
                      stride=(1, 1), 
                      padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=256, 
                      out_channels=256, 
                      kernel_size=(3, 3), 
                      stride=(1, 1), 
                      padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=256, 
                      out_channels=256, 
                      kernel_size=(3, 3), 
                      stride=(1, 1), 
                      padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=(2, 2), 
                         stride=(2, 2))
        )
        
        self.block_4 = nn.Sequential(
            nn.Conv2d(in_channels=256, 
                      out_channels=512, 
                      kernel_size=(3, 3), 
                      stride=(1, 1), 
                      padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=512, 
                      out_channels=512, 
                      kernel_size=(3, 3), 
                      stride=(1, 1), 
                      padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=512, 
                      out_channels=512, 
                      kernel_size=(3, 3), 
                      stride=(1, 1), 
                      padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=(2, 2), 
                         stride=(2, 2))
        )
        
        self.block_5 = nn.Sequential(
            nn.Conv2d(in_channels=512, 
                      out_channels=512, 
                      kernel_size=(3, 3), 
                      stride=(1, 1), 
                      padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=512, 
                      out_channels=512, 
                      kernel_size=(3, 3), 
                      stride=(1, 1), 
                      padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=512, 
                      out_channels=512, 
                      kernel_size=(3, 3), 
                      stride=(1, 1), 
                      padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=(2, 2), 
                         stride=(2, 2))
        )
        
        self.classifier = nn.Sequential(
            nn.Linear(512, 4096),
            nn.ReLU(True),
            nn.Linear(4096, 4096),
            nn.ReLU(True),
            nn.Linear(4096, num_classes)
        )
        
        for m in self.modules():
            if isinstance(m, torch.nn.Conv2d) or isinstance(m, torch.nn.Linear):
                nn.init.kaiming_uniform_(m.weight, mode="fan_in", nonlinearity="relu")
                if m.bias is not None:
                    m.bias.detach().zero_()
    
    def forward(self, x):
        x = self.block_1(x)
        x = self.block_2(x)
        x = self.block_3(x)
        x = self.block_4(x)
        x = self.block_5(x)
        x = x.view(x.size(0), -1)
        logits = self.classifier(x)
        probas = F.softmax(logits, dim=1)
        return logits, probas


torch.manual_seed(random_seed)
model = VGG16(num_features = num_features, num_classes = num_classes)
model = model.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [5]:
# Training
def compute_accuracy(model, data_loader):
    model.eval()
    correct_pred, num_examples = 0, 0
    for i, (features, targets) in enumerate(data_loader):
        
        features = features.to(device)
        targets = targets.to(device)
        
        logits, probas = model(features)
        _, predicted_labels = torch.max(probas, 1)
        num_examples += targets.size(0)
        correct_pred += (predicted_labels == targets).sum()
    return correct_pred.float() / num_examples * 100

def compute_epoch_loss(model, data_loader):
    model.eval()
    curr_loss, num_examples = 0., 0
    with torch.no_grad():
        for features, targets in data_loader:
            features = features.to(device)
            targets = targets.to(device)
            logits, probas = model(features)
            loss = F.cross_entropy(logits, targets, reduction="sum")
            num_examples += targets.size(0)
            curr_loss += loss
        curr_loss = curr_loss / num_examples
        return curr_loss

start_time = time.time()
for epoch in range(num_epochs):
    model.train()
    for batch_idx, (features, targets) in enumerate(train_loader):
        
        features = features.to(device)
        targets = targets.to(device)
        
        # Forward and Back Prop
        logits, probas = model(features)
        cost = F.cross_entropy(logits, targets)
        optimizer.zero_grad()
        
        cost.backward()
        
        # Update model parameters
        optimizer.step()
        
        # LOGGING
        if not batch_idx % 50:
            print("Epoch: %03d/%03d || Batch: %03d/%03d || Cost: %.4f" % (epoch+1, num_epochs, batch_idx, len(train_loader), cost))
    
    model.eval()
    with torch.set_grad_enabled(False):
        print("Epoch: %03d/%03d || Train: %.3f%% || Loss: %.3f" % (epoch+1, num_epochs, compute_accuracy(model, train_loader), compute_epoch_loss(model, train_loader)))
    
    print("Time Elapsed: %.2f min" % ((time.time()-start_time)/60))
print("Total Training Time: %.2f min" % ((time.time()-start_time)/60))

Epoch: 001/010 || Batch: 000/391 || Cost: 2.4443
Epoch: 001/010 || Batch: 050/391 || Cost: 2.1414
Epoch: 001/010 || Batch: 100/391 || Cost: 2.0844
Epoch: 001/010 || Batch: 150/391 || Cost: 1.9001
Epoch: 001/010 || Batch: 200/391 || Cost: 1.9626
Epoch: 001/010 || Batch: 250/391 || Cost: 1.7157
Epoch: 001/010 || Batch: 300/391 || Cost: 1.7874
Epoch: 001/010 || Batch: 350/391 || Cost: 1.7342
Epoch: 001/010 || Train: 37.350% || Loss: 1.603
Time Elapsed: 0.71 min
Epoch: 002/010 || Batch: 000/391 || Cost: 1.6161
Epoch: 002/010 || Batch: 050/391 || Cost: 1.6217
Epoch: 002/010 || Batch: 100/391 || Cost: 1.5814
Epoch: 002/010 || Batch: 150/391 || Cost: 1.4906
Epoch: 002/010 || Batch: 200/391 || Cost: 1.2700
Epoch: 002/010 || Batch: 250/391 || Cost: 1.4568
Epoch: 002/010 || Batch: 300/391 || Cost: 1.2954
Epoch: 002/010 || Batch: 350/391 || Cost: 1.2199
Epoch: 002/010 || Train: 53.450% || Loss: 1.295
Time Elapsed: 1.40 min
Epoch: 003/010 || Batch: 000/391 || Cost: 1.3097
Epoch: 003/010 || Batch: 

In [6]:
 with torch.set_grad_enabled(False):
        print("Test accuracy: %.2f%%" % (compute_accuracy(model, test_loader)))

Test accuracy: 76.04%


In [7]:
! pip freeze | grep "torch"

UsageError: Line magic function `%watermark` not found.
