In [1]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import torch.utils.data as data
import torch.nn.functional as F

import torch.optim as optim
from torch.optim.lr_scheduler import _LRScheduler, ReduceLROnPlateau
import torchvision.datasets as datasets

from sklearn import decomposition
from sklearn import manifold
from sklearn.metrics import confusion_matrix
from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.metrics import f1_score, precision_score, recall_score
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import copy
import random
import time

In [2]:
SEED = 1234

random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

### Model Creation

Taken from https://github.com/kuangliu/pytorch-cifar Link given in the assignment

In [3]:
class BasicBlock(nn.Module):
    
    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(
            in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
                               stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, planes,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out

In [4]:
class ResNet(nn.Module):
    def __init__(self, block, ip_planes, num_blocks, kernel_size=3, num_classes=10):
        super(ResNet, self).__init__()
        self.in_planes = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=kernel_size,
                               stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(block, ip_planes[0], num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, ip_planes[1], num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, ip_planes[2], num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, ip_planes[3], num_blocks[3], stride=2)
        self.linear = nn.Linear(ip_planes[3], num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)), inplace=True)
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out

In [5]:
def build_model(ip_planes, blocks, kernel_sz=3):
    return ResNet(BasicBlock, ip_planes, blocks, kernel_sz)

In [7]:
def calculate_accuracy(y_pred, y):
    top_pred = torch.squeeze(y_pred.argmax(1, keepdim = True), 1)
    correct = top_pred.eq(y.view_as(top_pred)).sum()
    acc = correct.float() / y.shape[0]

    prec = precision_score(y.cpu(), top_pred.cpu(), average='micro', zero_division = 0)

    rec = recall_score(y.cpu(), top_pred.cpu(), average='micro', zero_division = 0)

    f1 = f1_score(y.cpu(), top_pred.cpu(), average='micro', zero_division = 0)

    return acc, prec, rec, f1

In [8]:
def epoch_time(start_time, end_time):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs

In [9]:
def train(model, iterator, optimizer, criterion, device):
    
    epoch_loss = 0
    epoch_acc = 0
    
    model.train()
    
    for (x, y) in iterator:
        
        x = x.to(device)
        y = y.to(device)
        
        optimizer.zero_grad()
                 
        y_pred = model(x)
        
        loss = criterion(y_pred, y)
        
        acc,_,_,_ = calculate_accuracy(y_pred, y)
        
        loss.backward()
        
        optimizer.step()
        
        epoch_loss += loss.item()
        epoch_acc += acc.item()
        
    return epoch_loss / len(iterator), epoch_acc / len(iterator)

In [10]:

def evaluate(model, iterator, criterion, device):
    
    epoch_loss = 0
    epoch_acc = 0
    
    model.eval()
    
    with torch.no_grad():
        
        for (x, y) in iterator:

            x = x.to(device)
            y = y.to(device)

            y_pred = model(x)

            loss = criterion(y_pred, y)

            acc,_,_,_ = calculate_accuracy(y_pred, y)

            epoch_loss += loss.item()
            epoch_acc += acc.item()
        
    return epoch_loss / len(iterator), epoch_acc / len(iterator)

In [11]:
mean = (0.4914, 0.4822, 0.4465)
std = (0.2023, 0.1994, 0.2010)

In [12]:
def prepare_and_train(epochs, train_dataloader, test_dataloader, valid_dataloader, model, criterion, optimizer, scheduler, path, aug=False, augVal=-1):
    
    if aug:
        if augVal==1:
            train_transforms = transforms.Compose(
                [
                    transforms.RandomRotation(5),
                    transforms.RandomCrop((32,32), padding=4),
                    transforms.RandomHorizontalFlip(p=0.5),
                    transforms.RandomAffine(0, shear=10, scale=(0.8,1.2)),
                    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
                    transforms.ToTensor(),
                    transforms.Normalize(mean=mean, std=std)
                ])
              
        train_data = torchvision.datasets.CIFAR10(root='./data', train=True, transform=train_transforms, download=True)
        train_data, valid_data = data.random_split(train_data,  [n_train_examples, n_valid_examples])
        valid_data.dataset.transform = test_transforms
        valid_dataloader = data.DataLoader(valid_data, batch_size=256, shuffle=True)
        train_dataloader = DataLoader(train_data, batch_size=256, shuffle=True, num_workers=2)

    validation_loss_min = torch.inf
    model_path = path+"model.pt"
    train_loss = []
    train_acc = []
    val_loss = []
    val_acc = []

    # Fill training code here
    for i in range(epochs):
      t1 = time.time()
      training_loss, training_accuracy = train(model, train_dataloader, optimizer, criterion, device)
      validation_loss, validation_accuracy = evaluate(model,valid_dataloader, criterion, device)
      scheduler.step(validation_loss)
      t2 = time.time()
      train_loss.append(training_loss)
      train_acc.append(training_accuracy)
      val_loss.append(validation_loss)
      val_acc.append(validation_accuracy)
      min, sec = epoch_time(t1,t2)[0], epoch_time(t1,t2)[1]
      print('epoch: %d | train_loss: %.4f| train_accuracy: %.4f | valid_loss: %.4f | valid_accuracy: %.4f | epoch time: %s mins %s secs'%(i,training_loss,training_accuracy,validation_loss,validation_accuracy, min, sec))
      if validation_loss < validation_loss_min:
        print('Saving model ...')
        validation_loss_min = validation_loss
        best_model = torch.save(model, model_path)
        testing_loss, testing_accuracy = evaluate(model, test_dataloader, criterion, device)
        print("testing loss: %.4f | testing accuracy: %.4f"%(testing_loss, testing_accuracy)) 

    df = pd.DataFrame()
    df['Val_Loss']=val_loss
    df['Val_Acc']=val_acc
    df['Train_Loss']=train_loss
    df['Train_Acc']=train_acc
    df.to_csv(path+'Metrics.csv')

    return train_loss, train_acc, val_loss, val_acc   

In [13]:
train_transforms = transforms.Compose(
                [
                    transforms.RandomRotation(5),
                    transforms.RandomCrop((32,32), padding=4),
                    transforms.RandomHorizontalFlip(p=0.5),
                    transforms.ToTensor(),
                    transforms.Normalize(mean=mean, std=std)
                ])
test_transforms = transforms.Compose([
          transforms.ToTensor(),
          transforms.Normalize(mean=mean, std=std),    
          ])  

train_data = torchvision.datasets.CIFAR10(root='./data', train=True, transform=train_transforms, download=True)
test_data = torchvision.datasets.CIFAR10(root='./data', train=False, transform=test_transforms, download=True)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


In [14]:
VALID_RATIO = 0.9

n_train_examples = int(len(train_data) * VALID_RATIO)
n_valid_examples = len(train_data) - n_train_examples

train_data, valid_data = data.random_split(train_data,  [n_train_examples, n_valid_examples])
valid_data.dataset.transform = test_transforms
valid_dataloader = data.DataLoader(valid_data, batch_size=256, shuffle=True)
train_dataloader = DataLoader(train_data, batch_size=256, shuffle=True, num_workers=2)
test_dataloader = DataLoader(test_data, batch_size=256, shuffle=False, num_workers=2) 

In [None]:
#Defaults
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
criterion = nn.CrossEntropyLoss()
criterion = criterion.to(device)

planes = [[64,64,128,128], [64,64,256,256] [64,128,256,512]]
input_blocks = [[2, 2, 8, 8], [2, 2, 2, 2], [1, 1, 1, 1]]
opt='SGD'
ksz=3
train_losses = []
train_accuracies = []
val_losses = []
val_accuracies = []
epochs = 500

for i, plane in enumerate(planes):
  model = build_model(plane,input_blocks[i])
  model = model.to(device)
  optimizer = optim.SGD(model.parameters(), lr = 0.1, momentum=0.9, weight_decay=0.0001)
  scheduler = ReduceLROnPlateau(optimizer, 'min', factor=0.1)
  model_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
  print('Model Parameters ',model_params)
  path = ' '.join([str(i) for i in plane])+opt+str(ksz)+str(model_params)+'sch'
  train_loss, train_acc, val_loss, val_acc = prepare_and_train(epochs, train_dataloader,test_dataloader, valid_dataloader, model, criterion, optimizer, scheduler, path)
  train_losses.append(train_loss)
  train_accuracies.append(train_acc)
  val_losses.append(val_loss)
  val_accuracies.append(val_acc)

Below are tests for Augmentation

In [None]:
#Defaults
model = build_model([64,128,256,512], [1, 1, 1, 1])
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
criterion = nn.CrossEntropyLoss()
criterion = criterion.to(device)
model = model.to(device)
optimizer = optim.SGD(model.parameters(), lr = 0.1, momentum=0.9, weight_decay=0.0001)
scheduler = ReduceLROnPlateau(optimizer, 'min', factor=0.1)
model_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print('Model Parameters ',model_params)

opt='SGD'
ksz=3
train_losses = []
train_accuracies = []
val_losses = []
val_accuracies = []
epochs = 50
  
path = 'defaultaug'
train_loss, train_acc, val_loss, val_acc = prepare_and_train(epochs, train_dataloader,test_dataloader, valid_dataloader, model, criterion, optimizer, scheduler, path)
train_losses.append(train_loss)
train_accuracies.append(train_acc)
val_losses.append(val_loss)
val_accuracies.append(val_acc)

path = 'additionalaug'
train_loss, train_acc, val_loss, val_acc = prepare_and_train(epochs, train_dataloader,test_dataloader, valid_dataloader, model, criterion, optimizer, scheduler, path, aug=True, augVal=1)
train_losses.append(train_loss)
train_accuracies.append(train_acc)
val_losses.append(val_loss)
val_accuracies.append(val_acc)

In [None]:
import matplotlib.pyplot as plt
import pandas as pd

# Read csv into a pandas dataframe
df1 = pd.read_csv('additionalaugMetrics.csv')
df2 = pd.read_csv('defaultaugMetrics.csv')

val_acc1 = df1['Val_Acc']
val_acc2 = df2['Val_Acc']


# Plot the data
epochs = range(0,50)
plt.plot(epochs, val_acc1, 'g', label='Additional Augmentation')
plt.plot(epochs, val_acc2, 'b', label='Default Augmentation')
plt.title('Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Valid Accuracy')
plt.legend()
plt.show()

In [None]:
model1 = torch.load('additionalaugmodel.pt')
model2 = torch.load('defaultaugmodel.pt')

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model1 = model1.to(device)
model2 = model2.to(device)
models = [model1, model2]
criterion = nn.CrossEntropyLoss()
criterion = criterion.to(device)

test_accs = [[],[],[]]
for i, model in enumerate(models):
  _, acc = evaluate(model, test_dataloader, criterion, device )
  test_accs[i].append(acc) 

print('Testing Accuracy for Additional Augmentation %0.4f, Default Augmentation %.4f'  %(test_accs[0][0],test_accs[1][0]))