# Imports

In [6]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader
from torchvision.transforms import Compose,ToTensor, Normalize, RandomHorizontalFlip, RandomAffine
from torchvision.datasets.cifar import CIFAR10,CIFAR100
import torchvision.transforms as transforms
import numpy as np
from torchsummary import summary
import matplotlib.pyplot as plt
import math
import pdb
import copy
import random
from random import randint
from itertools import combinations
from google.colab import files
from google.colab import drive
drive.mount('/content/drive')
torch.cuda.empty_cache()


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Model Class:

In [18]:
use_cuda = torch.cuda.is_available()
print('Use GPU?', use_cuda)

num_train = 50000
num_classes = 100

# Define a VGG-16
class model(nn.Module):
    def __init__(self, num_classes=100):
        super(model, self).__init__()
        self.num_filters = 64
        self.layer1 = nn.Sequential(
            nn.Conv2d(3, self.num_filters, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(self.num_filters),
            nn.ReLU())
        self.layer2 = nn.Sequential(
            nn.Conv2d(self.num_filters, self.num_filters, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(self.num_filters),
            nn.ReLU())
        self.layer3 = nn.Sequential(
            nn.Conv2d(self.num_filters, self.num_filters*2, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(self.num_filters*2),
            nn.ReLU())
        self.layer4 = nn.Sequential(
            nn.Conv2d(self.num_filters*2,self.num_filters*2, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(self.num_filters*2),
            nn.ReLU())
        self.layer5 = nn.Sequential(
            nn.Conv2d(self.num_filters*2, self.num_filters*4, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(self.num_filters*4),
            nn.ReLU())
        self.layer6 = nn.Sequential(
            nn.Conv2d(self.num_filters*4, self.num_filters*4, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(self.num_filters*4),
            nn.ReLU())
        self.layer7 = nn.Sequential(
            nn.Conv2d(self.num_filters*4, self.num_filters*4, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(self.num_filters*4),
            nn.ReLU(),
            nn.AvgPool2d(kernel_size = 4, stride = 4))
        self.layer8 = nn.Sequential(
            nn.Conv2d(self.num_filters*4, self.num_filters*8, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(self.num_filters*8),
            nn.ReLU())
        self.layer9 = nn.Sequential(
            nn.Conv2d(self.num_filters*8, self.num_filters*8, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(self.num_filters*8),
            nn.ReLU())
        self.layer10 = nn.Sequential(
            nn.Conv2d(self.num_filters*8, self.num_filters*8, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(self.num_filters*8),
            nn.ReLU())
        self.layer11 = nn.Sequential(
            nn.Conv2d(self.num_filters*8, self.num_filters*8, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(self.num_filters*8),
            nn.ReLU())
        self.layer12 = nn.Sequential(
            nn.Conv2d(self.num_filters*8, self.num_filters*8, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(self.num_filters*8),
            nn.ReLU())
        self.layer13 = nn.Sequential(
            nn.Conv2d(self.num_filters*8, self.num_filters*8, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(self.num_filters*8),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2))

        self.fc = nn.Sequential(
            nn.Linear(self.num_filters*8*16, 4096),
            nn.ReLU())
        self.fc1 = nn.Sequential(
            nn.Linear(4096, 4096),
            nn.ReLU())
        self.fc2= nn.Sequential(
            nn.Linear(4096, num_classes,bias = False))

    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.layer5(out)
        out = self.layer6(out)
        out = self.layer7(out)
        out = self.layer8(out)
        out = self.layer9(out)
        out = self.layer10(out)
        out = self.layer11(out)
        out = self.layer12(out)
        out = self.layer13(out)
        out = out.reshape(out.size(0), -1)
        out = self.fc(out)
        out = self.fc1(out)
        out = self.fc2(out)
        return out



my_model = model()

if use_cuda:
  my_model = my_model.cuda()  # transfer model to GPU


summary(my_model,(3,32,32))
num_epochs = 200
minibatch_size = 100
criterion = nn.CrossEntropyLoss()




Use GPU? True
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 32, 32]           1,792
       BatchNorm2d-2           [-1, 64, 32, 32]             128
              ReLU-3           [-1, 64, 32, 32]               0
            Conv2d-4           [-1, 64, 32, 32]          36,928
       BatchNorm2d-5           [-1, 64, 32, 32]             128
              ReLU-6           [-1, 64, 32, 32]               0
            Conv2d-7          [-1, 128, 32, 32]          73,856
       BatchNorm2d-8          [-1, 128, 32, 32]             256
              ReLU-9          [-1, 128, 32, 32]               0
           Conv2d-10          [-1, 128, 32, 32]         147,584
      BatchNorm2d-11          [-1, 128, 32, 32]             256
             ReLU-12          [-1, 128, 32, 32]               0
           Conv2d-13          [-1, 256, 32, 32]         295,168
      BatchNorm2d-14     

# Data preprocessing:

In [8]:
batch_size = 100
num_workers = 8
train_transform = Compose([
    ToTensor(),
    Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
    RandomHorizontalFlip(p=0.5),
    RandomAffine(degrees = 0, translate = (0.125,0.125))
])

test_transform = Compose([
    ToTensor(),
    Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

# Load Data
train_dataset = CIFAR100(
    root="dataset/", train=True, transform=train_transform, download=True
)
test_dataset = CIFAR100(
    root="dataset/", train=False, transform=test_transform, download=True
)


train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, num_workers = num_workers, shuffle=True, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, num_workers=num_workers,
                         shuffle=False, pin_memory=True)


Files already downloaded and verified
Files already downloaded and verified


# Main

In [None]:
# Create new model
my_model = model()
if use_cuda:
  my_model = my_model.cuda()


# Different Hyper-parameters for different layers
my_list = ['fc.0.weight', 'fc.0.bias','fc1.0.weight', 'fc1.0.bias','fc2.0.weight', 'fc2.0.bias']
new_params = list(filter(lambda kv: kv[0] in my_list, my_model.named_parameters()))
base_params = list(filter(lambda kv: kv[0] not in my_list, my_model.named_parameters()))
new_params = [p[1] for p in new_params]
base_params = [p[1] for p in base_params]
optimizer = optim.SGD([{'params': base_params},
          {'params': new_params, 'lr':0.007, 'momentum': 0.982, 'weight_decay': 0.00135}
      ], lr=0.011, momentum=0.98, weight_decay = 0.00115, nesterov=True)


for epoch in range(num_epochs):


  if epoch%20==0:
    optimizer.param_groups[0]['lr'] *=0.6

  my_model.train()
  for which_mb, (images, labels) in enumerate(train_loader):


    if use_cuda:
      images = images.cuda()
      labels = labels.cuda()


    # Forward pass to get the loss
    output = my_model(images)
    optimizer.zero_grad()
    loss= criterion(output, labels)
    # Backward and compute the gradient
    loss.backward()  #backpropragation
    optimizer.step() #update the weights/parameters


  # Test accuracy
  my_model.eval()
  total = 0
  correct = 0
  for i, (images, labels) in enumerate(test_loader):
    if use_cuda:
      images = images.cuda()
      labels = labels.cuda()
    with torch.no_grad():
      outputs = my_model(images)


    p_max, predicted = torch.max(outputs, 1)
    correct += (predicted == labels).sum()

    total += labels.size(0)
  # test_accuracy_hard[epoch,counter] = float(correct_hard)/total

  print('Epoch: {}, Test accuracy soft: {:.4f}' .format(epoch+1,float(correct)/total))#,test_accuracy_hard[epoch,counter])) # training_accuracy

